Skip to content

Commit

Permalink
Fix some issues
Browse files Browse the repository at this point in the history
  • Loading branch information
junyuan-chen committed Apr 26, 2024
1 parent 058501e commit cf21cf3
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 137 deletions.
58 changes: 41 additions & 17 deletions src/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ struct LPData{TF<:AbstractFloat, TG<:Union{Vector,Nothing},
ys::Vector{Any}
wgs::Vector{Any}
fes::Vector{Any}
ipanelfe::Union{Int,Nothing}
clus::Vector{Any}
pw::TW
nlag::Int
Expand Down Expand Up @@ -130,7 +131,7 @@ function _checklpdata(ys, ws, nlag)
length(ws) > 0 || throw(ArgumentError("ws cannot be empty"))
end

function LPData(ys, xs, ws, wgs, sts, fes, clus, pw, nlag, minhorz, subset,
function LPData(ys, xs, ws, wgs, sts, fes, ipanelfe, clus, pw, nlag, minhorz, subset,
groups::Nothing, checkrows, TF=Float64)
_checklpdata(ys, ws, nlag)
Tfull = size(ys[1],1)
Expand All @@ -154,10 +155,11 @@ function LPData(ys, xs, ws, wgs, sts, fes, clus, pw, nlag, minhorz, subset,
esampleT = nothing
_fillX!(X, xs, ws, sts, nlag, minhorz, TF)
end
return LPData(ys, wgs, fes, clus, pw, nlag, minhorz, subset, X, esampleT, groups)
return LPData(ys, wgs, fes, ipanelfe, clus, pw, nlag, minhorz, subset, X,
esampleT, groups)
end

function LPData(ys, xs, ws, wgs, sts, fes, clus, pw, nlag, minhorz, subset,
function LPData(ys, xs, ws, wgs, sts, fes, ipanelfe, clus, pw, nlag, minhorz, subset,
groups::Vector, checkrows, TF=Float64)
_checklpdata(ys, ws, nlag)
ng = length(groups)
Expand Down Expand Up @@ -203,7 +205,8 @@ function LPData(ys, xs, ws, wgs, sts, fes, clus, pw, nlag, minhorz, subset,
sum(esampleT) > nX || throw(ArgumentError(
"not enough observations for nlag=$nlag and minhorz=$minhorz"))
end
return LPData(ys, wgs, fes, clus, pw, nlag, minhorz, subset, X, esampleT, groups)
return LPData(ys, wgs, fes, ipanelfe, clus, pw, nlag, minhorz, subset, X,
esampleT, groups)
end

function _fillY!(Y, esampleT, aux, ys, nlag, horz, subset, isfirststage, TF)
Expand Down Expand Up @@ -273,7 +276,7 @@ function _makeYX(dt::LPData{TF,Nothing}, horz::Int, isfirststage::Bool=false) wh
_fillY!(Y, dt.ys, dt.nlag, horz, isfirststage, TF)
X = dt.Xfull[1:T,:]
end
return Y, X, Y, nothing, nothing, uweights(T), T, esampleT, 0
return Y, X, Y, nothing, nothing, nothing, uweights(T), T, esampleT, 0
end

# The case for panel data
Expand Down Expand Up @@ -467,7 +470,6 @@ function _makeYX(dt::LPData{TF,<:Vector}, horz::Int, isfirststage::Bool=false) w
if isempty(FE)
doffe = 0
else
_feresiduals!(Y, X, FE, W)
doffe = 0
for fe in FE
if !isempty(CLU) && any(x->isnested(fe, x.groups), CLU)
Expand All @@ -476,14 +478,21 @@ function _makeYX(dt::LPData{TF,<:Vector}, horz::Int, isfirststage::Bool=false) w
doffe += nunique(fe)
end
end
# Handle doffe via fe for intercepts added to wgs
# but not residualize panelid via fe
if isempty(dt.wgs) || dt.ipanelfe === nothing
_feresiduals!(Y, X, FE, W)
else
if length(FE) > 1
FE = FE[1:length(FE).!=dt.ipanelfe]
_feresiduals!(Y, X, FE, W)
end
end
end
# Scaling of weights comes after partialing out fe as in FixedEffectModels.jl
if !(W isa UnitWeights)
for col in eachcol(Y)
col .*= sqrt.(W)
end
for col in eachcol(X)
col .*= sqrt.(W)
end
Y .*= sqrt.(W)
X .*= sqrt.(W)
end
if isempty(dt.wgs)
pt = nothing
Expand Down Expand Up @@ -512,6 +521,11 @@ function _makeYX(dt::LPData{TF,<:Vector}, horz::Int, isfirststage::Bool=false) w
copyto!(view(pt.resid, :, 1:ny), gY)
gX = view(X, 1+(g-1)*gT:g*gT, :)
copyto!(view(pt.resid, :, ny+1:gny), gX)
if !(W isa UnitWeights)
# pt.resid is already scaled by W
pt.X[:,1] .= one(TF)
pt.X .*= sqrt.(view(W, 1+(g-1)*gT:g*gT))
end
# Solve the partial OLS
coefg = view(pt.coef, :, 1+(g-1)*gny:g*gny)
mul!(coefg, pt.X', pt.resid)
Expand All @@ -523,7 +537,7 @@ function _makeYX(dt::LPData{TF,<:Vector}, horz::Int, isfirststage::Bool=false) w
copyto!(gX, view(pt.resid, :, ny+1:gny))
end
end
return Y, X, Ypt, pt, CLU, W, T, esampleT, doffe
return Y, X, Ypt, pt, FE, CLU, W, T, esampleT, doffe
end

# Fitted values from first stage of 2SLS
Expand Down Expand Up @@ -574,18 +588,21 @@ function _fillfitted(fitted, groups::Vector, nY, nlag, Tfull, horz, esampleT::No
end

# Xendo is needed for computing residuals from 2SLS
function _makeXendo(dt::LPData{TF,Nothing}, esampleT::BitVector, horz, yfs::Vector) where TF
function _makeXendo(dt::LPData{TF,Nothing}, esampleT::BitVector, horz, yfs::Vector,
FE, W) where TF
nyfs = length(yfs)
Xendo = Matrix{TF}(undef, sum(esampleT), nyfs)
@inbounds for j in 1:nyfs
yf = vec(yfs[j], dt.subset, :x, horz, TF)
src = view(yf, dt.nlag+1:length(yf)-horz)
copyto!(view(Xendo,:,j), view(src, esampleT))
end
W isa UnitWeights || (Xendo .*= sqrt.(W))
return Xendo
end

function _makeXendo(dt::LPData{TF,Nothing}, esampleT::Nothing, horz, yfs::Vector) where TF
function _makeXendo(dt::LPData{TF,Nothing}, esampleT::Nothing, horz, yfs::Vector,
FE, W) where TF
nyfs = length(yfs)
T = size(dt.Xfull, 1) - horz + dt.minhorz
Xendo = Matrix{TF}(undef, T, nyfs)
Expand All @@ -594,10 +611,12 @@ function _makeXendo(dt::LPData{TF,Nothing}, esampleT::Nothing, horz, yfs::Vector
src = view(yf, dt.nlag+1:length(yf)-horz)
copyto!(view(Xendo,:,j), src)
end
W isa UnitWeights || (Xendo .*= sqrt.(W))
return Xendo
end

function _makeXendo(dt::LPData{TF,<:Vector}, esampleT::BitVector, horz, yfs::Vector) where TF
function _makeXendo(dt::LPData{TF,<:Vector}, esampleT::BitVector, horz, yfs::Vector,
FE, W) where TF
nyfs = length(yfs)
Xendo = Matrix{TF}(undef, sum(esampleT), nyfs)
@inbounds for j in 1:nyfs
Expand All @@ -617,10 +636,13 @@ function _makeXendo(dt::LPData{TF,<:Vector}, esampleT::BitVector, horz, yfs::Vec
k1 = k2 + 1
end
end
_feresiduals!(Xendo, FE, W)
W isa UnitWeights || (Xendo .*= sqrt.(W))
return Xendo
end

function _makeXendo(dt::LPData{TF,<:Vector}, esampleT::Nothing, horz, yfs::Vector) where TF
function _makeXendo(dt::LPData{TF,<:Vector}, esampleT::Nothing, horz, yfs::Vector,
FE, W) where TF
nyfs = length(yfs)
T = size(dt.Xfull, 1) - length(dt.groups) * (horz - dt.minhorz)
Xendo = Matrix{TF}(undef, T, nyfs)
Expand All @@ -637,6 +659,8 @@ function _makeXendo(dt::LPData{TF,<:Vector}, esampleT::Nothing, horz, yfs::Vecto
i1 = i2 + 1
end
end
_feresiduals!(Xendo, FE, W)
W isa UnitWeights || (Xendo .*= sqrt.(W))
return Xendo
end

Expand Down
81 changes: 45 additions & 36 deletions src/lp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,9 @@ function _getcols(data, ynames, xnames, wnames, wgnames,
any(in(wgnames), wnames) && throw(ArgumentError(
"a name in wgnames cannot also be in wnames"))
any(x->x isa Cum, wgnames) && throw(ArgumentError("Cum is not allowed in wgnames"))
# An intercept is always added when partialing out wgs
# Need addpanelidfe only for handling doffe
addpanelidfe = true
end
states===nothing || _checknames(states) || throw(ArgumentError("invalid states"*argmsg))
length(fes)==0 || _checknames(fes) || throw(ArgumentError("invalid fes"*argmsg))
Expand All @@ -263,6 +266,8 @@ function _getcols(data, ynames, xnames, wnames, wgnames,
panelweight === nothing || (panelweight = _toname(data, panelweight))
!addpanelidfe || panelid === nothing || panelid in fenames || push!(fenames, panelid)
addylag && union!(wnames, setdiff(ynames, wgnames))
isempty(wgnames) || length(fenames) == 1 ||
@warn "caution should be taken as variables in wgnames are not partialled out by the fixed effects other than panelid"

ys = Any[getcolumn(data, n) for n in ynames]
xs = Any[getcolumn(data, n) for n in xnames]
Expand All @@ -278,24 +283,25 @@ function _getcols(data, ynames, xnames, wnames, wgnames,
xnames = VarName[xnames..., :constant]
end

ipanelfe = panelid === nothing ? nothing : findfirst(==(panelid), fenames)
groups = panelid === nothing ? nothing : _group(getcolumn(data, panelid))
pw = panelweight === nothing ? nothing : getcolumn(data, panelweight)

return ynames, xnames, wnames, wgnames, stnames, fenames, panelid, panelweight, states,
ys, xs, ws, wgs, sts, fes, clus, pw, groups
return ynames, xnames, wnames, wgnames, stnames, fenames, panelid, ipanelfe,
panelweight, states, ys, xs, ws, wgs, sts, fes, clus, pw, groups
end

function _lp(dt::LPData, horz::Int, vce, yfs::Nothing, ix_iv)
Y, X, _, pt, CLU, W, T, esampleT, doffe = _makeYX(dt, horz)
Y, X, _, pt, FE, CLU, W, T, esampleT, doffe = _makeYX(dt, horz)
dofr = T - size(X,2) - doffe
pt === nothing || (dofr -= (size(pt.X,2)-1) * length(dt.groups))
m = OLS(Y, X, dofr)
return coef(m), vcov(m, vce), T, m
end

function _lp(dt::LPData, horz::Int, vce, yfs::Vector, ix_iv)
Y, Xhat, _, pt, CLU, W, T, esampleT, doffe = _makeYX(dt, horz)
Xendo = _makeXendo(dt, esampleT, horz, yfs)
Y, Xhat, _, pt, FE, CLU, W, T, esampleT, doffe = _makeYX(dt, horz)
Xendo = _makeXendo(dt, esampleT, horz, yfs, FE, W)
X = similar(Xhat)
copyto!(view(X,:,ix_iv), Xendo)
iexo = setdiff(1:size(X,2), ix_iv)
Expand All @@ -307,7 +313,7 @@ function _lp(dt::LPData, horz::Int, vce, yfs::Vector, ix_iv)
end

function _lp(dt::LPData, horz::Int, vce::ClusterCovariance, yfs::Nothing, ix_iv)
Y, X, _, pt, CLU, W, T, esampleT, doffe = _makeYX(dt, horz)
Y, X, _, pt, FE, CLU, W, T, esampleT, doffe = _makeYX(dt, horz)
vce = cluster(names(vce), (CLU...,))
dofr = T - size(X,2) - doffe
pt === nothing || (dofr -= (size(pt.X,2)-1) * length(dt.groups))
Expand All @@ -317,8 +323,8 @@ function _lp(dt::LPData, horz::Int, vce::ClusterCovariance, yfs::Nothing, ix_iv)
end

function _lp(dt::LPData, horz::Int, vce::ClusterCovariance, yfs::Vector, ix_iv)
Y, Xhat, _, pt, CLU, W, T, esampleT, doffe = _makeYX(dt, horz)
Xendo = _makeXendo(dt, esampleT, horz, yfs)
Y, Xhat, _, pt, FE, CLU, W, T, esampleT, doffe = _makeYX(dt, horz)
Xendo = _makeXendo(dt, esampleT, horz, yfs, FE, W)
X = similar(Xhat)
copyto!(view(X,:,ix_iv), Xendo)
iexo = setdiff(1:size(X,2), ix_iv)
Expand All @@ -331,8 +337,8 @@ function _lp(dt::LPData, horz::Int, vce::ClusterCovariance, yfs::Vector, ix_iv)
return coef(m), vcov(m, vce), T, m
end

function _normalize!(data, normalize, xnames, xs, ws, wgs, sts, fes, pw, nlag, minhorz,
subset, groups, checkrows; TF=Float64)
function _normalize!(data, normalize, xnames, xs, ws, wgs, sts, fes, ipanelfe, pw,
nlag, minhorz, subset, groups, checkrows; TF=Float64)
snames = normalize isa Pair ? (normalize[1],) : (p[1] for p in normalize)
ix_s = Vector{Int}(undef, length(snames))
for (i, n) in enumerate(snames)
Expand All @@ -344,8 +350,8 @@ function _normalize!(data, normalize, xnames, xs, ws, wgs, sts, fes, pw, nlag, m
nnames = normalize isa Pair ? (normalize[2],) : (p[2] for p in normalize)
yn = Any[getcolumn(data, n) for n in nnames]
# Data on clusters are not needed
dt = LPData(yn, xs, ws, wgs, sts, fes, Any[], pw, nlag, minhorz, subset, groups,
checkrows, TF)
dt = LPData(yn, xs, ws, wgs, sts, fes, ipanelfe, Any[], pw, nlag, minhorz, subset,
groups, checkrows, TF)
Bn, Vn, Tn, _ = _lp(dt, minhorz, nothing, nothing, nothing)
normmults = TF[Bn[ix,i] for (i,ix) in enumerate(ix_s)]
xs[ix_s] .*= normmults
Expand All @@ -354,14 +360,15 @@ function _normalize!(data, normalize, xnames, xs, ws, wgs, sts, fes, pw, nlag, m
return normnames, normtars, normmults
end

_normalize!(data, normalize::Nothing, xnames, xs, ws, wgs, sts, fes, pw, nlag, minhorz,
subset, groups, checkrows; TF=Float64) = (nothing, nothing, nothing)
_normalize!(data, normalize::Nothing, xnames, xs, ws, wgs, sts, fes, ipanelfe, pw,
nlag, minhorz, subset, groups, checkrows; TF=Float64) = (nothing, nothing, nothing)

function _firststage(nendo, niv, ys, xs, ws, wgs, sts, fes, clus, pw, nlag::Int, horz::Int,
subset::Union{BitVector,Nothing}, groups, testweakiv, vce, checkrows; TF=Float64)
dt = LPData(ys, xs, ws, wgs, sts, fes, clus, pw, nlag, horz, subset, groups,
function _firststage(nendo, niv, ys, xs, ws, wgs, sts, fes, ipanelfe, clus, pw,
nlag::Int, horz::Int, subset::Union{BitVector,Nothing}, groups,
testweakiv, vce, checkrows; TF=Float64)
dt = LPData(ys, xs, ws, wgs, sts, fes, ipanelfe, clus, pw, nlag, horz, subset, groups,
checkrows, TF)
Y, X, Ypt, pt, CLU, W, T, esampleT, doffe = _makeYX(dt, horz, true)
Y, X, Ypt, pt, FE, CLU, W, T, esampleT, doffe = _makeYX(dt, horz, true)
# Get first-stage estimates
bf = X'Ypt
ldiv!(cholesky!(X'X), bf)
Expand Down Expand Up @@ -400,7 +407,7 @@ function _firststage(nendo, niv, ys, xs, ws, wgs, sts, fes, clus, pw, nlag::Int,
return fitted, F_kp, p_kp
end

function _iv!(data, iv, firststagebyhorz, xnames, xs, ws, wgs, sts, fes, clus, pw,
function _iv!(data, iv, firststagebyhorz, xnames, xs, ws, wgs, sts, fes, ipanelfe, clus, pw,
nlag, minhorz, subset, groups, testweakiv, vce, checkrows; TF=Float64)
endonames = iv[1]
endonames isa VarIndex && (endonames = (endonames,))
Expand All @@ -427,7 +434,8 @@ function _iv!(data, iv, firststagebyhorz, xnames, xs, ws, wgs, sts, fes, clus, p
any(x->x isa Cum, endonames) &&
@warn "firststagebyhorz=false while endogenous variables contain Cum"
fitted, F_kp, p_kp = _firststage(nendo, niv, yfs, xfs, ws, wgs, sts,
fes, clus, pw, nlag, minhorz, subset, groups, testweakiv, vce, checkrows; TF=TF)
fes, ipanelfe, clus, pw, nlag, minhorz, subset,
groups, testweakiv, vce, checkrows; TF=TF)
# Replace the endogenous variable with the fitted values
xs[ix_iv] .= fitted
else
Expand All @@ -438,11 +446,11 @@ function _iv!(data, iv, firststagebyhorz, xnames, xs, ws, wgs, sts, fes, clus, p
return endonames, ivnames, ix_iv, yfs, xfs, F_kp, p_kp
end

_iv!(data, iv::Nothing, firststagebyhorz, xnames, xs, ws, wgs, sts, fes, clus, pw,
_iv!(data, iv::Nothing, firststagebyhorz, xnames, xs, ws, wgs, sts, fes, ipanelfe, clus, pw,
nlag, minhorz, subset, groups, testweakiv, vce, checkrows; TF=Float64) =
(nothing, nothing, nothing, nothing, nothing, nothing, nothing)

function _est(::LeastSquaresLP, data, xnames, ys, xs, ws, wgs, sts, fes, clus, pw,
function _est(::LeastSquaresLP, data, xnames, ys, xs, ws, wgs, sts, fes, ipanelfe, clus, pw,
nlag, minhorz, nhorz, vce, subset, groups,
iv, ix_iv, nendo, niv, yfs, xfs, firststagebyhorz, testweakiv, checkrows; TF=Float64)
ny = length(ys)
Expand All @@ -454,7 +462,7 @@ function _est(::LeastSquaresLP, data, xnames, ys, xs, ws, wgs, sts, fes, clus, p
M = Vector{OLS{TF}}(undef, nhorz)
firststagebyhorz = iv !== nothing && firststagebyhorz
if !firststagebyhorz && !any(x->x isa Cum, xs)
dt = LPData(ys, xs, ws, wgs, sts, fes, clus, pw, nlag, minhorz, subset, groups,
dt = LPData(ys, xs, ws, wgs, sts, fes, ipanelfe, clus, pw, nlag, minhorz, subset, groups,
checkrows, TF)
end
F_kps = firststagebyhorz && testweakiv ? Vector{Float64}(undef, nhorz) : nothing
Expand All @@ -464,17 +472,18 @@ function _est(::LeastSquaresLP, data, xnames, ys, xs, ws, wgs, sts, fes, clus, p
# Handle cases where all data need to be regenerated for each horizon
if firststagebyhorz
fitted, F_kpsi, p_kpsi = _firststage(nendo, niv, yfs, xfs, ws, wgs, sts,
fes, clus, pw, nlag, h, subset, groups, testweakiv, vce, checkrows; TF=TF)
fes, ipanelfe, clus, pw, nlag, h, subset, groups,
testweakiv, vce, checkrows; TF=TF)
if testweakiv
F_kps[i] = F_kpsi
p_kps[i] = p_kpsi
end
xs[ix_iv] .= fitted
dt = LPData(ys, xs, ws, wgs, sts, fes, clus, pw, nlag, h, subset, groups,
checkrows, TF)
dt = LPData(ys, xs, ws, wgs, sts, fes, ipanelfe, clus, pw, nlag, h,
subset, groups, checkrows, TF)
elseif any(x->x isa Cum, xs)
dt = LPData(ys, xs, ws, wgs, sts, fes, clus, pw, nlag, h, subset, groups,
checkrows, TF)
dt = LPData(ys, xs, ws, wgs, sts, fes, ipanelfe, clus, pw, nlag, h,
subset, groups, checkrows, TF)
end
Bh, Vh, T[i], M[i] = _lp(dt, h, vce, yfs, ix_iv)
B[:,:,i] = reshape(Bh, nr, ny, 1)
Expand All @@ -494,7 +503,7 @@ The input `data` must be `Tables.jl`-compatible.
# Keywords
- `xnames=()`: indices of contemporaneous regressors from `data`.
- `wnames=()`: indices of lagged control variables from `data`.
- `wgnames=()`: indices of lagged control variables from `data` that are interacted with `panelid`; only relevant with panel data.
- `wgnames=()`: indices of lagged control variables from `data` that are interacted with `panelid` and partialled out before the main estimation; only relevant with panel data; coefficient estimates are not computed.
- `nlag::Int=4`: number of lags to be included for the lagged control variables.
- `nhorz::Int=1`: total number of horizons to be estimated.
- `minhorz::Int=0`: the minimum horizon involved in estimation.
Expand Down Expand Up @@ -531,8 +540,8 @@ function lp(estimator, data, ynames;

clunames = vce isa ClusterCovariance ? names(vce) : ()

ynames, xnames, wnames, wgnames, stnames, fenames, panelid, panelweight, states,
ys, xs, ws, wgs, sts, fes, clus, pw, groups =
ynames, xnames, wnames, wgnames, stnames, fenames, panelid, ipanelfe,
panelweight, states, ys, xs, ws, wgs, sts, fes, clus, pw, groups =
_getcols(data, ynames, xnames, wnames, wgnames, states,
fes, clunames, panelid, panelweight, addpanelidfe, addylag, nocons)

Expand Down Expand Up @@ -561,16 +570,16 @@ function lp(estimator, data, ynames;
normalize !== nothing && iv !== nothing &&
throw(ArgumentError("options normalize and iv cannot be specified at the same time"))
normnames, normtars, normmults =
_normalize!(data, normalize, xnames, xs, ws, wgs, sts, fes, pw, nlag, minhorz,
subset, groups, checkrows, TF=TF)
_normalize!(data, normalize, xnames, xs, ws, wgs, sts, fes, ipanelfe, pw,
nlag, minhorz, subset, groups, checkrows, TF=TF)
endonames, ivnames, ix_iv, yfs, xfs, F_kp, p_kp =
_iv!(data, iv, firststagebyhorz, xnames, xs, ws, wgs, sts, fes, clus, pw, nlag,
minhorz, subset, groups, testweakiv, vce, checkrows, TF=TF)
_iv!(data, iv, firststagebyhorz, xnames, xs, ws, wgs, sts, fes, ipanelfe, clus, pw,
nlag, minhorz, subset, groups, testweakiv, vce, checkrows, TF=TF)
nendo = endonames === nothing ? 0 : length(endonames)
niv = ivnames === nothing ? 0 : length(ivnames)

B, V, T, er, F_kps, p_kps = _est(estimator, data, xnames, ys, xs, ws, wgs, sts,
fes, clus, pw, nlag, minhorz, nhorz, vce, subset, groups,
fes, ipanelfe, clus, pw, nlag, minhorz, nhorz, vce, subset, groups,
iv, ix_iv, nendo, niv, yfs, xfs, firststagebyhorz, testweakiv, checkrows, TF=TF)

if firststagebyhorz
Expand Down
Loading

0 comments on commit cf21cf3

Please sign in to comment.