R/newey.r

# !#/bin/R
# Program: Calculates LIML regression with applied Bekker variances following NEWEY 2006.
# Author: Nicholas Potter
# Date: 11/15/2011
#
# UPDATED:
#	20120511 - NAP: Correctly calculates bekker adjustment, clean up and finalize
#
# Using MROZ data set should give the following results:
# Var	Coef	 	Std Err		Bekker
# hours	.00178946 	.00114792	.00203246
# exper	-.092979	.145624		.254861
###########
library("foreign")
library("corpcor")
library("Matrix")
model_type = "liml"

data_raw = read.dta("~/data/stata/mroz.dta")
data_complete = data_raw[complete.cases(data_raw),]
data = subset(data_complete, age < 32)
N = nrow(data)

#Dependent variable (lhs)
y = matrix(cbind(data$lwage))
#Instrumented (endogenous variables)
X1 = matrix(cbind(data$hours))
#Instruments (Z1: excluded exogenous variables)
Z1 = as.matrix(cbind(data$educ, data$kidslt6, data$kidsge6, data$age, data$nwifeinc))
#Exogenous (X2 = Z2: included exogenous variables)
#X2 = matrix(cbind(1,data$exper),N,2)
X2 = matrix(cbind(data$exper))
Z2 = X2

# Define our dimensions
G1 = ncol(X1)
G2 = ncol(X2)
G = G1 + G2
K1 = ncol(Z1)
K2 = ncol(Z2)
K = K1 + K2

# Weight vector
# weights = matrix(data$f_wt_totsvy)
weights = matrix(rep(1,N))
wt = diag(N)
wt = as.matrix(diag(N))

# create unit matrix
I = diag(N)

A = matrix(c(y,X1,X2,Z1),N,K+G1+1)
Y = matrix(c(y,X1,X2),N,1+G)
X = matrix(c(X1,X2),N,G)
Z = matrix(c(Z1,Z2),N,K)

df.total = N
df.ess = ncol(Y)-1
df.rss = df.total - df.ess

AA = t(A) %*% wt %*% A
M = nrow(AA)

# Now define the commonly used matrices.
YY		= t(Y) %*% Y 
YYinv	= pseudoinverse(YY)
ZZ 		= t(Z) %*% Z
ZZinv	= pseudoinverse(ZZ) 
XX		= t(X) %*% X
XXinv	= pseudoinverse(XX)
XZ		= t(X) %*% Z
PZ		= pseudoinverse(Z %*% t(Z)) %*% (Z %*% t(Z))
MZ		= I-PZ
YPZY 	= t(Y) %*% PZ %*% Y
XPZX	= t(X) %*% PZ %*% X

# get lambda/alpha
HH		= YYinv %*% YPZY
eigenvalues = eigen(HH)
alpha = min(Re(eigenvalues$val))

# Betas
X.beta = pseudoinverse(XPZX- alpha*XX) %*% (XPZy - alpha*Xy)
Z.beta = ZZinv %*% t(XZ)

e = y - X %*% X.beta
rss = t(e) %*% wt %*% e
rss.ms = rss/df.rss
rss.es = rss - rss.ms
rss.V = rss/(N-df.ess)
s2 = rss/(N-df.ess)

if (model_type=="liml") {
	X.var = s2[1,1] * XMZXinv
} else if (model_type=="tsls") {
	X.var = s2[1,1] * XPZXinv
}
Z.var = sigma2[1,1] * ZZinv
Z.se = sqrt(Z.var)
X.se = sqrt(X.var)

Kn = sum(diag(PZ)**2)/K
Tn = K/N
s2 = rss/(N-G)
S2 = matrix(c(s2),N,1)
H = XPZX - alpha[1] * XX
Hinv = pseudoinverse(H)
HA = sum(diag(PZ)-Tn) * t(PZX) %*% (rss[1] * MZXbar/N)
HB = K * (Kn[1] - Tn) * sum(e*e-S2) * t(MZXbar) %*% MZXbar / (N*(1 - 2*Tn + Kn[1]*Tn))
SigmaB = s2[1] * (((1 - alpha[1])**2)*t(Xbar)%*%PZXbar + a2[1]*t(Xbar)%*%MZXbar) 
# according to Newey, should also add HB but it only matches TSP if HB is not included 
Sigma3 = SigmaB + HA + t(HA) + HB
Sigma2 = SigmaB + HA + t(HA)

X.varnewey = Hinv %*% Sigma2 %*% Hinv
X.senewey = sqrt(X.varnewey)



xxx


Xy = as.matrix(AA[2:(K+1),1])
XX = as.matrix(AA[2:(K+1), 2:(K+1)])
if (K1 > 0) {
    X1X1 = as.matrix(AA[2:(K1+1), 2:(K1+1)])
    }
if (L1 > 0) {
    Z1Z1 = as.matrix(AA[(K+2):M,(K+2):M])
    }
if (L2 > 0) {
    Z2Z2 = as.matrix(AA[(K1+2):(K+1), (K1+2):(K+1)])
    X2y = as.matrix(AA[(K1+2):(K+1), 1])
    }

# Get ZZ
if ((L1 > 0) && (L2 > 0)) {
    Z2Z1 = as.matrix(AA[(K1+2):(K+1),(K+2):M])
    ZZ2 = matrix(c(Z2Z1, Z2Z2),L,L2)
    ZZ1 = matrix(c(Z1Z1, Z2Z1),L1,L)
	ZZ = matrix(rbind(ZZ1, t(ZZ2)),L,L)
} else if (L1>0) {
	ZZ = Z1Z1
} else {
	ZZ = Z2Z2
}

if (K1>0) {
	X1Z1 = as.matrix(AA[(2:(K1+1)), ((K+2):M)])
	}

if ((K1>0) && (L2>0)) {
	X1Z2 = as.matrix(AA[2:(K1+1), (K1+2):(K+1)])
	X1Z = matrix(c(X1Z1, X1Z2),1,L)
	XZ = matrix(rbind(X1Z, t(ZZ2)),2,L)
} else if (K1>0) { 
	XZ = X1Z1
	X1Z= X1Z1
} else if (L1>0) {
	XZ = matrix(AA[2:(K+1),(K+2):M], AA[(2:K+1),(2:(K+1))])
} else {
	XZ = ZZ
}

if ((L1>0) & (L2>0)) {
	Zy = matrix(c(AA[(K+2):M, 1],AA[(K1+2):(K+1)]),L,1)
	ZY = matrix(rbind(AA[(K+2):M, 1:(K1+1)], AA[(K1+2):(K+1), 1:(K1+1)]), L, L2+1)
	Z2Y = matrix(c(AA[(K1+2):(K+1), 1:(K1+1)]),L2,K1+1)
} else if (L1>0) {
	Zy = AA[(K+2):M, 1]
	ZY = AA[(K+2):M, 1:(K1+1)]
} else {
	Zy = AA[(K1+2):(K+1), 1]
	ZY = AA[(K1+2):(K+1), 1:(K1+1)]
	Z2Y = ZY
}

YY  = AA[1:(K1+1), 1:(K1+1)]
yy  = AA[1,1]
ym    = sum(wt %*% y)/N
yyc   = t(y %*% t(ym)) %*% wt %*% y %*% ym

YYinv	= pseudoinverse(YY)
XXinv   = pseudoinverse(XX)
ZZinv   = pseudoinverse(ZZ)
PZ      = Z %*% ZZinv %*% t(Z)
MZ      = I-PZ
XPZX    = XZ %*% ZZinv %*% t(XZ)
XPZy	= t(X) %*% PZ %*% y
XPZXinv = pseudoinverse(XPZX)
YPZY	= t(Y) %*% PZ %*% Y

if (ncol(X)==ncol(Z)) {
	if (X==Z) {
		ZZinv = XXinv
		XPZXinv = XXinv
	} else {
		ZZinv = pseudoinverse(ZZ)
		XPZX  = XZ %*% ZZinv %*% t(XZ)
		XPZXinv=pseudoinverse(XPZX)
	}
}

QZZ = ZZ/N
QXX = XX/N
QXZ = XZ/N
QZy = Zy/N
QZ2Z2 = Z2Z2/N
QYY = YY/N
QZY = ZY/N
QZ2Y = Z2Y/N
QXy = Xy/N
QXPZX = XPZX/N
QZZinv = ZZinv*N


QWW = QYY - t(QZY) %*% QZZinv %*% QZY
WW = (YY - t(ZY) %*% ZZinv %*% ZY)
Omega = WW/(N-K-L)

if (K2>0) {
	Z2Z2inv = pseudoinverse(Z2Z2)
	QZ2Z2inv = pseudoinverse(QZ2Z2)
	QWW1 = QYY - t(QZ2Y) %*% QZ2Z2inv %*% QZ2Y
	YPY = YY - (t(Z2Y) %*% Z2Z2inv %*% Z2Y) - WW
} else {
	QWW1 = QYY
	YPY = YY - WW
}
# equivalent to YMY/N in kolesar
QWWsym = as.matrix(forceSymmetric(QWW))
# equivalent to YY/N in kolesar
QWW1sym = as.matrix(forceSymmetric(QWW1))

QWWsqrt = mpower(QWWsym, -0.5)
AA2 = QWWsqrt %*% QWW1 %*% QWWsqrt

eigenvalues = eigen(AA2, only.values=TRUE)
lambda = min(Re(eigenvalues$val))

# for now just set C = 2
C = 2
# set model_type
model_type = "liml"
if (model_type=="fuller") {
	kclass = (lambda-lambda*C/N)/(1-lambda*C/N)
} else if (model_type=="liml") {
	kclass = lambda
} else if (model_type=="tsls") {
	kclass = 0
}

XMZX    = t(X) %*% (I-kclass*MZ) %*% X 
XMZXinv = pseudoinverse(XMZX)



QXhXh = (1-kclass) * QXX + kclass * QXPZX
QXhXhinv = pseudoinverse(QXhXh)
#newey :: 1/(1-kclass2) = kclass from ivreg2
Y2 = matrix(c(y,X),N,K+1)
Y2Y2 = t(Y2) %*% Y2
Y2Y2inv = pseudoinverse(Y2Y2)
Y2PZY2 = t(Y2) %*% PZ %*% Y2
AAA = Y2Y2inv %*% Y2PZY2
eigenvalues2 = eigen(AAA)
kclass2 = min(Re(eigenvalues2$val))
X.beta2 = pseudoinverse(XPZX- kclass2*XX) %*% (XPZy - kclass2*Xy)
X.beta1 = t(t(X) %*% (I-kclass*MZ) %*% y) %*% XMZXinv
X.beta = t(QXy) %*% QXhXhinv * (1-kclass) + kclass * t(QZy) %*% QZZinv %*% t(QXZ) %*% QXhXhinv
Z.beta = ZZinv %*% t(XZ)
e = y - X %*% t(X.beta)
rss = t(e) %*% wt %*% e
rss.ms = rss/df.rss
rss.es = rss - rss.ms
rss.V = rss/(N-df.ess)
s2 = rss/(N-df.ess)

# LIML Variance
X.varliml = s2[1,1] * XMZXinv
# TSLS Variance
X.vartsls = s2[1,1] * XPZXinv

Z.var = sigma2[1,1] * ZZinv
X.seliml = sqrt(X.varliml)
X.setsls = sqrt(X.vartsls)

# Bekker coefficients
ak = K/N
al = L/N
h1 = ak/(1-ak)
h2 = ak*(1-al)/(1-ak-al)

# WORKS TO THIS POINT
PZ2 = t(Z1) %*% pseudoinverse(Z1 %*% t(Z1)) %*% Z1
PW = t(Z2) %*% pseudoinverse(Z2 %*% t(Z2)) %*% Z2

#QWW = t(Y) %*% (I - PZ - PW) %*% Y

Gamma = matrix(c(1, -X.beta[1,1], 0, 1), 2,2)
Sigma = t(Gamma) %*% Omega %*% Gamma
Lambda = t(Gamma) %*% (YPY/N-ak*Omega) %*% Gamma
X.varbekker = Sigma[1,1] / Lambda[2,2] + h2 * det(Sigma)/(Lambda[2,2]**2)
X.sebekker = sqrt(X.varbekker)

# equivalent to e above
u = y - X %*% X.beta2
# this alpha is equivalent to kclass2 above
alpha = t(e) %*% PZ %*% e / rss[1]
a2 = alpha * alpha
PZX = PZ %*% X
Xbar = X - e %*% (t(e) %*% X) / rss[1]
PZXbar = PZ %*% Xbar
MZXbar = (I-PZ) %*% Xbar

G = K
K = 6


X.varnewey3 = Hinv %*% Sigma3 %*% Hinv
X.senewey3 = sqrt(X.varnewey3)
potterzot/napr documentation built on Aug. 14, 2019, 6:32 a.m.