Nothing
ivreg <- function(formula, instruments, data, subset, na.action, weights, offset,
contrasts = NULL, model = TRUE, y = TRUE, x = FALSE, ...)
{
## set up model.frame() call
cl <- match.call()
if(missing(data)) data <- environment(formula)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "na.action", "weights", "offset"), names(mf), 0)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
## handle instruments for backward compatibility
if(!missing(instruments)) {
formula <- Formula::as.Formula(formula, instruments)
cl$instruments <- NULL
cl$formula <- formula(formula)
} else {
formula <- Formula::as.Formula(formula)
}
stopifnot(length(formula)[1] == 1L, length(formula)[2] %in% 1:2)
## try to handle dots in formula
has_dot <- function(formula) inherits(try(terms(formula), silent = TRUE), "try-error")
if(has_dot(formula)) {
f1 <- formula(formula, rhs = 1)
f2 <- formula(formula, lhs = 0, rhs = 2)
if(!has_dot(f1) & has_dot(f2)) formula <- Formula::as.Formula(f1,
update(formula(formula, lhs = 0, rhs = 1), f2))
}
## call model.frame()
mf$formula <- formula
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
## extract response, terms, model matrices
Y <- model.response(mf, "numeric")
mt <- terms(formula, data = data)
mtX <- terms(formula, data = data, rhs = 1)
X <- model.matrix(mtX, mf, contrasts)
if(length(formula)[2] < 2L) {
mtZ <- NULL
Z <- NULL
} else {
mtZ <- delete.response(terms(formula, data = data, rhs = 2))
Z <- model.matrix(mtZ, mf, contrasts)
}
## weights and offset
weights <- model.weights(mf)
offset <- model.offset(mf)
if(is.null(offset)) offset <- 0
if(length(offset) == 1) offset <- rep(offset, NROW(Y))
offset <- as.vector(offset)
## call default interface
rval <- ivreg.fit(X, Y, Z, weights, offset, ...)
## enhance information stored in fitted model object
rval$call <- cl
rval$formula <- formula(formula)
rval$terms <- list(regressors = mtX, instruments = mtZ, full = mt)
rval$na.action <- attr(mf, "na.action")
rval$levels <- .getXlevels(mt, mf)
rval$contrasts <- list(regressors = attr(X, "contrasts"), instruments = attr(Z, "contrasts"))
if(model) rval$model <- mf
if(y) rval$y <- Y
if(x) rval$x <- list(regressors = X, instruments = Z, projected = rval$x)
else rval$x <- NULL
class(rval) <- "ivreg"
return(rval)
}
ivreg.fit <- function(x, y, z, weights, offset, ...)
{
## model dimensions
n <- NROW(y)
p <- ncol(x)
## defaults
if(missing(z)) z <- NULL
if(missing(weights)) weights <- NULL
if(missing(offset)) offset <- rep(0, n)
## sanity checks
stopifnot(n == nrow(x))
if(!is.null(z)) stopifnot(n == nrow(z))
if(!is.null(weights)) stopifnot(n == NROW(weights))
stopifnot(n == NROW(offset))
## project regressors x on image of instruments z
if(!is.null(z)) {
if(ncol(z) < ncol(x)) warning("more regressors than instruments")
auxreg <- if(is.null(weights)) lm.fit(z, x, ...) else lm.wfit(z, x, weights, ...)
xz <- as.matrix(auxreg$fitted.values)
# pz <- z %*% chol2inv(auxreg$qr$qr) %*% t(z)
colnames(xz) <- colnames(x)
} else {
xz <- x
# pz <- diag(NROW(x))
# colnames(pz) <- rownames(pz) <- rownames(x)
}
## main regression
fit <- if(is.null(weights)) lm.fit(xz, y, offset = offset, ...)
else lm.wfit(xz, y, weights, offset = offset, ...)
## model fit information
ok <- which(!is.na(fit$coefficients))
yhat <- drop(x[, ok, drop = FALSE] %*% fit$coefficients[ok])
names(yhat) <- names(y)
res <- y - yhat
ucov <- chol2inv(fit$qr$qr[1:length(ok), 1:length(ok), drop = FALSE])
colnames(ucov) <- rownames(ucov) <- names(fit$coefficients[ok])
rss <- if(is.null(weights)) sum(res^2) else sum(weights * res^2)
## hat <- diag(x %*% ucov %*% t(x) %*% pz)
## names(hat) <- rownames(x)
rval <- list(
coefficients = fit$coefficients,
residuals = res,
fitted.values = yhat,
weights = weights,
offset = if(identical(offset, rep(0, n))) NULL else offset,
n = n,
nobs = if(is.null(weights)) n else sum(weights > 0),
rank = fit$rank,
df.residual = fit$df.residual,
cov.unscaled = ucov,
sigma = sqrt(rss/fit$df.residual), ## NOTE: Stata divides by n here and uses z tests rather than t tests...
# hatvalues = hat,
x = xz
)
return(rval)
}
vcov.ivreg <- function(object, ...)
object$sigma^2 * object$cov.unscaled
bread.ivreg <- function (x, ...)
x$cov.unscaled * x$nobs
estfun.ivreg <- function (x, ...)
{
xmat <- model.matrix(x)
if(any(alias <- is.na(coef(x)))) xmat <- xmat[, !alias, drop = FALSE]
wts <- weights(x)
if(is.null(wts)) wts <- 1
res <- residuals(x)
rval <- as.vector(res) * wts * xmat
attr(rval, "assign") <- NULL
attr(rval, "contrasts") <- NULL
return(rval)
}
hatvalues.ivreg <- function(model, ...) {
xz <- model.matrix(model, component = "projected")
x <- model.matrix(model, component = "regressors")
z <- model.matrix(model, component = "instruments")
solve_qr <- function(x) chol2inv(qr.R(qr(x)))
diag(x %*% solve_qr(xz) %*% t(x) %*% z %*% solve_qr(z) %*% t(z))
}
terms.ivreg <- function(x, component = c("regressors", "instruments"), ...)
x$terms[[match.arg(component)]]
model.matrix.ivreg <- function(object, component = c("projected", "regressors", "instruments"), ...) {
component <- match.arg(component, c("projected", "regressors", "instruments"))
if(!is.null(object$x)) rval <- object$x[[component]]
else if(!is.null(object$model)) {
X <- model.matrix(object$terms$regressors, object$model, contrasts = object$contrasts$regressors)
Z <- if(is.null(object$terms$instruments)) NULL
else model.matrix(object$terms$instruments, object$model, contrasts = object$contrasts$instruments)
w <- weights(object)
XZ <- if(is.null(Z)) {
X
} else if(is.null(w)) {
lm.fit(Z, X)$fitted.values
} else {
lm.wfit(Z, X, w)$fitted.values
}
if(is.null(dim(XZ))) {
XZ <- matrix(XZ, ncol = 1L, dimnames = list(names(XZ), colnames(X)))
attr(XZ, "assign") <- attr(X, "assign")
}
rval <- switch(component,
"regressors" = X,
"instruments" = Z,
"projected" = XZ)
} else stop("not enough information in fitted model to return model.matrix")
return(rval)
}
predict.ivreg <- function(object, newdata, na.action = na.pass, ...)
{
if(missing(newdata)) fitted(object)
else {
mf <- model.frame(delete.response(object$terms$full), newdata,
na.action = na.action, xlev = object$levels)
X <- model.matrix(delete.response(object$terms$regressors), mf,
contrasts = object$contrasts$regressors)
ok <- !is.na(object$coefficients)
drop(X[, ok, drop = FALSE] %*% object$coefficients[ok])
}
}
print.ivreg <- function(x, digits = max(3, getOption("digits") - 3), ...)
{
cat("\nCall:\n", deparse(x$call), "\n\n", sep = "")
cat("Coefficients:\n")
print.default(format(coef(x), digits = digits), print.gap = 2, quote = FALSE)
cat("\n")
invisible(x)
}
summary.ivreg <- function(object, vcov. = NULL, df = NULL, diagnostics = FALSE, ...)
{
## weighted residuals
res <- object$residuals
y <- object$fitted.values + res
n <- NROW(res)
w <- object$weights
if(is.null(w)) w <- rep(1, n)
res <- res * sqrt(w)
## R-squared
rss <- sum(res^2)
if(attr(object$terms$regressors, "intercept")) {
tss <- sum(w * (y - weighted.mean(y, w))^2)
dfi <- 1
} else {
tss <- sum(w * y^2)
dfi <- 0
}
r.squared <- 1 - rss/tss
adj.r.squared <- 1 - (1 - r.squared) * ((n - dfi)/object$df.residual)
## degrees of freedom (for z vs. t test)
if(is.null(df)) df <- object$df.residual
if(!is.finite(df)) df <- 0
if(df > 0 & (df != object$df.residual)) {
df <- object$df.residual
}
## covariance matrix
if(is.null(vcov.))
vc <- vcov(object)
else {
if(is.function(vcov.)) vc <- vcov.(object)
else vc <- vcov.
}
## Wald test of each coefficient
cf <- lmtest::coeftest(object, vcov. = vc, df = df, ...)
attr(cf, "method") <- NULL
class(cf) <- "matrix"
## Wald test of all coefficients
Rmat <- if(attr(object$terms$regressors, "intercept"))
cbind(0, diag(length(na.omit(coef(object)))-1)) else diag(length(na.omit(coef(object))))
waldtest <- car::linearHypothesis(object, Rmat, vcov. = vcov., test = ifelse(df > 0, "F", "Chisq"), singular.ok = TRUE)
waldtest <- c(waldtest[2,3], waldtest[2,4], abs(waldtest[2,2]), if(df > 0) waldtest[2,1] else NULL)
## diagnostic tests
diag <- if(diagnostics) ivdiag(object, vcov. = vcov.) else NULL
rval <- list(
call = object$call,
terms = object$terms,
residuals = res,
weights <- object$weights,
coefficients = cf,
sigma = object$sigma,
df = c(object$rank, if(df > 0) df else Inf, object$rank), ## aliasing
r.squared = r.squared,
adj.r.squared = adj.r.squared,
waldtest = waldtest,
vcov = vc,
diagnostics = diag)
class(rval) <- "summary.ivreg"
return(rval)
}
print.summary.ivreg <- function(x, digits = max(3, getOption("digits") - 3),
signif.stars = getOption("show.signif.stars"), ...)
{
cat("\nCall:\n")
cat(paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "")
cat(if(!is.null(x$weights) && diff(range(x$weights))) "Weighted ", "Residuals:\n", sep = "")
if(NROW(x$residuals) > 5L) {
nam <- c("Min", "1Q", "Median", "3Q", "Max")
rq <- if(length(dim(x$residuals)) == 2)
structure(apply(t(x$residuals), 1, quantile), dimnames = list(nam, dimnames(x$residuals)[[2]]))
else structure(quantile(x$residuals), names = nam)
print(rq, digits = digits, ...)
} else {
print(x$residuals, digits = digits, ...)
}
cat("\nCoefficients:\n")
printCoefmat(x$coefficients, digits = digits, signif.stars = signif.stars,
signif.legend = signif.stars & is.null(x$diagnostics), na.print = "NA", ...)
if(!is.null(x$diagnostics)) {
cat("\nDiagnostic tests:\n")
printCoefmat(x$diagnostics, cs.ind = 1L:2L, tst.ind = 3L,
has.Pvalue = TRUE, P.values = TRUE, digits = digits,
signif.stars = signif.stars, na.print = "NA", ...)
}
cat("\nResidual standard error:", format(signif(x$sigma, digits)),
"on", x$df[2L], "degrees of freedom\n")
cat("Multiple R-Squared:", formatC(x$r.squared, digits = digits))
cat(",\tAdjusted R-squared:", formatC(x$adj.r.squared, digits = digits),
"\nWald test:", formatC(x$waldtest[1L], digits = digits),
"on", x$waldtest[3L], if(length(x$waldtest) > 3L) c("and", x$waldtest[4L]) else NULL,
"DF, p-value:", format.pval(x$waldtest[2L], digits = digits), "\n\n")
invisible(x)
}
anova.ivreg <- function(object, object2, test = "F", vcov = NULL, ...)
{
rval <- waldtest(object, object2, test = test, vcov = vcov)
if(is.null(vcov)) {
head <- attr(rval, "heading")
head[1] <- "Analysis of Variance Table\n"
rss <- sapply(list(object, object2), function(x) sum(residuals(x)^2))
dss <- c(NA, -diff(rss))
rval <- cbind(rval, cbind("RSS" = rss, "Sum of Sq" = dss))[,c(1L, 5L, 2L, 6L, 3L:4L)]
attr(rval, "heading") <- head
class(rval) <- c("anova", "data.frame")
}
return(rval)
}
update.ivreg <- function (object, formula., ..., evaluate = TRUE)
{
if(is.null(call <- getCall(object))) stop("need an object with call component")
extras <- match.call(expand.dots = FALSE)$...
if(!missing(formula.)) call$formula <- formula(update(Formula(formula(object)), formula.))
if(length(extras)) {
existing <- !is.na(match(names(extras), names(call)))
for (a in names(extras)[existing]) call[[a]] <- extras[[a]]
if(any(!existing)) {
call <- c(as.list(call), extras[!existing])
call <- as.call(call)
}
}
if(evaluate) eval(call, parent.frame())
else call
}
ivdiag <- function(obj, vcov. = NULL) {
## extract data
y <- model.response(model.frame(obj))
x <- model.matrix(obj, component = "regressors")
z <- model.matrix(obj, component = "instruments")
w <- weights(obj)
## names of "regressors" and "instruments"
xnam <- colnames(x)
znam <- colnames(z)
## relabel "instruments" to match order from "regressors"
fx <- attr(terms(obj, component = "regressors"), "factors")
fz <- attr(terms(obj, component = "instruments"), "factors")
fz <- fz[c(rownames(fx)[rownames(fx) %in% rownames(fz)], rownames(fz)[!(rownames(fz) %in% rownames(fx))]), , drop = FALSE]
nz <- apply(fz > 0, 2, function(x) paste(rownames(fz)[x], collapse = ":"))
nz <- nz[names(nz) != nz]
nz <- nz[nz %in% colnames(fx)]
if(length(nz) > 0L) znam[names(nz)] <- nz
## endogenous/instrument variables
endo <- which(!(xnam %in% znam))
inst <- which(!(znam %in% xnam))
if((length(endo) <= 0L) | (length(inst) <= 0L))
stop("no endogenous/instrument variables")
## return value
rval <- matrix(NA, nrow = length(endo) + 2L, ncol = 4L)
colnames(rval) <- c("df1", "df2", "statistic", "p-value")
rownames(rval) <- c(if(length(endo) > 1L) paste0("Weak instruments (", xnam[endo], ")") else "Weak instruments",
"Wu-Hausman", "Sargan")
## convenience functions
lmfit <- function(x, y, w = NULL) {
rval <- if(is.null(w)) lm.fit(x, y) else lm.wfit(x, y, w)
rval$x <- x
rval$y <- y
return(rval)
}
rss <- function(obj, weights = NULL) if(is.null(weights)) sum(obj$residuals^2) else sum(weights * obj$residuals^2)
wald <- function(obj0, obj1, vcov. = NULL, weights = NULL) {
df <- c(obj1$rank - obj0$rank, obj1$df.residual)
if(!is.function(vcov.)) {
w <- ((rss(obj0, w) - rss(obj1, w)) / df[1L]) / (rss(obj1, w)/df[2L])
} else {
if(NCOL(obj0$coefficients) > 1L) {
cf0 <- structure(as.vector(obj0$coefficients),
.Names = c(outer(rownames(obj0$coefficients), colnames(obj0$coefficients), paste, sep = ":")))
cf1 <- structure(as.vector(obj1$coefficients),
.Names = c(outer(rownames(obj1$coefficients), colnames(obj1$coefficients), paste, sep = ":")))
} else {
cf0 <- obj0$coefficients
cf1 <- obj1$coefficients
}
cf0 <- na.omit(cf0)
cf1 <- na.omit(cf1)
ovar <- which(!(names(cf1) %in% names(cf0)))
vc <- vcov.(lm(obj1$y ~ 0 + obj1$x, weights = w))
w <- t(cf1[ovar]) %*% solve(vc[ovar,ovar]) %*% cf1[ovar]
w <- w / df[1L]
}
pval <- pf(w, df[1L], df[2L], lower.tail = FALSE)
c(df, w, pval)
}
# Test for weak instruments
for(i in seq_along(endo)) {
aux0 <- lmfit(z[, -inst, drop = FALSE], x[, endo[i]], w)
aux1 <- lmfit(z, x[, endo[i]], w)
rval[i, ] <- wald(aux0, aux1, vcov. = vcov., weights = w)
}
## Wu-Hausman test for endogeneity
if(length(endo) > 1L) aux1 <- lmfit(z, x[, endo], w)
xfit <- as.matrix(aux1$fitted.values)
colnames(xfit) <- paste("fit", colnames(xfit), sep = "_")
auxo <- lmfit( x, y, w)
auxe <- lmfit(cbind(x, xfit), y, w)
rval[nrow(rval) - 1L, ] <- wald(auxo, auxe, vcov. = vcov., weights = w)
## Sargan test of overidentifying restrictions
r <- residuals(obj)
auxs <- lmfit(z, r, w)
rssr <- if(is.null(w)) sum((r - mean(r))^2) else sum(w * (r - weighted.mean(r, w))^2)
rval[nrow(rval), 1L] <- length(inst) - length(endo)
if(rval[nrow(rval), 1L] > 0L) {
rval[nrow(rval), 3L] <- length(r) * (1 - rss(auxs, w)/rssr)
rval[nrow(rval), 4L] <- pchisq(rval[nrow(rval), 3L], rval[nrow(rval), 1L], lower.tail = FALSE)
}
return(rval)
}
## If #Instruments = #Regressors then
## b = (Z'X)^{-1} Z'y
## and solves the estimating equations
## Z' (y - X beta) = 0
## For
## cov(y) = Omega
## the following holds
## cov(b) = (Z'X)^{-1} Z' Omega Z (X'Z)^{-1}
##
## Generally:
## b = (X' P_Z X)^{-1} X' P_Z y
## with estimating equations
## X' P_Z (y - X beta) = 0
## where P_Z is the usual projector (hat matrix wrt Z) and
## cov(b) = (X' P_Z X)^{-1} X' P_Z Omega P_Z X (X' P_Z X)^{-1}
## Thus meat is X' P_Z Omega P_Z X and bread i (X' P_Z X)^{-1}
##
## See
## http://www.stata.com/support/faqs/stat/2sls.html
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.