Nothing
#' Instrumental Variable Estimation with Lasso
#'
#' This function selects the instrumental variables in the first stage by
#' Lasso. First stage predictions are then used in the second stage as optimal
#' instruments to estimate the parameter vector. The function returns an element of class \code{rlassoIVselectZ}
#'
#' The implementation follows the procedure described in Belloni et al. (2012).
#' Option \code{post=TRUE} conducts post-lasso estimation, i.e. a refit of the
#' model with the selected variables, to estimate the optimal instruments. The
#' parameter vector of the structural equation is then fitted by two-stage
#' least square (tsls) estimation.
#'
#' @param x exogenous variables in the structural equation (matrix)
#' @param d endogenous variables in the structural equation (vector or matrix)
#' @param y outcome or dependent variable in the structural equation (vector or matrix)
#' @param z set of potential instruments for the endogenous variables.
#' Exogenous variables serve as their own instruments.
#' @param post logical. If \code{TRUE}, post-lasso estimation is conducted.
#' @param intercept logical. If \code{TRUE}, intercept is included in the second stage equation.
#' @param \dots arguments passed to the function \code{rlasso}.
#' @return An object of class \code{rlassoIVselectZ} containing at least the following
#' components: \item{coefficients}{estimated parameter vector}
#' \item{vcov}{variance-covariance matrix} \item{residuals}{
#' residuals} \item{samplesize}{sample size} \item{selected}{matrix of selected variables in the first stage for each endogenous variable}
#' @references D. Belloni, D. Chen, V. Chernozhukov and C. Hansen (2012).
#' Sparse models and methods for optimal instruments with an application to
#' eminent domain. \emph{Econometrica} 80 (6), 2369--2429.
#' @export
rlassoIVselectZ <- function(x, ...)
UseMethod("rlassoIVselectZ") # definition generic function
#' @export
#' @rdname rlassoIVselectZ
rlassoIVselectZ.default <- function(x, d, y, z, post = TRUE, intercept = TRUE, ...) {
d <- as.matrix(d)
if (is.vector(x)) x <- as.matrix(x)
n <- length(y)
kex <- dim(x)[2]
ke <- dim(d)[2]
if (is.null(colnames(d)))
colnames(d) <- paste("d", 1:ke, sep = "")
if (is.null(colnames(x)) & !is.null(x))
colnames(x) <- paste("x", 1:kex, sep = "")
Z <- cbind(z,x) # including the x-variables as instruments
kiv <- dim(Z)[2]
select.mat <- NULL # matrix with the selected variables
# first stage regression
Dhat <- NULL
flag.const <- 0
for (i in 1:ke) {
di <- d[, i]
#lasso.fit <- rlasso(di ~ Z, post = post, intercept = intercept, ...)
lasso.fit <- rlasso(y=di, x=Z, post = post, intercept = intercept, ...)
if (sum(lasso.fit$ind) == 0) {
dihat <- rep(mean(di), n) #dihat <- mean(di)
flag.const <- flag.const + 1
if (flag.const >1) message("No variables selected for two or more instruments leading to multicollinearity problems.")
#intercept <- FALSE # to avoid multicollineariry
select.mat <- cbind(select.mat, FALSE)
} else {
# dihat <- z%*%lasso.fit$coefficients
dihat <- predict(lasso.fit)
select.mat <- cbind(select.mat, lasso.fit$index)
}
Dhat <- cbind(Dhat, dihat)
}
colnames(select.mat) <- colnames(d)
#if (intercept) { #?
# Dhat <- cbind(Dhat, 1, x)
# d <- cbind(d, 1, x)
#} else {
# Dhat <- cbind(Dhat, x)
# d <- cbind(d, x)
#}
Dhat <- cbind(Dhat, x)
d <- cbind(d, x)
# calculation coefficients
#alpha.hat <- solve(t(Dhat) %*% d) %*% (t(Dhat) %*% y)
alpha.hat <- MASS::ginv(t(Dhat)%*%d)%*%(t(Dhat)%*%y)
# calcualtion of the variance-covariance matrix
residuals <- y - d %*% alpha.hat
#Omega.hat <- t(Dhat) %*% diag(as.vector(residuals^2)) %*% Dhat # Dhat.e <- Dhat*as.vector(residuals); Omega.hat <- t(Dhat.e)%*%Dhat.e
Omega.hat <- t(Dhat) %*% (Dhat*as.vector(residuals^2))
Q.hat.inv <- MASS::ginv(t(d) %*% Dhat) #solve(t(d)%*%Dhat)
vcov <- Q.hat.inv %*% Omega.hat %*% t(Q.hat.inv)
rownames(alpha.hat) <- c(colnames(d))
colnames(vcov) <- rownames(vcov) <- rownames(alpha.hat)
if (is.null(x)){
res <- list(coefficients = alpha.hat[1:ke, ], se = sqrt(diag(vcov))[1:ke],
vcov = vcov[1:ke, 1:ke, drop = FALSE], residuals = residuals, samplesize = n, selected = select.mat,
call = match.call())
}
else{
if (is.null(x) == FALSE){
res <- list(coefficients = alpha.hat[1:ke, ], se = sqrt(diag(vcov))[1:ke],
vcov = vcov[1:ke, 1:ke, drop = FALSE],
coefficients.controls = alpha.hat[(ke + 1):(ke + kex), ], se.controls = sqrt(diag(vcov))[(ke + 1):(ke + kex)],
vcov.controls = vcov[(ke + 1):(ke + kex), (ke + 1):(ke + kex), drop = FALSE],
residuals = residuals, samplesize = n, selected = select.mat,
call = match.call())
}
}
class(res) <- "rlassoIVselectZ"
return(res)
}
#' @rdname rlassoIVselectZ
#' @export
#' @param formula An object of class \code{Formula} of the form " y ~ x + d | x + z" with y the outcome variable,
#' d endogenous variable, z instrumental variables, and x exogenous variables.
#' @param data An optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model.
#' If not found in data, the variables are taken from environment(formula), typically the environment from which \code{rlassoIVselectZ} is called.
rlassoIVselectZ.formula <- function(formula, data, post=TRUE, intercept = TRUE, ...) {
mat <- f.formula(formula, data, all.categories = FALSE)
y <- mat$Y
x <- mat$X
d <- mat$D
z <- mat$Z
res <- rlassoIVselectZ(x=x,d=d,y=y,z=z, post=post, intercept=intercept, ...)
res$call <- match.call()
return(res)
}
################# Methods for rlassoIVselectZ
#' Methods for S3 object \code{rlassoIVselectZ}
#'
#' Objects of class \code{rlassoIVselectZ} are constructed by \code{rlassoIVselectZ}.
#' \code{print.rlassoIVselectZ} prints and displays some information about fitted \code{rlassoIVselectZ} objects.
#' \code{summary.rlassoIVselectZ} summarizes information of a fitted \code{rlassoIVselectZ} object.
#' \code{confint.rlassoIVselectZ} extracts the confidence intervals.
#' @param object an object of class \code{rlassoIVselectZ}
#' @param x an object of class \code{rlassoIVselectZ}
#' @param digits significant digits in printout
#' @param ... arguments passed to the print function and other methods
#' @param parm a specification of which parameters are to be given confidence intervals, either a vector of numbers or a vector of names. If missing, all parameters are considered.
#' @param level confidence level required.
#' @rdname methods.rlassoIVselectZ
#' @aliases methods.rlassoIVselectZ print.rlassoIVselectZ summary.rlassoIVselectZ
#' @export
print.rlassoIVselectZ <- function(x, digits = max(3L, getOption("digits") -
3L), ...) {
cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"),
"\n\n", sep = "")
if (length(coef(x))) {
cat("Coefficients:\n")
print.default(format(coef(x), digits = digits), print.gap = 2L,
quote = FALSE)
} else cat("No coefficients\n")
cat("\n")
invisible(coef(x))
}
#' @rdname methods.rlassoIVselectZ
#' @export
summary.rlassoIVselectZ <- function(object, digits = max(3L, getOption("digits") -
3L), ...) {
if (length(coef(object))) {
k <- length(object$coefficients)
table <- matrix(NA, ncol = 4, nrow = k)
rownames(table) <- names(object$coefficients)
colnames(table) <- c("coeff.", "se.", "t-value", "p-value")
table[, 1] <- object$coefficients
table[, 2] <- sqrt(diag(as.matrix(object$vcov)))
table[, 3] <- table[, 1]/table[, 2]
table[, 4] <- 2 * pnorm(-abs(table[, 3]))
print("Estimates and significance testing of the effect of target variables in the IV regression model")
printCoefmat(table, digits = digits, P.values = TRUE, has.Pvalue = TRUE)
cat("\n")
} else {
cat("No coefficients\n")
}
cat("\n")
invisible(table)
}
#' @rdname methods.rlassoIVselectZ
#' @export
confint.rlassoIVselectZ <- function(object, parm, level = 0.95, ...) {
n <- object$samplesize
k <- length(object$coefficients)
cf <- coef(object)
pnames <- names(cf)
if (missing(parm))
parm <- pnames else if (is.numeric(parm))
parm <- pnames[parm]
a <- (1 - level)/2
a <- c(a, 1 - a)
# fac <- qt(a, n-k)
fac <- qnorm(a)
pct <- format.perc(a, 3)
ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm, pct))
ses <- sqrt(diag(as.matrix(object$vcov)))[parm]
ci[] <- cf[parm] + ses %o% fac
print(ci)
invisible(ci)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.