#' Inference in linear regression with a shift-share regressor
#'
#' Computes confidence intervals and p-values in a linear regression in which
#' the regressor of interest has a shift-share structure, as the instrument in
#' Bartik (1991). Several different inference methods can computed, as specified
#' by \code{method}.
#'
#' @template formula
#' @template shocks
#' @template method
#' @template value
#' @references{
#'
#' \cite{Bartik, Timothy J., Who Benefits from State and Local Economic
#' Development Policies?, Kalamazoo, MI: W.E. Upjohn Institute for Employment
#' Research, 1991.}
#'
#' \cite{Adão, Rodrigo, Kolesár, Michal, and Morales, Eduardo,
#' "Shift-Share Designs: Theory and Inference", Quarterly Journal of Economics
#' 2019, 134 (4), 1949-2010. \doi{10.1093/qje/qjz025}.}
#'
#' }
#' @examples
#' ## Use ADH data from Autor, Dorn, and Hanson (2013)
#' reg_ss(d_sh_empl ~ 1, X=IV, data=ADH$reg, W=ADH$W,
#' method=c("ehw", "akm", "akm0"))
#' @export
reg_ss <- function(formula, X, data, W, subset, weights, method, beta0=0,
alpha=0.05, region_cvar=NULL, sector_cvar=NULL) {
## construct model frame
cl <- mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "X", "data", "subset", "weights", "region_cvar"),
names(mf), 0L)
mf <- mf[c(1L, m)]
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
y <- stats::model.response(mf, "numeric")
w <- as.vector(stats::model.weights(mf))
rc <- mf$"(region_cvar)"
if (!is.null(w) && !is.numeric(w))
stop("'weights' must be a numeric vector")
mt <- attr(mf, "terms")
Z <- NULL
if (!stats::is.empty.model(mt))
Z <- stats::model.matrix(mt, mf, contrasts=NULL)
ret <- reg_ss.fit(y, mf$"(X)", W, Z, w, method, beta0, alpha, rc,
sector_cvar)
ret$call <- cl
ret$terms <- mt
ret
}
#' Inference in a shift-share regression
#'
#' Basic computing engine to calculate confidence intervals and p-values in
#' shift-share designs using different inference methods, as specified by
#' \code{method}.
#' @param y Outcome variable, vector of length \code{N}, with each row
#' corresponding to a region.
#' @param Z Matrix of regional controls, matrix with \code{N} rows corresponding
#' to regions.
#' @template shocks
#' @template method
#' @template value
#' @param w vector of weights (length \code{N}) to be used in the fitting
#' process. If not \code{NULL}, weighted least squares is used with weights
#' \code{w}, i.e., \code{sum(w * residuals^2)} is minimized.
#' @export
reg_ss.fit <- function(y, X, W, Z, w=NULL, # nolint
method=c("akm", "akm0"), beta0=0,
alpha=0.05, region_cvar=NULL, sector_cvar=NULL) {
mm <- cbind(X, Z)
r <- drop_collinear(W, sector_cvar)
W <- r$W
sector_cvar <- r$sector_cvar
if (is.null(w)) {
ddX <- stats::lm.fit(y=X, x=Z)$residuals # \ddot{X}
ddY <- stats::lm.fit(y=y, x=Z)$residuals # \ddot{Y}
hX <- stats::lm.fit(y=ddX, x=W)$coefficients # \hat{\Xs}
wgt <- 1
r <- stats::lm.fit(mm, y)
} else {
ddX <- stats::lm.wfit(y=X, x=Z, w=w)$residuals
ddY <- stats::lm.wfit(y=y, x=Z, w=w)$residuals
hX <- stats::lm.wfit(y=ddX, x=W, w=w)$coefficients
wgt <- w
r <- stats::lm.wfit(mm, y, w)
}
betahat <- unname(r$coefficients[1])
n <- NROW(mm)
p <- r$rank
se.h <- se.r <- se.s <- se.akm <- se.akm0 <- NA
if ("all" %in% method)
method <- c("homosk", "ehw", "region_cluster", "akm", "akm0")
RX <- sum(wgt * ddX^2)
if ("homosk" %in% method) {
resvar <- sum(wgt * r$residuals^2) / r$df.residual
se.h <- sqrt(resvar / RX)
}
u <- wgt * r$residuals * ddX
if ("ehw" %in% method)
se.r <- sqrt((n / (n - p)) * drop(crossprod(u))) / RX
if (("region_cluster" %in% method) && is.null(region_cvar)) {
warning(paste0("Reporting NA for \"region_cluster\" Std. Error",
" because \"region_cvar\" not supplied."))
} else if ("region_cluster" %in% method) {
nc <- length(unique(region_cvar)) # # of clusters
se.s <- sqrt((nc / (nc-1)) * (n-1) / (n-p) *
drop(crossprod(tapply(u, factor(region_cvar),
sum)))) / RX
}
if ("akm" %in% method || "akm0" %in% method) {
cR <- hX*drop(crossprod(wgt * r$residuals, W))
cR0 <- hX*drop(crossprod(wgt * (ddY-ddX*beta0), W))
cW <- hX*drop(crossprod(wgt * ddX, W))
if (!is.null(sector_cvar) && length(sector_cvar) != length(cR))
stop("The length of \"sector_cvar\" is different ",
"from the number of sectors.")
if (!is.null(sector_cvar)) {
cR <- tapply(cR, factor(sector_cvar), sum)
cR0 <- tapply(cR0, factor(sector_cvar), sum)
cW <- tapply(cW, factor(sector_cvar), sum)
}
}
if ("akm" %in% method)
se.akm <- sqrt(sum(cR^2)) / RX
se0.akm0 <- cil.akm0 <- cir.akm0 <- NA
cv <- stats::qnorm(1-alpha/2)
if ("akm0" %in% method) {
se0.akm0 <- sqrt(sum(cR0^2)) / RX
## Now build CI
Q <- RX^2/cv^2 - sum(cW^2)
Q2 <- sum(cR*cW)/Q
mid <- betahat - Q2
dis <- Q2^2 + sum(cR^2) / Q
if (Q>0) {
cir.akm0 <- mid + sqrt(dis)
cil.akm0 <- mid - sqrt(dis)
se.akm0 <- sqrt(dis)/cv
} else if (dis >0) {
cir.akm0 <- mid - sqrt(dis)
cil.akm0 <- mid + sqrt(dis)
se.akm0 <- Inf
} else {
cir.akm0 <- Inf
cil.akm0 <- -Inf
se.akm0 <- Inf
}
}
se <- c(se.h, se.r, se.s, se.akm, se.akm0)
p <- 2 * (1-stats::pnorm(abs(betahat-beta0)/c(se[-5], se0.akm0)))
ci.l <- c(betahat-cv*se[-5], cil.akm0)
ci.r <- c(betahat+cv*se[-5], cir.akm0)
names(se) <- names(ci.l) <- names(ci.r) <- names(p) <-
c("Homoscedastic", "EHW", "Reg. cluster", "AKM", "AKM0")
structure(list(beta=betahat, se=se,
p=p, ci.l=ci.l, ci.r=ci.r), class="SSResults")
}
#' @export
print.SSResults <- function(x, digits = getOption("digits"), ...) {
fmt <- function(x) format(x, digits=digits, width=digits+1)
cat("Estimate:", fmt(x$beta))
s <- !is.na(x$se)
r <- cbind("Std. Error"=x$se[s], "p-value"=x$p[s], "Lower CI"=x$ci.l[s],
"Upper CI"=x$ci.r[s])
cat("\n\nInference:\n")
if (sum(r[, 3]<r[, 4]) > 0)
print(r[r[, 3]<r[, 4], , drop=FALSE], digits=digits) # nolint
## AKM0 in format (-Inf, U) \cup (L, Inf)
if (sum(r[, 3]>r[, 4]) > 0) {
r2 <- r[r[, 3]>r[, 4], , drop=FALSE] # nolint
r2 <- data.frame(r2[, 1:2, drop=FALSE],
"(-Inf", r2[, 4], "] + [", r2[, 3], "Inf )")
names(r2) <- c(colnames(r)[1:2], "CI", rep("", 4))
print(r2, digits=digits)
}
invisible(x)
}
drop_collinear <- function(W, sector_cvar) {
qrW <- qr(W)
if (qrW$rank < ncol(W)) {
warning("Share matrix is collinear")
## qrW$pivot gives the indices of the permuted columns
keep <- qrW$pivot[seq_len(qrW$rank)]
W <- W[, keep]
if (!is.null(sector_cvar))
sector_cvar <- sector_cvar[keep]
}
list(W=W, sector_cvar=sector_cvar)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.