# R/ols.R In ShiftShareSE: Inference in Regressions with Shift-Share Structure

#### Documented in reg_ssreg_ss.fit

#' Inference in linear regression with a shift-share regressor
#'
#' Computes confidence intervals and p-values in a linear regression in which
#' the regressor of interest has a shift-share structure, as the instrument in
#' Bartik (1991). Several different inference methods can computed, as specified
#' by \code{method}.
#'
#' @template formula
#' @template shocks
#' @template method
#' @template value
#' @references{
#'
#' \cite{Bartik, Timothy J., Who Benefits from State and Local Economic
#' Development Policies?, Kalamazoo, MI: W.E. Upjohn Institute for Employment
#' Research, 1991.}
#'
#' \cite{Adão, Rodrigo, Kolesár, Michal, and Morales, Eduardo,
#' "Shift-Share Designs: Theory and Inference", Quarterly Journal of Economics
#' 2019, 134 (4), 1949-2010. \doi{10.1093/qje/qjz025}.}
#'
#' }
#' @examples
#' ## Use ADH data from Autor, Dorn, and Hanson (2013)
#' reg_ss(d_sh_empl ~ 1, X=IV, data=ADH$reg, W=ADH$W,
#'          method=c("ehw", "akm", "akm0"))
#' @export
reg_ss <- function(formula, X, data, W, subset, weights, method, beta0=0,
alpha=0.05, region_cvar=NULL, sector_cvar=NULL) {

## construct model frame
cl <- mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "X", "data", "subset", "weights", "region_cvar"),
names(mf), 0L)
mf <- mf[c(1L, m)]
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())

y <- stats::model.response(mf, "numeric")
w <- as.vector(stats::model.weights(mf))
rc <- mf$"(region_cvar)" if (!is.null(w) && !is.numeric(w)) stop("'weights' must be a numeric vector") mt <- attr(mf, "terms") Z <- if (stats::is.empty.model(mt)) NULL else stats::model.matrix(mt, mf, contrasts=NULL) ret <- reg_ss.fit(y, mf$"(X)", W, Z, w, method, beta0, alpha, rc,
sector_cvar)

ret$call <- cl ret$terms <- mt
ret
}

#' Inference in a shift-share regression
#'
#' Basic computing engine to calculate confidence intervals and p-values in
#' shift-share designs using different inference methods, as specified by
#' \code{method}.
#' @param y Outcome variable, vector of length \code{N}, with each row
#'     corresponding to a region.
#' @param Z Matrix of regional controls, matrix with \code{N} rows corresponding
#'     to regions.
#' @template shocks
#' @template method
#' @template value
#' @param w vector of weights (length \code{N}) to be used in the fitting
#'     process. If not \code{NULL}, weighted least squares is used with weights
#'     \code{w}, i.e., \code{sum(w * residuals^2)} is minimized.
#' @export
reg_ss.fit <- function(y, X, W, Z, w=NULL, method=c("akm", "akm0"), beta0=0,
alpha=0.05, region_cvar=NULL, sector_cvar=NULL) {
mm <- cbind(X, Z)

r <- drop_collinear(W, sector_cvar)
W <- r$W sector_cvar <- r$sector_cvar

if (is.null(w)) {
ddX <- stats::lm.fit(y=X, x=Z)$residuals # \ddot{X} ddY <- stats::lm.fit(y=y, x=Z)$residuals # \ddot{Y}
hX <- stats::lm.fit(y=ddX, x=W)$coefficients # \hat{\Xs} wgt <- 1 r <- stats::lm.fit(mm, y) } else { ddX <- stats::lm.wfit(y=X, x=Z, w=w)$residuals
ddY <- stats::lm.wfit(y=y, x=Z, w=w)$residuals hX <- stats::lm.wfit(y=ddX, x=W, w=w)$coefficients
wgt <- w
r <- stats::lm.wfit(mm, y, w)
}

betahat <- unname(r$coefficients[1]) n <- NROW(mm) p <- r$rank

se.h <- se.r <- se.s <- se.akm <- se.akm0 <- NA

if("all" %in% method)
method <- c("homosk", "ehw", "region_cluster", "akm", "akm0")

RX <- sum(wgt * ddX^2)

if("homosk" %in% method) {
resvar <- sum(wgt * r$residuals^2) / r$df.residual
se.h <- sqrt(resvar / RX)
}

u <- wgt * r$residuals * ddX if("ehw" %in% method) se.r <- sqrt((n / (n - p)) * drop(crossprod(u))) / RX if (("region_cluster" %in% method) & is.null(region_cvar)) { warning(paste0("Reporting NA for \"region_cluster\" Std. Error", " because \"region_cvar\" not supplied.")) } else if ("region_cluster" %in% method) { nc <- length(unique(region_cvar)) # # of clusters se.s <- sqrt((nc/(nc-1)) * (n-1)/(n-p) * drop(crossprod(tapply(u, factor(region_cvar), sum)))) / RX } if ("akm" %in% method | "akm0" %in% method) { cR <- hX*drop(crossprod(wgt * r$residuals, W))
cR0 <- hX*drop(crossprod(wgt * (ddY-ddX*beta0), W))
cW <- hX*drop(crossprod(wgt * ddX, W))
if (!is.null(sector_cvar) & length(sector_cvar) != length(cR))
stop("The length of \"sector_cvar\" is different ",
"from the number of sectors.")

if (!is.null(sector_cvar)) {
cR <- tapply(cR, factor(sector_cvar), sum)
cR0 <- tapply(cR0, factor(sector_cvar), sum)
cW <- tapply(cW, factor(sector_cvar), sum)
}
}

if ("akm" %in% method)
se.akm <- sqrt(sum(cR^2)) / RX

se0.akm0 <- cil.akm0 <- cir.akm0 <- NA
cv <- stats::qnorm(1-alpha/2)

if ("akm0" %in% method) {
se0.akm0 <- sqrt(sum(cR0^2)) / RX

## Now build CI
Q <- RX^2/cv^2 - sum(cW^2)
Q2 <- sum(cR*cW)/Q
mid <- betahat -  Q2
dis <- Q2^2 + sum(cR^2) / Q

if (Q>0) {
cir.akm0 <- mid + sqrt(dis)
cil.akm0 <- mid - sqrt(dis)
se.akm0 <- sqrt(dis)/cv
} else if (dis >0)  {
cir.akm0 <- mid - sqrt(dis)
cil.akm0 <- mid + sqrt(dis)
se.akm0 <- Inf
} else  {
cir.akm0 <- Inf
cil.akm0 <- -Inf
se.akm0 <- Inf
}
}

se <- c(se.h, se.r, se.s, se.akm, se.akm0)
p <- 2*(1-stats::pnorm(abs(betahat-beta0)/c(se[-5], se0.akm0)))
ci.l <- c(betahat-cv*se[-5], cil.akm0)
ci.r <- c(betahat+cv*se[-5], cir.akm0)

names(se) <- names(ci.l) <- names(ci.r) <- names(p) <-
c("Homoscedastic", "EHW", "Reg. cluster", "AKM", "AKM0")

structure(list(beta=betahat, se=se,
p=p, ci.l=ci.l, ci.r=ci.r), class="SSResults")
}

#' @export
print.SSResults <- function(x, digits = getOption("digits"), ...) {

fmt <- function(x) format(x, digits=digits, width=digits+1)

cat("Estimate:", fmt(x$beta)) s <- !is.na(x$se)

r <- cbind("Std. Error"=x$se[s], "p-value"=x$p[s], "Lower CI"=x$ci.l[s], "Upper CI"=x$ci.r[s])
cat("\n\nInference:\n")
if (sum(r[, 3]<r[, 4]) > 0)
print(r[r[, 3]<r[, 4], , drop=FALSE], digits=digits) # nolint
## AKM0 in format (-Inf, U) \cup (L, Inf)
if (sum(r[, 3]>r[, 4]) > 0) {
r2 <- r[r[, 3]>r[, 4], , drop=FALSE] # nolint
r2 <- data.frame(r2[, 1:2, drop=FALSE],
"(-Inf", r2[, 4], "] + [", r2[, 3], "Inf )")
names(r2) <- c(colnames(r)[1:2], "CI", rep("", 4))
print(r2, digits=digits)
}

invisible(x)
}

drop_collinear <- function(W, sector_cvar) {
qrW <- qr(W)
if (qrW$rank < ncol(W)) { warning("Share matrix is collinear") ## qrW$pivot gives the indices of the permuted columns
keep <- qrW$pivot[seq_len(qrW$rank)]
W <- W[, keep]
if (!is.null(sector_cvar))
sector_cvar <- sector_cvar[keep]
}
list(W=W, sector_cvar=sector_cvar)
}


## Try the ShiftShareSE package in your browser

Any scripts or data that you put into this service are public.

ShiftShareSE documentation built on April 24, 2022, 9:05 a.m.