#' Honest inference in RD
#'
#' Calculate estimators and bias-aware CIs for the sharp or fuzzy RD parameter,
#' or for value of the conditional mean at a point.
#'
#' The bandwidth is calculated to be optimal for a given performance criterion,
#' as specified by \code{opt.criterion}. Alternatively, for local polynomial
#' estimators, the bandwidth can be specified by \code{h}. For
#' \code{kern="optimal"}, calculate optimal estimators under second-order Taylor
#' smoothness class (sharp RD only).
#'
#' @template RDFormula
#' @param cutoff specifies the RD cutoff in the running variable. For inference
#' at a point, specifies the point \eqn{x_0} at which to calculate the
#' conditional mean.
#' @param kern specifies the kernel function used in the local regression. It
#' can either be a string equal to \code{"triangular"}
#' (\eqn{k(u)=(1-|u|)_{+}}), \code{"epanechnikov"}
#' (\eqn{k(u)=(3/4)(1-u^2)_{+}}), or \code{"uniform"} (\eqn{k(u)=
#' (|u|<1)/2}), or else a kernel function. If equal to \code{"optimal"}, use
#' the finite-sample optimal linear estimator under Taylor smoothness class,
#' instead of a local linear estimator.
#' @param se.method method for estimating standard error of the estimate, one
#' of:
#'
#' \describe{
#' \item{"nn"}{Nearest neighbor method}
#'
#' \item{"EHW"}{Eicker-Huber-White, with residuals from local regression
#' (local polynomial estimators only).}
#'
#' \item{"supplied.var"}{Use conditional variance supplied by \code{sigmaY2}
#' instead of computing residuals. For fuzzy RD, \code{sigmaD2} and
#' \code{sigmaYD} also need to be supplied in this case.}
#'
#' }
#' @param J Number of nearest neighbors, if \code{se.method="nn"} is specified.
#' Otherwise ignored.
#' @param opt.criterion Optimality criterion that the bandwidth is designed to
#' optimize. The options are:
#'
#' \describe{
#'
#' \item{\code{"MSE"}}{Finite-sample maximum MSE}
#'
#' \item{\code{"FLCI"}}{Length of (fixed-length) two-sided
#' confidence intervals.}
#'
#' \item{\code{"OCI"}}{Given quantile of excess length of one-sided
#' confidence intervals}
#'
#' }
#'
#' The methods use conditional variance given by \code{sigmaY2}, if
#' supplied. For fuzzy RD, \code{sigmaD2} and \code{sigmaYD} also need to be
#' supplied in this case. Otherwise, the methods use preliminary variance
#' estimates based on assuming homoskedasticity on either side of the
#' cutoff.
#' @param beta Determines quantile of excess length to optimize, if bandwidth
#' optimizes given quantile of excess length of one-sided confidence
#' intervals (\code{opt.criterion="OCI"}); otherwise ignored.
#' @param alpha determines confidence level, \code{1-alpha} for
#' constructing/optimizing confidence intervals.
#' @param M Bound on second derivative of the conditional mean function, a
#' numeric vector of length one. For fuzzy RD, \code{M} needs to be a
#' numeric vector of length two, specifying the smoothness of the
#' conditional mean for the outcome and treatment, respectively.
#' @param sclass Smoothness class, either \code{"T"} for Taylor or \code{"H"}
#' for Hölder class.
#' @param h bandwidth, a scalar parameter. If not supplied, optimal bandwidth is
#' computed according to criterion given by \code{opt.criterion}.
#' @param weights Optional vector of weights to weight the observations (useful
#' for aggregated data). The weights are interpreted as the number of
#' observations that each aggregated data point averages over. Disregarded
#' if optimal kernel is used.
#' @param point.inference Do inference at a point determined by \code{cutoff}
#' instead of RD.
#' @param T0 Initial estimate of the treatment effect for calculating the
#' optimal bandwidth. Only relevant for fuzzy RD.
#' @param sigmaY2 Supply variance of outcome. Ignored when kernel is optimal.
#' @param sigmaD2 Supply variance of treatment (fuzzy RD only).
#' @param sigmaYD Supply covariance of treatment and outcome (fuzzy RD only).
#' @param clusterid Vector specifying cluster membership. If supplied,
#' \code{se.method="EHW"} is required, and standard errors use
#' cluster-robust variance formulas.
#' @return Returns an object of class \code{"RDResults"}. The function
#' \code{print} can be used to obtain and print a summary of the results. An
#' object of class \code{"RDResults"} is a list containing four components.
#' First, a data frame \code{"coefficients"} containing the following
#' columns:
#'
#' \describe{
#' \item{\code{term}}{type of parameter being estimated}
#'
#' \item{\code{estimate}}{point estimate}
#'
#' \item{\code{std.error}}{standard error of \code{estimate}}
#'
#' \item{\code{maximum.bias}}{maximum bias of \code{estimate}}
#'
#' \item{\code{conf.low}, \code{conf.high}}{lower (upper) end-point of a
#' two-sided CI based on \code{estimate}}
#'
#' \item{\code{conf.low.onesided}, \code{conf.high.onesided}}{lower (upper)
#' end-point of a one-sided CIs based on \code{estimate}}
#'
#' \item{\code{bandwidth}}{bandwidth used. If \code{kern="optimal"}, the
#' smoothing parameters \code{bandwidth.m} and \code{bandwidth.p} on
#' either side of the cutoff are reported instead}
#'
#' \item{\code{eff.obs}}{number of effective observations}
#'
#' \item{\code{leverage}}{maximal leverage of \code{estimate}}
#'
#' \item{\code{cv}}{critical value used to compute two-sided CIs}
#'
#' \item{\code{alpha}}{coverage level, as specified by option \code{alpha}}
#'
#' \item{\code{method}}{\code{sclass} is used}
#'
#' \item{\code{M}}{curvature bound used for worst-case bias
#' calculations. For fuzzy RD, equals
#' \code{(abs(estimate)*M.fs+M.rf)/first.stage}}
#'
#' \item{\code{M.rf}, \code{M.fs}}{curvature bound for the outcome (i.e.
#' reduced-form) and first-stage regressions. Fuzzy RD only.}
#'
#' \item{\code{first.stage}}{estimate of the first-stage coefficient.
#' Fuzzy RD only.}
#'
#' \item{\code{kernel}}{kernel used}
#'
#' \item{\code{p.value}}{p-value for testing the null of no effect}
#' }
#'
#' Second, a list called \code{"data"} containing the data used for
#' estimation. This is useful mostly for internal calculations. Third, an
#' object of class \code{"lm"} containing the local linear regression
#' estimates. Finally, a \code{call} object containing the matched call
#' called \code{"call"}.
#'
#' If \code{kern="optimal"}, the \code{"lm"} object is empty, and the
#' numeric vectors \code{"delta"} and \code{"omega"} are returned in
#' addition. These correspond to the parameters in the modulus problem used
#' to compute the optimal estimation weights.
#'
#'
#' @references{
#'
#' \cite{Timothy B. Armstrong and Michal Kolesár. Optimal inference in a class
#' of regression models. Econometrica, 86(2):655–683, March 2018.
#' \doi{10.3982/ECTA14434}}
#'
#' \cite{Timothy B. Armstrong and Michal Kolesár. Simple and honest confidence
#' intervals in nonparametric regression. Quantitative Economics, 11(1):1–39,
#' January 2020. \doi{10.3982/QE1199}}
#'
#' \cite{Michal Kolesár and Christoph Rothe. Inference in regression
#' discontinuity designs with a discrete running variable. American Economic
#' Review, 108(8):2277—-2304, August 2018. \doi{10.1257/aer.20160945}}
#'
#' }
#' @examples
#' RDHonest(voteshare ~ margin, data = lee08, kern = "uniform", M = 0.1, h = 10)
#' RDHonest(cn | retired ~ elig_year, data=rcp, cutoff=0, M=c(4, 0.4),
#' kern="triangular", opt.criterion="MSE", T0=0, h=3)
#' RDHonest(voteshare ~ margin, data = lee08, subset = margin>0,
#' kern = "uniform", M = 0.1, h = 10, point.inference=TRUE)
#' @export
RDHonest <- function(formula, data, subset, weights, cutoff=0, M,
kern="triangular", na.action, opt.criterion="MSE", h,
se.method="nn", alpha=0.05, beta=0.8, J=3, sclass="H",
T0=0, point.inference=FALSE, sigmaY2, sigmaD2, sigmaYD,
clusterid) {
## construct model frame
cl <- mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action", "sigmaY2",
"sigmaD2", "sigmaYD", "clusterid"), names(mf), 0L)
mf <- mf[c(1L, m)]
formula <- Formula::as.Formula(formula)
## at most 2 LHS and at most 2 RHS
stopifnot(length(formula)[1] <= 2L, length(formula)[2] <= 2L)
if (point.inference) {
method <- c("Value of conditional mean"="IP")
} else if (length(formula)[1]==2) {
method <- c("Fuzzy RD parameter"="FRD")
} else {
method <- c("Sharp RD parameter"="SRD")
}
mf$formula <- formula
## http://madrury.github.io/jekyll/update/statistics/2016/07/20/lm-in-R.html
mf$drop.unused.levels <- TRUE
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
d <- NPRData(mf, cutoff, method, formula)
process_options(M, se.method, method, d, kern)
if (!is.null(d$covs) && (missing(M) || missing(h))) {
## remove covariates and compute bw w/o covariates
d0 <- d
d0$covs <- NULL
ret <- NPRHonest(d0, MROT(d0), kern, h, opt.criterion=opt.criterion,
alpha=alpha, beta=beta, se.method=se.method, J=J,
sclass=sclass, T0=T0)
## Compute covariate-adjusted outcome
d <- covariate_adjust(d, kern, ret$coefficients$bandwidth)
}
if (missing(M)) {
M <- MROT(d)
message("Using Armstong & Kolesar (2020) ROT for smoothness constant M")
}
if (kernel_type(kern)=="optimal") {
ret <- RDTOpt(d, M, opt.criterion, alpha, beta, se.method, J)
} else {
ret <- NPRHonest(d, M, kern, h, opt.criterion=opt.criterion,
alpha=alpha, beta=beta, se.method=se.method, J=J,
sclass=sclass, T0=T0)
}
ret$call <- ret$lm$call <- cl
ret$na.action <- attr(mf, "na.action")
if (!is.finite(ret$coefficients$leverage) || ret$coefficients$leverage>0.1)
message(paste0("Maximal leverage is large: ",
round(ret$coefficients$leverage, 2),
".\nInference may be inaccurate. ",
"Consider using bigger bandwidth."))
ret$coefficients$term <- names(method)
ret
}
covariate_adjust <- function(d, kern, h) {
if (is.null(d$Y_unadj)) d$Y_unadj <- d$Y
d0 <- d
d0$Y <- d0$Y_unadj
r <- NPReg(d0, h, kern, order=1, se.method="EHW")
be <- as.matrix(r$lm$coefficients)
L <- NCOL(d$covs)
d$Y <-d$Y_unadj-d$covs %*% be[seq_len(L)+NROW(be)-L, , drop=FALSE]
d
}
## @param T0 Initial estimate of the treatment effect for calculating the
## optimal bandwidth. Only relevant for Fuzzy RD.
## @param T0bias When evaluating the maximum bias of the estimate, use the
## estimate itself (if \code{T0bias==FALSE}), or use the preliminary
## estimate \code{T0} (if \code{T0bias==TRUE}). Only relevant for Fuzzy RD.
NPRHonest <- function(d, M, kern="triangular", h, opt.criterion, alpha=0.05,
beta=0.8, se.method="nn", J=3, sclass="H", T0=0,
T0bias=FALSE) {
if (missing(h)) {
d0 <- d
d0$covs <- d0$Y_unadj <- NULL
h <- OptBW(d0, M, kern, opt.criterion, alpha, beta, sclass, T0)
}
if (!is.null(d$Y_unadj)) d$Y <- d$Y_unadj
r1 <- NPReg(d, h, kern, order=1, se.method, J)
wt <- r1$est_w[r1$est_w!=0]
xx <- d$X[r1$est_w!=0]
## Are we at a boundary?
bd <- TRUE
if (inherits(d, "IP")) {
bd <- length(unique(xx>=0))==1
}
if (T0bias && inherits(d, "FRD")) {
## multiply bias and sd by r1$fs to make if free of first stage
r1$se <- r1$se*abs(r1$fs)
M <- unname(c((M[1]+M[2]*abs(T0)), M))
} else if (!T0bias && inherits(d, "FRD")) {
M <- unname(c((M[1]+M[2]*abs(r1$estimate)) / abs(r1$fs), M))
} else {
M[2:3] <- c(NA, NA)
}
## Determine bias
if (r1$eff.obs==0) {
## If bandwidths too small, big bias / sd
bias <- r1$se <- sqrt(.Machine$double.xmax/10)
} else if (sclass=="T") {
bias <- M[1]/2 * (sum(abs(wt*xx^2)))
} else if (sclass=="H" && bd) {
## At boundary we know form of least favorable function
bias <- M[1]/2 *
abs(sum(wt[xx<0]*xx[xx<0]^2)-sum(wt[xx>=0]*xx[xx>=0]^2))
} else {
## Else need to find numerically (same formula if order=2)
w2p <- function(s) abs(sum((wt * (xx-s))[xx>=s]))
w2m <- function(s) abs(sum((wt * (s-xx))[xx<=s]))
bp <- stats::integrate(function(s) vapply(s, w2p, numeric(1)), 0, h)
bm <- stats::integrate(function(s) vapply(s, w2m, numeric(1)), -h, 0)
bias <- M[1] * (bp$value+bm$value)
}
method <- switch(sclass, H="Holder", "Taylor")
d$est_w <- r1$est_w
d$sigma2 <- r1$sigma2
co <- data.frame(term=NA, estimate=r1$estimate, std.error=r1$se,
maximum.bias=bias, conf.low=NA, conf.high=NA,
conf.low.onesided=NA, conf.high.onesided=NA, bandwidth=h,
eff.obs=r1$eff.obs,
leverage=max(c(0, wt^2/d$w[r1$est_w!=0]^2))/
sum(wt^2/d$w[r1$est_w!=0]),
cv=NA, alpha=alpha, method=method, M=M[1], M.rf=M[2],
M.fs=M[3], first.stage=r1$fs, kernel=kernel_type(kern),
p.value=NA)
structure(list(coefficients=fill_coefs(co), data=d,
lm=r1$lm), class="RDResults")
}
## Same for RDTOpt and RD
fill_coefs <- function(co) {
B <- co$maximum.bias/co$std.error
cv <- CVb(B, co$alpha)
co[, c("conf.low", "conf.high", "conf.low.onesided",
"conf.high.onesided", "cv", "p.value")] <-
c(co$estimate-cv*co$std.error, co$estimate+cv*co$std.error,
co$estimate - (B + stats::qnorm(1-co$alpha))*co$std.error,
co$estimate + (B + stats::qnorm(1-co$alpha))*co$std.error,
cv, stats::pnorm(B-abs(co$estimate/co$std.error))+
stats::pnorm(-B-abs(co$estimate/co$std.error)))
co
}
## Optimal bandwidth selection in nonparametric regression
OptBW <- function(d, M, kern="triangular", opt.criterion, alpha=0.05, beta=0.8,
sclass="H", T0=0) {
## First check if sigma2 is supplied
if (is.null(d$sigma2))
d <- PrelimVar(d, se.initial="EHW")
## Objective function for optimizing bandwidth
obj <- function(h) {
r <- NPRHonest(d, M, kern, h, alpha=alpha, se.method="supplied.var",
sclass=sclass, T0=T0, T0bias=TRUE)$coefficients
switch(opt.criterion,
OCI=2*r$maximum.bias+
r$std.error * (stats::qnorm(1-alpha)+stats::qnorm(beta)),
MSE=r$maximum.bias^2+r$std.error^2,
FLCI=r$conf.high-r$conf.low)
}
if (inherits(d, "IP")) {
hmin <- sort(unique(abs(d$X)))[2]
} else {
hmin <- max(unique(d$X[d$p])[2], sort(unique(abs(d$X[d$m])))[2])
}
## Optimize piecewise constant function using modification of golden
## search. In fact, the criterion may not be unimodal, so proceed with
## caution. (For triangular kernel, it appears unimodal)
if (kernel_type(kern)=="uniform") {
supp <- sort(unique(abs(d$X)))
h <- gss(obj, supp[supp>=hmin])
} else {
h <- abs(stats::optimize(obj, interval=c(hmin, max(abs(d$X))),
tol=.Machine$double.eps^0.75)$minimum)
}
h
}
#' @export
print.RDResults <- function(x, digits = getOption("digits"), ...) {
if (!is.null(x$call)) {
cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"),
"\n\n", sep = "")
}
fmt <- function(x) format(x, digits=digits, width=digits+1)
y <- x$coefficients
cat("Inference for ", y$term, " (using ", y$method,
" class), confidence level ", 100 * (1-y$alpha), "%:\n", sep="")
nm <- c("Estimate", "Std. Error", "Maximum Bias")
colnames(y)[2:4] <- nm
y$"Confidence Interval" <- paste0("(", fmt(y$conf.low), ", ",
fmt(y$conf.high), ")")
y$OCI <- paste0("(-Inf, ", fmt(y$conf.high.onesided), "), (",
fmt(y$conf.low.onesided), ", Inf)")
print.data.frame(y[, c(nm, "Confidence Interval"), ],
digits=digits)
cat("\nOnesided CIs: ", y$OCI)
cat("\nNumber of effective observations:", fmt(y$eff.obs))
par <- paste0(tolower(substr(y$term, 1, 1)), substring(y$term, 2))
cat("\nMaximal leverage for ", par, ": ", fmt(y$leverage),
sep="")
if (!is.null(y$first.stage) && !is.na(y$first.stage))
cat("\nFirst stage estimate:", fmt(y$first.stage),
"\nFirst stage smoothness constant M:", fmt(y$M.fs),
"\nReduced form smoothness constant M:", fmt(y$M.rf))
else if (!is.null(y[["M"]])) {
cat("\nSmoothness constant M:", fmt(y$M))
}
cat("\nP-value:", fmt(y$p.value), "\n")
if (!is.null(y$bandwidth)) {
cat("\nBased on local regression with bandwidth: ", fmt(y$bandwidth),
", kernel: ", y$kernel, "\nRegression coefficients:\n", sep="")
print.default(format(stats::coef(x$lm), digits = max(3L, digits - 3L)),
print.gap = 2L, quote = FALSE)
} else {
cat("\nSmoothing parameters below and above cutoff: ",
fmt(y$bandwidth.m), ", ", fmt(y$bandwidth.p), sep="")
}
if (inherits(x$na.action, "omit"))
cat(length(x$na.action), "observations with missing values dropped\n")
invisible(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.