Nothing
#' MSE bandwidth selection for geometrical RD design
#'
#' @title Local Polynomial RD Estimation on Distance-Based Running Variables
#'
#' @description
#' \code{rd2d.dist} implements distance-based local polynomial boundary regression discontinuity (RD) point estimators with robust bias-corrected pointwise confidence intervals and
#' uniform confidence bands, developed in Cattaneo, Titiunik and Yu (2025a) with a companion software article Cattaneo, Titiunik and Yu (2025b). For robust bias-correction, see Calonico, Cattaneo, Titiunik (2014).
#'
#' Companion commands are: \code{rdbw2d.dist} for data-driven bandwidth selection.
#'
#' For other packages of RD designs, visit
#' <https://rdpackages.github.io/>
#' @param Y Dependent variable; a numeric vector of length \eqn{N}, where \eqn{N} is the sample size.
#' @param D Distance-based scores \eqn{\mathbf{D}_i=(\mathbf{D}_{i}(\mathbf{b}_1),\cdots,\mathbf{D}_{i}(\mathbf{b}_J))}; dimension is \eqn{N \times J} where \eqn{N} = sample size and \eqn{J} = number of cutoffs; non-negative values means data point in treatment group and negative values means data point in control group.
#' @param h Bandwidth(s); if \eqn{c=h} then same bandwidth is used for both groups; if a matrix of size \eqn{J \times 2} is provided, each row contains \eqn{(h_{\text{control}}, h_{\text{tr}})} for the evaluation point; if not specified, bandwidths are selected via \code{rdbw2d.dist()}.
#' @param b Optional evaluation points; a matrix or data frame specifying boundary points \eqn{\mathbf{b}_j = (b_{1j}, b_{2j})}, dimension \eqn{J \times 2}.
#' @param p Polynomial order for point estimation. Default is \code{p = 1}.
#' @param q Polynomial order for bias-corrected estimation. Must satisfy \eqn{q \geq p}. Default is \code{q = p + 1}.
#' @param kink Logical; whether to apply kink adjustment. Options: \code{"on"} or \code{"off"} (default).
#' @param kernel Kernel function to use. Options are \code{"unif"}, \code{"uniform"} (uniform), \code{"triag"}, \code{"triangular"} (triangular, default), and \code{"epan"}, \code{"epanechnikov"} (Epanechnikov).
#' @param level Nominal confidence level for intervals/bands, between 0 and 100 (default is 95).
#' @param cbands Logical. If \code{TRUE}, also compute uniform confidence bands (default is \code{FALSE}).
#' @param side Type of confidence interval. Options: \code{"two"} (two-sided, default), \code{"left"} (left tail), or \code{"right"} (right tail).
#' @param repp Number of bootstrap repetitions used for critical value simulation. Default is \code{1000}.
#' @param bwselect Bandwidth selection strategy. Options:
#' \itemize{
#' \item \code{"mserd"}. One common MSE-optimal bandwidth selector for the boundary RD treatment effect estimator for each evaluation point (default).
#' \item \code{"imserd"}. IMSE-optimal bandwidth selector for the boundary RD treatment effect estimator based on all evaluation points.
#' \item \code{"msetwo"}. Two different MSE-optimal bandwidth selectors (control and treatment) for the boundary RD treatment effect estimator for each evaluation point.
#' \item \code{"imsetwo"}. Two IMSE-optimal bandwidth selectors (control and treatment) for the boundary RD treatment effect estimator based on all evaluation points.
#' \item \code{"user provided"}. User-provided bandwidths. If \code{h} is not \code{NULL}, then \code{bwselect} is overwritten to \code{"user provided"}.
#' }
#' @param vce Variance-covariance estimator for standard errors.
#' Options:
#' \describe{
#' \item{\code{"hc0"}}{Heteroskedasticity-robust variance estimator without small sample adjustment (White robust).}
#' \item{\code{"hc1"}}{Heteroskedasticity-robust variance estimator with degrees-of-freedom correction (default).}
#' \item{\code{"hc2"}}{Heteroskedasticity-robust variance estimator using leverage adjustments.}
#' \item{\code{"hc3"}}{More conservative heteroskedasticity-robust variance estimator (similar to jackknife correction).}
#' }
#' @param rbc Logical. Whether to apply robust bias correction. Options: \code{"on"} (default) or \code{"off"}. When \code{kink = off}, turn on \code{rbc} means setting \code{q} to \code{p + 1}.
#' When \code{kink = on}, turn on \code{rbc} means shrinking the bandwidth selector to be proportional to \eqn{N^{-1/3}}.
#' @param bwcheck If a positive integer is provided, the preliminary bandwidth used in the calculations is enlarged so that at least \code{bwcheck} observations are used. The program stops with “not enough observations” if sample size \eqn{N} < \code{bwcheck}. Default is \code{50 + p + 1}.
#' @param masspoints Strategy for handling mass points in the running variable.
#' Options:
#' \describe{
#' \item{\code{"check"}}{Check for repeated values and adjust inference if needed (default).}
#' \item{\code{"adjust"}}{Adjust bandwidths to guarantee a sufficient number of unique support points.}
#' \item{\code{"off"}}{Ignore mass points completely.}
#' }
#' @param C Cluster ID variable used for cluster-robust variance estimation. Default is \code{C = NULL}.
#' @param scaleregul Scaling factor for the regularization term in bandwidth selection. Default is \code{1}.
#' @param cqt Constant controlling subsample fraction for initial bias estimation. Default is \code{0.5}.
#'
#' @return An object of class \code{"rd2d.dist"}, a list containing:
#' \describe{
#' \item{\code{results}}{A data frame with point estimates, variances, p-values, confidence intervals, confidence bands, and bandwidths at each evaluation point.
#' \describe{
#' \item{\code{b1}}{First coordinate of the evaluation point.}
#' \item{\code{b2}}{Second coordinate of the evaluation point.}
#' \item{\code{Est.p}}{Point estimate of \eqn{\widehat{\tau}_{\text{dist},p}(\mathbf{b})} with polynomial order \eqn{p}.}
#' \item{\code{Se.p}}{Standard error of \eqn{\widehat{\tau}_{\text{dist},p}(\mathbf{b})}.}
#' \item{\code{Est.q}}{Bias-corrected estimate \eqn{\widehat{\tau}_{\text{dist},q}(\mathbf{b})} with polynomial order \eqn{q}.}
#' \item{\code{Se.q}}{Standard error of \eqn{\widehat{\tau}_{\text{dist},q}(\mathbf{b})}.}
#' \item{\code{pvalue}}{Two-sided p-value based on \eqn{T_{\text{dist},q}(\mathbf{b})}.}
#' \item{\code{CI.lower}}{Lower bound of confidence interval.}
#' \item{\code{CI.upper}}{Upper bound of confidence interval.}
#' \item{\code{CB.lower}}{Lower bound of uniform confidence band (if \code{cbands=TRUE}).}
#' \item{\code{CB.upper}}{Upper bound of uniform confidence band (if \code{cbands=TRUE}).}
#' \item{\code{h0}}{Bandwidth used for control group (\eqn{D_i(\mathbf{b}) < 0}).}
#' \item{\code{h1}}{Bandwidth used for treatment group (\eqn{D_i(\mathbf{b}) \geq 0}).}
#' \item{\code{Nh0}}{Effective sample size for control group.}
#' \item{\code{Nh1}}{Effective sample size for treatment group.}
#' }
#' }
#' \item{\code{results.A0}}{Same structure as \code{results} but for control group outcomes.}
#' \item{\code{results.A1}}{Same structure as \code{results} but for treatment group outcomes.}
#' \item{\code{tau.hat}}{Vector of point estimates \eqn{\widehat{\tau}_p(\mathbf{b})}.}
#' \item{\code{se.hat}}{Standard errors corresponding to \eqn{\widehat{\tau}_p(\mathbf{b})}.}
#' \item{\code{cb}}{Confidence intervals and uniform bands.}
#' \item{\code{cov.q}}{Covariance matrix for bias-corrected estimates \eqn{\widehat{\tau}_{\text{dist},q}(\mathbf{b})} for all point evaluations \eqn{\mathbf{b}}.}
#' \item{\code{opt}}{List of options used in the function call.}
#' }
#'
#' @seealso \code{\link{rdbw2d.dist}}, \code{\link{rd2d}}, \code{\link{print.rd2d.dist}}, \code{\link{summary.rd2d.dist}}
#'
#' @author
#' Matias D. Cattaneo, Princeton University. \email{cattaneo@princeton.edu} \cr
#' Rocío Titiunik, Princeton University. \email{titiunik@princeton.edu} \cr
#' Ruiqi Rae Yu, Princeton University. \email{rae.yu@princeton.edu}
#'
#' @references
#' \itemize{
#' \item{\href{https://rdpackages.github.io/references/Cattaneo-Titiunik-Yu_2025_BoundaryRD.pdf}{Cattaneo, M. D., Titiunik, R., Yu, R. R. (2025a).}
#' Estimation and Inference in Boundary Discontinuity Designs}
#' \item{\href{https://rdpackages.github.io/references/Cattaneo-Titiunik-Yu_2025_rd2d.pdf}{Cattaneo, M. D., Titiunik, R., Yu, R. R. (2025b).}
#' rd2d: Causal Inference in Boundary Discontinuity Designs}
#' \item{\href{https://rdpackages.github.io/references/Calonico-Cattaneo-Titiunik_2014_ECMA.pdf}{Calonico, S., Cattaneo, M. D., Titiunik, R. (2014)}
#' Robust Nonparametric Confidence Intervals for Regression-Discontinuity Designs}
#' }
#'
#' @examples
#' set.seed(123)
#' n <- 5000
#'
#' # Generate running variables x1 and x2
#' x1 <- rnorm(n)
#' x2 <- rnorm(n)
#'
#' # Define treatment assignment: treated if x1 >= 0
#' d <- as.numeric(x1 >= 0)
#'
#' # Generate outcome variable y with some treatment effect
#' y <- 3 + 2 * x1 + 1.5 * x2 + 1.5 * d + rnorm(n, sd = 0.5)
#'
#' # Define evaluation points (e.g., at the origin and another point)
#' eval <- data.frame(x.1 = c(0, 0), x.2 = c(0, 1))
#'
#' # Compute Euclidean distances to evaluation points
#' dist.a <- sqrt((x1 - eval$x.1[1])^2 + (x2 - eval$x.2[1])^2)
#' dist.b <- sqrt((x1 - eval$x.1[2])^2 + (x2 - eval$x.2[2])^2)
#'
#' # Combine distances into a matrix
#' D <- as.data.frame(cbind(dist.a, dist.b))
#'
#' # Assign positive distances for treatment group, negative for control
#' d_expanded <- matrix(rep(2 * d - 1, times = ncol(D)), nrow = nrow(D), ncol = ncol(D))
#' D <- D * d_expanded
#'
#' # Run the rd2d.dist function
#' result <- rd2d.dist(y, D, b = eval)
#'
#' # View the estimation results
#' print(result)
#' summary(result)
#' @export
rd2d.dist <- function(Y, D, h = NULL, b = NULL, p = 1, q = 2, kink = c("off", "on"),
kernel = c("tri","triangular", "epa","epanechnikov","uni","uniform","gau","gaussian"),
level = 95, cbands = TRUE, side = c("two", "left", "right"), repp = 1000,
bwselect = c("mserd", "imserd", "msetwo", "imsetwo", "user provided"),
vce = c("hc1","hc0","hc2","hc3"), rbc = c("on", "off"),
bwcheck = 50 + p + 1, masspoints = c("check","adjust","off"),
C = NULL, scaleregul = 1, cqt = 0.5){
# Input error handling
bwselect <- match.arg(bwselect)
kernel <- match.arg(kernel)
vce <- match.arg(vce)
masspoints <- match.arg(masspoints)
side <- match.arg(side)
kink <- match.arg(kink)
rbc <- match.arg(rbc)
# Check Errors
exit=0
if (length(Y) != nrow(D)) {
print("Y and rows of D must have the same length")
exit <- 1
}
if (kernel!="gau" & kernel!="gaussian" & kernel!="uni" & kernel!="uniform" & kernel!="tri" & kernel!="triangular" & kernel!="epa" & kernel!="epanechnikov" & kernel!="" ){
print("kernel incorrectly specified")
exit <- 1
}
if (!is.null(b)){
if (nrow(b) != ncol(D) || ncol(b) != 2){
print("b must have 2 columns and the same number of rows as D's number of columns")
exit <- 1
}
}
# level must be numeric in (0, 100)
if (!is.numeric(level) || level <= 0 || level >= 100) {
print("level must be a numeric value between 0 and 100")
exit <- 1
}
# repp must be a positive integer
if (!is.numeric(repp) || repp < 1 || repp != as.integer(repp)) {
print("repp must be a positive integer")
exit <- 1
}
# h must be either a positive scalar or a matrix/data.frame with same rows as b and 4 columns
if (!is.null(h)) {
if (length(h) == 1) {
if (!is.numeric(h) || h <= 0) {
print("If h is a scalar, it must be a positive numeric value")
exit <- 1
}
} else if (!(is.matrix(h) || is.data.frame(h)) ||
nrow(h) != ncol(D) || ncol(h) != 2) {
print("If h is not a scalar, it must be a matrix or data frame with the same number of rows as b and 2 columns")
exit <- 1
}
}
if (is.null(h) & bwselect == "user provided"){
exit <- 1
print("Please provide bandwidths.")
}
if (!is.null(C) && !(vce %in% c("hc0", "hc1"))) {
warning("When C is specified, vce must be 'hc0' or 'hc1'. Resetting vce to 'hc1'.")
vce <- "hc1"
}
if (exit>0) stop()
############################ Data Preparation ################################
neval <- ncol(D)
if (!is.null(b)){
eval <- as.data.frame(b)
} else {
eval <- matrix(NA, nrow = neval, ncol = 2)
eval <- as.data.frame(eval)
}
colnames(eval) <- c("x.1", "x.2")
# D <- as.data.frame(D)
na.ok <- complete.cases(D)
D <- D[na.ok,,drop = FALSE]
Y <- Y[na.ok]
C <- C[na.ok]
d <- (D[,1] >= 0)
N <- length(Y)
N.1 <- sum(d)
N.0 <- N - N.1
if (is.null(p)) p <- 1
kernel <- tolower(kernel)
kernel.type <- "Epanechnikov"
if (kernel=="triangular" | kernel=="tri") kernel.type <- "Triangular"
if (kernel=="uniform" | kernel=="uni") kernel.type <- "Uniform"
if (kernel=="gaussian" | kernel=="gau") kernel.type <- "Gaussian"
min_sample_size <- bwcheck
if (is.null(bwcheck)) min_sample_size <- 50 + p + 1
if (N < min_sample_size){
warning("Not enough observations to perform RDD calculations. ")
stop()
}
################################ Bandwidth ###################################
if (is.null(h)){
bws <- rdbw2d.dist(Y = Y, D = D, b = b, p = p, kink = kink, kernel = kernel, bwselect = bwselect,
vce = vce, bwcheck = bwcheck, masspoints = masspoints,
C = C, scaleregul = scaleregul, cqt = cqt)
bws <- bws$bws
hgrid <- bws[,3]
hgrid.1 <- bws[,4]
} else {
bwselect <- "user provided"
# standardize bandwidth
if (length(h) == 1){
hgrid <- rep(h, neval)
hgrid.1 <- rep(h, neval)
} else {
hgrid <- h[,1]
hgrid.1 <- h[,2]
}
}
hfull <- cbind(hgrid, hgrid.1)
###################### Point estimation and inference ########################
# kink adjustment
# if (kink =="on"){
# if (bwselect == "mserd" | bwselect == "imserd"){
# hfull <- hfull* N^(-1/4) / N^(-1/(2 * p + 4))
# }
# if (bwselect == "msetwo" | bwselect == "imsetwo"){
# hfull[,1] <- hfull[,1] * N.0^(-1/4) / N.0^(-1/(2 * p + 4))
# hfull[,2] <- hfull[,2] * N.1^(-1/4) / N.1^(-1/(2 * p + 4))
# }
# # if user provides bandwidth, then do not adjust
# }
distfit.p <- rd2d_dist_fit(Y = Y, D = D, h = hfull, p = p, b = b, kernel = kernel,
vce = vce, bwcheck = bwcheck, masspoints = masspoints, C = C, cbands = FALSE)
estimate.p <- distfit.p$Estimate
tau.hat.p <- estimate.p$mu1 - estimate.p$mu0
se.hat.p <- sqrt(estimate.p$se0^2 + estimate.p$se1^2)
h0.p <- estimate.p$h0
h1.p <- estimate.p$h1
eN0.p <- estimate.p$N0
eN1.p <- estimate.p$N1
M.vec <- distfit.p$M.vec
M.0.vec <- distfit.p$M.0.vec
M.1.vec <- distfit.p$M.1.vec
# robust bias correction
hfull.rbc <- hfull
if (kink == "on"){
if (q > p){
q <- p
print("q is taken to be p with kink on.")
}
if (rbc == "on"){
if (bwselect == "mserd" | bwselect == "imserd"){
hfull.rbc <- hfull* M.vec^(-1/3) / M.vec^(-1/4)
}
if (bwselect == "msetwo" | bwselect == "imsetwo"){
hfull.rbc[,1] <- hfull[,1] * M.0.vec^(-1/3) / M.0.vec^(-1/4)
hfull.rbc[,2] <- hfull[,2] * M.1.vec^(-1/3) / M.1.vec^(-1/4)
}
}
} else{
if (q == p){
if (rbc == "on"){
q <- p + 1
print("Set q = p + 1 for robust bias correction.")
}
}
}
distfit.q <- rd2d_dist_fit(Y = Y, D = D, h = hfull.rbc, p = q, b = b, kernel = kernel,
vce = vce, bwcheck = bwcheck, masspoints = masspoints, C = C, cbands = cbands)
estimate.q <- distfit.q$Estimate
tau.hat.q <- estimate.q$mu1 - estimate.q$mu0
se.hat.q <- sqrt(estimate.q$se0^2 + estimate.q$se1^2)
h0.q <- estimate.q$h0
h1.q <- estimate.q$h1
eN0.q <- estimate.q$N0
eN1.q <- estimate.q$N1
zvalues <- tau.hat.q/se.hat.q
pvalues <- 2 * pnorm(abs(zvalues),lower.tail = FALSE)
if (side == "two"){
zval <- qnorm((level + 100)/ 200)
CI.lower <- tau.hat.q - zval * se.hat.q
CI.upper <- tau.hat.q + zval * se.hat.q
}
if (side == "left"){
zval <- qnorm(level / 100)
CI.upper <- tau.hat.q + zval * se.hat.q
CI.lower <- rep(-Inf, length(CI.upper))
}
if (side == "right"){
zval <- qnorm(level / 100)
CI.lower <- tau.hat.q - zval * se.hat.q
CI.upper <- rep(Inf, length(CI.lower))
}
# to store matrices
Indicators.0 <- distfit.q$Indicators.0
Indicators.1 <- distfit.q$Indicators.1
inv.designs.0 <- distfit.q$inv.designs.0
inv.designs.1 <- distfit.q$inv.designs.1
sig.halfs.0 <- distfit.q$sig.halfs.0
sig.halfs.1 <- distfit.q$sig.halfs.1
resd.0 <- distfit.q$resd.0
resd.1 <- distfit.q$resd.1
############################## covariance ####################################
cov.us = matrix(NA,neval,neval)
clusters <- unique(C)
g <- length(clusters)
k <- q + 1
if (cbands){
for (i in 1:neval) {
for (j in i:neval) {
cov.0 = NA
cov.1 = NA
ind.i.0 <- Indicators.0[[i]]
ind.j.0 <- Indicators.0[[j]]
ind.0 <- ind.i.0 * ind.j.0
ind.i.1 <- Indicators.1[[i]]
ind.j.1 <- Indicators.1[[j]]
ind.1 <- ind.i.1 * ind.j.1
slice.i.0 <- as.logical(ind.0[ind.i.0])
slice.j.0 <- as.logical(ind.0[ind.j.0])
slice.i.1 <- as.logical(ind.1[ind.i.1])
slice.j.1 <- as.logical(ind.1[ind.j.1])
sigma0.half.i <- sig.halfs.0[[i]]
sigma0.half.j <- sig.halfs.0[[j]]
sigma1.half.i <- sig.halfs.1[[i]]
sigma1.half.j <- sig.halfs.1[[j]]
resd.i.0 <- unname(resd.0[[i]][slice.i.0])
resd.j.0 <- unname(resd.0[[j]][slice.j.0])
resd.i.1 <- unname(resd.1[[i]][slice.i.1])
resd.j.1 <- unname(resd.1[[j]][slice.j.1])
vec <- rep(0,q+1)
vec[1] <- 1
if (is.null(C)){
cov.0 <- matrix(vec, nrow = 1) %*% t(sigma0.half.i[slice.i.0,,drop = FALSE]) %*% sigma0.half.j[slice.j.0,,drop = FALSE] %*% matrix(vec, ncol=1)
cov.0 <- cov.0[1,1]
cov.1 <- matrix(vec, nrow = 1) %*% t(sigma1.half.i[slice.i.1,,drop = FALSE]) %*% sigma1.half.j[slice.j.1,,drop = FALSE] %*% matrix(vec, ncol=1)
cov.1 <- cov.1[1,1]
} else {
M.0 <- matrix(0, k, k)
M.1 <- matrix(0, k, k)
for (l in 1:g){
M.0 <- M.0 + matrix(sigma0.half.i[l,], ncol = 1) %*% matrix(sigma0.half.j[l,], nrow = 1)
M.1 <- M.1 + matrix(sigma1.half.i[l,], ncol = 1) %*% matrix(sigma1.half.j[l,], nrow = 1)
}
cov.0 <- matrix(vec, nrow = 1) %*% M.0 %*% matrix(vec, ncol=1)
cov.1 <- matrix(vec, nrow = 1) %*% M.1 %*% matrix(vec, ncol=1)
}
cov.us[i,j] <- cov.0 + cov.1
cov.us[j,i] <- cov.0 + cov.1
}
}
}
cov.hat.q <- NA
cb.hat.q <- list(CI.l = CI.lower, CI.r = CI.upper, CB.l = rep(NA, length(CI.lower)), CB.r = rep(NA, length(CI.lower)))
CB.lower <- NA
CB.upper <- NA
cval <- NA
if (cbands){
cov.hat.q <- cov.us
cb.hat.q <- rd2d_cb(tau.hat.q, cov.hat.q, repp, side, level)
CB.lower <- cb.hat.q$CB.l
CB.upper <- cb.hat.q$CB.r
cval <- cb.hat.q$cval
}
clustered <- !is.null(C)
############################### outputs ######################################
main <- cbind(eval[,1], eval[,2], tau.hat.p, se.hat.p, tau.hat.q, se.hat.q, zvalues, pvalues,
CI.lower, CI.upper, CB.lower, CB.upper, hfull[,1], hfull[,2],
hfull.rbc[,1], hfull.rbc[,2], eN0.p, eN1.p)
main <- as.data.frame(main)
colnames(main) <- c("b1","b2","Est.p","Se.p","Est.q","Se.q", "z", "P>|z|",
"CI.lower","CI.upper","CB.lower", "CB.upper", "h0", "h1",
"h0.rbc", "h1.rbc", "Nh0", "Nh1")
main.A0 <- cbind(eval[,1], eval[,2], estimate.p$mu0, estimate.p$se0, estimate.q$mu0, estimate.q$se0, hfull[,1], hfull.rbc[,1], eN0.p)
main.A0 <- as.data.frame(main.A0)
colnames(main.A0) <- c("b1","b2","Est.p","Se.p","Est.q","Se.q","h0", "h0.rbc","Nh0")
main.A1 <- cbind(eval[,1], eval[,2], estimate.p$mu1, estimate.p$se1, estimate.q$mu1, estimate.q$se1, hfull[,2], hfull.rbc[,2], eN1.p)
main.A1 <- as.data.frame(main.A1)
colnames(main.A1) <- c("b1","b2","Est.p","Se.p","Est.q","Se.q","h1", "h1.rbc","Nh1")
rdmodel <- "rd2d.dist"
out <- list(results = main, results.A0 = main.A0, results.A1 = main.A1,
opt=list(b = eval, p = p, q = q, kernel=kernel.type, kink = kink, N=N, N.0 = N.0, rbc = rbc,
N.1 = N.1, M = M.vec, M.0 = M.0.vec, M.1 = M.1.vec, neval=neval, bwselect = bwselect,
vce = vce, bwcheck = bwcheck, masspoints = masspoints, C = C, clustered = clustered,
scaleregul = scaleregul, cqt = cqt,
level = level, repp = repp, side = side,cbands = cbands,cval = cval,
h0 = hfull[,1], h1 = hfull[,2], h0.rbc = hfull.rbc[,1], h1.rbc = hfull.rbc[,2],
Nh0 = eN0.p, Nh1 = eN1.p),
cov.q=cov.us, rdmodel = rdmodel)
out$call <- match.call()
class(out) <- "rd2d.dist"
return(out)
}
################################################################################
#' Print Method for 2D Local Polynomial RD Estimation (Distance-Based)
#'
#' @description
#' Prints the results of a 2D local polynomial regression discontinuity (RD) estimation using distance-based evaluation, as obtained from \code{\link{rd2d.dist}}.
#'
#' @param x An object of class \code{rd2d.dist}, returned by \code{\link{rd2d.dist}}.
#' @param ... Additional arguments passed to the method (currently ignored).
#'
#' @return
#' No return value. This function is called for its side effects: it prints the \code{\link{rd2d.dist}} results.
#'
#' @author
#' Matias D. Cattaneo, Princeton University. \email{cattaneo@princeton.edu} \cr
#' Rocío Titiunik, Princeton University. \email{titiunik@princeton.edu} \cr
#' Ruiqi Rae Yu, Princeton University. \email{rae.yu@princeton.edu}
#'
#' @seealso
#' \code{\link{rd2d.dist}} for estimation using distance-based methods in 2D local polynomial RD designs.
#'
#' Supported methods: \code{\link{print.rd2d.dist}}, \code{\link{summary.rd2d.dist}}.
#'
#' @export
#'
print.rd2d.dist <- function(x,...) {
cat(paste(x$rdmodel, "\n", sep = ""))
cat(paste("\n", sep = ""))
cat(sprintf("Number of Obs. %d\n", x$opt$N))
cat(sprintf("BW type %s\n", paste(x$opt$bwselect, "rot", sep = "-")))
cat(sprintf("Kernel %s\n", paste(tolower(x$opt$kernel), "rad", sep = "-")))
cat(sprintf("VCE method %s\n", paste(x$opt$vce, ifelse(x$opt$clustered, "-clustered", ""),sep = "")))
cat(sprintf("Masspoints %s\n", x$opt$masspoints))
cat("\n")
cat(sprintf("Number of Obs. %-10d %-10d\n", x$opt$N.0, x$opt$N.1))
cat(sprintf("Estimand (deriv) %-10d %-10d\n", 0, 0))
cat(sprintf("Order est. (p) %-10d %-10d\n", x$opt$p, x$opt$p))
cat(sprintf("Order rbc. (q) %-10d %-10d\n", x$opt$q, x$opt$q))
cat("\n")
}
################################################################################
#' Summary Method for 2D Local Polynomial RD Estimation (Distance-Based)
#'
#' @description
#' Summarizes estimation and bandwidth results from a 2D local polynomial regression discontinuity (RD) design using distance-based methods, as returned by \code{\link{rd2d.dist}}.
#'
#' @param object An object of class \code{rd2d.dist}, returned by \code{\link{rd2d.dist}}.
#' @param ... Optional arguments. Supported options include:
#' \itemize{
#' \item \code{CBuniform}: Logical. If \code{TRUE}, displays uniform confidence bands;
#' if \code{FALSE} (default), displays pointwise confidence intervals.
#' \item \code{subset}: Integer vector of indices of evaluation points to display.
#' Defaults to all evaluation points.
#' \item \code{output}: Character. Use \code{"main"} to display estimation results,
#' or \code{"bw"} to display bandwidth information. Default is \code{"main"}.
#' \item \code{sep_main}: Integer vector of length seven controlling the column widths of the output table when \code{output = "main"}.
#' Default is \code{c(4, 7, 7, 7, 7, 7, 17)}. When \code{b} is not provided, output table has five columns, the second and third entry
#' of \code{sep_main} are not used, and can be any place holder.
#' \item \code{sep_bw}: Integer vector of length seven controlling the column widths of the output table when \code{output = "bw"}.
#' Default is \code{c(4, rep(8,6))}. When \code{b} is not provided, output table has five columns, the second and third entry
#' of \code{sep_bw} are not used, and can be any place holder.
#' \item \code{WBATE}: Integer vector of weights for aggregated average treatment effect (WBATE). Should have non-negative entries
#' summing up to one. If provided, an extra row for WBATE is added to the output table.
#' \item \code{LTE}: Logical. If \code{TRUE}, an extra row for largest treatment effect (LTE) is added to the output table.
#' }
#'
#' @return No return value. This function is called for its side effects: it prints a formatted summary of \code{\link{rd2d.dist}} results.
#'
#' @author
#' Matias D. Cattaneo, Princeton University. \email{cattaneo@princeton.edu} \cr
#' Rocío Titiunik, Princeton University. \email{titiunik@princeton.edu} \cr
#' Ruiqi Rae Yu, Princeton University. \email{rae.yu@princeton.edu}
#'
#' @seealso \code{\link{rd2d.dist}} for estimation using distance-based 2D local polynomial RD design.
#'
#' Supported methods: \code{\link{print.rd2d.dist}}, \code{\link{summary.rd2d.dist}}.
#'
#' @export
summary.rd2d.dist <- function(object, ...) {
x <- object
args <- list(...)
if (is.null(args[['CBuniform']])) {
CBuniform <- FALSE
} else {
CBuniform <- TRUE
}
if (is.null(args[['subset']])) {
subset <- NULL
} else {
subset <- args[['subset']]
}
if (is.null(args[['output']])) {
output <- "main"
} else {
output <- args[['output']]
}
if (is.null(args[['sep_main']])) {
sep_main <- c(5, 7, 7, 7, 7, 7, 17)
} else {
sep_main <- args[['sep_main']]
}
if (is.null(args[['sep_bw']])) {
sep_bw <- c(4, rep(8,6))
} else {
sep_bw <- args[['sep_bw']]
}
if (is.null(args[['WBATE']])){
WBATE <- NULL
} else {
WBATE <- args[['WBATE']]
WBATE <- WBATE / sum(WBATE)
}
if (is.null(args[['LTE']])){
LTE <- FALSE
} else {
LTE <- args[['LTE']]
}
# compatibility between 'LTE' and 'cbands'
if (LTE & !x$opt$cbands){
warning("Rerun rd2d with cbands = TRUE.")
stop()
}
cat(paste(x$rdmodel, "\n", sep = ""))
cat(paste("\n", sep = ""))
cat(sprintf("Number of Obs. %d\n", x$opt$N))
cat(sprintf("BW type %s\n", paste(x$opt$bwselect, "rot", sep = "-")))
cat(sprintf("Kernel %s\n", paste(tolower(x$opt$kernel), "rad", sep = "-")))
cat(sprintf("VCE method %s\n", paste(x$opt$vce, ifelse(x$opt$clustered, "-clustered", ""),sep = "")))
cat(sprintf("Masspoints %s\n", x$opt$masspoints))
cat("\n")
cat(sprintf("Number of Obs. %-10d %-10d\n", x$opt$N.0, x$opt$N.1))
cat(sprintf("Estimand (deriv) %-10d %-10d\n", 0, 0))
cat(sprintf("Order est. (p) %-10d %-10d\n", x$opt$p, x$opt$p))
cat(sprintf("Order rbc. (q) %-10d %-10d\n", x$opt$q, x$opt$q))
cat("\n")
results <- data.frame(
b1 = x$opt$b[, 1],
b2 = x$opt$b[, 2],
Coef = x$results$Est.p,
Zvalues = x$results$z,
Pvalues = x$results[["P>|z|"]],
CILower = if (CBuniform) x$results$CB.lower else x$results$CI.lower,
CIUpper = if (CBuniform) x$results$CB.upper else x$results$CI.upper,
h0 = x$opt$h0,
h1 = x$opt$h1,
Nh0 = x$opt$Nh0,
Nh1 = x$opt$Nh1
)
if (!is.null(WBATE)){
est_WBATE <- sum(WBATE * x$results$Est.p)
se_WBATE <- sqrt(WBATE %*% x$cov.q %*% WBATE)[1,1]
zvalue_WBATE <- sum(WBATE * x$results$Est.q)/se_WBATE
pvalue_WBATE <- 2 * pnorm(abs(zvalue_WBATE),lower.tail = FALSE)
if (x$opt$side == "two"){
zval <- qnorm((x$opt$level + 100)/ 200)
CI.lower_WBATE <- sum(WBATE * x$results$Est.q) - zval * se_WBATE
CI.upper_WBATE <- sum(WBATE * x$results$Est.q) + zval * se_WBATE
}
if (x$opt$side == "left"){
zval <- qnorm(x$opt$level / 100)
CI.upper_WBATE <- x$results$Est.q + zval * se_WBATE
CI.lower_WBATE <- rep(-Inf, length(CI.upper_WBATE))
}
if (x$opt$side == "right"){
zval <- qnorm(x$opt$level / 100)
CI.lower_WBATE <- x$results$Est.q - zval * se_WBATE
CI.upper_WBATE <- rep(Inf, length(CI.lower_WBATE))
}
}
if (LTE){
est_LTE.p <- max(x$results$Est.p)
CI.lower_LTE <- max(x$results$Est.q - x$opt$cval * x$results$Se.q)
CI.upper_LTE <- max(x$results$Est.q + x$opt$cval * x$results$Se.q)
}
eval.specified <- !all(is.na(x$opt$b)) # TRUE if at least one b is not NA
neval <- nrow(results)
if (is.null(subset)) {
subset <- seq_len(neval)
} else {
if (!all(subset %in% seq_len(neval))) {
warning("Invalid subset provided. Resetting to default: 1:neval")
subset <- seq_len(neval)
}
}
if (output == "main") {
if (eval.specified) {
headers <- c("ID", "b1", "b2", "Est.", "z", "P > |z|", sprintf("%d%% CI", x$opt$level))
col_widths <- sep_main
} else {
headers <- c("ID", "Est.", "z", "P > |z|", sprintf("%d%% CI", x$opt$level))
col_widths <- sep_main[c(1,4,5,6,7)]
}
if (CBuniform) {
headers[length(headers)] <- sprintf("%d%% Unif. CB", x$opt$level)
}
cat(strrep("=", sum(col_widths) + 2 * (length(headers) - 1)), "\n")
formatted_headers <- mapply(function(h, w) formatC(h, width = w, format = "s"), headers, col_widths)
cat(paste(formatted_headers, collapse = " "), "\n")
cat(strrep("=", sum(col_widths) + 2 * (length(headers) - 1)), "\n")
for (i in seq_len(neval)) {
if (i %in% subset) {
if (eval.specified) {
row_vals <- c(
formatC(i, width = col_widths[1], format = "d"),
formatC(results$b1[i], format = "f", digits = 3, width = col_widths[2]),
formatC(results$b2[i], format = "f", digits = 3, width = col_widths[3]),
formatC(results$Coef[i], format = "f", digits = 4, width = col_widths[4]),
formatC(results$Zvalues[i], format = "f", digits = 4, width = col_widths[5]),
formatC(results$Pvalues[i], format = "f", digits = 4, width = col_widths[6]),
formatC(
paste0("[", formatC(results$CILower[i], format = "f", digits = 4),
", ", formatC(results$CIUpper[i], format = "f", digits = 4), "]"),
width = col_widths[7], format = "s"
)
)
} else {
row_vals <- c(
formatC(i, width = col_widths[1], format = "d"),
formatC(results$Coef[i], format = "f", digits = 4, width = col_widths[2]),
formatC(results$Zvalues[i], format = "f", digits = 4, width = col_widths[3]),
formatC(results$Pvalues[i], format = "f", digits = 4, width = col_widths[4]),
formatC(
paste0("[", formatC(results$CILower[i], format = "f", digits = 4),
", ", formatC(results$CIUpper[i], format = "f", digits = 4), "]"),
width = col_widths[5], format = "s"
)
)
}
cat(paste(row_vals, collapse = " "), "\n")
}
}
if (!is.null(WBATE)){
cat(paste(rep("-", sum(col_widths) + 2 * (length(headers) - 1)), collapse = ""), "\n")
if (eval.specified) {
index_formatted <- formatC("WBATE", width = col_widths[1], format = "s")
b1_formatted <- formatC("", width = col_widths[2], format = "s")
b2_formatted <- formatC("", width = col_widths[3], format = "s")
coef_formatted <- formatC(est_WBATE, format = "f", digits = 4, width = col_widths[4])
zvalues_formatted <- formatC(zvalue_WBATE, format = "f", digits = 4, width = col_widths[5])
pvalues_formatted <- formatC(pvalue_WBATE, format = "f", digits = 4, width = col_widths[6])
ci_formatted <- formatC(paste0("[", formatC(CI.lower_WBATE, format = "f", digits = 4),
", ", formatC(CI.upper_WBATE, format = "f", digits = 4), "]"),
width = col_widths[7], format = "s")
row_vals <- c(index_formatted, b1_formatted, b2_formatted, coef_formatted, zvalues_formatted, pvalues_formatted, ci_formatted)
} else {
index_formatted <- formatC("WBATE", width = col_widths[1], format = "s")
coef_formatted <- formatC(est_WBATE, format = "f", digits = 4, width = col_widths[2])
zvalues_formatted <- formatC(zvalue_WBATE, format = "f", digits = 4, width = col_widths[3])
pvalues_formatted <- formatC(pvalue_WBATE, format = "f", digits = 4, width = col_widths[4])
ci_formatted <- formatC(paste0("[", formatC(CI.lower_WBATE, format = "f", digits = 4),
", ", formatC(CI.upper_WBATE, format = "f", digits = 4), "]"),
width = col_widths[5], format = "s")
row_vals <- c(index_formatted, coef_formatted, zvalues_formatted, pvalues_formatted, ci_formatted)
}
cat(paste(row_vals, collapse = " "), "\n")
}
if (LTE){
cat(paste(rep("-", sum(col_widths) + 2 * (length(headers) - 1)), collapse = ""), "\n")
if (eval.specified) {
index_formatted <- formatC("LTE", width = col_widths[1], format = "s")
b1_formatted <- formatC("", width = col_widths[2], format = "s")
b2_formatted <- formatC("", width = col_widths[3], format = "s")
coef_formatted <- formatC(est_LTE.p, format = "f", digits = 4, width = col_widths[4])
zvalues_formatted <- formatC("", width = col_widths[5], format = "f")
pvalues_formatted <- formatC("", width = col_widths[6], format = "f")
ci_formatted <- formatC(paste0("[", formatC(CI.lower_LTE, format = "f", digits = 4),
", ", formatC(CI.upper_LTE, format = "f", digits = 4), "]"),
width = col_widths[7], format = "s")
row_vals <- c(index_formatted, b1_formatted, b2_formatted, coef_formatted, zvalues_formatted, pvalues_formatted, ci_formatted)
} else {
index_formatted <- formatC("LTE", width = col_widths[1], format = "s")
coef_formatted <- formatC(est_LTE.p, format = "f", digits = 4, width = col_widths[2])
zvalues_formatted <- formatC("", width = col_widths[3], format = "f")
pvalues_formatted <- formatC("", width = col_widths[4], format = "f")
ci_formatted <- formatC(paste0("[", formatC(CI.lower_LTE, format = "f", digits = 4),
", ", formatC(CI.upper_LTE, format = "f", digits = 4), "]"),
width = col_widths[5], format = "s")
row_vals <- c(index_formatted, coef_formatted, zvalues_formatted, pvalues_formatted, ci_formatted)
}
cat(paste(row_vals, collapse = " "), "\n")
}
cat(strrep("=", sum(col_widths) + 2 * (length(headers) - 1)), "\n")
} else if (output == "bw") {
if (eval.specified) {
headers <- c("ID", "b1", "b2", "h0", "h1", "Nh0", "Nh1")
col_widths <- sep_bw
} else {
headers <- c("ID", "h0", "h1", "Nh0", "Nh1")
col_widths <- sep_bw[c(1,4,5,6,7)]
}
cat(strrep("=", sum(col_widths)), "\n")
if (eval.specified) {
group_headers <- c(
formatC(str_pad("Bdy Points", width = 2 + col_widths[2] + col_widths[3], side = "both"), width = col_widths[1] + col_widths[2] + col_widths[3], format = "s"),
formatC(str_pad("Bandwidths", width = 2 + col_widths[5], side = "both"), width = col_widths[4] + col_widths[5], format = "s"),
formatC(str_pad("Eff. N", width = 3 + col_widths[7], side = "both"), width = col_widths[6] + col_widths[7], format = "s")
)
} else {
group_headers <- c(
formatC(" ", width = col_widths[1], format = "s", flag = "-"),
formatC(str_pad("Bandwidths", width = 2 + col_widths[3], side = "both"), width = col_widths[2] + col_widths[3], format = "s"),
formatC(str_pad("Eff. N", width = 3 + col_widths[5], side = "both"), width = col_widths[4] + col_widths[5], format = "s")
)
}
cat(paste(group_headers, collapse = ""), "\n")
formatted_headers <- mapply(function(h, w) formatC(h, width = w, format = "s"), headers, col_widths)
cat(paste(formatted_headers, collapse = ""), "\n")
cat(strrep("=", sum(col_widths)), "\n")
for (j in seq_len(neval)) {
if (j %in% subset) {
if (eval.specified) {
row_vals <- c(
formatC(j, width = col_widths[1], format = "d"),
formatC(results$b1[j], format = "f", digits = 3, width = col_widths[2]),
formatC(results$b2[j], format = "f", digits = 3, width = col_widths[3]),
formatC(results$h0[j], format = "f", digits = 3, width = col_widths[4]),
formatC(results$h1[j], format = "f", digits = 3, width = col_widths[5]),
formatC(results$Nh0[j], format = "d", width = col_widths[6]),
formatC(results$Nh1[j], format = "d", width = col_widths[7])
)
} else {
row_vals <- c(
formatC(j, width = col_widths[1], format = "d"),
formatC(results$h0[j], format = "f", digits = 3, width = col_widths[2]),
formatC(results$h1[j], format = "f", digits = 3, width = col_widths[3]),
formatC(results$Nh0[j], format = "d", width = col_widths[4]),
formatC(results$Nh1[j], format = "d", width = col_widths[5])
)
}
cat(paste(row_vals, collapse = ""), "\n")
}
}
cat(strrep("=", sum(col_widths)), "\n")
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.