#' Nearest Neighborhood Estimation of Conditional Standard Deviation
#'
#' Standard deviation is calculated by the formula (23) of Armstrong and Kolesár (2020)
#'
#' Currently, this is not being used, although this works for general dimensions;
#' debugging to be done.
#'
#' @inheritParams CI_adpt
#' @param N the number of nearest neighbors; default is \code{N = 3}.
#'
#' @return a list containing conditional standard deviation estimates for treated
#' observations (\code{sigma.t}) and control observations (\code{sigma.c})
#'
#' @references{
#'
#' \cite{Armstrong, Timothy B., and Michal Kolesár. 2020.
#' "Simple and honest confidence intervals in nonparametric regression."
#' Quantitative Economics 11 (1): 1–39.}
#'
#' }
#'
#' @export
#'
sigmaNN2 <- function(Xt, Xc, Yt, Yc, N = 3){ # N = 3 might not be suitable for d > 1 case
# Euclidean distance between each observations based on X's
# Returns a n_q by n_q matrix for q = {t,c}
dist.t <- as.matrix(stats::dist(Xt, diag = TRUE, upper = TRUE))
dist.c <- as.matrix(stats::dist(Xc, diag = TRUE, upper = TRUE))
# Prevent choosing itself
diag(dist.t) <- Inf
diag(dist.c) <- Inf
nt <- nrow(Xt)
nc <- nrow(Xc)
yt.nb <- matrix(0, nt, N)
yc.nb <- matrix(0, nc, N)
for(i in 1:nt){
order.i <- order(dist.t[i, ])
yt.nb[i, ] <- Yt[order.i[1:N]]
}
for(i in 1:nc){
order.i <- order(dist.c[i, ])
yc.nb[i, ] <- Yc[order.i[1:N]]
}
diff.t <- Yt - rowMeans(yt.nb)
diff.c <- Yc - rowMeans(yc.nb)
# Formula (23) of Armstrong and Kolesár (2020)
sigma.t <- sqrt((N / (N + 1)) * diff.t^2)
sigma.c <- sqrt((N / (N + 1)) * diff.c^2)
return(list(sigma.t = sigma.t, sigma.c = sigma.c))
}
#' Nearest Neighborhood Estimation of Conditional Standard Deviation
#'
#' Standard deviation is calculated by the formula (23) of Armstrong and Kolesár (2020)
#'
#' For now, this function works only for one-dimensional cases.
#'
#' @inheritParams CI_adpt
#' @param N the number of nearest neighbors; default is \code{N = 3}.
#' @param t.dir treatment direction; \code{t.dir = "left"} if \eqn{x < 0} is treated.
#' Otherwise, \code{t.dir = "right"}.
#'
#' @return a list containing conditional standard deviation estimates for treated
#' observations (\code{sigma.t}) and control observations (\code{sigma.c})
#'
#' @references{
#'
#' \cite{Armstrong, Timothy B., and Michal Kolesár. 2020.
#' "Simple and honest confidence intervals in nonparametric regression."
#' Quantitative Economics 11 (1): 1–39.}
#'
#' }
#'
#' @export
#'
#' @examples n <- 500
#' d <- 1
#' X <- matrix(rnorm(n * d), nrow = n, ncol = d)
#' tind <- X[, 1] < 0
#' Xt <- X[tind == 1, ,drop = FALSE]
#' Xc <- X[tind == 0, ,drop = FALSE]
#' sigma <- rnorm(n)^2 + 1
#' sigma_t <- sigma[tind == 1]
#' sigma_c <- sigma[tind == 0]
#' Yt = 1 + rnorm(length(sigma_t), mean = 0, sd = sigma_t)
#' Yc = rnorm(length(sigma_c), mean = 0, sd = sigma_c)
#' sigmaNN(Xt, Xc, Yt, Yc, t.dir = "left")
sigmaNN <- function(Xt, Xc, Yt, Yc, t.dir = c("left", "right"), N = 3){
t.dir <- match.arg(t.dir)
RDH.sigmaNN <- function (X, Y, J, weights = rep(1L, length(X))){
n <- length(X)
sigma2 <- matrix(nrow = n, ncol = NCOL(Y)^2)
for (k in seq_along(X)) {
s <- max(k - J, 1):k
d <- sort(abs(c(X[s[-length(s)]], X[k:min(k + J, n)][-1]) -
X[k]))[J]
ind <- (abs(X - X[k]) <= d)
ind[k] <- FALSE
Jk <- sum(weights[ind])
sigma2[k, ] <- Jk/(Jk + weights[k]) * if (NCOL(Y) >
1)
as.vector(outer(Y[k, ] - colSums(weights[ind] *
Y[ind, ])/Jk, Y[k, ] - colSums(weights[ind] *
Y[ind, ])/Jk))
else (Y[k] - sum(weights[ind] * Y[ind])/Jk)^2
}
drop(sigma2)
}
y <- c(Yt, Yc)
x <- c(as.vector(Xt), as.vector(Xc))
d <- RDHonest::RDData(data.frame(y = y, x = x), 0)
if(t.dir == "left"){
# Flip p and m since RDHonest assumes x > 0 is treated
sigma.t <- RDH.sigmaNN(d$Xm, d$Ym, N)
sigma.c <- RDH.sigmaNN(d$Xp, d$Yp, N)
}else{
sigma.t <- RDH.sigmaNN(d$Xp, d$Yp, N)
sigma.c <- RDH.sigmaNN(d$Xm, d$Ym, N)
}
return(list(sigma.t = sqrt(sigma.t), sigma.c = sqrt(sigma.c)))
}
#' Standard Deviation Calculation by Silverman's Rule
#'
#' Standard deviation is calculated using the first-stage bandwidth chosen by
#' Silverman's Rule, as in RDHonest::NPRPrelimVar.fit.
#'
#' This works only for cases with one-dimensional running variables.
#'
#' @inheritParams CI_adpt
#' @param t.dir treatment direction; \code{t.dir = "left"} if
#' \eqn{x < 0} is treated. Otherwise, \code{t.dir = "right"}.
#'
#' @return a list containing conditional standard deviation estimates for treated
#' observations (\code{sigma.t}) and control observations (\code{sigma.c})
#' @export
#'
#' @examples X <- matrix(rnorm(500 * 1), nrow = 500, ncol = 1)
#' tind <- X[, 1] < 0
#' Xt <- X[tind == 1, ,drop = FALSE]
#' Xc <- X[tind == 0, ,drop = FALSE]
#' sigma <- rep(1, 500)
#' sigma_t <- sigma[tind == 1]
#' sigma_c <- sigma[tind == 0]
#' Yt = 1 + rnorm(length(sigma_t), mean = 0, sd = sigma_t)
#' Yc = rnorm(length(sigma_c), mean = 0, sd = sigma_c)
#' sigmaSvm(Xt, Xc, Yt, Yc, "left")
sigmaSvm <- function(Xt, Xc, Yt, Yc, t.dir = c("left", "right")){
Xt <- as.vector(Xt)
Xc <- as.vector(Xc)
X <- c(Xt, Xc)
Y <- c(Yt, Yc)
# Minimum value of meaningful bandwidth
hmin <- max(sort(abs(unique(Xt)))[2], sort(abs(unique(Xc)))[2])
h1 <- max(1.84 * stats::sd(X)/sum(length(X))^(1/5), hmin)
# First-stage nonparametric regression
# order = 0, since we don't assume bounded 2nd derivative
drf <- data.frame(y = Y, x = X)
d <- RDHonest::RDData(drf, 0)
r1 <- RDHonest::NPRreg.fit(d = d, h = h1, order = 0, se.method = "EHW")
r1$sigma2p <- r1$sigma2p * length(r1$sigma2p)/(length(r1$sigma2p) - 1)
r1$sigma2m <- r1$sigma2m * length(r1$sigma2m)/(length(r1$sigma2m) - 1)
if(t.dir == "left"){
# Flip p and m since RDHonest assumes x > 0 is treated
sigma.c <- rep(mean(r1$sigma2p), length(d$Xp))
sigma.t <- rep(mean(r1$sigma2m), length(d$Xm))
}else{
sigma.t <- rep(mean(r1$sigma2p), length(d$Xp))
sigma.c <- rep(mean(r1$sigma2m), length(d$Xm))
}
return(list(sigma.t = sqrt(sigma.t), sigma.c = sqrt(sigma.c)))
}
#' Standard Deviation Calculation by Silverman's Rule (Tesing Version for Multi-dim)
#'
#' Standard deviation is calculated using Silverman's Rule,
#' as in RDHonest::NPRPrelimVar.fit
#'
#' This works only for one-dimensional cases.
#'
#' @inheritParams CI_adpt
#'
#' @return a list containing conditional standard deviation estimates for treated
#' observations (\code{sigma.t}) and control observations (\code{sigma.c})
#' @export
#'
#' @examples n <- 500
#' d <- 1
#' X <- matrix(rnorm(n * d), nrow = n, ncol = d)
#' tind <- X[, 1] < 0
#' Xt <- X[tind == 1, ,drop = FALSE]
#' Xc <- X[tind == 0, ,drop = FALSE]
#' sigma <- rep(1, n)
#' sigma_t <- sigma[tind == 1]
#' sigma_c <- sigma[tind == 0]
#' Yt = 1 + rnorm(length(sigma_t), mean = 0, sd = sigma_t)
#' Yc = rnorm(length(sigma_c), mean = 0, sd = sigma_c)
#' sigmaSvm.test(Xt, Xc, Yt, Yc)
sigmaSvm.test <- function(Xt, Xc, Yt, Yc){
# Convert running variable data into matrices (for dim = 1)
if(!is.matrix(Xt)) Xt <- matrix(Xt, ncol = 1)
if(!is.matrix(Xc)) Xc <- matrix(Xc, ncol = 1)
d <- ncol(Xt)
n <- nrow(Xt) + nrow(Xc)
# Minimum bandwidth configuration
Xmin.vec <- numeric(d)
for(i in 1:d){
Xt.i <- Xt[, i]
Xc.i <- Xc[, i]
Xmin.vec[i] <- max(sort(abs(unique(Xt.i)))[2], sort(abs(unique(Xc.i)))[2])
}
X <- rbind(Xt, Xc)
Y <- c(Yt, Yc)
# Calculating the initial bandwidth using multivariate kernel density rule of thumb
# Based on Table 6.3 and (6.43) of Scott (1992)
# When d = 1, equivalent to h1 defined in Section 4.2 of Imbens and Kalyanaraman (2012),
# except for the minimum bandwidth configuration
h.vec <- numeric(d)
for(i in 1:d){
X.i <- X[, i]
h.vec[i] <- max(1.74 * (4 / ((d + 2) * n))^(1/(d + 4)) * stats::sd(X.i), Xmin.vec[i])
}
# Local constant regression
if(d == 1){
bwres.t <- np::npregbw(Yt ~ as.vector(Xt), bws = h.vec, bandwidth.compute = FALSE)
npreg.res.t <- np::npreg(Yt ~ as.vector(Xt), bws = bwres.t, ckertype = "uniform",
newdata = data.frame(Xt = 0))
bwres.c <- np::npregbw(Yc ~ as.vector(Xc), bws = h.vec, bandwidth.compute = FALSE)
npreg.res.c <- np::npreg(Yc ~ as.vector(Xc), bws = bwres.c, ckertype = "uniform",
newdata = data.frame(Xc = 0))
resid.t <- (Yt - npreg.res.t$mean)[abs(Xt[, 1]) < h.vec[1]]
resid.c <- (Yc - npreg.res.c$mean)[abs(Xc[, 1]) < h.vec[1]]
}else if(d == 2){
Xt1 <- Xt[, 1]
Xt2 <- Xt[, 2]
Xc1 <- Xc[, 1]
Xc2 <- Xc[, 2]
bwres.t <- np::npregbw(Yt ~ Xt1 + Xt2, bws = h.vec, bandwidth.compute = FALSE)
npreg.res.t <- np::npreg(Yt ~ Xt1 + Xt2, bws = bwres.t, ckertype = "uniform",
newdata = data.frame(Xt1 = 0, Xt2 = 0))
bwres.c <- np::npregbw(Yc ~ Xc1 + Xc2, bws = h.vec, bandwidth.compute = FALSE)
npreg.res.c <- np::npreg(Yc ~ Xc1 + Xc2, bws = bwres.c, ckertype = "uniform",
newdata = data.frame(Xc1 = 0, Xc2 = 0))
resid.t <- (Yt - npreg.res.t$mean)[abs(Xt[, 1]) < h.vec[1] & abs(Xt[, 2]) < h.vec[2]]
resid.c <- (Yc - npreg.res.c$mean)[abs(Xc[, 1]) < h.vec[1] & abs(Xc[, 2]) < h.vec[2]]
}
# Estimation of conditional variance under homoskedasticity
sigma2t <- resid.t^2 * length(resid.t) / (length(resid.t) - d)
sigma2c <- resid.c^2 * length(resid.c) / (length(resid.c) - d)
sigma.t <- rep(sqrt(mean(sigma2t)), nrow(Xt))
sigma.c <- rep(sqrt(mean(sigma2c)), nrow(Xc))
return(list(sigma.t = sigma.t, sigma.c = sigma.c))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.