Nothing
#' Changes-in-Changes Estimator
#'
#' Implements the Changes-in-Changes (CIC) estimator of Athey and Imbens (2006)
#' for the average treatment effect on the treated in a two-group, two-period
#' difference-in-differences setting.
#'
#' @param y_00 Numeric vector. Outcomes for the control group in the
#' pre-treatment period.
#' @param y_01 Numeric vector. Outcomes for the control group in the
#' post-treatment period.
#' @param y_10 Numeric vector. Outcomes for the treated group in the
#' pre-treatment period.
#' @param y_11 Numeric vector. Outcomes for the treated group in the
#' post-treatment period.
#' @param se Logical. If \code{TRUE} (default), compute analytic standard
#' errors using the asymptotic variance from Theorem 5.1 of Athey and
#' Imbens (2006). Ignored (with a message) when \code{discrete = TRUE},
#' because Theorem 5.1 is derived under the continuous-distribution
#' assumption.
#' @param boot Logical. If \code{TRUE}, also compute bootstrap standard
#' errors. Default is \code{FALSE}.
#' @param boot_iters Integer. Number of bootstrap iterations. Default 500.
#' @param seed Integer or \code{NULL}. Random seed for bootstrap.
#' @param discrete Logical. If \code{FALSE} (default), use the continuous
#' CIC estimator (Theorem~3.1 of Athey and Imbens 2006), which applies
#' \eqn{F^{-1}_{01}(F_{00}(y_{10,i}))} to each pre-treatment treated
#' observation. If \code{TRUE}, use the discrete CIC estimator
#' (Theorem~4.1), which integrates the counterfactual over the quantile
#' band \eqn{[F_{00}(y^-), F_{00}(y)]} to handle mass points in the
#' outcome distribution. Use \code{discrete = TRUE} when the outcome
#' takes a small number of distinct values (e.g., integer counts).
#'
#' @return An object of class \code{"cic"} containing:
#' \item{tau}{The CIC average treatment effect estimate.}
#' \item{se}{Analytic standard error (if \code{se = TRUE}).}
#' \item{z}{z-statistic.}
#' \item{pval}{Two-sided p-value.}
#' \item{counterfactual_mean}{Mean of the counterfactual distribution.}
#' \item{tau_did}{The standard DID estimate for comparison.}
#' \item{N}{Total sample size.}
#' \item{n}{Named vector of group sample sizes.}
#' \item{boot_se}{Bootstrap standard error (if \code{boot = TRUE}).}
#' \item{ecdfs}{List of empirical CDF objects for each group.}
#'
#' @details
#' The CIC estimator constructs a counterfactual distribution for the treated
#' group in the post-treatment period by applying the transformation:
#' \deqn{Y^{N,CIC}_{11} = F^{-1}_{Y,01}(F_{Y,00}(Y_{10}))}
#'
#' The average treatment effect is then:
#' \deqn{\hat{\tau}^{CIC} = \frac{1}{N_{11}} \sum Y_{11,i}
#' - \frac{1}{N_{10}} \sum F^{-1}_{Y,01}(F_{Y,00}(Y_{10,i}))}
#'
#' The analytic variance follows Theorem 5.1 of Athey and Imbens (2006):
#' \deqn{Var(\sqrt{N} \hat{\tau}^{CIC}) = V^p/\alpha_{00} + V^q/\alpha_{01}
#' + V^r/\alpha_{10} + V^s/\alpha_{11}}
#'
#' @references
#' Athey, S. and Imbens, G. W. (2006). Identification and Inference in
#' Nonlinear Difference-in-Differences Models. \emph{Econometrica},
#' 74(2), 431--497. \doi{10.1111/j.1468-0262.2006.00668.x}
#'
#' @examples
#' # Workers' compensation example (Meyer, Viscusi, and Durbin 1995)
#' if (requireNamespace("wooldridge", quietly = TRUE)) {
#' data("injury", package = "wooldridge")
#' result <- cic(
#' y_00 = injury$ldurat[injury$highearn == 0 & injury$afchnge == 0],
#' y_01 = injury$ldurat[injury$highearn == 0 & injury$afchnge == 1],
#' y_10 = injury$ldurat[injury$highearn == 1 & injury$afchnge == 0],
#' y_11 = injury$ldurat[injury$highearn == 1 & injury$afchnge == 1]
#' )
#' print(result)
#' }
#'
#' @export
cic <- function(y_00, y_01, y_10, y_11,
se = TRUE, boot = FALSE, boot_iters = 500L, seed = NULL,
discrete = FALSE) {
# Input validation
stopifnot(is.numeric(y_00), is.numeric(y_01),
is.numeric(y_10), is.numeric(y_11))
stopifnot(length(y_00) > 0, length(y_01) > 0,
length(y_10) > 0, length(y_11) > 0)
# Build empirical CDFs
ec_00 <- make_ecdf(y_00)
ec_01 <- make_ecdf(y_01)
ec_10 <- make_ecdf(y_10)
ec_11 <- make_ecdf(y_11)
N_00 <- ec_00$n; N_01 <- ec_01$n
N_10 <- ec_10$n; N_11 <- ec_11$n
N <- N_00 + N_01 + N_10 + N_11
# Counterfactual values via continuous or discrete CIC formula
cf_values <- compute_cic_cf(y_10, ec_00, ec_01, discrete = discrete)
counterfactual_mean <- mean(cf_values)
tau <- mean(y_11) - counterfactual_mean
# Standard DID for comparison
tau_did <- (mean(y_11) - mean(y_10)) - (mean(y_01) - mean(y_00))
# Analytic standard errors (Theorem 5.1, continuous case only)
se_val <- NA_real_
z_val <- NA_real_
pval <- NA_real_
V_components <- NULL
if (discrete && se) {
message("Analytic standard errors (Theorem 5.1) assume a continuous ",
"outcome distribution and are not valid when discrete = TRUE. ",
"Use boot = TRUE for inference.")
se <- FALSE
}
if (se) {
ecdfs <- list(ec_00 = ec_00, ec_01 = ec_01,
ec_10 = ec_10, ec_11 = ec_11)
V_components <- compute_variance_components(
y_00, y_01, y_10, y_11, ecdfs)
V_p <- V_components$V_p
V_q <- V_components$V_q
V_r <- V_components$V_r
V_s <- V_components$V_s
a_00 <- N_00 / N; a_01 <- N_01 / N
a_10 <- N_10 / N; a_11 <- N_11 / N
avar <- V_p / a_00 + V_q / a_01 + V_r / a_10 + V_s / a_11
se_val <- sqrt(avar) / sqrt(N)
z_val <- tau / se_val
pval <- 2 * stats::pnorm(-abs(z_val))
}
# Bootstrap
boot_se_val <- NA_real_
if (boot) {
if (!is.null(seed)) set.seed(seed)
boot_taus <- replicate(boot_iters, {
b00 <- sample(y_00, N_00, replace = TRUE)
b01 <- sample(y_01, N_01, replace = TRUE)
b10 <- sample(y_10, N_10, replace = TRUE)
b11 <- sample(y_11, N_11, replace = TRUE)
bec_00 <- make_ecdf(b00)
bec_01 <- make_ecdf(b01)
bcf <- compute_cic_cf(b10, bec_00, bec_01, discrete = discrete)
mean(b11) - mean(bcf)
})
boot_se_val <- stats::sd(boot_taus)
}
structure(list(
tau = tau,
se = se_val,
z = z_val,
pval = pval,
counterfactual_mean = counterfactual_mean,
tau_did = tau_did,
N = N,
n = c(N_00 = N_00, N_01 = N_01, N_10 = N_10, N_11 = N_11),
boot_se = boot_se_val,
V_components = V_components,
ecdfs = list(ec_00 = ec_00, ec_01 = ec_01,
ec_10 = ec_10, ec_11 = ec_11),
cf_values = cf_values,
discrete = discrete
), class = "cic")
}
#' @export
print.cic <- function(x, digits = 4, ...) {
estimator_label <- if (isTRUE(x$discrete)) {
"discrete (Theorem 4.1)"
} else {
"continuous (Theorem 3.1)"
}
cat("\nChanges-in-Changes Estimator\n")
cat(strrep("-", 40), "\n")
cat(" Estimator:", estimator_label, "\n")
cat(" tau^CIC =", formatC(x$tau, digits = digits, format = "f"), "\n")
cat(" tau^DID =", formatC(x$tau_did, digits = digits, format = "f"), "\n")
if (!is.na(x$se)) {
cat(" SE =", formatC(x$se, digits = digits, format = "f"), "\n")
cat(" z-stat =", formatC(x$z, digits = digits, format = "f"), "\n")
cat(" p-value =", format.pval(x$pval, digits = digits), "\n")
}
if (!is.na(x$boot_se)) {
cat(" boot SE =", formatC(x$boot_se, digits = digits, format = "f"), "\n")
}
cat(" N =", x$N,
sprintf("(%d/%d/%d/%d)", x$n[1], x$n[2], x$n[3], x$n[4]), "\n")
invisible(x)
}
#' @export
summary.cic <- function(object, ...) {
print.cic(object, ...)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.