Nothing
#' Synthetic Control Changes-in-Changes Estimator
#'
#' Combines synthetic control methods with the Changes-in-Changes estimator.
#' First constructs a synthetic control unit from donor units using elastic
#' net regularization, then applies the CIC estimator using the synthetic
#' control as the comparison group.
#'
#' @param y_treated Numeric vector. Outcome for the treated unit across all
#' time periods (pre and post).
#' @param y_donors Numeric matrix. Outcomes for donor units, with rows as
#' time periods (matching \code{y_treated}) and columns as donor units.
#' @param treatment_period Integer. The index (row number) of the first
#' treatment period. Periods 1 to \code{treatment_period - 1} are
#' pre-treatment.
#' @param alpha Elastic net mixing parameter. \code{alpha = 1} (default) is
#' lasso; \code{alpha = 0} is ridge.
#' @param boot Logical. Compute bootstrap standard errors. Default \code{TRUE}.
#' The bootstrap re-estimates the elastic net weights in every iteration
#' to account for first-stage estimation uncertainty.
#' @param boot_iters Integer. Number of bootstrap iterations. Default 500.
#' @param seed Integer or \code{NULL}. Random seed for reproducibility.
#'
#' @return An object of class \code{"sc_cic"} inheriting from \code{"cic"},
#' with components:
#' \item{tau}{The SC-CIC average treatment effect estimate.}
#' \item{se}{Bootstrap standard error (if \code{boot = TRUE}). Note:
#' analytic standard errors from Athey and Imbens (2006) Theorem 5.1
#' are \emph{not} provided for \code{sc_cic}, because they do not
#' account for first-stage synthetic control estimation uncertainty.
#' Use \code{\link{cic}} directly if analytic SEs are needed for a
#' pre-specified control group.}
#' \item{z}{z-statistic (bootstrap-based).}
#' \item{pval}{Two-sided p-value (bootstrap-based).}
#' \item{boot_se}{Same as \code{se} (for compatibility with \code{cic}).}
#' \item{tau_did}{The SC-DID estimate for comparison.}
#' \item{sc_weights}{Named vector of synthetic control weights (including
#' intercept).}
#' \item{sc_fitted}{Synthetic control outcome across all time periods.}
#' \item{donors_selected}{Names of donor units with nonzero weights.}
#' \item{pre_fit_rmse}{Root mean squared error of pre-treatment fit.}
#'
#' @details
#' The procedure works in two steps:
#'
#' \strong{Step 1: Synthetic Control Construction.}
#' In the pre-treatment period, the treated unit's outcome is regressed on
#' the donor units' outcomes using elastic net (via \code{\link[glmnet]{cv.glmnet}}).
#' This yields a sparse set of weights that construct a synthetic control unit
#' as a weighted combination of donors.
#'
#' \strong{Step 2: CIC Estimation.}
#' The CIC estimator is applied with the synthetic control as the "control
#' group" and the treated unit as the "treatment group."
#'
#' \strong{Inference.}
#' Because the synthetic control is an estimated object, the analytic
#' asymptotic variance of Athey and Imbens (2006) does not directly apply.
#' Instead, \code{sc_cic} provides bootstrap standard errors that
#' re-estimate the elastic net weights in each bootstrap iteration,
#' thereby accounting for first-stage estimation uncertainty. The bootstrap
#' resamples time periods (with replacement) within the pre-treatment and
#' post-treatment windows separately, preserving the panel structure.
#'
#' @references
#' Athey, S. and Imbens, G. W. (2006). Identification and Inference in
#' Nonlinear Difference-in-Differences Models. \emph{Econometrica},
#' 74(2), 431--497.
#'
#' Abadie, A., Diamond, A., and Hainmueller, J. (2010). Synthetic Control
#' Methods for Comparative Case Studies. \emph{Journal of the American
#' Statistical Association}, 105(490), 493--505.
#'
#' @examples
#' \donttest{
#' # Basque Country example
#' if (requireNamespace("Synth", quietly = TRUE)) {
#' data("basque", package = "Synth")
#' gdp <- reshape(basque[, c("regionno", "year", "gdpcap")],
#' idvar = "year", timevar = "regionno", direction = "wide")
#' y_treated <- gdp[, "gdpcap.17"]
#' donors <- as.matrix(gdp[, grep("gdpcap\\.", names(gdp))])
#' donors <- donors[, !colnames(donors) %in% c("gdpcap.17", "gdpcap.1")]
#' valid <- complete.cases(y_treated, donors)
#' result <- sc_cic(y_treated[valid], donors[valid, ],
#' treatment_period = 16, seed = 42)
#' print(result)
#' }
#' }
#'
#' @export
sc_cic <- function(y_treated, y_donors, treatment_period,
alpha = 1, boot = TRUE,
boot_iters = 500L, seed = NULL) {
# Input validation
stopifnot(is.numeric(y_treated))
if (!is.matrix(y_donors)) y_donors <- as.matrix(y_donors)
stopifnot(nrow(y_donors) == length(y_treated))
stopifnot(treatment_period > 1, treatment_period <= length(y_treated))
T_total <- length(y_treated)
pre_idx <- seq_len(treatment_period - 1)
post_idx <- seq(treatment_period, T_total)
T_pre <- length(pre_idx)
T_post <- length(post_idx)
# Remove donors with NAs in pre-period
good_cols <- colSums(is.na(y_donors[pre_idx, , drop = FALSE])) == 0
y_donors_clean <- y_donors[, good_cols, drop = FALSE]
# Step 1: Construct synthetic control via elastic net
sc_result <- .fit_sc(y_treated, y_donors_clean, pre_idx, post_idx, alpha, seed)
# Step 2: Apply CIC (no analytic SE — not valid for two-stage estimator)
cic_result <- cic(sc_result$y_00, sc_result$y_01,
sc_result$y_10, sc_result$y_11,
se = FALSE, boot = FALSE)
# Pre-treatment fit diagnostics
pre_fit_rmse <- sqrt(mean((sc_result$y_10 - sc_result$y_00)^2))
# Distributional alignment check (KS test on pre-treatment SC vs treated)
ks_result <- tryCatch(
stats::ks.test(sc_result$y_00, sc_result$y_10),
error = function(e) NULL
)
ks_pval <- if (!is.null(ks_result)) ks_result$p.value else NA_real_
if (!is.na(ks_pval) && ks_pval < 0.10) {
warning("Pre-treatment distributional mismatch detected (KS test p = ",
round(ks_pval, 3), "). ",
"The synthetic control matches the treated unit's mean but not ",
"its distribution. CIC estimates may be unreliable. ",
"Inspect plot_qq() for details.",
call. = FALSE)
}
# Effective number of donors (non-zero weights excluding intercept)
n_donors_eff <- sum(sc_result$weights[-1] != 0)
# Bootstrap: re-estimate SC weights in each iteration
boot_se_val <- NA_real_
boot_taus <- NULL
if (boot) {
if (!is.null(seed)) set.seed(seed)
boot_taus <- replicate(boot_iters, {
# Resample pre-treatment periods (with replacement)
b_pre <- sample(pre_idx, T_pre, replace = TRUE)
# Resample post-treatment periods (with replacement)
b_post <- sample(post_idx, T_post, replace = TRUE)
# Re-estimate SC weights on resampled pre-treatment data
b_sc <- tryCatch({
.fit_sc(y_treated, y_donors_clean, b_pre, b_post, alpha, seed = NULL)
}, error = function(e) NULL)
if (is.null(b_sc)) return(NA_real_)
# Compute CIC tau on resampled data
b_cic <- tryCatch({
cic(b_sc$y_00, b_sc$y_01, b_sc$y_10, b_sc$y_11,
se = FALSE, boot = FALSE)$tau
}, error = function(e) NA_real_)
b_cic
})
boot_taus <- boot_taus[!is.na(boot_taus)]
if (length(boot_taus) > 10) {
boot_se_val <- stats::sd(boot_taus)
}
}
# Assemble result
se_val <- boot_se_val
z_val <- if (!is.na(se_val) && se_val > 0) cic_result$tau / se_val else NA_real_
pval <- if (!is.na(z_val)) 2 * stats::pnorm(-abs(z_val)) else NA_real_
nonzero_idx <- which(sc_result$weights != 0)
structure(list(
tau = cic_result$tau,
se = se_val,
z = z_val,
pval = pval,
counterfactual_mean = cic_result$counterfactual_mean,
tau_did = cic_result$tau_did,
N = cic_result$N,
n = cic_result$n,
boot_se = boot_se_val,
boot_taus = boot_taus,
V_components = NULL,
ecdfs = cic_result$ecdfs,
cf_values = cic_result$cf_values,
sc_weights = sc_result$weights,
sc_fitted = sc_result$fitted,
donors_selected = names(sc_result$weights)[nonzero_idx],
pre_fit_rmse = pre_fit_rmse,
ks_pval = ks_pval,
n_donors_eff = n_donors_eff,
cv_fit = sc_result$cv_fit,
y_treated = y_treated,
treatment_period = treatment_period
), class = c("sc_cic", "cic"))
}
#' Fit synthetic control via elastic net (internal)
#' @keywords internal
.fit_sc <- function(y_treated, y_donors, pre_idx, post_idx, alpha, seed) {
X_pre <- y_donors[pre_idx, , drop = FALSE]
y_pre <- y_treated[pre_idx]
if (!is.null(seed)) set.seed(seed)
cv_fit <- glmnet::cv.glmnet(X_pre, y_pre, alpha = alpha)
coefs <- stats::coef(cv_fit, s = "lambda.min")
w <- as.numeric(coefs)
names(w) <- c("intercept", colnames(y_donors))
# Construct SC for all periods
X_all <- cbind(1, y_donors)
X_all[is.na(X_all)] <- 0
fitted <- as.numeric(X_all %*% w)
list(
weights = w,
fitted = fitted,
cv_fit = cv_fit,
y_00 = fitted[pre_idx],
y_01 = fitted[post_idx],
y_10 = y_treated[pre_idx],
y_11 = y_treated[post_idx]
)
}
#' @export
print.sc_cic <- function(x, digits = 4, ...) {
cat("\nSynthetic Control + Changes-in-Changes Estimator\n")
cat(strrep("-", 50), "\n")
cat("SC donors selected:", x$n_donors_eff, "\n")
cat("Pre-treatment RMSE:", formatC(x$pre_fit_rmse, digits = digits, format = "f"), "\n")
if (!is.na(x$ks_pval)) {
cat("Pre-treatment KS p: ", formatC(x$ks_pval, digits = 3, format = "f"),
if (x$ks_pval < 0.10) " [WARNING: distributional mismatch]" else " [OK]",
"\n", sep = "")
}
cat(strrep("-", 50), "\n")
cat(" tau^CIC =", formatC(x$tau, digits = digits, format = "f"), "\n")
cat(" tau^DID =", formatC(x$tau_did, digits = digits, format = "f"), "\n")
if (!is.na(x$se)) {
cat(" boot SE =", formatC(x$se, digits = digits, format = "f"), "\n")
cat(" z-stat =", formatC(x$z, digits = digits, format = "f"), "\n")
cat(" p-value =", format.pval(x$pval, digits = digits), "\n")
}
cat(" N =", x$N,
sprintf("(%d/%d/%d/%d)", x$n[1], x$n[2], x$n[3], x$n[4]), "\n")
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.