Nothing
#' Simulation Study for SC-CIC
#'
#' Generates data under controlled DGPs and evaluates SC-CIC performance.
#'
#' @param n_sims Integer. Number of simulation replications.
#' @param T_pre Integer. Number of pre-treatment periods.
#' @param T_post Integer. Number of post-treatment periods.
#' @param J Integer. Number of donor units.
#' @param tau_true Numeric. True average treatment effect.
#' @param dgp Character. Data generating process. See Details.
#' @param alpha Elastic net mixing parameter for SC construction.
#' @param boot_iters Integer. Bootstrap iterations per simulation.
#' @param seed Integer. Random seed.
#' @param verbose Logical. Print progress.
#'
#' @return A data frame with simulation results.
#'
#' @details
#' Four DGPs are available, designed to test different aspects of SC-CIC:
#'
#' \strong{DGP 1: \code{"linear"} — Baseline.}
#' Outcomes are linear in a common factor and unit-specific loadings.
#' DID is correctly specified. CIC matches DID. SC fits well.
#' Purpose: verify the method works in the easy case.
#'
#' \strong{DGP 2: \code{"nonlinear"} — CIC advantage.}
#' Cross-sectional DGP (not SC). N observations per cell.
#' Control and treated have different distributions of unobservables.
#' The production function is nonlinear and changes over time.
#' DID is biased due to the nonlinear distributional shift; CIC is correct.
#' Purpose: demonstrate the advantage of CIC over DID.
#' Note: this tests \code{cic()}, not \code{sc_cic()}.
#'
#' \strong{DGP 3: \code{"sc_good"} — SC with good distributional fit.}
#' The treated unit is a true sparse combination of donors plus noise.
#' SC recovers the weights well; the distributional dynamics are similar.
#' Purpose: show SC-CIC works when SC fit is good.
#'
#' \strong{DGP 4: \code{"sc_bad"} — SC with mean-only fit.}
#' The SC matches the treated mean, but donors have much lower variance
#' than the treated unit. The distributional transport is wrong.
#' Purpose: show SC-CIC fails when distributional assumptions are violated.
#'
#' @examples
#' # Quick example (runs in seconds)
#' r <- simulate_sccic(n_sims = 2, dgp = "nonlinear", tau_true = 1, boot_iters = 5, verbose = FALSE)
#' summarize_simulation(r, tau_true = 1)
#'
#' \donttest{
#' # Full simulation
#' r <- simulate_sccic(n_sims = 200, dgp = "nonlinear", tau_true = 1)
#' summarize_simulation(r, tau_true = 1)
#' }
#'
#' @export
simulate_sccic <- function(n_sims = 500, T_pre = 25, T_post = 15, J = 15,
tau_true = 1,
dgp = c("linear", "nonlinear", "sc_good", "sc_bad"),
alpha = 1, boot_iters = 200, seed = 42,
verbose = TRUE) {
dgp <- match.arg(dgp)
if (!is.null(seed)) set.seed(seed)
# For cross-sectional DGP, use N per cell instead of T
is_cs <- (dgp == "nonlinear")
results <- data.frame(
sim = integer(n_sims),
tau_cic = numeric(n_sims),
tau_did = numeric(n_sims),
se_cic = numeric(n_sims),
ci_lower = numeric(n_sims),
ci_upper = numeric(n_sims),
covers = logical(n_sims),
pre_rmse = numeric(n_sims)
)
for (s in seq_len(n_sims)) {
if (verbose && s %% 50 == 0) cat("Simulation", s, "/", n_sims, "\n")
res <- tryCatch({
if (is_cs) {
.run_cs_sim(T_pre, T_post, tau_true, dgp)
} else {
.run_sc_sim(T_pre, T_post, J, tau_true, dgp, alpha, boot_iters)
}
}, error = function(e) NULL)
if (is.null(res)) {
results[s, ] <- c(s, rep(NA, 7))
next
}
results$sim[s] <- s
results$tau_cic[s] <- res$tau_cic
results$tau_did[s] <- res$tau_did
results$se_cic[s] <- res$se
results$ci_lower[s] <- res$tau_cic - 1.96 * res$se
results$ci_upper[s] <- res$tau_cic + 1.96 * res$se
results$covers[s] <- !is.na(res$se) &&
(res$tau_cic - 1.96 * res$se) <= tau_true &&
tau_true <= (res$tau_cic + 1.96 * res$se)
results$pre_rmse[s] <- res$pre_rmse
}
attr(results, "dgp") <- dgp
attr(results, "tau_true") <- tau_true
results
}
#' Run one cross-sectional CIC simulation (DGP: nonlinear)
#' @keywords internal
.run_cs_sim <- function(N_cell, N_cell_post, tau_true, dgp) {
# Cross-sectional: N_cell obs per group-period cell
# Control group: U ~ N(0, 1)
# Treated group: U ~ N(0.5, 1) (shifted distribution)
# Production function:
# h(u, t=0) = u
# h(u, t=1) = u + 0.4 * u^2 (monotone for u > -1.25, covers ~90% of N(0,1))
# Treatment: adds tau_true to h(U, 1) for treated in post-period
U_00 <- stats::rnorm(N_cell, 0, 1)
U_01 <- stats::rnorm(N_cell_post, 0, 1)
U_10 <- stats::rnorm(N_cell, 0.5, 1)
U_11 <- stats::rnorm(N_cell_post, 0.5, 1)
# Apply production function
gamma <- 0.4
y_00 <- U_00 # h(u, 0) = u
y_01 <- U_01 + gamma * U_01^2 # h(u, 1) = u + gamma*u^2
y_10 <- U_10 # h(u, 0) = u
y_11 <- U_11 + gamma * U_11^2 + tau_true # h(u, 1) + tau
# True counterfactual for treated in post: h(U_11, 1) without treatment
# = U_11 + gamma * U_11^2
# True tau = mean(y_11) - mean(U_11 + gamma * U_11^2) = tau_true (by construction)
result <- cic(y_00, y_01, y_10, y_11, se = TRUE, boot = FALSE)
list(tau_cic = result$tau, tau_did = result$tau_did,
se = result$se, pre_rmse = 0)
}
#' Run one SC-CIC simulation
#' @keywords internal
.run_sc_sim <- function(T_pre, T_post, J, tau_true, dgp, alpha, boot_iters) {
T_total <- T_pre + T_post
pre_idx <- seq_len(T_pre)
post_idx <- seq(T_pre + 1, T_total)
# Common time factor (smooth trend + noise)
f_t <- 0.1 * seq_len(T_total) + cumsum(stats::rnorm(T_total, 0, 0.2))
# Donor fixed effects and factor loadings
mu_d <- stats::rnorm(J, 0, 1)
lambda_d <- stats::runif(J, 0.5, 1.5)
sigma <- 0.5
# Treated unit: sparse convex combination of donors
n_active <- min(4, J)
active <- sample(J, n_active)
w_true <- numeric(J)
w_true[active] <- stats::runif(n_active, 0.2, 0.6)
w_true <- w_true / sum(w_true)
mu_tr <- sum(w_true * mu_d) + 0.3
lambda_tr <- sum(w_true * lambda_d)
# Generate latent values for all units
latent_d <- matrix(NA, T_total, J)
for (j in seq_len(J)) {
latent_d[, j] <- mu_d[j] + lambda_d[j] * f_t + stats::rnorm(T_total, 0, sigma)
}
latent_tr <- mu_tr + lambda_tr * f_t + stats::rnorm(T_total, 0, sigma)
# Apply production functions based on DGP
if (dgp == "linear") {
# h(u, t) = u for all t. DID is correct. CIC matches DID.
y_donors <- latent_d
y_treated <- latent_tr
} else if (dgp == "sc_good") {
# Linear dynamics with GOOD SC fit.
# Same as linear DGP but with more pre-treatment periods and lower
# noise so the SC fits well and CIC/DID both work.
# The SC is a linear combination of linear outcomes, so it preserves
# the distributional dynamics. CIC assumptions are approximately met.
y_donors <- latent_d
y_treated <- latent_tr
} else if (dgp == "sc_bad") {
# Nonlinear production function applied to ALL units.
# h(u, pre) = u, h(u, post) = u + gamma * u^2
# The SC is sum(w_j * h(L_j, post)) which is NOT h(sum(w_j * L_j), post).
# This means the SC's distributional transport is WRONG even though
# all units share the same production function.
# PURPOSE: demonstrate that SC (a linear operation) breaks CIC when
# the outcome is nonlinear in the latent variable.
gamma <- 0.3
y_donors <- latent_d
y_donors[post_idx, ] <- latent_d[post_idx, ] +
gamma * latent_d[post_idx, ]^2
y_treated <- latent_tr
y_treated[post_idx] <- latent_tr[post_idx] +
gamma * latent_tr[post_idx]^2
}
colnames(y_donors) <- paste0("d", seq_len(J))
# Add treatment effect to treated in post-period
y_treated[post_idx] <- y_treated[post_idx] + tau_true
# Run SC-CIC
result <- sc_cic(y_treated, y_donors,
treatment_period = T_pre + 1,
alpha = alpha, boot = TRUE,
boot_iters = boot_iters, seed = NULL)
list(tau_cic = result$tau, tau_did = result$tau_did,
se = result$boot_se, pre_rmse = result$pre_fit_rmse)
}
#' Summarize simulation results
#'
#' @param results Data frame from \code{simulate_sccic}.
#' @param tau_true True treatment effect.
#' @return Prints summary statistics and returns them invisibly.
#' @export
summarize_simulation <- function(results, tau_true) {
valid <- !is.na(results$tau_cic)
r <- results[valid, ]
dgp <- attr(results, "dgp")
if (is.null(dgp)) dgp <- "unknown"
cat("\n=== Simulation Summary (DGP:", dgp, ") ===\n")
cat("Replications:", sum(valid), "/", nrow(results), "\n")
cat("True tau:", tau_true, "\n\n")
# CIC
bias_cic <- mean(r$tau_cic) - tau_true
rmse_cic <- sqrt(mean((r$tau_cic - tau_true)^2))
se_mean <- mean(r$se_cic, na.rm = TRUE)
coverage <- mean(r$covers, na.rm = TRUE)
cat("CIC:\n")
cat(" Mean estimate:", round(mean(r$tau_cic), 4), "\n")
cat(" Bias: ", round(bias_cic, 4), "\n")
cat(" RMSE: ", round(rmse_cic, 4), "\n")
cat(" Mean SE: ", round(se_mean, 4), "\n")
cat(" 95% CI cover: ", round(coverage * 100, 1), "%\n\n")
# DID
bias_did <- mean(r$tau_did) - tau_true
rmse_did <- sqrt(mean((r$tau_did - tau_true)^2))
cat("DID:\n")
cat(" Mean estimate:", round(mean(r$tau_did), 4), "\n")
cat(" Bias: ", round(bias_did, 4), "\n")
cat(" RMSE: ", round(rmse_did, 4), "\n\n")
if (any(!is.na(r$pre_rmse) & r$pre_rmse > 0)) {
cat("Pre-treatment fit:\n")
cat(" Mean RMSE: ", round(mean(r$pre_rmse[r$pre_rmse > 0], na.rm = TRUE), 4), "\n")
}
invisible(list(bias_cic = bias_cic, rmse_cic = rmse_cic,
coverage = coverage, bias_did = bias_did, rmse_did = rmse_did))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.