R/diagnostics.R

Defines functions check_data plot_qq check_support loo_donors sensitivity_alpha plot_qte quantile_te sc_weights plot_distributions plot.sc_cic

Documented in check_data check_support loo_donors plot_distributions plot_qq plot_qte plot.sc_cic quantile_te sc_weights sensitivity_alpha

#' Plot Pre-treatment Fit for SC-CIC
#'
#' Plots the treated unit against the synthetic control over time,
#' with a vertical line at the treatment period.
#'
#' @param x An object of class \code{"sc_cic"}.
#' @param ... Additional arguments passed to \code{\link[base]{plot}}.
#'
#' @return Invisible. Called for its side effect of producing a plot.
#'
#' @details
#' The plot shows the treated unit (solid line) and synthetic control
#' (dashed line) over all time periods, with a vertical dashed line
#' marking the start of treatment. Good pre-treatment fit is a necessary
#' (but not sufficient) condition for valid SC-CIC inference.
#'
#' @export
plot.sc_cic <- function(x, ...) {
  y_tr <- x$y_treated
  y_sc <- x$sc_fitted
  tp <- x$treatment_period
  T_total <- length(y_tr)

  if (is.null(y_tr) || is.null(y_sc)) {
    stop("Plot requires y_treated and sc_fitted in the sc_cic object. ",
         "Re-run sc_cic() to obtain these.")
  }

  time <- seq_len(T_total)
  ylim <- range(c(y_tr, y_sc), na.rm = TRUE)

  plot(time, y_tr, type = "l", lwd = 2, col = "black",
       ylim = ylim, xlab = "Time period", ylab = "Outcome",
       main = "SC-CIC: Treated vs Synthetic Control", ...)
  lines(time, y_sc, lwd = 2, col = "steelblue", lty = 2)
  abline(v = tp - 0.5, lty = 3, col = "red", lwd = 1.5)
  legend("topleft", legend = c("Treated", "Synthetic Control", "Treatment"),
         col = c("black", "steelblue", "red"), lty = c(1, 2, 3), lwd = c(2, 2, 1.5),
         bty = "n", cex = 0.9)

  invisible(x)
}

#' Plot Counterfactual Distribution
#'
#' Plots the empirical CDFs of the four group-period cells used in the
#' CIC estimator, illustrating the distributional transport.
#'
#' @param x An object of class \code{"cic"} or \code{"sc_cic"}.
#' @param ... Additional arguments (currently unused).
#'
#' @return Invisible. Called for its side effect of producing a plot.
#'
#' @export
plot_distributions <- function(x, ...) {
  ecdfs <- x$ecdfs
  if (is.null(ecdfs)) stop("No ecdf objects found in result.")

  ec_00 <- ecdfs$ec_00; ec_01 <- ecdfs$ec_01
  ec_10 <- ecdfs$ec_10; ec_11 <- ecdfs$ec_11

  all_vals <- c(ec_00$values, ec_01$values, ec_10$values, ec_11$values)
  xlim <- range(all_vals)

  oldpar <- par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))
  on.exit(par(oldpar))

  # Control group CDFs
  plot(ec_00$values, ec_00$cdf, type = "s", lwd = 2, col = "steelblue",
       xlim = xlim, ylim = c(0, 1),
       xlab = "Outcome", ylab = "CDF", main = "Control group (G=0)")
  lines(ec_01$values, ec_01$cdf, type = "s", lwd = 2, col = "coral")
  legend("bottomright", legend = c("Pre (T=0)", "Post (T=1)"),
         col = c("steelblue", "coral"), lwd = 2, bty = "n", cex = 0.8)

  # Treated group CDFs + counterfactual
  plot(ec_10$values, ec_10$cdf, type = "s", lwd = 2, col = "steelblue",
       xlim = xlim, ylim = c(0, 1),
       xlab = "Outcome", ylab = "CDF", main = "Treated group (G=1)")
  lines(ec_11$values, ec_11$cdf, type = "s", lwd = 2, col = "coral")

  # Counterfactual CDF
  if (!is.null(x$cf_values)) {
    cf_sorted <- sort(x$cf_values)
    cf_cdf <- seq_along(cf_sorted) / length(cf_sorted)
    lines(cf_sorted, cf_cdf, type = "s", lwd = 2, col = "gray40", lty = 2)
    legend("bottomright",
           legend = c("Pre (T=0)", "Post (T=1)", "Counterfactual"),
           col = c("steelblue", "coral", "gray40"), lwd = 2,
           lty = c(1, 1, 2), bty = "n", cex = 0.8)
  } else {
    legend("bottomright", legend = c("Pre (T=0)", "Post (T=1)"),
           col = c("steelblue", "coral"), lwd = 2, bty = "n", cex = 0.8)
  }

  invisible(x)
}

#' Extract Synthetic Control Weights
#'
#' Returns a data frame of donor weights from an SC-CIC fit,
#' sorted by absolute weight. Useful for inspecting which donors
#' contribute to the synthetic control.
#'
#' @param x An object of class \code{"sc_cic"}.
#' @param nonzero_only Logical. If \code{TRUE} (default), only return
#'   donors with nonzero weights.
#'
#' @return A data frame with columns \code{donor} and \code{weight}.
#'
#' @export
sc_weights <- function(x, nonzero_only = TRUE) {
  if (!inherits(x, "sc_cic")) stop("x must be an sc_cic object.")
  w <- x$sc_weights
  df <- data.frame(donor = names(w), weight = unname(w),
                   stringsAsFactors = FALSE)
  if (nonzero_only) df <- df[df$weight != 0, ]
  df <- df[order(-abs(df$weight)), ]
  rownames(df) <- NULL
  df
}

#' Compute Quantile Treatment Effects
#'
#' Estimates quantile treatment effects from a CIC fit by comparing
#' quantiles of the actual post-treatment treated distribution with
#' quantiles of the counterfactual distribution.
#'
#' @param x An object of class \code{"cic"} or \code{"sc_cic"}.
#' @param probs Numeric vector of quantiles at which to compute effects.
#'   Default is \code{seq(0.05, 0.95, 0.05)}.
#'
#' @return A data frame with columns \code{quantile}, \code{actual},
#'   \code{counterfactual}, and \code{qte} (quantile treatment effect).
#'
#' @details
#' The quantile treatment effect at quantile \eqn{q} is:
#' \deqn{\hat{\tau}_q = \hat{F}^{-1}_{Y^I,11}(q) - \hat{F}^{-1}_{Y^N,11}(q)}
#' where \eqn{\hat{F}^{-1}_{Y^N,11}} is the CIC counterfactual distribution.
#'
#' @export
quantile_te <- function(x, probs = seq(0.05, 0.95, 0.05)) {
  if (!inherits(x, "cic")) stop("x must be a cic or sc_cic object.")

  ec_11 <- x$ecdfs$ec_11
  cf <- x$cf_values
  if (is.null(cf)) stop("No counterfactual values in object.")

  # Counterfactual ecdf
  ec_cf <- make_ecdf(cf)

  actual_q <- vapply(probs, function(q) ecdf_inv(ec_11, q), numeric(1))
  cf_q <- vapply(probs, function(q) ecdf_inv(ec_cf, q), numeric(1))

  data.frame(
    quantile = probs,
    actual = actual_q,
    counterfactual = cf_q,
    qte = actual_q - cf_q
  )
}

#' Plot Quantile Treatment Effects
#'
#' @param x An object of class \code{"cic"} or \code{"sc_cic"}.
#' @param probs Numeric vector of quantiles.
#' @param ... Additional arguments passed to \code{\link[base]{plot}}.
#'
#' @return Invisible. Called for its side effect of producing a plot.
#'
#' @export
plot_qte <- function(x, probs = seq(0.05, 0.95, 0.05), ...) {
  qte_df <- quantile_te(x, probs)

  plot(qte_df$quantile, qte_df$qte, type = "b", pch = 19,
       lwd = 2, col = "steelblue",
       xlab = "Quantile", ylab = "Treatment Effect",
       main = "CIC Quantile Treatment Effects", ...)
  abline(h = 0, lty = 2, col = "red")
  abline(h = x$tau, lty = 3, col = "gray40")
  legend("topleft", legend = c("QTE", "Zero", "ATE"),
         col = c("steelblue", "red", "gray40"),
         lty = c(1, 2, 3), pch = c(19, NA, NA), lwd = 2,
         bty = "n", cex = 0.9)

  invisible(qte_df)
}

#' Sensitivity Analysis for SC-CIC
#'
#' Re-estimates the SC-CIC treatment effect over a grid of elastic net
#' penalty parameters, showing sensitivity to the regularization choice.
#'
#' @param y_treated Numeric vector. Treated unit outcomes.
#' @param y_donors Numeric matrix. Donor unit outcomes.
#' @param treatment_period Integer. First treatment period index.
#' @param alphas Numeric vector. Grid of alpha values to evaluate.
#'   Default is \code{seq(0, 1, 0.2)}.
#' @param seed Integer or \code{NULL}. Random seed.
#'
#' @return A data frame with columns \code{alpha}, \code{tau_cic},
#'   \code{tau_did}, \code{n_donors}, and \code{pre_rmse}.
#'
#' @export
sensitivity_alpha <- function(y_treated, y_donors, treatment_period,
                              alphas = seq(0, 1, 0.2), seed = 42) {
  results <- data.frame(
    alpha = alphas,
    tau_cic = numeric(length(alphas)),
    tau_did = numeric(length(alphas)),
    n_donors = integer(length(alphas)),
    pre_rmse = numeric(length(alphas))
  )

  for (i in seq_along(alphas)) {
    res <- tryCatch({
      sc_cic(y_treated, y_donors, treatment_period,
             alpha = alphas[i], boot = FALSE, seed = seed)
    }, error = function(e) NULL)

    if (!is.null(res)) {
      results$tau_cic[i] <- res$tau
      results$tau_did[i] <- res$tau_did
      results$n_donors[i] <- sum(res$sc_weights[-1] != 0)
      results$pre_rmse[i] <- res$pre_fit_rmse
    } else {
      results$tau_cic[i] <- NA
      results$tau_did[i] <- NA
      results$n_donors[i] <- NA
      results$pre_rmse[i] <- NA
    }
  }

  results
}

#' Leave-One-Out Donor Analysis
#'
#' Re-estimates SC-CIC dropping one donor at a time to assess
#' sensitivity to individual donors.
#'
#' @param y_treated Numeric vector. Treated unit outcomes.
#' @param y_donors Numeric matrix. Donor unit outcomes.
#' @param treatment_period Integer. First treatment period index.
#' @param alpha Elastic net mixing parameter.
#' @param seed Integer or \code{NULL}. Random seed.
#'
#' @return A data frame with one row per donor, showing the SC-CIC
#'   estimate when that donor is excluded.
#'
#' @export
loo_donors <- function(y_treated, y_donors, treatment_period,
                       alpha = 1, seed = 42) {
  if (!is.matrix(y_donors)) y_donors <- as.matrix(y_donors)
  J <- ncol(y_donors)
  dnames <- colnames(y_donors)
  if (is.null(dnames)) dnames <- paste0("donor", seq_len(J))

  results <- data.frame(
    excluded = dnames,
    tau_cic = numeric(J),
    tau_did = numeric(J),
    pre_rmse = numeric(J),
    stringsAsFactors = FALSE
  )

  for (j in seq_len(J)) {
    res <- tryCatch({
      sc_cic(y_treated, y_donors[, -j, drop = FALSE], treatment_period,
             alpha = alpha, boot = FALSE, seed = seed)
    }, error = function(e) NULL)

    if (!is.null(res)) {
      results$tau_cic[j] <- res$tau
      results$tau_did[j] <- res$tau_did
      results$pre_rmse[j] <- res$pre_fit_rmse
    } else {
      results$tau_cic[j] <- NA
      results$tau_did[j] <- NA
      results$pre_rmse[j] <- NA
    }
  }

  results
}

#' Check Support Condition
#'
#' Warns if the treated unit's pre-treatment outcomes fall outside the
#' range of the synthetic control's pre-treatment outcomes, which can
#' cause the CIC distributional transport to extrapolate.
#'
#' @param x An object of class \code{"sc_cic"}.
#' @return Logical. \code{TRUE} if support condition is satisfied.
#' @export
check_support <- function(x) {
  if (!inherits(x, "sc_cic") && !inherits(x, "cic")) {
    stop("x must be a cic or sc_cic object.")
  }

  ec_00 <- x$ecdfs$ec_00
  ec_10 <- x$ecdfs$ec_10

  range_00 <- range(ec_00$values)
  range_10 <- range(ec_10$values)

  ok <- range_10[1] >= range_00[1] && range_10[2] <= range_00[2]

  if (!ok) {
    warning("Support condition may be violated: ",
            sprintf("treated pre-treatment range [%.3f, %.3f] ",
                    range_10[1], range_10[2]),
            sprintf("is not contained in control range [%.3f, %.3f]. ",
                    range_00[1], range_00[2]),
            "CIC counterfactual may involve extrapolation.",
            call. = FALSE)
  }

  invisible(ok)
}

#' Q-Q Plot of Treated vs Synthetic Control (Pre-treatment)
#'
#' Produces a quantile-quantile plot comparing the pre-treatment
#' distributions of the treated unit and the synthetic control. This
#' assesses whether the synthetic control tracks the treated unit's
#' \emph{distributional} dynamics, not just its mean---a necessary
#' condition for CIC validity. Points on the 45-degree line indicate
#' identical distributions.
#'
#' @param x An object of class \code{"sc_cic"} or \code{"cic"}.
#' @param ... Additional arguments passed to \code{\link[base]{plot}}.
#'
#' @return Invisible. Called for its side effect of producing a plot.
#'
#' @export
plot_qq <- function(x, ...) {
  ec_00 <- x$ecdfs$ec_00  # control/SC pre-treatment

  ec_10 <- x$ecdfs$ec_10  # treated pre-treatment

  q_control <- sort(ec_00$raw)
  q_treated <- sort(ec_10$raw)

  # Align lengths via quantile interpolation
  n <- min(length(q_control), length(q_treated))
  probs <- seq(0, 1, length.out = n)
  qc <- stats::quantile(ec_00$raw, probs = probs)
  qt <- stats::quantile(ec_10$raw, probs = probs)

  rng <- range(c(qc, qt))
  plot(qc, qt, pch = 19, cex = 0.8, col = "steelblue",
       xlim = rng, ylim = rng,
       xlab = "Control / SC quantiles (pre-treatment)",
       ylab = "Treated quantiles (pre-treatment)",
       main = "Pre-treatment Q-Q Plot", ...)
  abline(0, 1, lty = 2, col = "red", lwd = 1.5)

  invisible(x)
}

#' Validate CIC Input Data
#'
#' Checks for common data issues and issues informative warnings.
#' Called internally by \code{\link{cic}} when input is potentially
#' problematic. Also available for manual use.
#'
#' @param y_00,y_01,y_10,y_11 Numeric vectors of outcomes.
#' @return Invisible \code{TRUE}. Produces warnings for potential issues.
#' @export
check_data <- function(y_00, y_01, y_10, y_11) {
  cells <- list(y_00 = y_00, y_01 = y_01, y_10 = y_10, y_11 = y_11)

  for (nm in names(cells)) {
    v <- cells[[nm]]
    n <- length(v)

    # Small sample warning
    if (n < 10) {
      warning(sprintf("%s has only %d observations. CIC estimates may be unreliable.", nm, n),
              call. = FALSE)
    }

    # Ties / discrete data warning
    n_unique <- length(unique(v))
    tie_frac <- 1 - n_unique / n
    if (tie_frac > 0.5) {
      warning(sprintf("%s has %.0f%% tied values (%d unique out of %d). ",
                      nm, tie_frac * 100, n_unique, n),
              "CIC is designed for continuous outcomes. ",
              "With discrete data, the point estimate is an upper bound ",
              "on the treatment effect (see Athey and Imbens 2006, Section 4).",
              call. = FALSE)
    }

    # NA check
    if (any(is.na(v))) {
      warning(sprintf("%s contains %d NA values. These will cause errors.",
                      nm, sum(is.na(v))),
              call. = FALSE)
    }
  }

  invisible(TRUE)
}

Try the sccic package in your browser

Any scripts or data that you put into this service are public.

sccic documentation built on April 10, 2026, 5:07 p.m.