R/variance.R

Defines functions compute_variance_components

Documented in compute_variance_components

#' Compute Variance Components for CIC Inference
#'
#' Implements the analytic asymptotic variance from Theorem 5.1 of
#' Athey and Imbens (2006). The variance has four components corresponding
#' to the four group-period subsamples.
#'
#' @param y_00,y_01,y_10,y_11 Numeric vectors of outcomes for each
#'   group-period combination (raw observations, not unique values).
#' @param ecdfs List of empirical CDF objects from \code{make_ecdf}.
#'
#' @return A list with components \code{V_p}, \code{V_q}, \code{V_r},
#'   \code{V_s}.
#' @keywords internal
compute_variance_components <- function(y_00, y_01, y_10, y_11, ecdfs) {
  ec_00 <- ecdfs$ec_00
  ec_01 <- ecdfs$ec_01
  ec_10 <- ecdfs$ec_10
  ec_11 <- ecdfs$ec_11

  N_00 <- length(y_00); N_01 <- length(y_01)
  N_10 <- length(y_10); N_11 <- length(y_11)

  # --- Influence function p(y, z) ---
  # p(y, z) = [1/f_{01}(F^{-1}_{01}(F_{00}(z)))] * (1{y <= z} - F_{00}(z))
  # p_1(y) = E[p(y, Y_{10})] computed over unique Y_{10} values weighted by mass

  p_func <- function(y, z) {
    q <- ecdf_eval(ec_00, z)
    inv_val <- ecdf_inv(ec_01, q)
    f_val <- ecdf_density(ec_01, inv_val)
    if (f_val < 1e-10) return(0)
    (1 / f_val) * (as.numeric(y <= z) - q)
  }

  # E[p(y, Y_{10})] = sum over unique y_10 values weighted by density
  p_expectation <- function(y) {
    total <- 0
    for (j in seq_along(ec_10$values)) {
      total <- total + p_func(y, ec_10$values[j]) * ecdf_density(ec_10, ec_10$values[j])
    }
    total
  }

  # V^p = E[E[p(Y_{00}, Y_{10}) | Y_{00}]^2]
  V_p <- mean(vapply(y_00, function(yi) p_expectation(yi)^2, numeric(1)))

  # --- Influence function q(y, z) ---
  # q(y, z) = [1/f_{01}(F^{-1}_{01}(F_{00}(z)))] * (1{F_{01}(y) <= F_{00}(z)} - F_{00}(z))
  q_func <- function(y, z) {
    q_00z <- ecdf_eval(ec_00, z)
    inv_val <- ecdf_inv(ec_01, q_00z)
    f_val <- ecdf_density(ec_01, inv_val)
    if (f_val < 1e-10) return(0)
    (1 / f_val) * (as.numeric(ecdf_eval(ec_01, y) <= q_00z) - q_00z)
  }

  q_expectation <- function(y) {
    total <- 0
    for (j in seq_along(ec_10$values)) {
      total <- total + q_func(y, ec_10$values[j]) * ecdf_density(ec_10, ec_10$values[j])
    }
    total
  }

  V_q <- mean(vapply(y_01, function(yi) q_expectation(yi)^2, numeric(1)))

  # --- Influence function r(y) ---
  # r(y) = F^{-1}_{01}(F_{00}(y)) - E[F^{-1}_{01}(F_{00}(Y_{10}))]
  cf_mean <- 0
  for (j in seq_along(ec_10$values)) {
    cf_mean <- cf_mean +
      ecdf_inv(ec_01, ecdf_eval(ec_00, ec_10$values[j])) *
      ecdf_density(ec_10, ec_10$values[j])
  }

  r_func <- function(y) {
    ecdf_inv(ec_01, ecdf_eval(ec_00, y)) - cf_mean
  }

  V_r <- mean(vapply(y_10, function(yi) r_func(yi)^2, numeric(1)))

  # --- Influence function s(y) ---
  # s(y) = y - E[Y_{11}]
  mean_11 <- mean(y_11)
  V_s <- mean((y_11 - mean_11)^2)

  list(V_p = V_p, V_q = V_q, V_r = V_r, V_s = V_s)
}

Try the sccic package in your browser

Any scripts or data that you put into this service are public.

sccic documentation built on April 10, 2026, 5:07 p.m.