R/ecdf_utils.R

Defines functions ecdf_density compute_cic_cf integral_quantile ecdf_eval_left ecdf_inv ecdf_eval make_ecdf

Documented in compute_cic_cf ecdf_density ecdf_eval ecdf_eval_left ecdf_inv integral_quantile make_ecdf

#' Empirical CDF and Inverse CDF Utilities
#'
#' Internal functions for computing empirical distribution functions
#' and their inverses, used by the CIC estimator.
#'
#' @name ecdf_utils
#' @keywords internal
NULL

#' Compute empirical CDF from a sample
#'
#' Returns sorted unique values and their empirical CDF values.
#'
#' @param x Numeric vector of observations.
#' @return A list with components:
#'   \item{values}{Sorted unique values of \code{x}.}
#'   \item{cdf}{Empirical CDF evaluated at each unique value.}
#'   \item{n}{Total number of observations (including duplicates).}
#'   \item{raw}{The original (unsorted) observations.}
#' @keywords internal
make_ecdf <- function(x) {
  raw <- x
  n <- length(x)
  values <- sort(unique(x))
  cdf <- vapply(values, function(v) sum(x <= v) / n, numeric(1))
  list(values = values, cdf = cdf, n = n, raw = raw)
}

#' Evaluate empirical CDF at a point
#'
#' @param ec An ecdf object from \code{make_ecdf}.
#' @param y The point at which to evaluate the CDF.
#' @return The empirical CDF value F(y).
#' @keywords internal
ecdf_eval <- function(ec, y) {
  idx <- sum(ec$values <= y)
  if (idx == 0L) return(0)
  ec$cdf[idx]
}

#' Evaluate inverse empirical CDF (quantile function)
#'
#' Computes \eqn{F^{-1}(q) = \inf\{y : F(y) \ge q\}}.
#'
#' @param ec An ecdf object from \code{make_ecdf}.
#' @param q A quantile in [0, 1].
#' @return The smallest value y such that F(y) >= q.
#' @keywords internal
ecdf_inv <- function(ec, q) {
  if (q <= 0) return(ec$values[1])
  idx <- match(TRUE, ec$cdf >= q)
  if (is.na(idx)) return(ec$values[length(ec$values)])
  ec$values[idx]
}

#' Left limit of empirical CDF
#'
#' Returns \eqn{F(x^-) = \Pr(Y < x)}, the strict left limit of the CDF at x.
#' Used by the discrete CIC estimator.
#'
#' @param ec An ecdf object from \code{make_ecdf}.
#' @param x The point at which to evaluate the left limit.
#' @return \eqn{\Pr(Y < x)}.
#' @keywords internal
ecdf_eval_left <- function(ec, x) {
  idx <- sum(ec$values < x)
  if (idx == 0L) return(0)
  ec$cdf[idx]
}

#' Integral of empirical quantile function
#'
#' Computes \eqn{\int_a^b Q(u) \, du} where \eqn{Q} is the empirical
#' quantile function of \code{ec}, i.e., \eqn{Q(u) = \inf\{y : F(y) \ge u\}}.
#' Used by the discrete CIC estimator to average the counterfactual
#' over a quantile band.
#'
#' @param ec An ecdf object from \code{make_ecdf}.
#' @param a Lower integration limit in \eqn{[0, 1]}.
#' @param b Upper integration limit in \eqn{[0, 1]}.
#' @return Numeric scalar equal to the integral.
#' @keywords internal
integral_quantile <- function(ec, a, b) {
  if (b <= a) return(0)
  vals <- ec$values
  cdfs <- ec$cdf
  k    <- length(vals)
  result <- 0
  for (j in seq_len(k)) {
    left_j  <- if (j == 1L) 0 else cdfs[j - 1L]
    right_j <- cdfs[j]
    overlap <- max(0, min(right_j, b) - max(left_j, a))
    result  <- result + vals[j] * overlap
  }
  result
}

#' Compute CIC counterfactual values
#'
#' Applies the CIC mapping to each element of \code{y_10}: either the
#' continuous formula (Theorem~3.1) or the discrete formula (Theorem~4.1)
#' of Athey and Imbens (2006).
#'
#' For the continuous case the counterfactual is
#' \eqn{F^{-1}_{01}(F_{00}(y))}. For the discrete case each observation
#' at value \eqn{x} receives the mean of \eqn{Q_{01}(U)} for
#' \eqn{U \sim \mathrm{Uniform}[F_{00}(x^-), F_{00}(x)]}, i.e.,
#' \eqn{\int_{F_{00}(x^-)}^{F_{00}(x)} Q_{01}(u)\,du \;/\; \Delta_x}
#' where \eqn{\Delta_x = F_{00}(x) - F_{00}(x^-)} is the probability mass
#' of \eqn{Y_{00}} at \eqn{x}.
#'
#' @param y_10 Numeric vector. Treated group, pre-treatment outcomes.
#' @param ec_00,ec_01 Empirical CDF objects from \code{make_ecdf}.
#' @param discrete Logical. If \code{TRUE}, use the discrete CIC formula
#'   (Theorem~4.1). Default \code{FALSE}.
#' @return Numeric vector of counterfactual values, one per element of
#'   \code{y_10}.
#' @keywords internal
compute_cic_cf <- function(y_10, ec_00, ec_01, discrete = FALSE) {
  if (!discrete) {
    return(vapply(y_10, function(yi)
      ecdf_inv(ec_01, ecdf_eval(ec_00, yi)), numeric(1)))
  }
  vapply(y_10, function(x) {
    f_left  <- ecdf_eval_left(ec_00, x)
    f_right <- ecdf_eval(ec_00, x)
    delta   <- f_right - f_left
    if (delta < 1e-14) {
      # x is not an atom of Y_00: use continuous fallback
      ecdf_inv(ec_01, f_right)
    } else {
      integral_quantile(ec_01, f_left, f_right) / delta
    }
  }, numeric(1))
}

#' Estimate density at a point using finite differences
#'
#' Uses the approach from Athey and Imbens (2006), Section 5.
#'
#' @param ec An ecdf object from \code{make_ecdf}.
#' @param y The point at which to evaluate the density.
#' @param bandwidth Bandwidth for finite differences. Default uses \eqn{n^{-1/3}}
#'   as recommended in Athey and Imbens (2006).
#' @return Estimated density f(y).
#' @keywords internal
ecdf_density <- function(ec, y, bandwidth = NULL) {
  if (is.null(bandwidth)) bandwidth <- ec$n^(-1/3)
  midpoint <- (min(ec$values) + max(ec$values)) / 2

  if (y %in% ec$values) {
    pos <- match(y, ec$values)
    if (pos == 1L) {
      return(ec$cdf[1])
    } else {
      return(ec$cdf[pos] - ec$cdf[pos - 1L])
    }
  }

  if (y <= midpoint) {
    (ecdf_eval(ec, y + bandwidth) - ecdf_eval(ec, y)) / bandwidth
  } else {
    (ecdf_eval(ec, y) - ecdf_eval(ec, y - bandwidth)) / bandwidth
  }
}

Try the sccic package in your browser

Any scripts or data that you put into this service are public.

sccic documentation built on April 10, 2026, 5:07 p.m.