R/matching_sensitivity.R

Defines functions plot.sensitivity_analysis print.summary.sensitivity_analysis summary.sensitivity_analysis print.sensitivity_analysis .rosenbaum_bounds sensitivity_analysis

Documented in plot.sensitivity_analysis print.sensitivity_analysis .rosenbaum_bounds sensitivity_analysis summary.sensitivity_analysis

# ==============================================================================
# Rosenbaum Sensitivity Analysis for Matched Pairs
# ==============================================================================

#' Rosenbaum Sensitivity Analysis
#'
#' Assesses how sensitive a matched comparison is to hidden bias using
#' Rosenbaum bounds on the Wilcoxon signed-rank statistic.
#'
#' @param result A matching_result object from [match_couples()] or
#'   [greedy_couples()]
#' @param left Original left (treated) dataset
#' @param right Original right (control) dataset
#' @param outcome_var Name of the outcome column in `left` and `right`
#' @param gamma Numeric vector of sensitivity parameters
#'   (default: seq(1, 3, by = 0.25)). Gamma = 1 means no hidden bias.
#' @param alternative Direction of the test: "greater" (default), "less",
#'   or "two.sided"
#' @param left_id Name of ID column in left (default: "id")
#' @param right_id Name of ID column in right (default: "id")
#'
#' @return An S3 object of class `sensitivity_analysis` containing:
#' \describe{
#'   \item{results}{Tibble with columns: gamma, t_stat, p_upper, p_lower}
#'   \item{n_pairs}{Number of matched pairs analyzed}
#'   \item{critical_gamma}{Smallest gamma at which p_upper > 0.05}
#'   \item{alternative}{Direction of test}
#' }
#'
#' @details
#' Rosenbaum (2002, Chapter 4) bounds quantify how much hidden bias (an
#' unobserved confounder) would be needed to explain away the observed
#' treatment effect. The sensitivity parameter Gamma represents the maximum
#' ratio of treatment odds between two matched units:
#'
#' - Gamma = 1: No hidden bias (standard Wilcoxon test)
#' - Gamma = 2: One unit could be twice as likely to receive treatment
#'   due to an unobserved factor
#'
#' The function computes upper and lower bounds on the p-value of the
#' Wilcoxon signed-rank test under each level of hidden bias. A finding is
#' "insensitive to bias" if p_upper remains below 0.05 even at large Gamma.
#'
#' @references
#' Rosenbaum, P.R. (2002). Observational Studies, 2nd edition. Springer.
#'
#' @examples
#' set.seed(42)
#' left <- data.frame(id = 1:20, x = rnorm(20), outcome = rnorm(20, 1, 1))
#' right <- data.frame(id = 21:40, x = rnorm(20), outcome = rnorm(20, 0, 1))
#' result <- match_couples(left, right, vars = "x")
#' sens <- sensitivity_analysis(result, left, right, outcome_var = "outcome")
#' print(sens)
#'
#' @export
sensitivity_analysis <- function(result,
                                 left,
                                 right,
                                 outcome_var,
                                 gamma = seq(1, 3, by = 0.25),
                                 alternative = c("greater", "less",
                                                 "two.sided"),
                                 left_id = "id",
                                 right_id = "id") {

  alternative <- match.arg(alternative)

  # --- Validation ---
  if (!inherits(result, "matching_result")) {
    stop("result must be a matching_result object", call. = FALSE)
  }
  if (!is.character(outcome_var) || length(outcome_var) != 1) {
    stop("outcome_var must be a single variable name", call. = FALSE)
  }
  if (!(outcome_var %in% names(left))) {
    stop(sprintf("outcome_var '%s' not found in left", outcome_var),
         call. = FALSE)
  }
  if (!(outcome_var %in% names(right))) {
    stop(sprintf("outcome_var '%s' not found in right", outcome_var),
         call. = FALSE)
  }
  if (!is.numeric(gamma) || any(gamma < 1)) {
    stop("gamma must be numeric with all values >= 1", call. = FALSE)
  }

  # --- Extract matched pair outcomes ---
  pairs <- result$pairs
  if (nrow(pairs) == 0) {
    stop("No matched pairs to analyze", call. = FALSE)
  }

  # Merge outcomes
  left_outcomes <- merge(
    data.frame(left_id = pairs$left_id, stringsAsFactors = FALSE),
    left[, c(left_id, outcome_var), drop = FALSE],
    by.x = "left_id", by.y = left_id, all.x = TRUE
  )[[outcome_var]]

  right_outcomes <- merge(
    data.frame(right_id = pairs$right_id, stringsAsFactors = FALSE),
    right[, c(right_id, outcome_var), drop = FALSE],
    by.x = "right_id", by.y = right_id, all.x = TRUE
  )[[outcome_var]]

  # Pair differences
  d <- left_outcomes - right_outcomes
  n <- length(d)

  # Remove zero differences (standard Wilcoxon convention)
  nonzero <- d != 0
  d <- d[nonzero]
  n <- length(d)

  if (n == 0) {
    stop("All pair differences are zero; cannot compute sensitivity bounds",
         call. = FALSE)
  }

  # Ranks of absolute differences
  ranks <- rank(abs(d))

  # Observed test statistic: sum of ranks where d > 0
  t_obs <- sum(ranks[d > 0])

  # --- Compute bounds for each gamma ---
  results <- tibble::tibble(
    gamma = numeric(length(gamma)),
    t_stat = numeric(length(gamma)),
    p_upper = numeric(length(gamma)),
    p_lower = numeric(length(gamma))
  )

  for (i in seq_along(gamma)) {
    g <- gamma[i]
    bounds <- .rosenbaum_bounds(t_obs, ranks, n, g, alternative)
    results$gamma[i] <- g
    results$t_stat[i] <- t_obs
    results$p_upper[i] <- bounds$p_upper
    results$p_lower[i] <- bounds$p_lower
  }

  # Critical gamma: smallest where p_upper > 0.05
  sig_idx <- which(results$p_upper > 0.05)
  critical_gamma <- if (length(sig_idx) > 0) {
    results$gamma[sig_idx[1]]
  } else {
    Inf  # Still significant at all tested gamma
  }

  out <- list(
    results = results,
    n_pairs = n,
    t_observed = t_obs,
    critical_gamma = critical_gamma,
    alternative = alternative
  )
  class(out) <- "sensitivity_analysis"
  out
}

#' Compute Rosenbaum bounds via normal approximation
#'
#' Under hidden bias Gamma, each pair's probability of the treated unit
#' having the larger value is between 1/(1+Gamma) and Gamma/(1+Gamma).
#'
#' @return List with p_upper and p_lower.
#' @keywords internal
.rosenbaum_bounds <- function(t_obs, ranks, n, gamma, alternative) {
  p_plus <- gamma / (1 + gamma)   # Upper bound on treatment probability
  p_minus <- 1 / (1 + gamma)     # Lower bound on treatment probability

  # Expected value and variance of T+ under p
  # E[T+] = sum(ranks * p), Var[T+] = sum(ranks^2 * p * (1-p))
  e_upper <- sum(ranks * p_plus)
  var_upper <- sum(ranks^2 * p_plus * (1 - p_plus))

  e_lower <- sum(ranks * p_minus)
  var_lower <- sum(ranks^2 * p_minus * (1 - p_minus))

  sd_upper <- sqrt(var_upper)
  sd_lower <- sqrt(var_lower)

  # Continuity-corrected normal approximation
  switch(alternative,
    greater = {
      z_upper <- (t_obs - 0.5 - e_upper) / sd_upper
      z_lower <- (t_obs - 0.5 - e_lower) / sd_lower
      p_upper <- stats::pnorm(z_upper, lower.tail = FALSE)
      p_lower <- stats::pnorm(z_lower, lower.tail = FALSE)
    },
    less = {
      z_upper <- (t_obs + 0.5 - e_lower) / sd_lower
      z_lower <- (t_obs + 0.5 - e_upper) / sd_upper
      p_upper <- stats::pnorm(z_upper, lower.tail = TRUE)
      p_lower <- stats::pnorm(z_lower, lower.tail = TRUE)
    },
    two.sided = {
      # Two-sided: use the more conservative bound
      z_upper_right <- (t_obs - 0.5 - e_upper) / sd_upper
      z_lower_left <- (t_obs + 0.5 - e_lower) / sd_lower

      p_upper <- 2 * min(
        stats::pnorm(z_upper_right, lower.tail = FALSE),
        stats::pnorm(z_lower_left, lower.tail = TRUE)
      )
      p_upper <- min(p_upper, 1)

      z_lower_right <- (t_obs - 0.5 - e_lower) / sd_lower
      z_upper_left <- (t_obs + 0.5 - e_upper) / sd_upper

      p_lower <- 2 * min(
        stats::pnorm(z_lower_right, lower.tail = FALSE),
        stats::pnorm(z_upper_left, lower.tail = TRUE)
      )
      p_lower <- min(p_lower, 1)
    }
  )

  list(p_upper = p_upper, p_lower = p_lower)
}

#' Print method for sensitivity analysis
#'
#' @param x A sensitivity_analysis object
#' @param ... Additional arguments (ignored)
#'
#' @return Invisibly returns the input object `x`.
#' @export
#' @method print sensitivity_analysis
print.sensitivity_analysis <- function(x, ...) {
  cat("Rosenbaum Sensitivity Analysis\n")
  cat("==============================\n\n")

  cat("Matched pairs:", x$n_pairs, "\n")
  cat("Alternative:", x$alternative, "\n")
  cat("Test statistic (T+):", sprintf("%.1f", x$t_observed), "\n\n")

  cat("Gamma    p (upper bound)\n")
  cat("-----    ---------------\n")
  for (i in seq_len(nrow(x$results))) {
    sig <- if (x$results$p_upper[i] < 0.05) " *" else ""
    cat(sprintf("%-8.2f %.4f%s\n",
                x$results$gamma[i], x$results$p_upper[i], sig))
  }

  cat("\n* significant at alpha = 0.05\n")

  if (is.finite(x$critical_gamma)) {
    cat(sprintf(
      "\nInsensitive to hidden bias up to Gamma = %.2f\n",
      x$critical_gamma - 0.25  # Last significant gamma
    ))
    cat(sprintf(
      "To explain away the effect, hidden bias would need Gamma >= %.2f\n",
      x$critical_gamma
    ))
  } else {
    cat("\nResult is insensitive to bias at all tested Gamma values\n")
  }

  invisible(x)
}

#' Summary method for sensitivity analysis
#'
#' @param object A sensitivity_analysis object
#' @param ... Additional arguments (ignored)
#'
#' @return A list containing summary statistics (invisibly)
#' @export
#' @method summary sensitivity_analysis
summary.sensitivity_analysis <- function(object, ...) {
  out <- list(
    n_pairs = object$n_pairs,
    t_observed = object$t_observed,
    critical_gamma = object$critical_gamma,
    alternative = object$alternative,
    p_at_gamma_1 = object$results$p_upper[object$results$gamma == 1],
    max_gamma_tested = max(object$results$gamma)
  )
  class(out) <- "summary.sensitivity_analysis"
  out
}

#' @export
print.summary.sensitivity_analysis <- function(x, ...) {
  cat("Sensitivity Analysis Summary\n")
  cat("============================\n\n")
  cat("Matched pairs:", x$n_pairs, "\n")
  cat("Alternative:", x$alternative, "\n")
  if (length(x$p_at_gamma_1) > 0) {
    cat("p-value (no hidden bias):", sprintf("%.4f", x$p_at_gamma_1), "\n")
  }
  cat("Critical Gamma:", if (is.finite(x$critical_gamma)) {
    sprintf("%.2f", x$critical_gamma)
  } else {
    paste0("> ", x$max_gamma_tested)
  }, "\n")

  invisible(x)
}

#' Plot method for sensitivity analysis (base graphics)
#'
#' @param x A sensitivity_analysis object
#' @param alpha Significance level (default: 0.05)
#' @param ... Additional arguments passed to plot
#'
#' @return The sensitivity_analysis object (invisibly)
#' @export
#' @method plot sensitivity_analysis
plot.sensitivity_analysis <- function(x, alpha = 0.05, ...) {
  df <- x$results

  graphics::plot(df$gamma, df$p_upper,
                 type = "b", pch = 19, col = "steelblue",
                 xlab = expression(Gamma),
                 ylab = "Upper bound p-value",
                 main = "Rosenbaum Sensitivity Analysis",
                 ...)
  graphics::abline(h = alpha, lty = 2, col = "firebrick")

  invisible(x)
}

Try the couplr package in your browser

Any scripts or data that you put into this service are public.

couplr documentation built on March 3, 2026, 5:08 p.m.