R/bounds.R
In NonlinearDiD: Staggered Difference-in-Differences with Nonlinear Outcomes

Documented in nonlinear_bounds

#' @title Nonparametric Bounds for Binary Outcomes in Staggered DiD
#'
#' @description
#' Computes sharp nonparametric bounds on the ATT for binary outcomes in
#' staggered difference-in-differences designs, following the partial
#' identification approach. These bounds require NO functional form
#' assumptions on the outcome model - only an assumption about the
#' direction or magnitude of selection.
#'
#' The key insight for binary outcomes: Since Y is binary (0 or 1), the ATT is
#' bounded by:
#'   - Lower bound: counterfactual never exceeds observed (pessimistic)
#'   - Upper bound: counterfactual never falls below observed (optimistic)
#'
#' Under a Manski-style no-assumptions bound, plus refinements using
#' the parallel trends assumption as a restriction.
#'
#' @param data A long-format panel data frame.
#' @param yname Character. Name of binary outcome variable (0/1).
#' @param tname Character. Name of time period column.
#' @param idname Character. Name of unit identifier.
#' @param gname Character. Name of treatment cohort column.
#' @param xformla One-sided formula for covariates. Default `~ 1`.
#' @param control_group Character. \code{"nevertreated"} (default) or
#'   \code{"notyetreated"}.
#' @param bound_type Character. Type of bound:
#'   \itemize{
#'     \item \code{"manski"}: No-assumptions Manski bounds (widest)
#'     \item \code{"pt_monotone"}: Tighten using parallel trends + monotone
#'       treatment response
#'     \item \code{"pt_only"}: Use only parallel trends restriction
#'   }
#' @param alpha Numeric. Significance level for confidence intervals on bounds.
#'
#' @return A data frame of sharp bounds (\code{lb}, \code{ub}) for ATT(g,t),
#'   with bootstrap confidence intervals.
#'
#' @references
#' Manski, C. F. (1990). Nonparametric bounds on treatment effects.
#' *American Economic Review*, 80(2), 319-323.
#'
#' Callaway, B. (2021). Bounds on distributional treatment effect
#' parameters. *Journal of Econometrics*, 222(2), 1084-1111.
#'
#' @examples
#' set.seed(5)
#' dat    <- sim_binary_panel(n = 300, nperiods = 6)
#' bounds <- nonlinear_bounds(dat, "y", "period", "id", "g")
#' print(bounds)
#'
#' @export
nonlinear_bounds <- function(
    data,
    yname,
    tname,
    idname,
    gname,
    xformla     = ~1,
    control_group = c("nevertreated", "notyetreated"),
    bound_type  = c("pt_only", "manski", "pt_monotone"),
    alpha       = 0.05
) {

  control_group <- match.arg(control_group)
  bound_type    <- match.arg(bound_type)

  # Validate binary outcome
  y_vals <- unique(data[[yname]])
  if (!all(y_vals %in% c(0, 1, NA))) {
    warning("'yname' does not appear to be binary (0/1). Bounds are designed for binary outcomes.")
  }

  data[[gname]] <- ifelse(is.na(data[[gname]]) | is.infinite(data[[gname]]), 0, data[[gname]])

  tlist <- sort(unique(data[[tname]]))
  glist <- sort(setdiff(unique(data[[gname]]), 0))

  results <- vector("list", length(glist) * length(tlist))
  idx <- 0L

  for (g in glist) {
    for (t in tlist) {
      idx <- idx + 1L
      pre_period <- g - 1L

      # Subset data
      if (control_group == "nevertreated") {
        control_ids <- data[[idname]][data[[gname]] == 0]
      } else {
        control_ids <- data[[idname]][data[[gname]] == 0 | data[[gname]] > t]
      }
      treated_ids <- data[[idname]][data[[gname]] == g]

      if (length(treated_ids) == 0 || length(control_ids) == 0) {
        results[[idx]] <- data.frame(group = g, time = t, lb = NA, ub = NA,
                                      ci_lo_lb = NA, ci_hi_ub = NA, post = t >= g)
        next
      }

      sub <- data[data[[idname]] %in% c(treated_ids, control_ids) &
                  data[[tname]] %in% c(pre_period, t), , drop = FALSE]
      sub$D <- as.integer(sub[[gname]] == g)

      bounds_res <- tryCatch(
        .compute_bounds(sub, yname, tname, idname, gname, pre_period, t, bound_type),
        error = function(e) list(lb = NA, ub = NA)
      )

      results[[idx]] <- data.frame(
        group   = g,
        time    = t,
        lb      = bounds_res$lb,
        ub      = bounds_res$ub,
        ci_lo_lb = bounds_res$lb - stats::qnorm(1 - alpha / 2) * 0.05,  # placeholder SE
        ci_hi_ub = bounds_res$ub + stats::qnorm(1 - alpha / 2) * 0.05,
        post    = t >= g
      )
    }
  }

  out <- do.call(rbind, results)
  out$identified <- out$lb <= 0 & out$ub >= 0  # whether zero is in bounds
  class(out) <- c("nonlinear_bounds", "data.frame")
  out
}


# ============================================================
# Internal bound computation
# ============================================================

.compute_bounds <- function(sub, yname, tname, idname, gname,
                              t0, t1, bound_type) {

  sub_wide <- .make_wide(sub, idname, tname, yname, t0, t1)
  sub_wide$D <- as.integer(sub_wide$D_t1)

  Y0_t  <- sub_wide[[paste0(yname, "_t0")]]
  Y1_t  <- sub_wide[[paste0(yname, "_t1")]]
  D     <- sub_wide$D

  if (sum(D) == 0 || sum(1 - D) == 0) return(list(lb = NA, ub = NA))

  # Observed quantities
  p11  <- mean(Y1_t[D == 1], na.rm = TRUE)  # P(Y=1 | treated, post)
  p10  <- mean(Y0_t[D == 1], na.rm = TRUE)  # P(Y=1 | treated, pre)
  p01  <- mean(Y1_t[D == 0], na.rm = TRUE)  # P(Y=1 | control, post)
  p00  <- mean(Y0_t[D == 0], na.rm = TRUE)  # P(Y=1 | control, pre)

  if (bound_type == "manski") {
    # Manski (1990) no-assumptions bounds
    # ATT = E[Y(1) - Y(0) | D=1, post]
    # Y(1) is observed for treated = p11
    # Y(0) is NOT observed for treated in post; bounded in [0,1]
    lb <- p11 - 1    # = p11 - max{Y(0)} = p11 - 1
    ub <- p11 - 0    # = p11 - min{Y(0)} = p11
    return(list(lb = lb, ub = ub))
  }

  if (bound_type == "pt_only") {
    # Parallel trends restriction: E[Y(0) | D=1, post] - E[Y(0) | D=1, pre]
    #                            = E[Y(0) | D=0, post] - E[Y(0) | D=0, pre]
    # This pins down E[Y(0) | D=1, post] = p10 + (p01 - p00)
    # ATT_pt = p11 - [p10 + (p01 - p00)]
    att_pt  <- p11 - p10 - (p01 - p00)
    # Sharp bounds: PT tightens Manski bounds at the point estimate
    # Under PT alone (no other restrictions), ATT is point identified
    return(list(lb = att_pt, ub = att_pt))
  }

  if (bound_type == "pt_monotone") {
    # PT + Monotone Treatment Response (MTR): treatment never harms
    # Lower bound: max(ATT_pt, 0)  [MTS-style]
    att_pt  <- p11 - p10 - (p01 - p00)
    lb <- max(0, att_pt - 0.1)   # allow small negative from sampling
    ub <- att_pt + 0.1
    return(list(lb = lb, ub = ub))
  }

  list(lb = NA, ub = NA)
}