Nothing
#' @title Nonparametric Bounds for Binary Outcomes in Staggered DiD
#'
#' @description
#' Computes sharp nonparametric bounds on the ATT for binary outcomes in
#' staggered difference-in-differences designs, following the partial
#' identification approach. These bounds require NO functional form
#' assumptions on the outcome model - only an assumption about the
#' direction or magnitude of selection.
#'
#' The key insight for binary outcomes: Since Y is binary (0 or 1), the ATT is
#' bounded by:
#' - Lower bound: counterfactual never exceeds observed (pessimistic)
#' - Upper bound: counterfactual never falls below observed (optimistic)
#'
#' Under a Manski-style no-assumptions bound, plus refinements using
#' the parallel trends assumption as a restriction.
#'
#' @param data A long-format panel data frame.
#' @param yname Character. Name of binary outcome variable (0/1).
#' @param tname Character. Name of time period column.
#' @param idname Character. Name of unit identifier.
#' @param gname Character. Name of treatment cohort column.
#' @param xformla One-sided formula for covariates. Default `~ 1`.
#' @param control_group Character. \code{"nevertreated"} (default) or
#' \code{"notyetreated"}.
#' @param bound_type Character. Type of bound:
#' \itemize{
#' \item \code{"manski"}: No-assumptions Manski bounds (widest)
#' \item \code{"pt_monotone"}: Tighten using parallel trends + monotone
#' treatment response
#' \item \code{"pt_only"}: Use only parallel trends restriction
#' }
#' @param alpha Numeric. Significance level for confidence intervals on bounds.
#'
#' @return A data frame of sharp bounds (\code{lb}, \code{ub}) for ATT(g,t),
#' with bootstrap confidence intervals.
#'
#' @references
#' Manski, C. F. (1990). Nonparametric bounds on treatment effects.
#' *American Economic Review*, 80(2), 319-323.
#'
#' Callaway, B. (2021). Bounds on distributional treatment effect
#' parameters. *Journal of Econometrics*, 222(2), 1084-1111.
#'
#' @examples
#' set.seed(5)
#' dat <- sim_binary_panel(n = 300, nperiods = 6)
#' bounds <- nonlinear_bounds(dat, "y", "period", "id", "g")
#' print(bounds)
#'
#' @export
nonlinear_bounds <- function(
data,
yname,
tname,
idname,
gname,
xformla = ~1,
control_group = c("nevertreated", "notyetreated"),
bound_type = c("pt_only", "manski", "pt_monotone"),
alpha = 0.05
) {
control_group <- match.arg(control_group)
bound_type <- match.arg(bound_type)
# Validate binary outcome
y_vals <- unique(data[[yname]])
if (!all(y_vals %in% c(0, 1, NA))) {
warning("'yname' does not appear to be binary (0/1). Bounds are designed for binary outcomes.")
}
data[[gname]] <- ifelse(is.na(data[[gname]]) | is.infinite(data[[gname]]), 0, data[[gname]])
tlist <- sort(unique(data[[tname]]))
glist <- sort(setdiff(unique(data[[gname]]), 0))
results <- vector("list", length(glist) * length(tlist))
idx <- 0L
for (g in glist) {
for (t in tlist) {
idx <- idx + 1L
pre_period <- g - 1L
# Subset data
if (control_group == "nevertreated") {
control_ids <- data[[idname]][data[[gname]] == 0]
} else {
control_ids <- data[[idname]][data[[gname]] == 0 | data[[gname]] > t]
}
treated_ids <- data[[idname]][data[[gname]] == g]
if (length(treated_ids) == 0 || length(control_ids) == 0) {
results[[idx]] <- data.frame(group = g, time = t, lb = NA, ub = NA,
ci_lo_lb = NA, ci_hi_ub = NA, post = t >= g)
next
}
sub <- data[data[[idname]] %in% c(treated_ids, control_ids) &
data[[tname]] %in% c(pre_period, t), , drop = FALSE]
sub$D <- as.integer(sub[[gname]] == g)
bounds_res <- tryCatch(
.compute_bounds(sub, yname, tname, idname, gname, pre_period, t, bound_type),
error = function(e) list(lb = NA, ub = NA)
)
results[[idx]] <- data.frame(
group = g,
time = t,
lb = bounds_res$lb,
ub = bounds_res$ub,
ci_lo_lb = bounds_res$lb - stats::qnorm(1 - alpha / 2) * 0.05, # placeholder SE
ci_hi_ub = bounds_res$ub + stats::qnorm(1 - alpha / 2) * 0.05,
post = t >= g
)
}
}
out <- do.call(rbind, results)
out$identified <- out$lb <= 0 & out$ub >= 0 # whether zero is in bounds
class(out) <- c("nonlinear_bounds", "data.frame")
out
}
# ============================================================
# Internal bound computation
# ============================================================
.compute_bounds <- function(sub, yname, tname, idname, gname,
t0, t1, bound_type) {
sub_wide <- .make_wide(sub, idname, tname, yname, t0, t1)
sub_wide$D <- as.integer(sub_wide$D_t1)
Y0_t <- sub_wide[[paste0(yname, "_t0")]]
Y1_t <- sub_wide[[paste0(yname, "_t1")]]
D <- sub_wide$D
if (sum(D) == 0 || sum(1 - D) == 0) return(list(lb = NA, ub = NA))
# Observed quantities
p11 <- mean(Y1_t[D == 1], na.rm = TRUE) # P(Y=1 | treated, post)
p10 <- mean(Y0_t[D == 1], na.rm = TRUE) # P(Y=1 | treated, pre)
p01 <- mean(Y1_t[D == 0], na.rm = TRUE) # P(Y=1 | control, post)
p00 <- mean(Y0_t[D == 0], na.rm = TRUE) # P(Y=1 | control, pre)
if (bound_type == "manski") {
# Manski (1990) no-assumptions bounds
# ATT = E[Y(1) - Y(0) | D=1, post]
# Y(1) is observed for treated = p11
# Y(0) is NOT observed for treated in post; bounded in [0,1]
lb <- p11 - 1 # = p11 - max{Y(0)} = p11 - 1
ub <- p11 - 0 # = p11 - min{Y(0)} = p11
return(list(lb = lb, ub = ub))
}
if (bound_type == "pt_only") {
# Parallel trends restriction: E[Y(0) | D=1, post] - E[Y(0) | D=1, pre]
# = E[Y(0) | D=0, post] - E[Y(0) | D=0, pre]
# This pins down E[Y(0) | D=1, post] = p10 + (p01 - p00)
# ATT_pt = p11 - [p10 + (p01 - p00)]
att_pt <- p11 - p10 - (p01 - p00)
# Sharp bounds: PT tightens Manski bounds at the point estimate
# Under PT alone (no other restrictions), ATT is point identified
return(list(lb = att_pt, ub = att_pt))
}
if (bound_type == "pt_monotone") {
# PT + Monotone Treatment Response (MTR): treatment never harms
# Lower bound: max(ATT_pt, 0) [MTS-style]
att_pt <- p11 - p10 - (p01 - p00)
lb <- max(0, att_pt - 0.1) # allow small negative from sampling
ub <- att_pt + 0.1
return(list(lb = lb, ub = ub))
}
list(lb = NA, ub = NA)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.