test-error-handling.R
In did: Treatment Effects with Multiple Periods and Groups

# =============================================================================
# Tests for stop(), warning(), and message() conditions
# =============================================================================

# Shared setup
set.seed(20260401)
sp <- did::reset.sim()
data_eh <- did::build_sim_dataset(sp)

# =============================================================================
# att_gt() validation errors
# =============================================================================

test_that("att_gt errors on invalid est_method string", {
  expect_error(
    att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
           gname = "G", est_method = "bad", bstrap = FALSE),
    "must be one of"
  )
})

test_that("att_gt errors on non-character non-function est_method", {
  expect_error(
    att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
           gname = "G", est_method = 42, bstrap = FALSE),
    "must be a character string"
  )
})

test_that("att_gt errors on invalid fix_weights value", {
  expect_error(
    att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
           gname = "G", fix_weights = "bad", bstrap = FALSE),
    "must be NULL or one of"
  )
})

test_that("att_gt rejects non-exact control_group and base_period values in both modes", {
  for (fm in c(FALSE, TRUE)) {
    expect_error(
      att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
             gname = "G", control_group = "NotYetTreated", faster_mode = fm,
             bstrap = FALSE),
      "control_group must be either"
    )
    expect_error(
      att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
             gname = "G", base_period = "Universal", faster_mode = fm,
             bstrap = FALSE),
      "base_period must be either"
    )
  }
})

test_that("att_gt rejects negative or non-numeric anticipation in both modes", {
  for (fm in c(FALSE, TRUE)) {
    expect_error(
      att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
             gname = "G", anticipation = -1, faster_mode = fm, bstrap = FALSE),
      "anticipation must be non-negative"
    )
    expect_error(
      att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
             gname = "G", anticipation = "1", faster_mode = fm, bstrap = FALSE),
      "anticipation must be numeric"
    )
  }
})

test_that("att_gt rejects argument-referenced internal variable names in both modes", {
  reserved_names <- c(".w", ".rowid", ".G", ".C", "post",
                      "asif_never_treated", "treated_first_period")
  reserved_data <- data_eh
  for (nm in reserved_names) {
    reserved_data[[nm]] <- seq_len(nrow(reserved_data))
  }
  msg <- "reserved for internal use"

  for (fm in c(FALSE, TRUE)) {
    for (nm in reserved_names) {
      expect_error(
        att_gt(yname = nm, data = reserved_data, tname = "period",
               idname = "id", gname = "G", faster_mode = fm,
               bstrap = FALSE),
        msg
      )
      expect_error(
        att_gt(yname = "Y", xformla = as.formula(paste("~", nm)),
               data = reserved_data, tname = "period", idname = "id",
               gname = "G", faster_mode = fm, bstrap = FALSE),
        msg
      )
      expect_error(
        att_gt(yname = "Y", data = reserved_data, tname = "period",
               idname = "id", gname = "G", weightsname = nm,
               faster_mode = fm, bstrap = FALSE),
        msg
      )
    }
  }
})

test_that("att_gt errors on panel=TRUE without idname in both modes", {
  for (fm in c(FALSE, TRUE)) {
    expect_error(
      att_gt(yname = "Y", data = data_eh, tname = "period",
             gname = "G", bstrap = FALSE, faster_mode = fm),
      "Must provide idname when panel = TRUE"
    )
  }
})

test_that("att_gt still runs with panel=FALSE and no idname in both modes", {
  res <- list()
  for (fm in c(FALSE, TRUE)) {
    res[[as.character(fm)]] <- suppressWarnings(
      att_gt(yname = "Y", data = data_eh, tname = "period", gname = "G",
             panel = FALSE, bstrap = FALSE, faster_mode = fm)
    )
    expect_true(all(is.finite(res[[as.character(fm)]]$att)))
  }
  expect_equal(res[["FALSE"]]$att, res[["TRUE"]]$att, tolerance = 1e-10)
})

test_that("att_gt errors on invalid alp", {
  for (bad_alp in list(1.5, 0, 1, -0.05, c(0.05, 0.1), "0.05", NA_real_)) {
    expect_error(
      att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
             gname = "G", bstrap = FALSE, alp = bad_alp),
      "alp must be a single number strictly between 0 and 1"
    )
  }
})

test_that("att_gt errors on invalid biters when bootstrapping", {
  for (bad_biters in list(-5, 0, 2.5, c(100, 200), "100", NA_real_)) {
    expect_error(
      att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
             gname = "G", bstrap = TRUE, biters = bad_biters),
      "biters must be a single positive whole number"
    )
  }
  # biters is not used (and so not validated) when bstrap = FALSE
  res <- suppressWarnings(
    att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
           gname = "G", bstrap = FALSE, biters = -5)
  )
  expect_s3_class(res, "MP")
})

test_that("att_gt errors on fix_weights with panel=FALSE", {
  expect_error(
    att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
           gname = "G", fix_weights = "base_period", panel = FALSE,
           bstrap = FALSE),
    "not supported for repeated cross sections"
  )
})

test_that("att_gt warns on extra args with built-in est_method", {
  expect_warning(
    att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
           gname = "G", est_method = "reg", bstrap = FALSE, extra_arg = 1),
    "Extra arguments"
  )
})

test_that("att_gt messages about anticipation", {
  expect_message(
    suppressWarnings(
      att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
             gname = "G", anticipation = 1, bstrap = FALSE)
    ),
    "anticipation ="
  )
})

test_that("att_gt computes clustered SEs without the bootstrap (no 'requires bootstrap' warning)", {
  # Clustering at the individual level (idname) without the bootstrap returns the unit-level analytical
  # standard errors -- no "requires bootstrap" warning. A coarser cluster variable yields the analytical
  # cluster-robust SE (covered in test-cluster-analytic.R).
  w <- capture_warnings(
    res <- att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
                  gname = "G", clustervars = "id", bstrap = FALSE)
  )
  expect_false(any(grepl("Clustered standard errors require", w)))
  expect_false(any(grepl("could not be computed analytically", w)))
  expect_true(any(is.finite(res$se)))
})

# =============================================================================
# pre_process_did / pre_process_did2 validation errors
# =============================================================================

test_that("att_gt errors on missing column name (slower mode)", {
  expect_error(
    att_gt(yname = "nonexistent", data = data_eh, tname = "period",
           idname = "id", gname = "G", bstrap = FALSE, faster_mode = FALSE),
    "not found"
  )
})

test_that("att_gt errors on missing column name (faster_mode)", {
  expect_error(
    att_gt(yname = "nonexistent", data = data_eh, tname = "period",
           idname = "id", gname = "G", bstrap = FALSE, faster_mode = TRUE),
    "nonexistent"
  )
})

test_that("att_gt errors on non-numeric tname", {
  bad_data <- data_eh
  bad_data$period <- as.character(bad_data$period)
  expect_error(
    att_gt(yname = "Y", data = bad_data, tname = "period", idname = "id",
           gname = "G", bstrap = FALSE),
    "must be numeric"
  )
})

test_that("att_gt errors on non-numeric gname", {
  bad_data <- data_eh
  bad_data$G <- as.character(bad_data$G)
  expect_error(
    att_gt(yname = "Y", data = bad_data, tname = "period", idname = "id",
           gname = "G", bstrap = FALSE),
    "must be numeric"
  )
})

test_that("att_gt errors on non-numeric outcome variable in both modes", {
  bad_chr <- data_eh
  bad_chr$Y <- as.character(bad_chr$Y)
  bad_fac <- data_eh
  bad_fac$Y <- factor(ifelse(bad_fac$Y > 0, "hi", "lo"))
  for (fm in c(FALSE, TRUE)) {
    expect_error(
      att_gt(yname = "Y", data = bad_chr, tname = "period", idname = "id",
             gname = "G", bstrap = FALSE, faster_mode = fm),
      "The outcome variable 'Y' must be numeric"
    )
    expect_error(
      att_gt(yname = "Y", data = bad_fac, tname = "period", idname = "id",
             gname = "G", bstrap = FALSE, faster_mode = fm),
      "The outcome variable 'Y' must be numeric"
    )
  }
})

test_that("att_gt still accepts logical and integer outcomes in both modes", {
  log_data <- data_eh
  log_data$Y <- log_data$Y > median(log_data$Y)
  int_data <- data_eh
  int_data$Y <- as.integer(round(int_data$Y))
  for (fm in c(FALSE, TRUE)) {
    res_log <- suppressWarnings(
      att_gt(yname = "Y", data = log_data, tname = "period", idname = "id",
             gname = "G", bstrap = FALSE, faster_mode = fm)
    )
    expect_true(all(is.finite(res_log$att)))
    res_int <- suppressWarnings(
      att_gt(yname = "Y", data = int_data, tname = "period", idname = "id",
             gname = "G", bstrap = FALSE, faster_mode = fm)
    )
    expect_true(all(is.finite(res_int$att)))
  }
})

test_that("att_gt errors on treatment reversals (faster_mode)", {
  bad_data <- data_eh
  # Make one unit switch groups across time
  target_id <- bad_data$id[1]
  periods <- sort(unique(bad_data$period))
  bad_data$G[bad_data$id == target_id & bad_data$period == periods[1]] <- 3
  bad_data$G[bad_data$id == target_id & bad_data$period == periods[2]] <- 4
  expect_error(
    att_gt(yname = "Y", data = bad_data, tname = "period", idname = "id",
           gname = "G", bstrap = FALSE, faster_mode = TRUE),
    "same across all periods|time-invariant|must be the same"
  )
})

test_that("att_gt warns on missing data dropped", {
  bad_data <- data_eh
  bad_data$Y[1:5] <- NA
  expect_warning(
    att_gt(yname = "Y", data = bad_data, tname = "period", idname = "id",
           gname = "G", bstrap = FALSE),
    "dropped|missing"
  )
})

test_that("att_gt warns on small groups", {
  # Create data with a very small treated group
  small_data <- data_eh
  treated_ids <- unique(small_data$id[small_data$G > 0])
  # Keep only 2 treated units and some controls
  keep_ids <- c(treated_ids[1:2], unique(small_data$id[small_data$G == 0])[1:50])
  small_data <- small_data[small_data$id %in% keep_ids, ]
  expect_warning(
    att_gt(yname = "Y", data = small_data, tname = "period", idname = "id",
           gname = "G", bstrap = FALSE),
    "very few observations"
  )
})

test_that("att_gt handles data with .w column (slower mode)", {
  # The .w column gets dropped during column selection in pre_process_did
  # before the collision check, so this should run without error
  bad_data <- data.frame(data_eh)
  bad_data$.w <- 1
  result <- att_gt(yname = "Y", data = bad_data, tname = "period", idname = "id",
                   gname = "G", bstrap = FALSE, faster_mode = FALSE)
  expect_s3_class(result, "MP")
})

test_that("att_gt messages on time-varying weights (panel)", {
  tv_data <- data_eh
  tv_data$tw <- tv_data$period + runif(nrow(tv_data))
  expect_message(
    att_gt(yname = "Y", data = tv_data, tname = "period", idname = "id",
           gname = "G", weightsname = "tw", bstrap = FALSE),
    "Time-varying weights"
  )
})

test_that("clustervars contract enforced in all faster_mode x bstrap combinations", {
  # ?att_gt promises an error for more than two cluster variables (one of which
  # must be idname). Both pre-processors must reject the input with identical
  # wording (also matching mboot()) regardless of bstrap -- the analytical
  # (bstrap = FALSE) slow path used to silently cluster on the first extra
  # variable only.
  d <- data_eh
  d$cl1 <- d$id %% 10  # time-invariant within unit
  d$cl2 <- d$id %% 7
  msg <- "At most one cluster variable \\(beyond 'idname'\\) is supported"
  for (fm in c(FALSE, TRUE)) {
    for (bs in c(FALSE, TRUE)) {
      # three cluster variables (idname + two extras)
      expect_error(
        att_gt(yname = "Y", data = d, tname = "period", idname = "id",
               gname = "G", clustervars = c("id", "cl1", "cl2"),
               bstrap = bs, faster_mode = fm),
        msg
      )
      # two cluster variables, neither of which is idname
      expect_error(
        att_gt(yname = "Y", data = d, tname = "period", idname = "id",
               gname = "G", clustervars = c("cl1", "cl2"),
               bstrap = bs, faster_mode = fm),
        msg
      )
    }
  }
})

test_that("one extra cluster variable still works in all faster_mode x bstrap combinations", {
  # clustered DGP: units within a cluster share a common per-period shock, so
  # the clustered SE genuinely differs from the unclustered one
  set.seed(20260609)
  n_cl <- 40L
  sz <- rep(c(2L, 4L), length.out = n_cl)
  N <- sum(sz)
  cl <- rep(seq_len(n_cl), times = sz)
  gC <- sample(c(2L, 3L, 0L), n_cl, replace = TRUE)[cl]
  eta <- matrix(rnorm(n_cl * 4L, 0, 1.5), n_cl, 4L)
  d <- data.frame(id = rep(seq_len(N), each = 4L), period = rep(1:4, N),
                  cl = rep(cl, each = 4L), G = rep(gC, each = 4L))
  d$Y <- eta[cbind(d$cl, d$period)] + 0.3 * d$period +
    (d$G != 0 & d$period >= d$G) + rnorm(nrow(d))
  for (fm in c(FALSE, TRUE)) {
    se_cl <- NULL
    for (bs in c(FALSE, TRUE)) {
      res <- suppressWarnings(
        att_gt(yname = "Y", data = d, tname = "period", idname = "id",
               gname = "G", clustervars = c("id", "cl"), bstrap = bs,
               biters = 100, cband = FALSE, faster_mode = fm)
      )
      expect_s3_class(res, "MP")
      expect_true(any(is.finite(res$se)))
      if (!bs) se_cl <- res$se
    }
    # the analytical (bstrap = FALSE) path actually clusters: the SE differs
    # from the unclustered analytical SE
    res_iid <- att_gt(yname = "Y", data = d, tname = "period", idname = "id",
                      gname = "G", bstrap = FALSE, faster_mode = fm)
    ok <- is.finite(se_cl) & is.finite(res_iid$se)
    expect_true(any(ok))
    expect_gt(max(abs(se_cl[ok] - res_iid$se[ok])), 1e-8)
  }
})

# =============================================================================
# aggte() validation errors
# =============================================================================

test_that("aggte errors on invalid type", {
  mp_tmp <- att_gt(yname = "Y", data = data_eh, tname = "period", idname = "id",
                   gname = "G", bstrap = FALSE)
  expect_error(
    aggte(mp_tmp, type = "invalid"),
    "must be one of"
  )
})

test_that("aggte errors when ATTs contain NA and na.rm=FALSE", {
  sp <- did::reset.sim()
  data <- did::build_sim_dataset(sp)
  mp_na <- suppressWarnings(suppressMessages(
    att_gt(yname = "Y", data = data, tname = "period", idname = "id",
           gname = "G", bstrap = FALSE)
  ))
  # Inject NA directly — no need to rely on estimation failure
  mp_na$att[1] <- NA
  expect_error(
    aggte(mp_na, type = "dynamic", na.rm = FALSE),
    "Missing values"
  )
})

# =============================================================================
# Wald pre-test warnings
# =============================================================================

test_that("att_gt handles singular covariance for Wald test gracefully", {
  # When covariance matrix is singular, W and Wpval should be NULL
  # and the result should still be a valid MP object
  small_sp <- did::reset.sim()
  small_sp$n <- 50
  small_data <- did::build_sim_dataset(small_sp)
  result <- suppressWarnings(
    att_gt(yname = "Y", data = small_data, tname = "period", idname = "id",
           gname = "G", bstrap = FALSE)
  )
  expect_s3_class(result, "MP")
  # W should be either a valid number or NULL (when singular)
  expect_true(is.null(result$W) || is.numeric(result$W))
})

# =============================================================================
# compute.att_gt warnings
# =============================================================================

test_that("att_gt warns on overlap violations", {
  # Create data where propensity score is near 1 (near-perfect separation)
  sep_data <- data_eh
  # Make covariate perfectly predict treatment for some cells
  sep_data$X_sep <- ifelse(sep_data$G > 0, 100, -100)
  expect_warning(
    att_gt(yname = "Y", xformla = ~X_sep, data = sep_data, tname = "period",
           idname = "id", gname = "G", est_method = "dr", bstrap = FALSE),
    "overlap condition"
  )
})

test_that("slow path warns once per failed cell, matching fast-path wording, with accurate Wald diagnosis", {
  # Each failed (g,t) cell must produce exactly ONE warning with identical text in
  # both modes: the tryCatch wrapper must not re-warn the internal overlap/singular
  # sentinels after the diagnostic warning already fired. And when pre-treatment
  # cells existed but were all dropped for NA/zero variance, the Wald warning must
  # say so instead of claiming no pre-treatment cells exist.
  set.seed(20260610)
  d <- expand.grid(id = 1:200, period = 1:3)
  gvals <- c(0, 2, 3)
  d$G <- gvals[(d$id - 1) %% 3 + 1]
  d$Xsep <- 1 * (d$G > 0)  # separates treated from never-treated: overlap fails in all 4 cells
  d$Y <- 0.1 * d$period + (d$G > 0 & d$period >= d$G) + rnorm(nrow(d), 0, 0.5)

  w_slow <- capture_warnings(suppressMessages(
    att_gt(yname = "Y", xformla = ~Xsep, data = d, tname = "period", idname = "id",
           gname = "G", est_method = "dr", faster_mode = FALSE, bstrap = FALSE)))
  w_fast <- capture_warnings(suppressMessages(
    att_gt(yname = "Y", xformla = ~Xsep, data = d, tname = "period", idname = "id",
           gname = "G", est_method = "dr", faster_mode = TRUE, bstrap = FALSE)))

  # identical warning sets across modes (one per failed cell + the Wald warning)
  expect_identical(w_slow, w_fast)
  # exactly one warning per failed cell, with the fast-path wording
  expect_identical(sum(grepl("overlap condition violated for group", w_slow)), 4L)
  # no leaked internal sentinel via the tryCatch wrapper
  expect_false(any(grepl("Error computing internal 2x2 DiD", w_slow)))
  # accurate Wald diagnosis: pre-treatment cells existed but had NA/zero variance
  expect_true(any(grepl("missing or zero variance", w_slow)))
  expect_false(any(grepl("treated early in the panel", w_slow)))
})

test_that("att_gt warns when no pre-treatment periods for Wald test", {
  # Create data where a group is first treated in period 2 (only 1 pre-treatment period)
  # With base_period="varying", there may be no pre-treatment ATTs for the Wald test
  early_data <- data.frame(
    id = rep(1:100, each = 3),
    period = rep(1:3, 100),
    G = rep(c(rep(2, 50), rep(0, 50)), each = 3),
    Y = rnorm(300)
  )
  expect_warning(
    att_gt(yname = "Y", data = early_data, tname = "period", idname = "id",
           gname = "G", bstrap = FALSE),
    "pre-treatment|Wald"
  )
})

test_that("empty-cell warning names the actually-empty base period under base_period='universal' in both modes", {
  # Group 3's universal base period is 2; group 3 has observations in every
  # OTHER period. The slow path used to name the cell's current period
  # (1/3/4) -- periods where the group does have units -- instead of the empty
  # base period (2), which the fast path reported correctly.
  set.seed(20260610)
  d <- expand.grid(rep = 1:30, period = 1:4, G = c(0, 3))
  d$Y <- rnorm(nrow(d)) + 0.1 * d$period + (d$G > 0 & d$period >= d$G)
  d <- d[!(d$G == 3 & d$period == 2), ]  # group 3 empty only in its base period
  for (fm in c(FALSE, TRUE)) {
    w <- capture_warnings(suppressMessages(
      att_gt(yname = "Y", data = d, tname = "period", gname = "G",
             panel = FALSE, base_period = "universal", bstrap = FALSE,
             faster_mode = fm)
    ))
    # one warning per non-base cell (t = 1, 3, 4), each naming period 2
    expect_identical(
      sum(grepl("No units in group 3 in time period 2", w, fixed = TRUE)), 3L,
      info = paste("faster_mode =", fm)
    )
    expect_false(any(grepl("No units in group 3 in time period [134]", w)),
                 info = paste("faster_mode =", fm))
  }
})

# =============================================================================
# pre-processing warning parity across modes
# =============================================================================

test_that("'no never-treated group' warning is identical across modes and discloses the period filtering", {
  # Both modes drop all periods >= (latest cohort - anticipation); the fast-mode
  # warning used to omit that a chunk of the data was discarded.
  set.seed(20260609)
  d <- data.frame(
    id = rep(1:60, each = 4),
    period = rep(1:4, 60),
    G = rep(c(rep(2, 30), rep(4, 30)), each = 4)
  )
  d$Y <- rnorm(nrow(d))
  msgs <- list()
  for (fm in c(FALSE, TRUE)) {
    w <- capture_warnings(suppressMessages(
      att_gt(yname = "Y", data = d, tname = "period", idname = "id",
             gname = "G", control_group = "nevertreated", bstrap = FALSE,
             faster_mode = fm)
    ))
    m <- grep("No never-treated group", w, value = TRUE)
    expect_length(m, 1L)
    expect_match(m, "filtered out", fixed = TRUE)
    msgs[[as.character(fm)]] <- m
  }
  expect_identical(msgs[["FALSE"]], msgs[["TRUE"]])
})

test_that("balanced-panel coercion warning counts dropped units identically in both modes", {
  # 5 units each miss one period's row: 5 UNITS (20 rows) are dropped while
  # coercing to a balanced panel. The slow path used to report the unit count
  # as 'observations' (a 4x undercount of the rows actually removed).
  drop_ids <- unique(data_eh$id)[1:5]
  d <- data_eh[!(data_eh$id %in% drop_ids & data_eh$period == max(data_eh$period)), ]
  expected <- "5 units are missing in some periods. Converting to balanced panel by dropping them."
  for (fm in c(FALSE, TRUE)) {
    w <- capture_warnings(suppressMessages(
      att_gt(yname = "Y", data = d, tname = "period", idname = "id",
             gname = "G", panel = TRUE, allow_unbalanced_panel = FALSE,
             bstrap = FALSE, faster_mode = fm)
    ))
    expect_true(any(w == expected), info = paste("faster_mode =", fm))
    expect_false(any(grepl("observations while converting", w)),
                 info = paste("faster_mode =", fm))
  }
})
Any scripts or data that you put into this service are public.
did documentation built on June 13, 2026, 5:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
did
Treatment Effects with Multiple Periods and Groups

tests/testthat/test-error-handling.R
In did: Treatment Effects with Multiple Periods and Groups

Try the did package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

did Treatment Effects with Multiple Periods and Groups

tests/testthat/test-error-handling.R In did: Treatment Effects with Multiple Periods and Groups

Try the did package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

did
Treatment Effects with Multiple Periods and Groups

tests/testthat/test-error-handling.R
In did: Treatment Effects with Multiple Periods and Groups