tests/testthat/test-dcee-numeric-golden-other-learners.R

# Shared tiny DGM (fast + deterministic)
.make_tiny_dgm <- function(n = 40, Tt = 4, seed = 123) {
    set.seed(seed)
    df <- data.frame(
        userid = rep(seq_len(n), each = Tt),
        dp     = rep(seq_len(Tt), times = n)
    )
    df$avail <- 1L
    df$prob_A <- 0.5
    df$X <- rnorm(n * Tt, 0, 1)
    df$Z <- rbinom(n * Tt, 1, 0.4)
    df$A <- rbinom(n * Tt, 1, df$prob_A) * df$avail

    # Distal Y (constant per id): linear signal in mean X, mean A, and mean A*X
    c0 <- 0.2
    c1 <- 0.7
    c2 <- 0.4
    c3 <- 0.9
    dY <- aggregate(cbind(X, A) ~ userid, data = df, FUN = mean)
    dAX <- aggregate(I(A * X) ~ userid, data = df, FUN = mean)
    dY <- merge(dY, dAX, by = "userid")
    names(dY) <- c("userid", "mX", "mA", "mAX")
    set.seed(seed + 1)
    dY$eps <- rnorm(n, 0, 0.1)
    dY$Y <- with(dY, c0 + c1 * mX + c2 * mA + c3 * mAX + eps)
    merge(df, dY[c("userid", "Y")], by = "userid", all.x = TRUE)
}

# "DCEE goldens for different learners"

test_that("GAM (mgcv): goldens for beta / se / V and lincomb", {
    skip_on_cran()
    skip_if_not_installed("mgcv")

    dat <- .make_tiny_dgm()

    # Fixed seed for any internal RNG (usually not needed for GAM, but cheap to set)
    set.seed(20201)
    fit <- dcee(
        data = dat,
        id = "userid", outcome = "Y", treatment = "A", rand_prob = "prob_A",
        moderator_formula = ~Z, # => beta: Intercept, Z
        control_formula = ~ s(X) + Z, # spline in nuisance
        availability = "avail",
        control_reg_method = "gam",
        cross_fit = FALSE,
        verbose = FALSE
    )

    # --- First time: uncomment to capture goldens ---
    # cat("GAM beta_hat = "); dput(unclass(fit$fit$beta_hat)); cat("\n")
    # cat("GAM beta_se  = "); dput(unclass(fit$fit$beta_se));  cat("\n")
    # cat("GAM beta_V   = "); dput(fit$fit$beta_varcov);       cat("\n")

    # >>>> PASTE YOUR REAL NUMBERS BELOW (placeholders shown) <<<<
    expected_beta <- structure(c(0.151147323591678, -0.102361877063079), .Names = c("Intercept", "Z")) # PLACEHOLDER
    expected_se <- structure(c(0.0887917218842927, 0.112450883649567), .Names = c("Intercept", "Z")) # PLACEHOLDER
    expected_V <- matrix(c(0.00788396987517758, -0.00545529153606339, -0.00545529153606339, 0.0126452012335686), 2,
        byrow = TRUE,
        dimnames = list(c("Intercept", "Z"), c("Intercept", "Z"))
    ) # PLACEHOLDER

    expect_equal(unclass(fit$fit$beta_hat), expected_beta, tolerance = 1e-8)
    expect_equal(unclass(fit$fit$beta_se), expected_se, tolerance = 1e-8)
    expect_equal(fit$fit$beta_varcov, expected_V, tolerance = 1e-8)

    # lincomb: extract Z effect via [0, 1]
    s <- summary(fit, lincomb = c(0, 1), conf_level = 0.95)
    df <- fit$df
    tcrit <- stats::qt(0.975, df)
    est <- expected_beta["Z"]
    se <- expected_se["Z"]
    LCL <- est - tcrit * se
    UCL <- est + tcrit * se
    LCL_col <- grep("LCL$", names(s$lincomb), value = TRUE)
    UCL_col <- grep("UCL$", names(s$lincomb), value = TRUE)
    expect_equal(as.numeric(s$lincomb$Estimate), as.numeric(est), tolerance = 1e-8)
    expect_equal(as.numeric(s$lincomb$`Std. Error`), as.numeric(se), tolerance = 1e-8)
    expect_equal(as.numeric(s$lincomb[[LCL_col]]), as.numeric(LCL), tolerance = 1e-8)
    expect_equal(as.numeric(s$lincomb[[UCL_col]]), as.numeric(UCL), tolerance = 1e-8)
})

test_that("randomForest: goldens for beta / se / V and lincomb", {
    skip_on_cran()
    skip_if_not_installed("randomForest")

    dat <- .make_tiny_dgm()

    set.seed(30301)
    fit <- dcee(
        data = dat,
        id = "userid", outcome = "Y", treatment = "A", rand_prob = "prob_A",
        moderator_formula = ~Z,
        control_formula = ~ X + Z,
        availability = "avail",
        control_reg_method = "rf",
        cross_fit = FALSE,
        verbose = FALSE,
        ntree = 300, mtry = 2, nodesize = 5
    )

    # cat("RF beta_hat = "); dput(unclass(fit$fit$beta_hat)); cat("\n")
    # cat("RF beta_se  = "); dput(unclass(fit$fit$beta_se));  cat("\n")
    # cat("RF beta_V   = "); dput(fit$fit$beta_varcov);       cat("\n")

    expected_beta <- structure(c(0.150921347945536, -0.144731366766928), .Names = c("Intercept", "Z")) # PLACEHOLDER
    expected_se <- structure(c(0.0692415601249499, 0.0946123423050273), .Names = c("Intercept", "Z")) # PLACEHOLDER
    expected_V <- matrix(c(0.00479439364853705, -0.00320667976988224, -0.00320667976988223, 0.00895149531644365), 2,
        byrow = TRUE,
        dimnames = list(c("Intercept", "Z"), c("Intercept", "Z"))
    ) # PLACEHOLDER

    expect_equal(unclass(fit$fit$beta_hat), expected_beta, tolerance = 1e-8)
    expect_equal(unclass(fit$fit$beta_se), expected_se, tolerance = 1e-8)
    expect_equal(fit$fit$beta_varcov, expected_V, tolerance = 1e-8)

    s <- summary(fit, lincomb = c(0, 1), conf_level = 0.95)
    df <- fit$df
    tcrit <- stats::qt(0.975, df)
    est <- expected_beta["Z"]
    se <- expected_se["Z"]
    expect_equal(as.numeric(s$lincomb$Estimate), as.numeric(est), tolerance = 1e-8)
    expect_equal(as.numeric(s$lincomb$`Std. Error`), as.numeric(se), tolerance = 1e-8)
})

test_that("ranger: goldens for beta / se / V and lincomb", {
    skip_on_cran()
    skip_if_not_installed("ranger")

    dat <- .make_tiny_dgm()

    set.seed(40401)
    fit <- dcee(
        data = dat,
        id = "userid", outcome = "Y", treatment = "A", rand_prob = "prob_A",
        moderator_formula = ~Z,
        control_formula = ~ X + Z,
        availability = "avail",
        control_reg_method = "ranger",
        cross_fit = FALSE,
        verbose = FALSE,
        num.trees = 400, mtry = 2, min.node.size = 5, seed = 40401
    )

    # cat("ranger beta_hat = "); dput(unclass(fit$fit$beta_hat)); cat("\n")
    # cat("ranger beta_se  = "); dput(unclass(fit$fit$beta_se));  cat("\n")
    # cat("ranger beta_V   = "); dput(fit$fit$beta_varcov);       cat("\n")

    expected_beta <- structure(c(0.147337094915664, -0.131825565893961), .Names = c("Intercept", "Z")) # PLACEHOLDER
    expected_se <- structure(c(0.0694862479952816, 0.0954002422648467), .Names = c("Intercept", "Z")) # PLACEHOLDER
    expected_V <- matrix(c(0.00482833866046178, -0.00330272964784973, -0.00330272964784973, 0.00910120622419144), 2,
        byrow = TRUE,
        dimnames = list(c("Intercept", "Z"), c("Intercept", "Z"))
    ) # PLACEHOLDER

    expect_equal(unclass(fit$fit$beta_hat), expected_beta, tolerance = 1e-8)
    expect_equal(unclass(fit$fit$beta_se), expected_se, tolerance = 1e-8)
    expect_equal(fit$fit$beta_varcov, expected_V, tolerance = 1e-8)

    s <- summary(fit, lincomb = c(0, 1), conf_level = 0.95)
    expect_equal(nrow(s$lincomb), 1L)
})

test_that("SuperLearner: goldens for beta / se / V and lincomb", {
    skip_on_cran()
    skip_if_not_installed("SuperLearner")

    suppressMessages(library(SuperLearner))

    dat <- .make_tiny_dgm()

    # Use a deterministic, minimal library; disable shuffling in CV control

    set.seed(50501)
    fit <- suppressPackageStartupMessages(dcee(
        data = dat,
        id = "userid", outcome = "Y", treatment = "A", rand_prob = "prob_A",
        moderator_formula = ~Z,
        control_formula = ~ X + Z,
        availability = "avail",
        control_reg_method = "sl",
        cross_fit = FALSE,
        verbose = FALSE,
        # pass through to SuperLearner via ...
        cvControl = SuperLearner::SuperLearner.CV.control(V = 3, shuffle = FALSE)
    ))

    # cat("SL beta_hat = "); dput(unclass(fit$fit$beta_hat)); cat("\n")
    # cat("SL beta_se  = "); dput(unclass(fit$fit$beta_se));  cat("\n")
    # cat("SL beta_V   = "); dput(fit$fit$beta_varcov);       cat("\n")

    expected_beta <- structure(c(0.157693969082668, -0.0905976419823446), .Names = c("Intercept", "Z")) # PLACEHOLDER
    expected_se <- structure(c(0.089170606422377, 0.109801833613013), .Names = c("Intercept", "Z")) # PLACEHOLDER
    expected_V <- matrix(c(0.00795139704973446, -0.00546213131030219, -0.00546213131030219, 0.0120564426647797), 2,
        byrow = TRUE,
        dimnames = list(c("Intercept", "Z"), c("Intercept", "Z"))
    ) # PLACEHOLDER

    expect_equal(unclass(fit$fit$beta_hat), expected_beta, tolerance = 1e-8)
    expect_equal(unclass(fit$fit$beta_se), expected_se, tolerance = 1e-8)
    expect_equal(fit$fit$beta_varcov, expected_V, tolerance = 1e-8)

    s <- summary(fit, lincomb = c(0, 1))
    expect_equal(nrow(s$lincomb), 1L)
})

Try the MRTAnalysis package in your browser

Any scripts or data that you put into this service are public.

MRTAnalysis documentation built on Sept. 9, 2025, 5:41 p.m.