test-mcee-missing-data-error.R
In MRTAnalysis: Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

test_that("mcee errors cleanly on missing/NaN/Inf in core, moderator, and control vars", {
    set.seed(42)

    # ---- base DGM (no missing) ----
    n <- 6
    Ti <- c(5, 7, 6, 6, 8, 5)
    id <- rep(seq_len(n), Ti)
    dp <- unlist(lapply(Ti, seq_len))
    I <- rbinom(length(dp), 1, 0.9)
    A <- rbinom(length(dp), 1, 0.6)
    M <- rbinom(length(dp), 1, plogis(-0.2 + 0.3 * A + 0.1 * scale(dp)))
    Ytmp <- 0.5 * A + 0.6 * M + 0.08 * scale(dp) + rnorm(length(dp), 0, 0.2)
    Y <- ave(Ytmp, id, FUN = function(v) rep(mean(v), length(v)))

    d0 <- data.frame(id, dp, I, A, M, Y, check.names = FALSE)

    # nontrivial weights
    w <- ave(0.3 + 0.7 * (d0$dp / ave(d0$dp, d0$id, FUN = max)),
        d0$id,
        FUN = function(v) v / sum(v)
    )

    # 1) Missing in a core column (dp)
    d1 <- d0
    d1$dp[c(3, 10)] <- NA_integer_
    expect_error(
        mcee(
            data = d1, id = "id", dp = "dp", outcome = "Y", treatment = "A", mediator = "M",
            availability = "I",
            time_varying_effect_form = ~1,
            control_formula_with_mediator = ~ dp + M,
            control_reg_method = "glm",
            rand_prob = 0.5,
            weight_per_row = w,
            verbose = FALSE
        ),
        regexp = "Missing/NaN/Inf.*dp.*rows\\s+3,\\s+10.*does not support handling missing data",
        ignore.case = TRUE
    )

    # 2) NaN/Inf in moderator vars (here dp is used in moderator)
    d2 <- d0
    d2$dp[c(2, 9)] <- Inf
    expect_error(
        mcee(
            data = d2, id = "id", dp = "dp", outcome = "Y", treatment = "A", mediator = "M",
            availability = "I",
            time_varying_effect_form = ~dp,
            control_formula_with_mediator = ~ dp + M,
            control_reg_method = "glm",
            rand_prob = 0.5,
            weight_per_row = w,
            verbose = FALSE
        ),
        regexp = "Missing/NaN/Inf.*dp.*rows\\s+2,\\s+9",
        ignore.case = TRUE
    )

    # 3) Missing in a control-only variable (M here)
    d3 <- d0
    d3$M[c(4, 12, 25)] <- NA_real_
    expect_error(
        mcee(
            data = d3, id = "id", dp = "dp", outcome = "Y", treatment = "A", mediator = "M",
            availability = "I",
            time_varying_effect_form = ~1,
            control_formula_with_mediator = ~ dp + M,
            control_reg_method = "glm",
            rand_prob = 0.5,
            weight_per_row = w,
            verbose = FALSE
        ),
        regexp = "Missing/NaN/Inf.*M.*rows\\s+4,\\s+12,\\s+25",
        ignore.case = TRUE
    )
})

test_that("mcee_general errors cleanly on missing in config formula variables", {
    set.seed(99)

    # base DGM again
    n <- 6
    Ti <- c(5, 7, 6, 6, 8, 5)
    id <- rep(seq_len(n), Ti)
    dp <- unlist(lapply(Ti, seq_len))
    I <- rbinom(length(dp), 1, 0.9)
    A <- rbinom(length(dp), 1, 0.6)
    M <- rbinom(length(dp), 1, plogis(-0.2 + 0.3 * A + 0.1 * scale(dp)))
    Ytmp <- 0.5 * A + 0.6 * M + 0.08 * scale(dp) + rnorm(length(dp), 0, 0.2)
    Y <- ave(Ytmp, id, FUN = function(v) rep(mean(v), length(v)))
    Z <- rnorm(length(dp)) # extra covariate for configs

    d0 <- data.frame(id, dp, I, A, M, Y, Z, check.names = FALSE)
    w <- ave(0.3 + 0.7 * (d0$dp / ave(d0$dp, d0$id, FUN = max)),
        d0$id,
        FUN = function(v) v / sum(v)
    )

    # Make NA only in a variable used by a config (Z used in q)
    d1 <- d0
    d1$Z[c(6, 13)] <- NA_real_

    cfg_p <- list(method = "glm", formula = ~dp) # binomial auto
    cfg_q <- list(method = "glm", formula = ~ dp + Z) # binomial auto; Z has NA
    cfg_eta <- list(method = "glm", formula = ~dp) # gaussian auto
    cfg_mu <- list(method = "glm", formula = ~ dp + M) # gaussian auto
    cfg_nu <- list(method = "glm", formula = ~dp) # gaussian auto

    expect_error(
        mcee_general(
            data = d1,
            id = "id", dp = "dp", outcome = "Y",
            treatment = "A", mediator = "M",
            availability = "I",
            time_varying_effect_form = ~1,
            config_p = cfg_p, config_q = cfg_q,
            config_eta = cfg_eta, config_mu = cfg_mu, config_nu = cfg_nu,
            weight_per_row = w,
            verbose = FALSE
        ),
        regexp = "Missing/NaN/Inf.*Z.*rows\\s+6,\\s+13",
        ignore.case = TRUE
    )
})

test_that("mcee_userfit_nuisance errors cleanly on missing in supplied nuisance vectors", {
    set.seed(123)

    # base DGM
    n <- 6
    Ti <- c(5, 7, 6, 6, 8, 5)
    id <- rep(seq_len(n), Ti)
    dp <- unlist(lapply(Ti, seq_len))
    I <- rbinom(length(dp), 1, 0.9)
    A <- rbinom(length(dp), 1, 0.6)
    M <- rbinom(length(dp), 1, plogis(-0.2 + 0.3 * A + 0.1 * scale(dp)))
    Ytmp <- 0.5 * A + 0.6 * M + 0.08 * scale(dp) + rnorm(length(dp), 0, 0.2)
    Y <- ave(Ytmp, id, FUN = function(v) rep(mean(v), length(v)))

    d0 <- data.frame(id, dp, I, A, M, Y, check.names = FALSE)
    w <- ave(0.3 + 0.7 * (d0$dp / ave(d0$dp, d0$id, FUN = max)),
        d0$id,
        FUN = function(v) v / sum(v)
    )

    # Create sane nuisance predictions first
    p1 <- plogis(-0.1 + 0.02 * dp) # in (0,1)
    q1 <- plogis(-0.2 + 0.02 * dp + 0.3 * M)
    eta1 <- 0.4 + 0.1 * dp
    eta0 <- 0.3 + 0.05 * dp
    mu1 <- 0.4 + 0.15 * dp + 0.2 * M
    mu0 <- 0.3 + 0.10 * dp + 0.1 * M
    nu1 <- 0.35 + 0.12 * dp
    nu0 <- 0.25 + 0.08 * dp

    # Inject NA into one nuisance vector
    p1_bad <- p1
    p1_bad[c(2, 11, 17)] <- NA_real_

    expect_error(
        mcee_userfit_nuisance(
            data = d0,
            id = "id", dp = "dp", outcome = "Y",
            treatment = "A", mediator = "M",
            availability = "I",
            time_varying_effect_form = ~dp,
            p1 = p1_bad, q1 = q1,
            eta1 = eta1, eta0 = eta0,
            mu1 = mu1, mu0 = mu0,
            nu1 = nu1, nu0 = nu0,
            weight_per_row = w,
            verbose = FALSE
        ),
        regexp = "Missing/NaN/Inf detected in 'p1'.*rows\\s+2,\\s+11,\\s+17.*does not support handling missing data",
        ignore.case = TRUE
    )

    # Inject Inf into another vector (nu0)
    nu0_bad <- nu0
    nu0_bad[c(3, 9)] <- Inf
    expect_error(
        mcee_userfit_nuisance(
            data = d0,
            id = "id", dp = "dp", outcome = "Y",
            treatment = "A", mediator = "M",
            availability = "I",
            time_varying_effect_form = ~1,
            p1 = p1, q1 = q1,
            eta1 = eta1, eta0 = eta0,
            mu1 = mu1, mu0 = mu0,
            nu1 = nu1, nu0 = nu0_bad,
            weight_per_row = w,
            verbose = FALSE
        ),
        regexp = "Missing/NaN/Inf detected in 'nu0'.*rows\\s+3,\\s+9",
        ignore.case = TRUE
    )
})

test_that("mcee_* missing-data message aggregates multiple offenders", {
    set.seed(7)

    n <- 5
    Ti <- rep(5, n)
    id <- rep(seq_len(n), Ti)
    dp <- unlist(lapply(Ti, seq_len))
    I <- rep(1, length(dp))
    A <- rbinom(length(dp), 1, 0.5)
    M <- rbinom(length(dp), 1, 0.5)
    Y <- ave(0.2 * A + 0.3 * M + rnorm(length(dp), 0, .1), id, FUN = function(v) rep(mean(v), length(v)))

    d0 <- data.frame(id, dp, I, A, M, Y)

    # Make both Y and M missing in a few rows (should list both variables)
    d1 <- d0
    d1$Y[c(4, 7)] <- NA_real_
    d1$M[c(3, 8)] <- NA_real_

    w <- rep(1, nrow(d1))

    expect_error(
        mcee(
            data = d1, id = "id", dp = "dp", outcome = "Y", treatment = "A", mediator = "M",
            availability = "I",
            time_varying_effect_form = ~1,
            control_formula_with_mediator = ~ dp + M,
            control_reg_method = "glm",
            rand_prob = 0.5,
            weight_per_row = w,
            verbose = FALSE
        ),
        regexp = "(Y.*rows\\s+4,\\s+7|M.*rows\\s+3,\\s+8).*(Y.*rows\\s+4,\\s+7|M.*rows\\s+3,\\s+8)",
        ignore.case = TRUE
    )
})

Any scripts or data that you put into this service are public.

MRTAnalysis documentation built on Sept. 9, 2025, 5:41 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

MRTAnalysis
Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

tests/testthat/test-mcee-missing-data-error.R
In MRTAnalysis: Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

Try the MRTAnalysis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MRTAnalysis Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

tests/testthat/test-mcee-missing-data-error.R In MRTAnalysis: Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

Try the MRTAnalysis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MRTAnalysis
Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

tests/testthat/test-mcee-missing-data-error.R
In MRTAnalysis: Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials