context("Test tmleCommunity on individual-level exposure")
# ---------------------------------------------------------------------------------
# TEST SET 1. TESTS FOR FITTING BINARY EXPOSURE A IN IID DATA
# ---------------------------------------------------------------------------------
get.iid.dat.Abin <- function(ndata = 100000, rndseed = NULL, is.Y.bin = TRUE) {
require(simcausal)
D <- DAG.empty()
D <- D +
node("W1", distr = "rbern", prob = 0.5) +
node("W2", distr = "rbern", prob = 0.3) +
node("W3", distr = "rnorm", mean = 0, sd = 0.3) +
node("W4", distr = "runif", min = 0, max = 1) +
node("W3W4", distr = "rconst", const = W3 * W4) +
node("A", distr = "rbern", prob = plogis(0.86 * W1 + W2 + 1.32 * W3 - W4 - 0.45 *W3W4 + 0.1)) +
node("Y", distr = "rnorm", mean = (2.8 * A + 2 * W1 + 1.5 * W2 + 0.5 * W3 - 3 * W4 - 3.5), sd = 1) +
node("Y1", distr = "rnorm", mean = (2.8 * 1 + 2 * W1 + 1.5 * W2 + 0.5 * W3 - 3 * W4 - 3.5), sd = 1) +
node("Y0", distr = "rnorm", mean = (2.8 * 0 + 2 * W1 + 1.5 * W2 + 0.5 * W3 - 3 * W4 - 3.5), sd = 1)
D <- set.DAG(D)
Odata <- sim(D, n = ndata, rndseed = rndseed)
psi0.Y <- mean(Odata$Y1) - mean(Odata$Y0)
print("psi0.Y: " %+% psi0.Y)
return(list(psi0.Y = psi0.Y, Odata = Odata))
}
`%+%` <- function(a, b) paste0(a, b)
ndata <- 10000
rndseed <- 12345
indPop.iid.bA.cY <- get.iid.dat.Abin(ndata = 1000000, rndseed = rndseed, is.Y.bin = FALSE)$Odata
psi0.Y <- mean(indPop.iid.bA.cY$Y1) - mean(indPop.iid.bA.cY$Y0) # 2.80026
indSample.iid.bA.cY <- get.iid.dat.Abin(ndata = ndata, rndseed = rndseed)$Odata
N <- NROW(indSample.iid.bA.cY)
indSample.iid.bA.cY <- indSample.iid.bA.cY[, c("W1", "W2", "W3", "W4", "A", "Y")]
Qform.corr <- "Y ~ W1 + W2 + W3 + W4 + A" # correct Q
Qform.mis <- "Y ~ W3 + A" # incorrect Q
gform.corr <- "A ~ W1 + W2 + W3 * W4" # correct g
gform.mis <- "A ~ W1 + W3" # incorrect g
#***************************************
# Test 1.1 speedglm & glm
#***************************************
## Test 1.1.1 Correctly specified Qform & gform (+ tmle.intercept)
test_that("fit TMLE for binary A with speedglm, with correctly specified Qform & gform", {
tmleCom_Options(Qestimator = "speedglm__glm", gestimator = "speedglm__glm", maxNperBin = N)
tmleCom_res <-
tmleCommunity(data = indSample.iid.bA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = 1, f_gstar2 = 0,
Qform = Qform.corr, hform.g0 = gform.corr, hform.gstar = gform.corr)
estimates <- tmleCom_res$ATE$estimates # psi0 = 2.80026
variances <- tmleCom_res$ATE$vars
expect_equal(estimates["tmle", ], 2.799876, tolerance = 1e-6)
expect_equal(estimates["iptw", ], 2.783021, tolerance = 1e-6)
expect_equal(estimates["gcomp", ], 2.779068, tolerance = 1e-6)
expect_equal(as.vector(variances), c(0.000491327, 0.027397109, 0.000491327))
expect_type(tmleCom_res$EY_gstar1$h.g0_GenericModel, "environment")
})
## Test 1.1.2 Misspecified Qform (+ tmle.intercept)
test_that("fit TMLE for binary A with speedglm, with misspecified Qform", {
tmleCom_Options(Qestimator = "speedglm__glm", gestimator = "speedglm__glm", maxNperBin = N)
tmleCom_res <-
tmleCommunity(data = indSample.iid.bA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = 1, f_gstar2 = 0,
Qform = Qform.mis, hform.g0 = gform.corr, hform.gstar = gform.corr)
estimates <- tmleCom_res$ATE$estimates # psi0 = 2.80026
expect_equal(estimates["tmle", ], 2.801727, tolerance = 1e-6)
expect_equal(estimates["iptw", ], 2.783021, tolerance = 1e-6)
expect_equal(estimates["gcomp", ], 3.696836, tolerance = 1e-6)
expect_lt(abs(estimates["tmle", ] - psi0.Y), abs(estimates["gcomp", ] - psi0.Y))
})
## Test 1.1.3 Misspecified gform (+ tmle.intercept)
# No big influence on the IPTW estimator since only the consistence estimation of
# the weights h_gstar / h_gN is required, even if g0 is misspecified
test_that("fit TMLE for binary A with speedglm, with misspecified gform", {
tmleCom_Options(Qestimator = "speedglm__glm", gestimator = "speedglm__glm", maxNperBin = N)
tmleCom_res <-
tmleCommunity(data = indSample.iid.bA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = 1, f_gstar2 = 0,
Qform = Qform.corr, hform.g0 = gform.mis, hform.gstar = gform.mis)
estimates <- tmleCom_res$ATE$estimates # psi0 = 2.80026
expect_equal(estimates["tmle", ], 2.777445, tolerance = 1e-6)
expect_equal(estimates["iptw", ], 3.307761, tolerance = 1e-6)
expect_equal(estimates["gcomp", ], 2.779068, tolerance = 1e-6)
expect_lt(abs(estimates["tmle", ] - psi0.Y), abs(estimates["iptw", ] - psi0.Y))
})
#***************************************
## Test 1.2 SuperLearner
#***************************************
## Test 1.2.1 SuperLearners with only main terms (+ tmle.intercept)
test_that("fit TMLE for binary A with SuperLearner, using SL.glm, SL.step, SL.glm.interaction,
when both Qform and gform only use main terms", {
require("SuperLearner")
tmleCom_Options(Qestimator = "SuperLearner", gestimator = "SuperLearner", maxNperBin = N,
SL.library = c("SL.glm", "SL.step", "SL.glm.interaction"))
tmleCom_res <-
tmleCommunity(data = indSample.iid.bA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = 1, f_gstar2 = 0,
Qform = NULL, hform.g0 = NULL, hform.gstar = NULL, rndseed = 12345)
estimates <- tmleCom_res$ATE$estimates # psi0 = 2.80026
expect_equal(estimates["tmle", ], 2.799282, tolerance = 1e-6)
expect_equal(estimates["iptw", ], 2.801273, tolerance = 1e-6)
expect_equal(estimates["gcomp", ], 2.780339, tolerance = 1e-6)
})
#***************************************
# Test 1.3 sl3
#***************************************
## Test 1.3.1 sl3::Lrnr_glm_fast with only main terms (+ tmle.intercept)
test_that("fit TMLE estimator for binary A with sl3, using sl3::Lrnr_glm_fast and main term formulae", {
require("sl3"); require("SuperLearner")
tmleCom_Options(Qestimator = "speedglm__glm", gestimator = "sl3_pipelines", maxNperBin = N, nbins = 5,
sl3_learner = list(glm_fast = sl3::make_learner(sl3::Lrnr_glm_fast)),
sl3_metalearner = sl3::make_learner(sl3::Lrnr_optim, loss_function = sl3::loss_loglik_binomial,
learner_function = sl3::metalearner_logistic_binomial))
tmleCom_res <-
tmleCommunity(data = indSample.iid.bA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = 1, f_gstar2 = 0, rndseed = 12345)
estimates <- tmleCom_res$ATE$estimates # psi0 = 2.80026
expect_equal(estimates["tmle", ], 2.799400, tolerance = 1e-6)
expect_equal(estimates["iptw", ], 2.820582, tolerance = 1e-6)
expect_equal(estimates["gcomp", ], 2.779068, tolerance = 1e-6)
})
#***************************************
# Test 1.4 h2o
#***************************************
## Test 1.4.1 h2o.glm.wrapper with only main terms (+ tmle.intercept)
test_that("fit TMLE estimator for binary A with h2o, using h2o.glm.wrapper and main term formulae", {
# require("h2oEnsemble")
tmleCom_Options(Qestimator = "h2o__ensemble", gestimator = "h2o__ensemble", maxNperBin = N,
h2olearner = c("h2o.glm.wrapper"), nbins = 5)
tmleCom_res <-
tmleCommunity(data = indSample.iid.bA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = 1, f_gstar2 = 0, rndseed = 12345)
estimates <- tmleCom_res$ATE$estimates # psi0 = 2.80026
expect_equal(estimates["tmle", ], 2.800916, tolerance = 1e-6)
expect_equal(estimates["iptw", ], 2.875755, tolerance = 1e-6)
expect_equal(estimates["gcomp", ], 2.800763, tolerance = 1e-6)
h2o.removeAll()
})
# ---------------------------------------------------------------------------------
# TEST SET 2. TESTS FOR FITTING CONTINUOUS EXPOSURE A IN IID DATA
# ---------------------------------------------------------------------------------
`%+%` <- function(a, b) paste0(a, b)
data("indSample.iid.cA.cY_list", package = "tmleCommunity")
indSample.iid.cA.cY <- indSample.iid.cA.cY_list$indSample.iid.cA.cY
N <- nrow(indSample.iid.cA.cY)
psi0.Y <- indSample.iid.cA.cY_list$psi0.Y
psi0.Ygstar <- indSample.iid.cA.cY_list$psi0.Ygstar
define_f.gstar <- function(shift.val, truncBD, rndseed = NULL) {
shift.const <- shift.val
trunc.const <- truncBD
f.gstar <- function(data, ...) {
print("shift.const: " %+% shift.const)
set.seed(rndseed)
A.mu <- 0.86 * data[,"W1"] + 0.41 * data[,"W2"] - 0.34 * data[,"W3"] + 0.93 * data[,"W4"]
untrunc.A <- rnorm(n = nrow(data), mean = A.mu + shift.const, sd = 1)
r.new.A <- exp(0.8 * shift.const * (untrunc.A - A.mu - shift.const / 3))
trunc.A <- ifelse(r.new.A > trunc.const, untrunc.A - shift.const, untrunc.A)
return(trunc.A)
}
return(f.gstar)
}
f.gstar <- define_f.gstar(shift = indSample.iid.cA.cY_list$shift.val,
truncBD = indSample.iid.cA.cY_list$truncBD)
Qform.corr <- "Y ~ W1 + W2 + W3 + W4 + A" # correct Q
Qform.mis <- "Y ~ W3 + A" # incorrect Q
gform.corr <- "A ~ W1 + W2 + W3 + W4" # correct g
gform.mis <- "A ~ W3" # incorrect g
#***************************************
# Test 2.1 speedglm & glm
#***************************************
## Test 2.1.1 Correctly specified Qform & gform (+ tmle.intercept + equal.mass + 5 nbins)
test_that("fit TMLE for cont A with speedglm, with correctly specified Qform & gform (5 bins)", {
tmleCom_Options(Qestimator = "speedglm__glm", gestimator = "speedglm__glm", maxNperBin = N)
tmleCom_res <-
tmleCommunity(data = indSample.iid.cA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = f.gstar, rndseed = 12345,
Qform = Qform.corr, hform.g0 = gform.corr, hform.gstar = gform.corr)
estimates <- tmleCom_res$EY_gstar1$estimates # psi0 = 3.50856
expect_equal(estimates["tmle", ], 3.452077, tolerance = 0.005)
expect_equal(estimates["iptw", ], 3.435237, tolerance = 0.005)
expect_equal(estimates["gcomp", ], 3.453169, tolerance = 0.005)
expect_equal(tmleCom_res$EY_gstar2, NULL)
})
## Test 2.1.2 Misspecified gform (+ tmle.intercept + equal.mass + 5 nbins)
test_that("fit TMLE for cont A with speedglm, with misspecified gform (5 bins)", {
tmleCom_Options(maxNperBin = N)
tmleCom_res <-
tmleCommunity(data = indSample.iid.cA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = f.gstar, rndseed = 12345,
Qform = Qform.corr, hform.g0 = gform.mis, hform.gstar = gform.mis)
estimates <- tmleCom_res$EY_gstar1$estimates # psi0 = 3.50856
expect_equal(estimates["tmle", ], 3.453457, tolerance = 0.005)
expect_equal(estimates["iptw", ], 3.311372, tolerance = 0.005)
expect_equal(estimates["gcomp", ], 3.454157, tolerance = 0.005)
expect_lt(abs(estimates["tmle", ] - psi0.Ygstar), abs(estimates["iptw", ] - psi0.Ygstar))
})
## Test 2.1.3 Misspecified gform (+ tmle.intercept + equal.mass + 20 nbins)
test_that("fit TMLE for cont A with speedglm, with misspecified gform (20 bins)", {
tmleCom_Options(maxNperBin = N, nbins = 20)
tmleCom_res <-
tmleCommunity(data = indSample.iid.cA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = f.gstar, rndseed = 12345,
Qform = Qform.corr, hform.g0 = gform.mis, hform.gstar = gform.mis)
estimates <- tmleCom_res$EY_gstar1$estimates # psi0 = 3.50856
expect_equal(estimates["tmle", ], 3.450999, tolerance = 0.005)
expect_equal(estimates["iptw", ], 3.306703, tolerance = 0.005)
expect_equal(estimates["gcomp", ], 3.454069, tolerance = 0.005)
expect_lt(abs(estimates["tmle", ] - psi0.Ygstar), abs(estimates["iptw", ] - psi0.Ygstar))
})
#***************************************
## Test 2.2 SuperLearner
#***************************************
## Test 2.2.1 SL.glm, SL.step, SL.glm.interaction (+ tmle.intercept + equal.mass + 10 nbins)
test_that("fit TMLE for cont A with SuperLearner, using SL.glm, SL.bayesglm, SL.gam", {
require("SuperLearner")
tmleCom_Options(Qestimator = "SuperLearner", gestimator = "SuperLearner", maxNperBin = N,
SL.library = c("SL.glm", "SL.bayesglm", "SL.gam"), nbins = 10)
tmleCom_res <-
tmleCommunity(data = indSample.iid.cA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = f.gstar, rndseed = 12345)
estimates <- tmleCom_res$EY_gstar1$estimates # psi0 = 3.50856
expect_equal(estimates["tmle", ], 3.454295, tolerance = 0.005)
expect_equal(estimates["iptw", ], 3.447997, tolerance = 0.005)
expect_equal(estimates["gcomp", ], 3.453953, tolerance = 0.005)
})
#***************************************
## Test 2.2 sl3
#***************************************
## Test 2.3.1 sl3::Lrnr_glm_fast (+ tmle.intercept + equal.mass + 10 nbins)
test_that("fit TMLE for cont A with sl3, using sl3::Lrnr_glm_fast", {
require("sl3"); require("SuperLearner")
tmleCom_Options(Qestimator = "speedglm__glm", gestimator = "sl3_pipelines", maxNperBin = N, nbins = 10,
sl3_learner = list(glm_fast = sl3::make_learner(sl3::Lrnr_glm_fast)),
sl3_metalearner = sl3::make_learner(sl3::Lrnr_optim, loss_function = sl3::loss_loglik_binomial,
learner_function = sl3::metalearner_logistic_binomial))
tmleCom_res <-
tmleCommunity(data = indSample.iid.cA.cY, Ynode = "Y", Anodes = "A",
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = f.gstar, rndseed = 12345)
estimates <- tmleCom_res$EY_gstar1$estimates # psi0 = 3.50856
expect_equal(estimates["tmle", ], 3.458052, tolerance = 0.005)
expect_equal(estimates["iptw", ], 3.494268, tolerance = 0.005)
expect_equal(estimates["gcomp", ], 3.454461, tolerance = 0.005)
})
#***************************************
# Test 2.3 f_gstar1 = f_gstar2 = NULL
#***************************************
test_that("fit TMLE for cont A with speedglm, when f_gstar1 = f_gstar2 = NULL", {
tmleCom_Options(maxNperBin = N)
expect_message(
tmleCom_res <-
tmleCommunity(data = indSample.iid.cA.cY, Ynode = "Y", Anodes = "A", verbose = TRUE,
WEnodes = c("W1", "W2", "W3", "W4"), f_gstar1 = NULL, f_gstar2 = NULL,
Qform = Qform.corr, hform.g0 = gform.corr, hform.gstar = gform.corr),
regexp = "skip g0 & gstar fitting procedure and directly set h_gstar_h_gN = 1 for each observation")
estimates <- tmleCom_res$EY_gstar1$estimates
expect_equivalent(as.vector(estimates), rep(mean(indSample.iid.cA.cY$Y), 3))
expect_type(tmleCom_res$EY_gstar1$h.g0_GenericModel, "NULL")
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.