tests/testthat/test-bootstrap.R

# load required functions and packages
library("testthat")
suppressWarnings(library("SuperLearner"))

# generate the data -- note that this is a simple setting, for speed -----------
set.seed(4747)
p <- 2
n <- 5e4
x <- as.data.frame(replicate(p, stats::rnorm(n, 0, 1)))
# apply the function to the covariates
y <- 1 + 0.5 * x[, 1] + 0.75 * x[, 2] + stats::rnorm(n, 0, 1)
true_var <- 1 + .5 ^ 2 + .75 ^ 2
# note that true difference in R-squareds for variable j, under independence, is
# beta_j^2 * var(x_j) / var(y)
r2_one <- 0.5 ^ 2 * 1 / true_var
r2_two <- 0.75 ^ 2 * 1 / true_var

# set up a library for SuperLearner
learners <- c("SL.glm")
V <- 2

# test the bootstrap -----------------------------------------------------------
set.seed(1234)
test_that("Bootstrap without cross-fitting works", {
  # a small number of bootstraps, for speed
  expect_warning(
    est <- vim(Y = y, X = x, run_regression = TRUE, SL.library = learners,
               cvControl = list(V = V), indx = 1, type = "r_squared",
               sample_splitting = FALSE, env = environment(),
               bootstrap = TRUE, b = 10, boot_interval_type = "perc")
  )
  expect_true(est$ci[1] <= r2_one & est$ci[2] >= r2_one)
})

cluster_id <- 1:length(y)
set.seed(0714)
test_that("Clustered bootstrap without cross-fitting produces similar results
          to unclustered.", {
  # a small number of bootstraps, for speed
  expect_warning(
    est <- vim(Y = y, X = x, run_regression = TRUE, SL.library = learners,
               cvControl = list(V = V), indx = 1, type = "r_squared",
               sample_splitting = FALSE, env = environment(),
               bootstrap = TRUE, b = 10, boot_interval_type = "perc",
               clustered = TRUE, cluster_id = cluster_id)
  )
  expect_true(est$ci[1] <= r2_one & est$ci[2] >= r2_one)
})
set.seed(4747)
test_that("Bootstrap with cross-fitting works", {
  # small number of boostraps, for speed
  expect_warning(
    est <- cv_vim(Y = y, X = x, run_regression = TRUE, SL.library = learners,
                  cvControl = list(V = V), indx = 1, V = V, type = "r_squared",
                  sample_splitting = FALSE, env = environment(),
                  bootstrap = TRUE, b = 10, cross_fitted_se = FALSE,
                  boot_interval_type = "perc")
  )
  # check that the estimate is nearly correct
  expect_true(est$ci[1] <= r2_one & est$ci[2] >= r2_one)
})
set.seed(5949)
test_that("Clustered bootstrap with cross-fitting produces similar
          estimates to unclustered", {
  # small number of boostraps, for speed
  expect_warning(
    est <- cv_vim(Y = y, X = x, run_regression = TRUE, SL.library = learners,
                  cvControl = list(V = V), indx = 1, V = V, type = "r_squared",
                  sample_splitting = FALSE, env = environment(),
                  bootstrap = TRUE, b = 10, cross_fitted_se = FALSE,
                  boot_interval_type = "perc", clustered = TRUE,
                  cluster_id = cluster_id)
  )
  # check that the estimate is nearly correct
  expect_true(est$ci[1] <= r2_one & est$ci[2] >= r2_one)
})
bdwilliamson/npvi documentation built on Feb. 1, 2024, 10:46 p.m.