tests/testthat/test-bootstrap.R

# quick, initial unit testing to check broad implementation and detect if code breaks
# bootstrap functions still under development, so likely to change in the future

test_that("nonparametric() works as intended", {

  skip_on_cran()

  indices <- 1:100
  R <- 10

  expect_error(nonparametric(indices = indices, R = R, seed = "12"),
               "Argument 'seed' must either be NULL or an integer")
  expect_error(nonparametric(indices = indices, R = R, seed = TRUE),
               "Argument 'seed' must either be NULL or an integer")
  expect_error(nonparametric(indices = indices, R = R, seed = 1.3),
               "Argument 'seed' must either be NULL or an integer")

  set.seed(40)
  n1 <- nonparametric(indices = indices, R = R, replacement = TRUE, seed = NULL)
  n2 <- nonparametric(indices = indices, R = R, replacement = TRUE, seed = 40)
  expect_identical(n1, n2)
  expect_type(n1, "list")
  expect_identical(class(n1), "list")
  expect_identical(length(n1), 10L)
  listlengths <- sapply(X = n1, FUN = length)
  expect_identical(listlengths, rep(100L, times = 10))

  n3 <- nonparametric(indices = indices, R = R, replacement = FALSE, seed = NULL)
  expect_type(n3, "list")
  expect_identical(class(n3), "list")
  expect_identical(length(n3), 10L)
  listlengths <- sapply(X = n3, FUN = length)
  expect_identical(listlengths, rep(100L, times = 10))
  # without resampling, must be unique and with resample size = orig. size identical
  identicals <- sapply(X = n3, FUN = function(x) setequal(x, indices))
  expect_identical(identicals, rep(TRUE, times = 10))

})

test_that("count_indices() works as intended", {

  skip_on_cran()

  indices <- 1:10
  R <- 5
  n1 <- nonparametric(indices = indices, R = R, replacement = TRUE, seed = 10)
  # count_indices returns a list with two matrices ()
  c1 <- count_indices(resamples = n1, indices = indices) # all
  c2 <- count_indices(resamples = n1, indices = c(1, 3, 5)) # only subset
  # replacement FALSE
  n2 <- nonparametric(indices = indices, R = R, replacement = FALSE, seed = 10)
  c3 <- count_indices(resamples = n2, indices = indices) # all, should be 1 each

  expect_type(c1, "list")
  expect_type(c2, "list")
  expect_type(c3, "list")
  expect_identical(class(c1), "list")
  expect_identical(class(c2), "list")
  expect_identical(class(c3), "list")
  expect_length(c1, 2)
  expect_length(c2, 2)
  expect_length(c3, 2)
  expect_named(c1, c("count_all", "count_clean"))
  expect_named(c2, c("count_all", "count_clean"))
  expect_named(c3, c("count_all", "count_clean"))
  expect_type(c1$count_all, "integer")
  expect_type(c2$count_all, "integer")
  expect_type(c3$count_all, "integer")
  expect_identical(class(c1$count_all), c("matrix", "array"))
  expect_identical(class(c2$count_all), c("matrix", "array"))
  expect_identical(class(c3$count_all), c("matrix", "array"))
  expect_type(c1$count_clean, "integer")
  expect_type(c2$count_clean, "integer")
  expect_type(c3$count_clean, "integer")
  expect_identical(class(c1$count_clean), c("matrix", "array"))
  expect_identical(class(c2$count_clean), c("matrix", "array"))
  expect_identical(class(c3$count_clean), c("matrix", "array"))
  expect_identical(dim(c1$count_all), c(as.integer(R), length(indices)))
  expect_identical(dim(c2$count_all), c(as.integer(R), as.integer(max(c(1, 3, 5)))))
  expect_identical(dim(c3$count_all), c(as.integer(R), length(indices)))
  expect_identical(dim(c1$count_clean), c(as.integer(R), length(indices)))
  expect_identical(dim(c2$count_clean), c(as.integer(R), length(c(1, 3, 5))))
  expect_identical(dim(c3$count_clean), c(as.integer(R), length(indices)))

  # for no replacement, expect only ones
  matnorepl <- matrix(1L, nrow = R, ncol = length(indices))
  colnames(matnorepl) <- paste("o", 1:10, sep = "")
  expect_identical(c3$count_all, matnorepl)
  expect_identical(c3$count_clean, matnorepl)

  # snapshot to detect changes of input
  expect_snapshot_output(n1)
  expect_snapshot_output(n2)
  expect_snapshot_output(c1)
  expect_snapshot_output(c2)
  expect_snapshot_output(c3)

})

test_that("nonparametric_resampling() works correctly", {

  skip_on_cran()

  data <- mtcars
  resamples <- nonparametric(indices = 1:NROW(data), R = 5, replacement = TRUE,
                             seed = 1)
  # use these resamples of indices to actually get resample of mtcars
  df1 <- nonparametric_resampling(df = data, resample = resamples[[1]])

  expect_snapshot_output(df1)

  # do manual check of two observations
  # index 1 was drawn twice, this is Mazda RX4
  expect_true("Mazda RX4" %in% rownames(df1))
  expect_true("Mazda RX4.1" %in% rownames(df1))
  expect_true(!("Mazda RX4.2" %in% rownames(df1)))
  # index 3 was not drawn at all, this is Datsun 710
  expect_true(!("Datsun 710" %in% rownames(df1)))

})

test_that("case_resampling() works correctly", {

  skip_on_cran()

  library(future)
  library(parallel)
  library(doFuture)
  library(foreach)

  # setup
  p <- generate_param(3, 2, 3, sigma = 2, intercept = TRUE, seed = 42)
  d <- generate_data(parameters = p, n = 1000)$data
  r <- outlier_detection(data = d, formula = p$setting$formula,
                         ref_dist = "normal", sign_level = 0.05,
                         initial_est = "robustified", iterations = 3)

  set.seed(10)
  cr1 <- case_resampling(robust2sls_object = r, R = 10)
  # only one iteration
  cr2 <- case_resampling(robust2sls_object = r, R = 10, m = 1)
  # only one coefficient by number
  cr3 <- case_resampling(robust2sls_object = r, R = 10, coef = 1, m = 1)
  # only one coefficient by name
  cr4 <- case_resampling(robust2sls_object = r, R = 10, coef = "x2", m = 1)

  # same ones but in parallel
  set.seed(10)
  ncores <- min(max(parallel::detectCores() - 1, 1), 2)
  doFuture::registerDoFuture()
  cl <- parallel::makeCluster(ncores)
  parallel::clusterCall(cl = cl, function(x) .libPaths(x), .libPaths())
  future::plan(future::cluster, workers = cl)
  cr11 <- case_resampling(robust2sls_object = r, R = 10, parallel = TRUE)
  # only one iteration
  cr21 <- case_resampling(robust2sls_object = r, R = 10, m = 1, parallel = TRUE)
  # only one coefficient by number
  cr31 <- case_resampling(robust2sls_object = r, R = 10, coef = 1, m = 1,
                          parallel = TRUE)
  # only one coefficient by name
  cr41 <- case_resampling(robust2sls_object = r, R = 10, coef = "x2", m = 1,
                          parallel = TRUE)
  stopCluster(cl)
  future::plan(future::sequential)

  # first, ensure that same results whether parallel or not
  expect_identical(cr1, cr11)
  expect_identical(cr2, cr21)
  expect_identical(cr3, cr31)
  expect_identical(cr4, cr41)

  # check output structure
  expect_type(cr1, "list")
  expect_type(cr2, "list")
  expect_type(cr3, "list")
  expect_type(cr4, "list")
  expect_identical(class(cr1), "r2sls_boot")
  expect_identical(class(cr2), "r2sls_boot")
  expect_identical(class(cr3), "r2sls_boot")
  expect_identical(class(cr4), "r2sls_boot")
  expect_length(cr1, 3)
  expect_length(cr2, 3)
  expect_length(cr3, 3)
  expect_length(cr4, 3)
  expect_named(cr1, c("boot", "resamples", "original"))
  expect_named(cr2, c("boot", "resamples", "original"))
  expect_named(cr3, c("boot", "resamples", "original"))
  expect_named(cr4, c("boot", "resamples", "original"))
  expect_identical(class(cr1$boot), "data.frame")
  expect_identical(class(cr2$boot), "data.frame")
  expect_identical(class(cr3$boot), "data.frame")
  expect_identical(class(cr4$boot), "data.frame")
  expect_identical(NCOL(cr1$boot), 9L)
  expect_identical(NCOL(cr2$boot), 9L)
  expect_identical(NCOL(cr3$boot), 4L)
  expect_identical(NCOL(cr4$boot), 4L)
  expect_identical(NROW(cr1$boot), 44L)
  expect_identical(NROW(cr2$boot), 11L)
  expect_identical(NROW(cr3$boot), 11L)
  expect_identical(NROW(cr4$boot), 11L)
  expect_identical(colnames(cr1$boot), c("X.Intercept.", "x1", "x2", "x3", "x4", "x5", "m", "gauge", "r"))
  expect_identical(colnames(cr2$boot), c("X.Intercept.", "x1", "x2", "x3", "x4", "x5", "m", "gauge", "r"))
  expect_identical(colnames(cr3$boot), c("X.Intercept.", "m", "gauge", "r"))
  expect_identical(colnames(cr4$boot), c("x2", "m", "gauge", "r"))
  expect_type(cr1$resamples, "list")
  expect_type(cr2$resamples, "list")
  expect_type(cr3$resamples, "list")
  expect_type(cr4$resamples, "list")
  expect_identical(class(cr1$resamples), "list")
  expect_identical(class(cr2$resamples), "list")
  expect_identical(class(cr3$resamples), "list")
  expect_identical(class(cr4$resamples), "list")
  expect_length(cr1$resamples, 10L)
  expect_length(cr2$resamples, 10L)
  expect_length(cr3$resamples, 10L)
  expect_length(cr4$resamples, 10L)
  expect_identical(class(cr1$original), "robust2sls")
  expect_identical(class(cr2$original), "robust2sls")
  expect_identical(class(cr3$original), "robust2sls")
  expect_identical(class(cr4$original), "robust2sls")
  # $original saves original obj, so should be same across cr1, cr2, cr3, cr4
  expect_identical(cr1$original, cr2$original)
  expect_identical(cr1$original, cr3$original)
  expect_identical(cr1$original, cr4$original)

  # save as snapshot
  expect_snapshot_output(cr1)
  expect_snapshot_output(cr2)
  expect_snapshot_output(cr3)
  expect_snapshot_output(cr4)

  # need to check saturated and convergence (both parallel and not)
  set.seed(10)
  r <- outlier_detection(data = d, formula = p$setting$formula,
                         ref_dist = "normal", sign_level = 0.05,
                         initial_est = "saturated", iterations = "convergence",
                         convergence_criterion = 0.5, split = 0.5)
  cr5 <- case_resampling(robust2sls_object = r, R = 10, m = "convergence")
  ncores <- min(max(parallel::detectCores() - 1, 1), 2)
  doFuture::registerDoFuture()
  future::plan(future::cluster, workers = ncores)
  set.seed(10)
  cr6 <- case_resampling(robust2sls_object = r, R = 10, m = "convergence",
                         parallel = TRUE)
  future::plan(future::sequential)

  expect_snapshot_output(cr5)
  expect_snapshot_output(cr6)
  expect_identical(cr5, cr6)

})

test_that("extract_boot() works correctly", {

  skip_on_cran()

  p <- generate_param(3, 2, 3, sigma = 2, intercept = TRUE, seed = 42)
  d <- generate_data(parameters = p, n = 1000)$data
  r1 <- outlier_detection(data = d, formula = p$setting$formula,
                         ref_dist = "normal", sign_level = 0.05,
                         initial_est = "robustified", iterations = 3)
  r2 <- outlier_detection(data = d, formula = p$setting$formula,
                          ref_dist = "normal", sign_level = 0.05,
                          initial_est = "robustified",
                          iterations = "convergence", convergence_criterion = 1)

  R <- 10
  set.seed(10)
  cr1 <- case_resampling(robust2sls_object = r1, R = R)
  set.seed(10)
  cr2 <- case_resampling(robust2sls_object = r2, R = R, m = "convergence")

  ex1 <- extract_boot(r2sls_boot = cr1, iteration = 0)
  ex2 <- extract_boot(r2sls_boot = cr1, iteration = 1)
  ex3 <- extract_boot(r2sls_boot = cr2, iteration = "convergence")

  expect_identical(class(ex1), "data.frame")
  expect_identical(class(ex2), "data.frame")
  expect_identical(class(ex3), "data.frame")
  expect_type(ex1, "list")
  expect_type(ex2, "list")
  expect_type(ex3, "list")
  expect_identical(NROW(ex1), as.integer(R+1))
  expect_identical(NROW(ex2), as.integer(R+1))
  expect_identical(NROW(ex3), as.integer(R+1))
  expect_identical(NCOL(ex1), 9L)
  expect_identical(NCOL(ex2), 9L)
  expect_identical(NCOL(ex3), 9L)
  expect_identical(ex1$m, rep("m0", times = (R+1)))
  expect_identical(ex2$m, rep("m1", times = (R+1)))
  # for convergence, the recorded iteration m can vary across resamples
  expect_identical(ex1$r, as.double(0:R))
  expect_identical(ex2$r, as.double(0:R))
  expect_identical(ex3$r, as.double(0:R))

  expect_snapshot_output(ex1)
  expect_snapshot_output(ex2)
  expect_snapshot_output(ex3)

})

test_that("evaluate_boot() works correctly", {

  skip_on_cran()

  p <- generate_param(3, 2, 3, sigma = 2, intercept = TRUE, seed = 42)
  d <- generate_data(parameters = p, n = 1000)$data
  r1 <- outlier_detection(data = d, formula = p$setting$formula,
                          ref_dist = "normal", sign_level = 0.05,
                          initial_est = "robustified", iterations = 3)
  r2 <- outlier_detection(data = d, formula = p$setting$formula,
                          ref_dist = "normal", sign_level = 0.05,
                          initial_est = "robustified",
                          iterations = "convergence", convergence_criterion = 1)

  R <- 10
  set.seed(10)
  cr1 <- case_resampling(robust2sls_object = r1, R = R)

  ev0 <- evaluate_boot(r2sls_boot = cr1, iterations = 0)
  ev1 <- evaluate_boot(r2sls_boot = cr1, iterations = 1)
  ev2 <- evaluate_boot(r2sls_boot = cr1, iterations = 2)
  ev3 <- evaluate_boot(r2sls_boot = cr1, iterations = 3)
  ev4 <- evaluate_boot(r2sls_boot = cr1, iterations = 0:3)

  expect_identical(class(ev0), "data.frame")
  expect_identical(class(ev1), "data.frame")
  expect_identical(class(ev2), "data.frame")
  expect_identical(class(ev3), "data.frame")
  expect_identical(class(ev4), "data.frame")
  expect_identical(NROW(ev0), 1L)
  expect_identical(NROW(ev1), 1L)
  expect_identical(NROW(ev2), 1L)
  expect_identical(NROW(ev3), 1L)
  expect_identical(NROW(ev4), 4L)
  expect_identical(NCOL(ev0), 9L)
  expect_identical(NCOL(ev1), 9L)
  expect_identical(NCOL(ev2), 9L)
  expect_identical(NCOL(ev3), 9L)
  expect_identical(NCOL(ev4), 9L)
  expect_identical(ev0$m, 0)
  expect_identical(ev1$m, 1)
  expect_identical(ev2$m, 2)
  expect_identical(ev3$m, 3)
  expect_identical(ev4$m, c(0L, 1L, 2L, 3L))
  binding <- rbind(ev0, ev1, ev2, ev3)
  binding$m <- as.integer(binding$m)
  expect_identical(ev4, binding)

  expect_snapshot_output(ev0)
  expect_snapshot_output(ev1)
  expect_snapshot_output(ev2)
  expect_snapshot_output(ev3)
  expect_snapshot_output(ev4)

  # try saturated
  r2 <- outlier_detection(data = d, formula = p$setting$formula,
                          ref_dist = "normal", sign_level = 0.05,
                          initial_est = "saturated", iterations = 1)
  set.seed(10)
  cr2 <- case_resampling(robust2sls_object = r2, R = R)
  ev5 <- evaluate_boot(r2sls_boot = cr2, iterations = 1)

  expect_identical(class(ev5), "data.frame")
  expect_identical(NROW(ev5), 1L)
  expect_identical(NCOL(ev5), 9L)
  expect_identical(ev5$m, 1)
  expect_snapshot_output(ev5)


})

Try the robust2sls package in your browser

Any scripts or data that you put into this service are public.

robust2sls documentation built on Jan. 11, 2023, 5:13 p.m.