tests/testthat/test-bootci.R

n <- 1000
mu <- 10
sigma <- 1

set.seed(888)
rand_nums <- rnorm(n, mu, sigma)
dat <- data.frame(x = rand_nums)

set.seed(456765)
bt_norm <-
  bootstraps(dat, times = 1000, apparent = TRUE) %>%
  dplyr::mutate(
    stats = purrr::map(splits, ~ get_stats(.x))
  )

test_that("Bootstrap estimate of mean is close to estimate of mean from normal distribution", {
  skip_if_not_installed("broom")
  skip_on_cran()
  ttest <- broom::tidy(t.test(rand_nums))
  ttest_lower_conf <- broom::tidy(t.test(rand_nums, conf.level = 0.8))
  single_pct_res <- int_pctl(bt_norm, stats)

  single_t_res <- int_t(bt_norm, stats)

  single_bca_res <- int_bca(bt_norm, stats, .fn = get_stats)

  single_bca_res_lower_conf <- int_bca(bt_norm, stats, .fn = get_stats, alpha = 0.2)

  expect_equal(ttest$conf.low,
               single_pct_res$.lower,
               tolerance = 0.001
  )
  expect_equal(unname(ttest$estimate),
               single_pct_res$.estimate,
               tolerance = 0.001
  )
  expect_equal(ttest$conf.high,
               single_pct_res$.upper,
               tolerance = 0.001
  )

  expect_equal(ttest$conf.low,
               single_t_res$.lower,
               tolerance = 0.001
  )
  expect_equal(unname(ttest$estimate),
               single_t_res$.estimate,
               tolerance = 0.001
  )
  expect_equal(ttest$conf.high,
               single_pct_res$.upper,
               tolerance = 0.001
  )

  expect_equal(ttest$conf.low,
               single_bca_res$.lower,
               tolerance = 0.001
  )
  expect_equal(unname(ttest$estimate),
               single_bca_res$.estimate,
               tolerance = 0.001
  )
  expect_equal(ttest$conf.high,
               single_bca_res$.upper,
               tolerance = 0.001
  )

  expect_equal(ttest_lower_conf$conf.low,
               single_bca_res_lower_conf$.lower,
               tolerance = 0.001
  )
  expect_equal(unname(ttest_lower_conf$estimate),
               single_bca_res_lower_conf$.estimate,
               tolerance = 0.001
  )
  expect_equal(ttest_lower_conf$conf.high,
               single_bca_res_lower_conf$.upper,
               tolerance = 0.001
  )
})

# ------------------------------------------------------------------------------

test_that("Wrappers -- selection of multiple variables works", {
  skip_if_not_installed("broom")
  skip_if_not_installed("modeldata")
  data("attrition", package = "modeldata")
  func <- function(split, ...) {
    lm(Age ~ HourlyRate + DistanceFromHome, data = analysis(split)) %>% tidy()
  }

  # generate boostrap resamples
  set.seed(888)
  bt_resamples <- bootstraps(attrition, times = 1000, apparent = TRUE) %>%
    mutate(res = purrr::map(splits, func))

  attrit_tidy <-
    lm(Age ~ HourlyRate + DistanceFromHome, data = attrition) %>%
    tidy(conf.int = TRUE) %>%
    dplyr::arrange(term)

  pct_res <-
    int_pctl(bt_resamples, res) %>%
    inner_join(attrit_tidy, by = "term")
  expect_equal(pct_res$conf.low, pct_res$.lower, tolerance = .01)
  expect_equal(pct_res$conf.high, pct_res$.upper, tolerance = .01)


  t_res <-
    int_t(bt_resamples, res) %>%
    inner_join(attrit_tidy, by = "term")
  expect_equal(t_res$conf.low, t_res$.lower, tolerance = .01)
  expect_equal(t_res$conf.high, t_res$.upper, tolerance = .01)


  bca_res <-
    int_bca(bt_resamples, res, .fn = func) %>%
    inner_join(attrit_tidy, by = "term")
  expect_equal(bca_res$conf.low, bca_res$.lower, tolerance = .01)
  expect_equal(bca_res$conf.high, bca_res$.upper, tolerance = .01)
})

# ------------------------------------------------------------------------------

test_that("Upper & lower confidence interval does not contain NA", {
  bad_stats <- function(split, ...) {
    tibble(
      term = "mean",
      estimate = NA_real_,
      std.error = runif(1)
    )
  }

  set.seed(888)
  bt_resamples <- bootstraps(data.frame(x = 1:100), times = 1000, apparent = TRUE) %>%
    mutate(res = purrr::map(splits, bad_stats))

  expect_snapshot(
    int_pctl(bt_resamples, res),
    error = TRUE
  )

  expect_snapshot(
    int_t(bt_resamples, res),
    error = TRUE
  )

  expect_snapshot(
    int_bca(bt_resamples, res, .fn = bad_stats),
    error = TRUE
  )
})

# ------------------------------------------------------------------------------

set.seed(456765)
bt_small <-
  bootstraps(dat, times = 10, apparent = TRUE) %>%
  dplyr::mutate(
    stats = purrr::map(splits, ~ get_stats(.x)),
    junk = 1:11
  )

test_that(
  "Sufficient replications needed to sufficiently reduce Monte Carlo sampling Error for BCa method",
  {
    expect_warning(int_pctl(bt_small, stats))
    expect_warning(int_t(bt_small, stats))
    expect_warning(int_bca(bt_small, stats, .fn = get_stats))
  }
)


test_that("bad input", {
  expect_error(int_pctl(bt_small, id))
  expect_error(int_pctl(bt_small, junk))
  expect_error(int_pctl(bt_small, stats, alpha = c(0.05, 0.2)))
  expect_error(int_t(bt_small, stats, alpha = "potato"))
  expect_error(int_bca(bt_small, stats, alpha = 1:2, .fn = get_stats))
  expect_error(int_pctl(vfold_cv(mtcars)))
  expect_error(int_t(vfold_cv(mtcars)))
  expect_error(int_bca(vfold_cv(mtcars)))

  bad_bt_norm <-
    bt_norm %>%
    mutate(stats = purrr::map(stats, ~ .x[, 1:2]))
  expect_error(int_t(bad_bt_norm, stats))

  expect_error(int_bca(bad_bt_norm, stats))

  no_dots <- function(split) {
    dat <- analysis(split)
    x <- dat[[1]]
    tibble(
      term = "mean",
      estimate = mean(x, na.rm = TRUE),
      std.error = sqrt(var(x, na.rm = TRUE) / sum(!is.na(x)))
    )
  }
  expect_error(
    int_bca(bt_norm, stats, .fn = no_dots),
    "must have an argument"
  )

  expect_error(int_pctl(as.data.frame(bt_norm), stats))
  expect_error(int_t(as.data.frame(bt_norm), stats))
  expect_error(int_bca(as.data.frame(bt_norm), stats, .fn = get_stats))

  expect_error(
    int_t(bt_norm %>% dplyr::filter(id != "Apparent"), stats)
  )
  expect_error(
    int_bca(bt_norm %>% dplyr::filter(id != "Apparent"), stats, .fn = get_stats)
  )

  poo <- function(x) {
    x$estimate <- "a"
    x
  }
  badder_bt_norm <-
    bt_norm %>%
    mutate(
      bad_term = purrr::map(stats, ~ .x %>% setNames(c("a", "estimate", "std.err"))),
      bad_est = purrr::map(stats, ~ .x %>% setNames(c("term", "b", "std.err"))),
      bad_err = purrr::map(stats, ~ .x %>% setNames(c("term", "estimate", "c"))),
      bad_num = purrr::map(stats, ~ poo(.x))
    )
  expect_error(int_pctl(badder_bt_norm, bad_term))
  expect_error(int_t(badder_bt_norm, bad_err))
  expect_error(int_bca(badder_bt_norm, bad_est, .fn = get_stats))
  expect_error(int_pctl(badder_bt_norm, bad_num))
})


# ------------------------------------------------------------------------------

test_that("regression intervals", {
  skip_if_not_installed("broom")
  skip_on_cran()

  expect_error(
    {
      set.seed(1)
      int_1 <- reg_intervals(mpg ~ disp + wt, data = mtcars)
    },
    regexp = NA
  )

  expect_equal(
    names(int_1),
    c("term", ".lower", ".estimate", ".upper", ".alpha", ".method")
  )

  expect_snapshot({
    skip_if(new_rng_snapshots)
    set.seed(123)
    int_2 <- reg_intervals(
      mpg ~ disp + wt,
      data = mtcars,
      filter = term == "wt",
      model_fn = "glm",
      keep_reps = TRUE
    )
    int_2
  })

  expect_equal(
    names(int_2),
    c("term", ".lower", ".estimate", ".upper", ".alpha", ".method", ".replicates")
  )
  expect_true(nrow(int_2) == 1)
  expect_true(all(int_2$term == "wt"))


  expect_error(
    reg_intervals(mpg ~ disp + wt, data = mtcars, model_fn = "potato"),
    "`model_fn` must be one of"
  )
  expect_error(
    reg_intervals(mpg ~ disp + wt, data = mtcars, type = "random"),
    "`type` must be one of"
  )
  expect_error(
    reg_intervals(mpg ~ disp + wt, data = mtcars, alpha = "a"),
    "must be a single numeric value"
  )
})

Try the rsample package in your browser

Any scripts or data that you put into this service are public.

rsample documentation built on Aug. 23, 2023, 5:08 p.m.