# varsel() ----------------------------------------------------------------
context("varsel()")
test_that(paste(
"`object` of class `refmodel` and arguments `method`, `nterms_max`,",
"`nclusters`, and `nclusters_pred` work"
), {
skip_if_not(run_vs)
for (tstsetup in names(vss)) {
tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref
mod_crr <- args_vs[[tstsetup]]$mod_nm
fam_crr <- args_vs[[tstsetup]]$fam_nm
prj_crr <- args_vs[[tstsetup]]$prj_nm
meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward"
vsel_tester(
vss[[tstsetup]],
refmod_expected = refmods[[tstsetup_ref]],
prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
search_terms_expected = args_vs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_vs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_vs[[tstsetup]]$search_terms)),
search_control_expected = args_vs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
}
})
test_that("invalid `object` fails", {
expect_error(varsel(rnorm(5), verbose = FALSE),
"no applicable method")
})
test_that("invalid `method` fails", {
for (tstsetup in names(refmods)) {
expect_error(varsel(refmods[[tstsetup]], method = "k-fold"),
"^Unexpected value for argument `method`\\.$",
info = tstsetup)
if (args_ref[[tstsetup]]$mod_nm != "glm") {
expect_error(varsel(refmods[[tstsetup]], method = "L1"),
paste("^L1 search is only supported for reference models",
"without multilevel and without additive",
"\\(\"smoothing\"\\) terms\\.$"),
info = tstsetup)
}
if (args_ref[[tstsetup]]$mod_nm == "glm" &&
args_ref[[tstsetup]]$prj_nm == "augdat") {
expect_error(varsel(refmods[[tstsetup]], method = "L1"),
paste("^Currently, the augmented-data projection may not be",
"combined with an L1 search\\.$"),
info = tstsetup)
}
}
})
test_that("`seed` works (and restores the RNG state afterwards)", {
skip_if_not(run_vs)
# To save time:
tstsetups <- grep("\\.glm\\.gauss\\.", names(vss), value = TRUE)
for (tstsetup in tstsetups) {
args_vs_i <- args_vs[[tstsetup]]
vs_orig <- vss[[tstsetup]]
rand_orig <- runif(1) # Just to advance `.Random.seed[2]`.
.Random.seed_repr1 <- .Random.seed
vs_repr <- do.call(varsel, c(
list(object = refmods[[args_vs_i$tstsetup_ref]]),
excl_nonargs(args_vs_i)
))
.Random.seed_repr2 <- .Random.seed
rand_new <- runif(1) # Just to advance `.Random.seed[2]`.
# Expected equality:
expect_equal(vs_repr, vs_orig, info = tstsetup)
expect_equal(.Random.seed_repr2, .Random.seed_repr1, info = tstsetup)
# Expected inequality:
expect_false(isTRUE(all.equal(rand_new, rand_orig)), info = tstsetup)
}
})
## d_test -----------------------------------------------------------------
test_that(paste(
"`d_test` set to the training data gives the same results as its default"
), {
skip_if_not(run_vs)
tstsetups <- names(vss)
for (tstsetup in tstsetups) {
args_vs_i <- args_vs[[tstsetup]]
tstsetup_ref <- args_vs_i$tstsetup_ref
pkg_crr <- args_vs_i$pkg_nm
mod_crr <- args_vs_i$mod_nm
fam_crr <- args_vs_i$fam_nm
prj_crr <- args_vs_i$prj_nm
if (!all(refmods[[tstsetup_ref]]$offset == 0)) {
offs_crr <- offs_tst
} else {
offs_crr <- rep(0, nobsv)
}
if (!all(refmods[[tstsetup_ref]]$wobs == 1)) {
wobs_crr <- wobs_tst
} else {
wobs_crr <- rep(1, nobsv)
}
formul_fit_crr <- args_fit[[args_vs_i$tstsetup_fit]]$formula
dat_crr <- get_dat_formul(formul_crr = formul_fit_crr,
needs_adj = grepl("\\.spclformul", tstsetup))
d_test_crr <- list(
data = dat,
offset = offs_crr,
weights = wobs_crr,
y = dat_crr[[stdize_lhs(formul_fit_crr)$y_nm]]
)
y_oscale_crr <- d_test_crr$y
if (prj_crr %in% c("latent", "augdat")) {
if (use_fac) {
yunqs <- yunq_chr
} else {
yunqs <- as.character(yunq_num)
}
if (!(prj_crr == "latent" && fam_crr == "brnll")) {
lvls_crr <- args_ref[[args_vs_i$tstsetup_ref]]$augdat_y_unqs
lvls_crr <- lvls_crr %||%
args_ref[[args_vs_i$tstsetup_ref]]$latent_y_unqs
lvls_crr <- lvls_crr %||% yunqs
y_oscale_crr <- factor(as.character(y_oscale_crr), levels = lvls_crr,
ordered = is.ordered(y_oscale_crr))
}
if (prj_crr == "augdat") {
d_test_crr$y <- y_oscale_crr
} else if (prj_crr == "latent") {
d_test_crr$y <- colMeans(
unname(posterior_linpred(fits[[args_vs_i$tstsetup_fit]]))
)
}
}
d_test_crr$y_oscale <- y_oscale_crr
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
vs_repr <- suppressWarnings(do.call(varsel, c(
list(object = refmods[[tstsetup_ref]], d_test = d_test_crr),
excl_nonargs(args_vs_i)
)))
meth_exp_crr <- args_vs_i$method %||% "forward"
vsel_tester(
vs_repr,
refmod_expected = refmods[[tstsetup_ref]],
ywtest_expected = setNames(
as.data.frame(d_test_crr[nms_y_wobs_test(wobs_nm = "weights")]),
nms_y_wobs_test()
),
prd_trms_len_expected = args_vs_i$nterms_max,
method_expected = meth_exp_crr,
search_terms_expected = args_vs_i$search_terms,
search_trms_empty_size =
length(args_vs_i$search_terms) &&
all(grepl("\\+", args_vs_i$search_terms)),
search_control_expected = args_vs_i[c("avoid.increase")],
info_str = tstsetup
)
expect_equal(vs_repr[setdiff(names(vs_repr),
c("type_test", "y_wobs_test"))],
vss[[tstsetup]][setdiff(names(vss[[tstsetup]]),
c("type_test", "y_wobs_test"))],
info = tstsetup)
y_wobs_test_orig <- vss[[tstsetup]]$y_wobs_test
if (pkg_crr == "brms") {
# brms seems to set argument `contrasts`, but this is not important for
# projpred, so ignore it in the comparison:
attr(y_wobs_test_orig$y, "contrasts") <- NULL
attr(y_wobs_test_orig$y_oscale, "contrasts") <- NULL
}
expect_equal(vs_repr$y_wobs_test, y_wobs_test_orig, info = tstsetup)
}
})
test_that(paste(
"`d_test` set to actual test data gives a `<vsel_object>$summaries$sub`",
"object that can be reproduced by proj_linpred() and a",
"`<vsel_object>$summaries$ref` object that can be reproduced by",
"posterior_epred() and log_lik()"
), {
skip_if_not(run_vs)
if (exists(".Random.seed", envir = .GlobalEnv)) {
rng_old <- get(".Random.seed", envir = .GlobalEnv)
}
tstsetups <- names(vss)
### TODO (GAMMs): Currently, the following test setups (can) lead to the error
### ```
### Error in t(as.matrix(b$reTrms$Zt[ii, ])) %*%
### as.matrix(c(as.matrix(ranef[[i]]))) :
### non-conformable arguments
### ```
### thrown by predict.gamm4(). This needs to be fixed. For now, exclude these
### test setups:
tstsetups <- grep("\\.gamm\\.", tstsetups, value = TRUE, invert = TRUE)
###
for (tstsetup in tstsetups) {
args_vs_i <- args_vs[[tstsetup]]
tstsetup_ref <- args_vs_i$tstsetup_ref
pkg_crr <- args_vs_i$pkg_nm
mod_crr <- args_vs_i$mod_nm
fam_crr <- args_vs_i$fam_nm
prj_crr <- args_vs_i$prj_nm
if (!all(refmods[[tstsetup_ref]]$offset == 0)) {
offs_crr <- offs_indep
} else {
offs_crr <- rep(0, nobsv_indep)
}
if (!all(refmods[[tstsetup_ref]]$wobs == 1)) {
wobs_crr <- wobs_indep
} else {
wobs_crr <- rep(1, nobsv_indep)
}
formul_fit_crr <- args_fit[[args_vs_i$tstsetup_fit]]$formula
y_nm_crr <- stdize_lhs(formul_fit_crr)$y_nm
dat_indep_crr <- get_dat_formul(
formul_crr = formul_fit_crr,
needs_adj = grepl("\\.spclformul", tstsetup),
dat_crr = dat_indep
)
d_test_crr <- list(
data = dat_indep,
offset = offs_crr,
weights = wobs_crr,
y = dat_indep_crr[[y_nm_crr]]
)
y_oscale_crr <- d_test_crr$y
if (prj_crr %in% c("latent", "augdat")) {
if (use_fac) {
yunqs <- yunq_chr
} else {
yunqs <- as.character(yunq_num)
}
if (!(prj_crr == "latent" && fam_crr == "brnll")) {
lvls_crr <- args_ref[[args_vs_i$tstsetup_ref]]$augdat_y_unqs
lvls_crr <- lvls_crr %||%
args_ref[[args_vs_i$tstsetup_ref]]$latent_y_unqs
lvls_crr <- lvls_crr %||% yunqs
y_oscale_crr <- factor(as.character(y_oscale_crr), levels = lvls_crr,
ordered = is.ordered(y_oscale_crr))
}
if (prj_crr == "augdat") {
d_test_crr$y <- y_oscale_crr
} else if (prj_crr == "latent") {
if (pkg_crr == "rstanarm") {
post_linpred <- posterior_linpred(fits[[args_vs_i$tstsetup_fit]],
newdata = dat_indep,
offset = d_test_crr$offset)
} else {
post_linpred <- posterior_linpred(fits[[args_vs_i$tstsetup_fit]],
newdata = dat_indep)
}
d_test_crr$y <- colMeans(unname(post_linpred))
}
}
d_test_crr$y_oscale <- y_oscale_crr
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
vs_indep <- suppressWarnings(do.call(varsel, c(
list(object = refmods[[tstsetup_ref]], d_test = d_test_crr),
excl_nonargs(args_vs_i)
)))
meth_exp_crr <- args_vs_i$method %||% "forward"
vsel_tester(
vs_indep,
refmod_expected = refmods[[tstsetup_ref]],
ywtest_expected = setNames(
as.data.frame(d_test_crr[nms_y_wobs_test(wobs_nm = "weights")]),
nms_y_wobs_test()
),
prd_trms_len_expected = args_vs_i$nterms_max,
method_expected = meth_exp_crr,
search_terms_expected = args_vs_i$search_terms,
search_trms_empty_size =
length(args_vs_i$search_terms) &&
all(grepl("\\+", args_vs_i$search_terms)),
search_control_expected = args_vs_i[c("avoid.increase")],
info_str = tstsetup
)
### Summaries for the submodels -------------------------------------------
if (!(getOption("projpred.mlvl_pred_new", FALSE) &&
mod_crr %in% c("glmm", "gamm") &&
any(grepl("\\|", vs_indep$predictor_ranking)))) {
# In the negation of this case (i.e., multilevel models with option
# `projpred.mlvl_pred_new` being set to `TRUE`), proj_linpred() can't be
# used to calculate the reference model's performance statistics because
# proj_linpred()'s argument `.seed` cannot be set such that the
# .Random.seed from inside proj_linpred() at the place where the new
# group-level effects are drawn coincides with .Random.seed from inside
# varsel() at the place where the new group-level effects are drawn (not
# even `.seed = NA` with an appropriate preparation is possible).
# For getting the correct seed in proj_linpred():
set.seed(args_vs_i$seed)
p_sel_dummy <- get_refdist(refmods[[tstsetup_ref]],
nclusters = vs_indep$nprjdraws_search)
# Use suppressWarnings() because test_that() somehow redirects stderr()
# and so throws warnings that projpred wants to capture internally:
pl_indep <- suppressWarnings(proj_linpred(
vs_indep,
newdata = dat_indep_crr,
offsetnew = d_test_crr$offset,
weightsnew = d_test_crr$weights,
transform = TRUE,
integrated = TRUE,
.seed = NA,
nterms = c(0L, seq_along(vs_indep$predictor_ranking)),
nclusters = args_vs_i$nclusters_pred,
seed = NA
))
summ_sub_ch <- lapply(pl_indep, function(pl_indep_k) {
names(pl_indep_k)[names(pl_indep_k) == "pred"] <- "mu"
names(pl_indep_k)[names(pl_indep_k) == "lpd"] <- "lppd"
pl_indep_k$mu <- unname(drop(pl_indep_k$mu))
pl_indep_k$lppd <- drop(pl_indep_k$lppd)
if (!is.null(refmods[[tstsetup_ref]]$family$cats)) {
pl_indep_k$mu <- structure(
as.vector(pl_indep_k$mu),
ndiscrete = length(refmods[[tstsetup_ref]]$family$cats),
class = "augvec"
)
}
return(pl_indep_k)
})
if (prj_crr == "latent") {
# For getting the correct seed in proj_linpred():
set.seed(args_vs_i$seed)
p_sel_dummy <- get_refdist(refmods[[tstsetup_ref]],
nclusters = vs_indep$nprjdraws_search)
dat_indep_crr[[paste0(".", y_nm_crr)]] <- d_test_crr$y
expect_warning(
pl_indep_lat <- proj_linpred(
vs_indep,
newdata = dat_indep_crr,
offsetnew = d_test_crr$offset,
weightsnew = d_test_crr$weights,
transform = FALSE,
integrated = TRUE,
.seed = NA,
nterms = c(0L, seq_along(vs_indep$predictor_ranking)),
nclusters = args_vs_i$nclusters_pred,
seed = NA
),
get_warn_wrhs_orhs(tstsetup, weightsnew = d_test_crr$weights,
offsetnew = d_test_crr$offset),
info = tstsetup
)
y_lat_mat <- matrix(d_test_crr$y, nrow = args_vs_i$nclusters_pred,
ncol = nobsv_indep, byrow = TRUE)
summ_sub_ch_lat <- lapply(seq_along(pl_indep_lat), function(k_idx) {
pl_indep_k <- pl_indep_lat[[k_idx]]
names(pl_indep_k)[names(pl_indep_k) == "pred"] <- "mu"
names(pl_indep_k)[names(pl_indep_k) == "lpd"] <- "lppd"
pl_indep_k$mu <- unname(drop(pl_indep_k$mu))
pl_indep_k$lppd <- drop(pl_indep_k$lppd)
return(pl_indep_k)
})
summ_sub_ch <- lapply(seq_along(summ_sub_ch), function(k_idx) {
c(summ_sub_ch_lat[[k_idx]], list("oscale" = summ_sub_ch[[k_idx]]))
})
}
names(summ_sub_ch) <- NULL
expect_equal(vs_indep$summaries$sub, summ_sub_ch,
tolerance = .Machine$double.eps, info = tstsetup)
}
### Summaries for the reference model -------------------------------------
if (getOption("projpred.mlvl_pred_new", FALSE)) {
dat_indep_crr$z.1 <- as.factor(paste0("NEW_", dat_indep_crr$z.1))
}
if (pkg_crr == "rstanarm") {
mu_new <- rstantools::posterior_epred(refmods[[tstsetup_ref]]$fit,
newdata = dat_indep_crr,
offset = d_test_crr$offset)
if (fam_crr == "cumul") {
eta_new <- rstantools::posterior_linpred(refmods[[tstsetup_ref]]$fit,
newdata = dat_indep_crr,
offset = d_test_crr$offset)
# The following shows that in case of an rstanarm::stan_polr() fit,
# rstantools::posterior_epred() returns the linear predictors with a
# threshold of zero, transformed to response scale (which is not really
# helpful):
mu_new_ch <- augdat_ilink_cumul(
array(eta_new, dim = c(dim(eta_new), 1L)),
link = link_str
)
stopifnot(isTRUE(all.equal(unname(mu_new), mu_new_ch[, , 1],
tolerance = .Machine$double.eps)))
# Therefore, `mu_new` has to be adapted to incorporate the correct
# thresholds:
drws <- as.matrix(refmods[[tstsetup_ref]]$fit)
drws_thres <- drws[, grep("\\|", colnames(drws))]
mu_new <- apply(drws_thres, 2, function(thres_vec) {
thres_vec - eta_new
}, simplify = FALSE)
mu_new <- abind::abind(mu_new, rev.along = 0)
mu_new <- augdat_ilink_cumul(mu_new, link = link_str)
}
if (grepl("\\.without_wobs", tstsetup)) {
lppd_new <- rstantools::log_lik(refmods[[tstsetup_ref]]$fit,
newdata = dat_indep_crr,
offset = d_test_crr$offset)
} else {
# Currently, rstanarm issue #567 causes an error to be thrown when
# calling log_lik(). Therefore, use the following dummy which guarantees
# test success:
lppd_new <- matrix(vs_indep$summaries$ref$lppd,
nrow = nrefdraws, ncol = nobsv_indep, byrow = TRUE)
}
if (prj_crr == "latent") {
mu_new_lat <- rstantools::posterior_linpred(refmods[[tstsetup_ref]]$fit,
newdata = dat_indep_crr,
offset = d_test_crr$offset)
}
} else if (pkg_crr == "brms") {
expr_seed <- expression({
set.seed(seed2_tst)
kfold_seed_dummy <- sample.int(.Machine$integer.max, 1)
refprd_seed_dummy <- sample.int(.Machine$integer.max, 1)
set.seed(refprd_seed_dummy)
})
eval(expr_seed)
mu_new <- rstantools::posterior_epred(refmods[[tstsetup_ref]]$fit,
newdata = dat_indep_crr,
allow_new_levels = TRUE,
sample_new_levels = "gaussian")
if (fam_crr == "binom") {
# Compared to rstanarm, brms uses a different convention for the
# binomial family: The values returned by posterior_epred() are not
# probabilities, but the expected values on the scale of the response
# (so the probabilities multiplied by the number of trials). Thus, we
# have to revert this here:
mu_new <- mu_new / matrix(wobs_indep, nrow = nrow(mu_new),
ncol = ncol(mu_new), byrow = TRUE)
}
eval(expr_seed)
lppd_new <- rstantools::log_lik(refmods[[tstsetup_ref]]$fit,
newdata = dat_indep_crr,
allow_new_levels = TRUE,
sample_new_levels = "gaussian")
if (prj_crr == "latent") {
eval(expr_seed)
mu_new_lat <- rstantools::posterior_linpred(
refmods[[tstsetup_ref]]$fit,
newdata = dat_indep_crr,
allow_new_levels = TRUE,
sample_new_levels = "gaussian"
)
}
}
if (length(dim(mu_new)) == 2) {
mu_new <- colMeans(mu_new)
} else if (length(dim(mu_new)) == 3) {
# In fact, we have `identical(colMeans(mu_new), apply(mu_new, c(2, 3),
# mean))` giving `TRUE`, but it's better to be explicit:
ncat_crr <- dim(mu_new)[3]
mu_new <- apply(mu_new, c(2, 3), mean)
mu_new <- structure(as.vector(mu_new), ndiscrete = ncat_crr,
class = "augvec")
} else {
stop("Unexpected number of margins for `mu_new`.")
}
summ_ref_ch <- list(
mu = unname(mu_new),
lppd = unname(apply(lppd_new, 2, log_sum_exp) - log(nrefdraws))
)
if (prj_crr == "augdat" && fam_crr %in% c("brnll", "binom")) {
summ_ref_ch$mu <- structure(c(1 - summ_ref_ch$mu, summ_ref_ch$mu),
ndiscrete = 2, class = "augvec")
}
if (prj_crr == "latent") {
y_lat_mat <- matrix(d_test_crr$y, nrow = nrefdraws, ncol = nobsv_indep,
byrow = TRUE)
lppd_new_lat <- dnorm(y_lat_mat, mean = mu_new_lat,
sd = refmods[[tstsetup_ref]]$dis, log = TRUE)
summ_ref_ch_lat <- list(
mu = unname(colMeans(mu_new_lat)),
lppd = unname(apply(lppd_new_lat, 2, log_sum_exp) - log(nrefdraws))
)
summ_ref_ch <- c(summ_ref_ch_lat, list("oscale" = summ_ref_ch))
}
expect_equal(vs_indep$summaries$ref, summ_ref_ch,
tolerance = 1e3 * .Machine$double.eps, info = tstsetup)
lppd_ref_ch2 <- unname(loo::elpd(lppd_new)$pointwise[, "elpd"])
if (prj_crr == "latent") {
lppd_ref_ch2_oscale <- lppd_ref_ch2
expect_equal(vs_indep$summaries$ref$oscale$lppd, lppd_ref_ch2_oscale,
tolerance = 1e1 * .Machine$double.eps, info = tstsetup)
lppd_ref_ch2 <- loo::elpd(lppd_new_lat)$pointwise[, "elpd"]
}
expect_equal(vs_indep$summaries$ref$lppd, lppd_ref_ch2,
tolerance = 1e2 * .Machine$double.eps, info = tstsetup)
}
if (exists("rng_old")) assign(".Random.seed", rng_old, envir = .GlobalEnv)
})
## refit_prj --------------------------------------------------------------
test_that("`refit_prj` works", {
skip_if_not(run_vs)
if (run_more) {
tstsetups <- names(vss)
} else {
tstsetups <- head(grep("\\.glm\\.", names(vss), value = TRUE), 1)
}
for (tstsetup in tstsetups) {
args_vs_i <- args_vs[[tstsetup]]
args_vs_i$refit_prj <- FALSE
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
vs_reuse <- suppressWarnings(do.call(varsel, c(
list(object = refmods[[args_vs_i$tstsetup_ref]]),
excl_nonargs(args_vs_i)
)))
mod_crr <- args_vs_i$mod_nm
fam_crr <- args_vs_i$fam_nm
prj_crr <- args_vs_i$prj_nm
meth_exp_crr <- args_vs_i$method %||% "forward"
extra_tol_crr <- 1.1
if (meth_exp_crr == "L1" &&
any(grepl(":", ranking(vs_reuse)[["fulldata"]]))) {
### Testing for non-increasing element `ce` (for increasing model size)
### doesn't make sense if the ranking of predictors involved in
### interactions has been changed, so we choose a higher `extra_tol`:
extra_tol_crr <- 1.2
###
}
vsel_tester(
vs_reuse,
refmod_expected = refmods[[args_vs_i$tstsetup_ref]],
prd_trms_len_expected = args_vs_i$nterms_max,
method_expected = meth_exp_crr,
refit_prj_expected = FALSE,
search_terms_expected = args_vs_i$search_terms,
search_trms_empty_size =
length(args_vs_i$search_terms) &&
all(grepl("\\+", args_vs_i$search_terms)),
search_control_expected = args_vs_i[c("avoid.increase")],
extra_tol = extra_tol_crr,
info_str = tstsetup
)
}
})
## Warning for full Gaussian multilevel models ----------------------------
if (run_more) {
test_that(paste(
"the warning for full Gaussian multilevel models is thrown correctly"
), {
skip_if_not(run_vs)
warn_instable_orig <- options(projpred.warn_instable_projections = NULL)
tstsetups <- names(vss)
pick_these <- sapply(tstsetups, function(tstsetup) {
refmod_i <- refmods[[args_vs[[tstsetup]]$tstsetup_ref]]
return(
(refmod_i$family$family == "gaussian" || refmod_i$family$for_latent) &&
is.null(args_vs[[tstsetup]]$search_terms)
)
})
tstsetups <- tstsetups[pick_these]
skip_if_not(length(tstsetups) > 0)
for (tstsetup in tstsetups) {
args_vs_i <- args_vs[[tstsetup]]
if (identical(args_vs_i$nterms_max, unname(ntermss[args_vs_i$mod_nm])) &&
any(grepl("\\|", labels(terms(
args_fit[[args_vs_i$tstsetup_fit]]$formula
))))) {
warn_expected <- "the projection onto the full model can be instable"
} else {
warn_expected <- NA
}
args_vs_i$refit_prj <- FALSE
expect_warning(
vs_tmp <- do.call(varsel, c(
list(object = refmods[[args_vs_i$tstsetup_ref]]),
excl_nonargs(args_vs_i)
)),
warn_expected,
info = tstsetup
)
}
options(warn_instable_orig)
})
}
## Message when cutting off the search ------------------------------------
if (run_more) {
test_that("the message when cutting off the search is thrown correctly", {
skip_if_not(run_vs)
skip_if_not_installed("rstanarm")
mssg_cut_search_orig <- options(projpred.mssg_cut_search = NULL)
dat_gauss <- data.frame(y = df_gaussian$y, df_gaussian$x)
stopifnot(sum(grepl("^X", names(dat_gauss))) > 19)
fit_exceed <- suppressWarnings(rstanarm::stan_glm(
y ~ ., family = gaussian(), data = dat_gauss, QR = TRUE, chains = 1,
iter = 500, refresh = 0, seed = 9876
))
fit_not_exceed <- suppressWarnings(rstanarm::stan_glm(
y ~ . - X20, family = gaussian(), data = dat_gauss, QR = TRUE, chains = 1,
iter = 500, refresh = 0, seed = 9876
))
for (nterms_max_i in list(NULL, 0, 20)) {
if (is.null(nterms_max_i)) {
mssg_expected <- "Cutting off the search at size 19\\."
} else {
mssg_expected <- NA
}
expect_message(
vs_tmp <- varsel(
fit_exceed, method = "forward", nterms_max = nterms_max_i,
nclusters = 1, refit_prj = FALSE, seed = 5555, verbose = FALSE
),
mssg_expected,
info = paste("fit_exceed, nterms_max =", nterms_max_i %||% "NULL")
)
expect_message(
vs_tmp <- varsel(
fit_not_exceed, method = "forward", nterms_max = nterms_max_i,
nclusters = 1, refit_prj = FALSE, seed = 5555, verbose = FALSE
),
NA,
info = paste("fit_not_exceed, nterms_max =", nterms_max_i %||% "NULL")
)
}
options(mssg_cut_search_orig)
})
}
## Regularization ---------------------------------------------------------
# In fact, `regul` is already checked in `test_project.R`, so the `regul` tests
# could be omitted here since varsel() and cv_varsel() also pass `regul` to
# proj_to_submodl() (usually via perf_eval(), just like project()). This doesn't
# hold for L1 search, though. So for L1 search, the `regul` tests are still
# needed.
test_that(paste(
"for GLMs with L1 search, `regul` only has an effect on prediction, not on",
"selection"
), {
skip_if_not(run_vs)
regul_tst <- c(regul_default, 1e-1, 1e2)
stopifnot(regul_tst[1] == regul_default)
stopifnot(all(diff(regul_tst) > 0))
# Output elements of `vsel` objects that may be influenced by `regul`:
vsel_nms_regul <- c("summaries", "ce")
vsel_nms_regul_opt <- character()
# The setups that should be tested:
tstsetups <- grep("\\.glm\\..*\\.L1\\.", names(vss), value = TRUE)
for (tstsetup in tstsetups) {
args_vs_i <- args_vs[[tstsetup]]
m_max <- args_vs_i$nterms_max + 1L
ssq_regul_prd <- array(dim = c(length(regul_tst), m_max))
for (j in seq_along(regul_tst)) {
if (regul_tst[j] == regul_default) {
vs_regul <- vss[[tstsetup]]
} else {
vs_regul <- do.call(varsel, c(
list(object = refmods[[args_vs_i$tstsetup_ref]],
regul = regul_tst[j]),
excl_nonargs(args_vs_i)
))
vsel_tester(
vs_regul,
refmod_expected = refmods[[args_vs_i$tstsetup_ref]],
prd_trms_len_expected = args_vs_i$nterms_max,
method_expected = "L1",
info_str = tstsetup
)
# Expect equality for all components not related to `regul`:
expect_equal(vs_regul[setdiff(vsel_nms, vsel_nms_regul)],
vss[[tstsetup]][setdiff(vsel_nms, vsel_nms_regul)],
info = paste(tstsetup, j, sep = "__"))
# Expect inequality for the elements related to `regul` (the elements
# from `vsel_nms_regul_opt` can be, but don't need to be differing):
for (vsel_nm in setdiff(vsel_nms_regul, vsel_nms_regul_opt)) {
expect_false(isTRUE(all.equal(vs_regul[[vsel_nm]],
vss[[tstsetup]][[vsel_nm]])),
info = paste(tstsetup, j, vsel_nm, sep = "__"))
}
}
# Check the inequality of the prediction components in detail: Expect a
# reduction of the sum of the squared coefficients (excluding the
# intercept) for increasing `regul`:
for (m in seq_len(m_max)) {
# Since varsel() doesn't output object `p_sub`, use the linear predictor
# here (instead of the coefficients themselves, which would only be
# accessible from `p_sub`):
mu_jm_regul <- vs_regul$refmodel$family$linkfun(
vs_regul$summaries$sub[[m]]$mu
)
if (grepl("\\.with_offs", tstsetup)) {
mu_jm_regul <- mu_jm_regul - offs_tst
}
# In fact, `sum((mu - offset - intercept)^2)` would make more sense than
# `var(mu - offset) = sum((mu - offset - mean(mu - offset))^2)` but
# since varsel() doesn't output object `p_sub`, the intercept from the
# prediction is not accessible here.
ssq_regul_prd[j, m] <- var(mu_jm_regul)
}
}
# For the intercept-only model, the linear predictor consists only
# of the intercept, so we expect no variation in `mu_jm_regul`:
expect_true(all(ssq_regul_prd[, 1] <= 1e-5), info = tstsetup)
# All other (i.e., not intercept-only) models:
for (j in seq_len(dim(ssq_regul_prd)[1])[-1]) {
for (m in seq_len(dim(ssq_regul_prd)[2])[-1]) {
expect_lt(ssq_regul_prd[!!j, !!m], ssq_regul_prd[j - 1, m])
}
}
}
})
test_that(paste(
"for GLMs with forward search, `regul` has an expected effect on selection",
"as well as on prediction"
), {
skip_if_not(run_vs)
regul_tst <- c(regul_default, 1e-1, 1e2)
stopifnot(regul_tst[1] == regul_default)
stopifnot(all(diff(regul_tst) > 0))
tstsetups <- setdiff(grep("\\.glm\\.", names(vss), value = TRUE),
grep("\\.glm\\..*\\.L1\\.", names(vss), value = TRUE))
tstsetups <- grep(fam_nms_aug_regex, tstsetups, value = TRUE, invert = TRUE)
for (tstsetup in tstsetups) {
args_vs_i <- args_vs[[tstsetup]]
m_max <- args_vs_i$nterms_max + 1L
if (length(args_vs_i$search_terms) &&
all(grepl("\\+", args_vs_i$search_terms))) {
# This is the "empty_size" setting, so we have to subtract the skipped
# model size (see issue #307):
m_max <- m_max - 1L
}
ncl_crr <- args_vs_i$nclusters
ssq_regul_sel_alpha <- array(dim = c(length(regul_tst), m_max, ncl_crr))
ssq_regul_sel_beta <- array(dim = c(length(regul_tst), m_max, ncl_crr))
ssq_regul_prd <- array(dim = c(length(regul_tst), m_max))
for (j in seq_along(regul_tst)) {
if (regul_tst[j] == regul_default) {
vs_regul <- vss[[tstsetup]]
} else {
vs_regul <- do.call(varsel, c(
list(object = refmods[[args_vs_i$tstsetup_ref]],
regul = regul_tst[j]),
excl_nonargs(args_vs_i)
))
vsel_tester(
vs_regul,
refmod_expected = refmods[[args_vs_i$tstsetup_ref]],
prd_trms_len_expected = args_vs_i$nterms_max,
method_expected = "forward",
search_terms_expected = args_vs_i$search_terms,
search_trms_empty_size =
length(args_vs_i$search_terms) &&
all(grepl("\\+", args_vs_i$search_terms)),
search_control_expected = c(args_vs_i[c("avoid.increase")],
list(regul = regul_tst[j])),
info_str = tstsetup
)
}
for (m in seq_len(m_max)) {
# Selection:
outdmin_jm_regul <- vs_regul$search_path$outdmins[[m]]
if (ncl_crr == 1) {
outdmin_jm_regul <- list(outdmin_jm_regul)
} else {
stopifnot(identical(ncl_crr, length(outdmin_jm_regul)))
}
for (nn in seq_len(ncl_crr)) {
stopifnot(length(outdmin_jm_regul[[nn]]$alpha) == 1)
ssq_regul_sel_alpha[j, m, nn] <- outdmin_jm_regul[[nn]]$alpha^2
if (length(outdmin_jm_regul[[nn]]$beta) > 0) {
ssq_regul_sel_beta[j, m, nn] <- sum(outdmin_jm_regul[[nn]]$beta^2)
}
}
# Prediction:
# Since varsel() doesn't output object `p_sub`, use the linear predictor
# here (instead of the coefficients themselves, which would only be
# accessible from `p_sub`):
mu_jm_regul <- vs_regul$summaries$sub[[m]]$mu
if (args_vs_i$prj_nm == "augdat") {
mu_jm_regul <- augvec2augmat(mu_jm_regul)
}
mu_jm_regul <- vs_regul$refmodel$family$linkfun(mu_jm_regul)
if (grepl("\\.with_offs", tstsetup)) {
mu_jm_regul <- mu_jm_regul - offs_tst
}
# In fact, `sum((mu - offset - intercept)^2)` would make more sense than
# `var(mu - offset) = sum((mu - offset - mean(mu - offset))^2)` but
# since varsel() doesn't output object `p_sub`, the intercept from the
# prediction is not accessible here.
if (args_vs_i$prj_nm == "augdat") {
# Take the maximum variance across the response categories (i.e., the
# worst-case scenario):
mu_jm_regul <- augmat2arr(mu_jm_regul)
mu_jm_regul <- matrix(mu_jm_regul,
nrow = dim(mu_jm_regul)[1],
ncol = dim(mu_jm_regul)[2])
var_jm_regul <- max(apply(mu_jm_regul, 2, var))
} else {
var_jm_regul <- var(mu_jm_regul)
}
ssq_regul_prd[j, m] <- var_jm_regul
}
}
# Selection:
# For the intercept-only model:
for (nn in seq_len(dim(ssq_regul_sel_alpha)[3])) {
expect_length(unique(ssq_regul_sel_alpha[, 1, !!nn]), 1)
}
expect_true(all(is.na(ssq_regul_sel_beta[, 1, ])), info = tstsetup)
# All other (i.e., not intercept-only) models (note: as discussed at issue
# #169, the intercept is not tested here to stay the same):
ssq_regul_sel_beta_cond <- array(
dim = dim(ssq_regul_sel_beta) + c(-1L, -1L, 0L)
)
for (j in seq_len(dim(ssq_regul_sel_beta)[1])[-1]) {
for (m in seq_len(dim(ssq_regul_sel_beta)[2])[-1]) {
for (nn in seq_len(dim(ssq_regul_sel_beta)[3])) {
ssq_regul_sel_beta_cond[j - 1, m - 1, nn] <-
ssq_regul_sel_beta[j, m, nn] < ssq_regul_sel_beta[j - 1, m, nn]
}
}
}
expect_true(all(ssq_regul_sel_beta_cond), info = tstsetup)
# Prediction:
# For the intercept-only model, the linear predictor consists only
# of the intercept, so we expect no variation in `mu_jm_regul`:
expect_true(all(ssq_regul_prd[, 1] <= 1e-12), info = tstsetup)
# All other (i.e., not intercept-only) models:
for (j in seq_len(dim(ssq_regul_prd)[1])[-1]) {
for (m in seq_len(dim(ssq_regul_prd)[2])[-1]) {
expect_lt(ssq_regul_prd[!!j, !!m], ssq_regul_prd[j - 1, m])
}
}
}
})
## Penalty ----------------------------------------------------------------
test_that("`penalty` of invalid length fails", {
skip_if_not(run_vs)
tstsetups <- grep("\\.L1\\.", names(vss), value = TRUE)
for (tstsetup in tstsetups) {
args_vs_i <- args_vs[[tstsetup]]
formul_crr <- get_formul_from_fit(fits[[args_vs_i$tstsetup_fit]])
formul_crr <- rm_addresp(formul_crr)
penal_possbl <- get_penal_possbl(formul_crr)
len_penal <- length(penal_possbl)
# The `penalty` objects to be tested:
penal_tst <- list(rep(1, len_penal + 1), rep(1, len_penal - 1))
for (penal_crr in penal_tst) {
expect_error(
do.call(varsel, c(
list(object = refmods[[args_vs_i$tstsetup_ref]],
penalty = penal_crr),
excl_nonargs(args_vs_i)
)),
paste0("^Incorrect length of penalty vector \\(should be ",
len_penal, "\\)\\.$"),
info = paste(tstsetup, which(sapply(penal_tst, identical, penal_crr)),
sep = "__")
)
}
}
})
test_that("for forward search, `penalty` has no effect", {
skip_if_not(run_vs)
penal_tst <- 2
tstsetups <- grep("\\.L1\\.", names(vss), value = TRUE, invert = TRUE)
# To save time:
if (!run_more) {
tstsetups <- head(tstsetups, 1)
}
for (tstsetup in tstsetups) {
args_vs_i <- args_vs[[tstsetup]]
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
vs_penal <- suppressWarnings(do.call(varsel, c(
list(object = refmods[[args_vs_i$tstsetup_ref]],
penalty = penal_tst),
excl_nonargs(args_vs_i)
)))
vs_penal$args_search["penalty"] <- list(NULL)
expect_equal(vs_penal, vss[[tstsetup]], info = tstsetup)
}
})
test_that("for L1 search, `penalty` has an expected effect", {
skip_if_not(run_vs)
tstsetups <- grep("\\.L1\\.", names(vss), value = TRUE)
for (tstsetup in tstsetups) {
args_vs_i <- args_vs[[tstsetup]]
formul_crr <- get_formul_from_fit(fits[[args_vs_i$tstsetup_fit]])
formul_crr <- rm_addresp(formul_crr)
penal_possbl <- get_penal_possbl(formul_crr)
len_penal <- length(penal_possbl)
penal_crr <- rep(1, len_penal)
stopifnot(len_penal >= 3)
# TODO: This test should be extended to also test the case where a
# categorical predictor (more precisely, one of its dummy variables) or a
# poly() term (more precisely, one of its lower-order terms resulting from
# the expansion of the poly() term) gets zero or infinite penalty. For now,
# the following code ensures that no categorical predictors and no poly()
# terms get zero or infinite penalty.
idx_cat <- grep("xca\\.", penal_possbl)
idx_poly <- grep("poly[m]*\\(", penal_possbl)
# Two predictors without cost:
idx_penal_0 <- head(setdiff(seq_along(penal_crr),
c(idx_cat, idx_poly)),
2)
stopifnot(length(idx_penal_0) == 2)
# One predictor with infinite penalty:
idx_penal_Inf <- head(setdiff(seq_along(penal_crr),
c(idx_penal_0, idx_cat, idx_poly)),
1)
stopifnot(length(idx_penal_Inf) == 1)
penal_crr[idx_penal_0] <- 0
penal_crr[idx_penal_Inf] <- Inf
vs_penal <- do.call(varsel, c(
list(object = refmods[[args_vs_i$tstsetup_ref]],
penalty = penal_crr),
excl_nonargs(args_vs_i, nms_excl_add = "nterms_max")
))
nterms_max_crr <- count_terms_in_formula(formul_crr) - 1L
vsel_tester(
vs_penal,
refmod_expected = refmods[[args_vs_i$tstsetup_ref]],
prd_trms_len_expected = nterms_max_crr,
method_expected = "L1",
penalty_expected = penal_crr,
info_str = tstsetup
)
# Check that the variables with no cost are selected first and the ones
# with infinite penalty last:
prd_trms_penal <- vs_penal$predictor_ranking
prd_trms_penal <- sub("(I\\(.*as\\.logical\\(.*\\)\\))", "\\1TRUE",
prd_trms_penal)
expect_identical(prd_trms_penal[seq_along(idx_penal_0)],
penal_possbl[idx_penal_0],
info = tstsetup)
expect_identical(rev(prd_trms_penal)[seq_along(idx_penal_Inf)],
rev(penal_possbl[idx_penal_Inf]),
info = tstsetup)
}
})
## L1 search and interactions ---------------------------------------------
test_that("L1 search handles three-way (second-order) interactions correctly", {
skip_if_not(run_vs)
skip_if_not_installed("rstanarm")
warn_L1_ia_orig <- options(projpred.warn_L1_interactions = TRUE)
main_terms_in_ia <- c("xca.2", "xco.3", "xco.1")
all_ias_split <- lapply(seq_along(main_terms_in_ia), combn,
x = main_terms_in_ia, simplify = FALSE)
all_ias <- unlist(lapply(all_ias_split, function(ia_split) {
lapply(ia_split, all_ia_perms, is_split = TRUE)
}))
trms_universe_split_bu <- trms_universe_split
trms_universe_split <<- union(trms_universe_split, all_ias)
tstsetup <- head(grep("^rstanarm\\.glm\\.", names(fits), value = TRUE), 1)
args_fit_i <- args_fit[[tstsetup]]
stopifnot(!(args_fit_i$pkg_nm == "rstanarm" && args_fit_i$fam_nm == "cumul"))
fit_fun_nm <- get_fit_fun_nm(args_fit_i)
args_fit_i$formula <- update(args_fit_i$formula,
. ~ . + xca.2 * xco.3 * xco.1)
fit <- suppressWarnings(do.call(
get(fit_fun_nm, asNamespace(args_fit_i$pkg_nm)),
excl_nonargs(args_fit_i)
))
args_ref_i <- args_ref[[paste0(tstsetup, ".trad")]]
refmod <- do.call(get_refmodel, c(
list(object = fit),
excl_nonargs(args_ref_i)
))
args_vs_i <- args_vs[[paste0(tstsetup, ".trad.L1.default_search_trms")]]
args_vs_i$refit_prj <- FALSE
args_vs_i$nterms_max <- NULL
expect_warning(
vs <- do.call(varsel, c(
list(object = refmod),
excl_nonargs(args_vs_i)
)),
"was selected before all.+lower-order interaction terms have been selected",
info = tstsetup
)
vsel_tester(
vs,
refmod_expected = refmod,
prd_trms_len_expected = count_terms_in_formula(refmod$formula) - 1L,
method_expected = "L1",
refit_prj_expected = FALSE,
### Testing for non-increasing element `ce` (for increasing model size)
### doesn't make sense if the ranking of predictors involved in interactions
### has been changed, so we choose a higher `extra_tol` than by default:
extra_tol = 1.2,
###
info_str = tstsetup
)
rk <- ranking(vs)[["fulldata"]]
expect_true(
all(sapply(grep(":", rk), function(ia_idx) {
main_terms_in_ia <- strsplit(rk[ia_idx], ":")[[1]]
all_ias_split <- lapply(seq_len(length(main_terms_in_ia) - 1L), combn,
x = main_terms_in_ia, simplify = FALSE)
ias_lower <- unlist(lapply(all_ias_split, function(ia_split) {
lapply(ia_split, all_ia_perms, is_split = TRUE)
}))
return(all(which(rk %in% ias_lower) < ia_idx))
})),
info = tstsetup
)
trms_universe_split <<- trms_universe_split_bu
options(warn_L1_ia_orig)
})
## search_terms -----------------------------------------------------------
test_that(paste(
"including all terms in `search_terms` gives the same results as the default",
"`search_terms`"
), {
skip_if_not(run_vs)
tstsetups <- grep("\\.alltrms", names(vss), value = TRUE)
for (tstsetup in tstsetups) {
tstsetup_default <- sub("\\.alltrms", "\\.default_search_trms", tstsetup)
if (!tstsetup_default %in% names(vss)) next
vs_search_terms <- vss[[tstsetup]]
vs_search_terms$args_search["search_terms"] <- list(NULL)
expect_identical(vs_search_terms, vss[[tstsetup_default]], info = tstsetup)
}
})
test_that(paste(
"forcing the inclusion of a term in the candidate models via `search_terms`",
"works as expected"
), {
skip_if_not(run_vs)
tstsetups <- grep("\\.fixed", names(vss), value = TRUE)
for (tstsetup in tstsetups) {
# In principle, `search_trms_tst$fixed$search_terms[1]` could be used
# instead of `"xco.1"`, but that would seem like the forced term always has
# to come first in `search_terms` (which is not the case):
expect_identical(vss[[tstsetup]]$predictor_ranking[1], "xco.1",
info = tstsetup)
}
})
test_that(paste(
"forcing the exclusion of a term in the candidate models via `search_terms`",
"works as expected"
), {
skip_if_not(run_vs)
tstsetups <- grep("\\.excluded", names(vss), value = TRUE)
for (tstsetup in tstsetups) {
expect_false("xco.1" %in% vss[[tstsetup]]$predictor_ranking,
info = tstsetup)
}
})
test_that(paste(
"forcing the skipping of a model size via `search_terms` works as expected"
), {
skip_if_not(run_vs)
tstsetups <- grep("\\.empty_size", names(vss), value = TRUE)
for (tstsetup in tstsetups) {
rk_fulldata_out <- vss[[tstsetup]]$predictor_ranking
expect_true(grepl("\\+", rk_fulldata_out[1]) &&
!any(grepl("\\+", rk_fulldata_out[-1])),
info = tstsetup)
}
})
## varsel.vsel() ----------------------------------------------------------
test_that("varsel.vsel() works for `vsel` objects from varsel()", {
skip_if_not(run_vs)
tstsetups <- names(vss)
if (!run_more) {
tstsetups <- head(tstsetups, 1)
}
for (tstsetup in tstsetups) {
vs_eval <- varsel(vss[[tstsetup]], refit_prj = FALSE, verbose = FALSE,
seed = seed2_tst)
tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref
meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward"
extra_tol_crr <- 1.1
if (meth_exp_crr == "L1" &&
any(grepl(":", ranking(vs_eval)[["fulldata"]]))) {
### Testing for non-increasing element `ce` (for increasing model size)
### doesn't make sense if the ranking of predictors involved in
### interactions has been changed, so we choose a higher `extra_tol`:
extra_tol_crr <- 1.2
###
}
vsel_tester(
vs_eval,
refmod_expected = refmods[[tstsetup_ref]],
prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
refit_prj_expected = FALSE,
search_terms_expected = args_vs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_vs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_vs[[tstsetup]]$search_terms)),
search_control_expected = args_vs[[tstsetup]][c("avoid.increase")],
extra_tol = extra_tol_crr,
info_str = tstsetup
)
}
})
test_that("varsel.vsel() works for `vsel` objects from cv_varsel()", {
skip_if_not(run_vs)
skip_if_not(run_cvvs)
tstsetup_counter <- 0L
for (tstsetup in names(cvvss)) {
if (!run_more && tstsetup_counter > 0L) {
next
} else if (run_more && tstsetup_counter > 0L) {
refit_prj_crr <- TRUE
nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L
} else {
refit_prj_crr <- FALSE
nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred
}
fam_crr <- args_cvvs[[tstsetup]]$fam_nm
prj_crr <- args_cvvs[[tstsetup]]$prj_nm
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
vs_eval <- suppressWarnings(varsel(
cvvss[[tstsetup]], refit_prj = refit_prj_crr,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
))
tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref
meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward"
vsel_tester(
vs_eval,
refmod_expected = refmods[[tstsetup_ref]],
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
refit_prj_expected = refit_prj_crr,
nprjdraws_eval_expected = if (!refit_prj_crr && meth_exp_crr == "L1") {
1L
} else if (!refit_prj_crr) {
nclusters_tst
} else {
nclusters_pred_crr
},
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
tstsetup_counter <- tstsetup_counter + 1L
}
})
# cv_varsel() -------------------------------------------------------------
context("cv_varsel()")
test_that(paste(
"`object` of class `refmodel` and arguments `method`, `cv_method`,",
"`nterms_max`, `nclusters`, and `nclusters_pred` work"
), {
skip_if_not(run_cvvs)
for (tstsetup in names(cvvss)) {
mod_crr <- args_cvvs[[tstsetup]]$mod_nm
fam_crr <- args_cvvs[[tstsetup]]$fam_nm
prj_crr <- args_cvvs[[tstsetup]]$prj_nm
meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvss[[tstsetup]],
with_cv = TRUE,
refmod_expected = refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]],
cvfits_expected = if (identical(args_cvvs[[tstsetup]]$cv_method,
"kfold")) {
cvfitss[[args_cvvs[[tstsetup]]$tstsetup_ref]]
} else {
refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]]$cvfits
},
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = args_cvvs[[tstsetup]]$cv_method,
valsearch_expected = args_cvvs[[tstsetup]]$validate_search,
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
}
})
test_that("invalid `object` fails", {
expect_error(cv_varsel(rnorm(5)),
"^no applicable method for")
})
test_that("invalid `method` fails", {
for (tstsetup in names(refmods)) {
expect_error(cv_varsel(refmods[[tstsetup]], method = "k-fold"),
"^Unexpected value for argument `method`\\.$",
info = tstsetup)
if (args_ref[[tstsetup]]$mod_nm != "glm") {
expect_error(cv_varsel(refmods[[tstsetup]], method = "L1"),
paste("^L1 search is only supported for reference models",
"without multilevel and without additive",
"\\(\"smoothing\"\\) terms\\.$"),
info = tstsetup)
}
if (args_ref[[tstsetup]]$mod_nm == "glm" &&
args_ref[[tstsetup]]$prj_nm == "augdat") {
expect_error(cv_varsel(refmods[[tstsetup]], method = "L1"),
paste("^Currently, the augmented-data projection may not be",
"combined with an L1 search\\.$"),
info = tstsetup)
}
}
})
test_that("invalid `cv_method` fails", {
for (tstsetup in names(refmods)) {
expect_error(cv_varsel(refmods[[tstsetup]], cv_method = "k-fold"),
"^Unknown `cv_method`\\.$",
info = tstsetup)
}
})
test_that("`seed` works (and restores the RNG state afterwards)", {
skip_if_not(run_cvvs)
# To save time:
tstsetups <- union(
grep("\\.glm\\.gauss.*\\.default_search_trms", names(cvvss), value = TRUE),
# Important for testing get_refmodel.brmsfit()'s internal `kfold_seed` (and
# also `refprd_seed` if we are lucky and get a fold which separates out at
# least one group):
grep("^brms\\.(glmm|gamm)\\..*\\.kfold", names(cvvss), value = TRUE)
)
for (tstsetup in tstsetups) {
args_cvvs_i <- args_cvvs[[tstsetup]]
cvvs_orig <- cvvss[[tstsetup]]
rand_orig <- runif(1) # Just to advance `.Random.seed[2]`.
.Random.seed_repr1 <- .Random.seed
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_repr <- suppressWarnings(do.call(cv_varsel, c(
list(object = refmods[[args_cvvs_i$tstsetup_ref]],
cvfits = if (identical(args_cvvs_i$cv_method, "kfold")) {
cvfitss[[args_cvvs_i$tstsetup_ref]]
} else {
refmods[[args_cvvs_i$tstsetup_ref]]$cvfits # should be `NULL`
}),
excl_nonargs(args_cvvs_i)
)))
.Random.seed_repr2 <- .Random.seed
rand_new <- runif(1) # Just to advance `.Random.seed[2]`.
# Expected equality:
expect_equal(cvvs_repr, cvvs_orig, info = tstsetup)
expect_equal(.Random.seed_repr2, .Random.seed_repr1, info = tstsetup)
# Expected inequality:
expect_false(isTRUE(all.equal(rand_new, rand_orig)), info = tstsetup)
}
})
## refit_prj --------------------------------------------------------------
test_that("`refit_prj` works", {
skip_if_not(run_cvvs)
if (run_more) {
tstsetups <- names(cvvss)
} else {
tstsetups <- head(grep("\\.glm\\.", names(cvvss), value = TRUE), 1)
}
for (tstsetup in tstsetups) {
args_cvvs_i <- args_cvvs[[tstsetup]]
if (identical(args_cvvs_i$cv_method, "kfold") &&
isFALSE(args_cvvs_i$validate_search)) {
# K-fold CV with `validate_search = FALSE` does not allow to specify
# `refit_prj = FALSE`:
next
}
args_cvvs_i$refit_prj <- FALSE
cvvs_reuse <- suppressWarnings(do.call(cv_varsel, c(
list(object = refmods[[args_cvvs_i$tstsetup_ref]],
cvfits = if (identical(args_cvvs_i$cv_method, "kfold")) {
cvfitss[[args_cvvs_i$tstsetup_ref]]
} else {
refmods[[args_cvvs_i$tstsetup_ref]]$cvfits # should be `NULL`
}),
excl_nonargs(args_cvvs_i)
)))
mod_crr <- args_cvvs_i$mod_nm
fam_crr <- args_cvvs_i$fam_nm
prj_crr <- args_cvvs_i$prj_nm
meth_exp_crr <- args_cvvs_i$method %||% "forward"
vsel_tester(
cvvs_reuse,
with_cv = TRUE,
refmod_expected = refmods[[args_cvvs_i$tstsetup_ref]],
cvfits_expected = if (identical(args_cvvs_i$cv_method, "kfold")) {
cvfitss[[args_cvvs_i$tstsetup_ref]]
} else {
refmods[[args_cvvs_i$tstsetup_ref]]$cvfits
},
prd_trms_len_expected = args_cvvs_i$nterms_max,
method_expected = meth_exp_crr,
refit_prj_expected = FALSE,
cv_method_expected = args_cvvs_i$cv_method,
valsearch_expected = args_cvvs_i$validate_search,
search_terms_expected = args_cvvs_i$search_terms,
search_trms_empty_size =
length(args_cvvs_i$search_terms) &&
all(grepl("\\+", args_cvvs_i$search_terms)),
search_control_expected = args_cvvs_i[c("avoid.increase")],
info_str = tstsetup
)
}
})
## Message when cutting off the search ------------------------------------
if (run_more) {
test_that("the message when cutting off the search is thrown correctly", {
skip_if_not(run_vs)
skip_if_not_installed("rstanarm")
mssg_cut_search_orig <- options(projpred.mssg_cut_search = NULL)
dat_gauss <- data.frame(y = df_gaussian$y, df_gaussian$x)
stopifnot(sum(grepl("^X", names(dat_gauss))) > 19)
fit_exceed <- suppressWarnings(rstanarm::stan_glm(
y ~ ., family = gaussian(), data = dat_gauss, QR = TRUE, chains = 1,
iter = 500, refresh = 0, seed = 9876
))
fit_not_exceed <- suppressWarnings(rstanarm::stan_glm(
y ~ . - X20, family = gaussian(), data = dat_gauss, QR = TRUE, chains = 1,
iter = 500, refresh = 0, seed = 9876
))
for (nterms_max_i in list(NULL, 0, 20)) {
if (is.null(nterms_max_i)) {
mssg_expected <- "Cutting off the search at size 19\\."
} else {
mssg_expected <- NA
}
expect_message(
vs_tmp <- suppressWarnings(cv_varsel(
fit_exceed, validate_search = FALSE, method = "forward",
nterms_max = nterms_max_i, nclusters = 2, refit_prj = FALSE,
seed = 5555, verbose = FALSE
)),
mssg_expected,
info = paste("fit_exceed, nterms_max =", nterms_max_i %||% "NULL")
)
expect_message(
vs_tmp <- suppressWarnings(cv_varsel(
fit_not_exceed, validate_search = FALSE, method = "forward",
nterms_max = nterms_max_i, nclusters = 2, refit_prj = FALSE,
seed = 5555, verbose = FALSE
)),
NA,
info = paste("fit_not_exceed, nterms_max =", nterms_max_i %||% "NULL")
)
}
options(mssg_cut_search_orig)
})
}
## nloo -------------------------------------------------------------------
test_that("invalid `nloo` fails", {
skip_if_not(run_cvvs)
tstsetups_nonkfold <- grep("\\.kfold", names(cvvss), value = TRUE,
invert = TRUE)
for (tstsetup in head(tstsetups_nonkfold, 1)) {
args_cvvs_i <- args_cvvs[[tstsetup]]
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
expect_error(
suppressWarnings(do.call(cv_varsel, c(
list(object = refmods[[args_cvvs_i$tstsetup_ref]],
nloo = -1),
excl_nonargs(args_cvvs_i)
))),
"^nloo must be at least 1$",
info = tstsetup
)
}
})
test_that(paste(
"setting `nloo` at least as large as the number of observations doesn't",
"change results"
), {
skip_if_not(run_cvvs)
nloo_tst <- nobsv + 1L
tstsetups <- grep(
"\\.glm\\.gauss\\..*\\.default_cvmeth\\.default_search_trms",
names(cvvss), value = TRUE
)
for (tstsetup in tstsetups) {
args_cvvs_i <- args_cvvs[[tstsetup]]
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_nloo <- suppressWarnings(do.call(cv_varsel, c(
list(object = refmods[[args_cvvs_i$tstsetup_ref]],
nloo = nloo_tst),
excl_nonargs(args_cvvs_i)
)))
cvvs_nloo[["nloo"]] <- nobsv
expect_equal(cvvs_nloo, cvvss[[tstsetup]], info = tstsetup)
}
})
test_that("setting `nloo` smaller than the number of observations works", {
skip_if_not(run_cvvs)
nloo_tst <- nobsv %/% 5L
# Output elements of `vsel` objects that may be influenced by `nloo`:
vsel_nms_nloo <- c("summaries", "predictor_ranking_cv", "nloo", "ce")
# In general, element `ce` is affected as well (because the PRNG state when
# doing the clustering for the performance evaluation is different when `nloo`
# is smaller than the number of observations compared to when `nloo` is equal
# to the number of observations), but the changes in `ce` may be so small that
# they are not detected by all.equal():
vsel_nms_nloo_opt <- c("ce")
# The setups that should be tested:
tstsetups <- grep(
"\\.glm\\.gauss\\..*\\.default_cvmeth\\.default_search_trms",
names(cvvss), value = TRUE
)
for (tstsetup in tstsetups) {
args_cvvs_i <- args_cvvs[[tstsetup]]
tstsetup_ref <- args_cvvs_i$tstsetup_ref
mod_crr <- args_cvvs_i$mod_nm
fam_crr <- args_cvvs_i$fam_nm
prj_crr <- args_cvvs_i$prj_nm
meth_exp_crr <- args_cvvs_i$method %||% "forward"
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_nloo <- suppressWarnings(do.call(cv_varsel, c(
list(object = refmods[[args_cvvs_i$tstsetup_ref]],
nloo = nloo_tst),
excl_nonargs(args_cvvs_i)
)))
vsel_tester(
cvvs_nloo,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
prd_trms_len_expected = args_cvvs_i$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "LOO",
valsearch_expected = args_cvvs_i$validate_search,
nloo_expected = nloo_tst,
search_terms_expected = args_cvvs_i$search_terms,
search_trms_empty_size =
length(args_cvvs_i$search_terms) &&
all(grepl("\\+", args_cvvs_i$search_terms)),
search_control_expected = args_cvvs_i[c("avoid.increase")],
info_str = tstsetup
)
# Expected equality for most elements with a few exceptions:
vsel_nms_nloo_crr <- vsel_nms_nloo
if (isFALSE(args_cvvs_i$validate_search)) {
vsel_nms_nloo_crr <- setdiff(vsel_nms_nloo_crr, "predictor_ranking_cv")
}
expect_equal(cvvs_nloo[setdiff(vsel_nms, vsel_nms_nloo_crr)],
cvvss[[tstsetup]][setdiff(vsel_nms, vsel_nms_nloo_crr)],
info = tstsetup)
# Expected inequality for the exceptions (the elements from
# `vsel_nms_nloo_opt` can be, but don't need to be differing):
for (vsel_nm in setdiff(vsel_nms_nloo_crr, vsel_nms_nloo_opt)) {
expect_false(isTRUE(all.equal(cvvs_nloo[[vsel_nm]],
cvvss[[tstsetup]][[vsel_nm]])),
info = paste(tstsetup, vsel_nm, sep = "__"))
}
}
})
## validate_search --------------------------------------------------------
test_that("`validate_search` works", {
skip_if_not(run_cvvs)
# Output elements of `vsel` objects that may be influenced by
# `validate_search`:
vsel_nms_valsearch <- c("validate_search", "summaries", "ce",
"predictor_ranking_cv")
# The setups that should be tested:
tstsetups <- names(cvvss)
if (!run_valsearch_always) {
has_valsearch_true <- sapply(tstsetups, function(tstsetup_cvvs) {
!isFALSE(args_cvvs[[tstsetup_cvvs]]$validate_search)
})
tstsetups <- tstsetups[has_valsearch_true]
}
suggsize_cond <- setNames(rep(NA, length(tstsetups)), nm = tstsetups)
for (tstsetup in tstsetups) {
args_cvvs_i <- args_cvvs[[tstsetup]]
stopifnot(is.null(args_cvvs_i$validate_search) ||
isTRUE(args_cvvs_i$validate_search))
tstsetup_ref <- args_cvvs_i$tstsetup_ref
mod_crr <- args_cvvs_i$mod_nm
fam_crr <- args_cvvs_i$fam_nm
prj_crr <- args_cvvs_i$prj_nm
meth_exp_crr <- args_cvvs_i$method %||% "forward"
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_valsearch <- suppressWarnings(do.call(cv_varsel, c(
list(object = refmods[[args_cvvs_i$tstsetup_ref]],
validate_search = FALSE,
cvfits = if (identical(args_cvvs_i$cv_method, "kfold")) {
cvfitss[[args_cvvs_i$tstsetup_ref]]
} else {
refmods[[args_cvvs_i$tstsetup_ref]]$cvfits # should be `NULL`
}),
excl_nonargs(args_cvvs_i, nms_excl_add = "validate_search")
)))
vsel_tester(
cvvs_valsearch,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
cvfits_expected = if (identical(args_cvvs_i$cv_method, "kfold")) {
cvfitss[[args_cvvs_i$tstsetup_ref]]
} else {
refmods[[args_cvvs_i$tstsetup_ref]]$cvfits
},
prd_trms_len_expected = args_cvvs_i$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = args_cvvs_i$cv_method,
valsearch_expected = FALSE,
search_terms_expected = args_cvvs_i$search_terms,
search_trms_empty_size =
length(args_cvvs_i$search_terms) &&
all(grepl("\\+", args_cvvs_i$search_terms)),
search_control_expected = args_cvvs_i[c("avoid.increase")],
info_str = tstsetup
)
# Expected equality for most elements with a few exceptions:
vsel_nms_valsearch_crr <- vsel_nms_valsearch
if (identical(args_cvvs_i$cv_method, "kfold")) {
vsel_nms_valsearch_crr <- setdiff(vsel_nms_valsearch_crr, "ce")
}
expect_equal(cvvs_valsearch[setdiff(vsel_nms, vsel_nms_valsearch_crr)],
cvvss[[tstsetup]][setdiff(vsel_nms, vsel_nms_valsearch_crr)],
info = tstsetup)
expect_identical(cvvs_valsearch$summaries$ref,
cvvss[[tstsetup]]$summaries$ref,
info = tstsetup)
# Expected inequality for the exceptions:
for (vsel_nm in vsel_nms_valsearch_crr) {
expect_false(isTRUE(all.equal(cvvs_valsearch[[vsel_nm]],
cvvss[[tstsetup]][[vsel_nm]])),
info = paste(tstsetup, vsel_nm, sep = "__"))
}
# Check the expected inequalities more specifically:
# Without a validated search, we expect increased LPPDs (and consequently
# also an increased ELPD) in the submodels (due to overfitting):
tol_crr <- 2e-1
# Allow for just a small proportion of extreme differences:
prop_as_expected <- if (identical(args_cvvs_i$cv_method, "kfold")) {
0.8
} else {
0.9
}
for (j in seq_along(cvvs_valsearch$summaries$sub)) {
expect_true(mean(cvvs_valsearch$summaries$sub[[j]]$lppd >=
cvvss[[tstsetup]]$summaries$sub[[j]]$lppd - tol_crr) >=
prop_as_expected,
info = paste(tstsetup, j, sep = "__"))
}
# Again allow for just a small proportion of extreme differences:
prop_sizes_as_expected <- if (identical(args_cvvs_i$cv_method, "kfold")) {
0.5
} else {
1
}
expect_true(
mean(
summary(cvvs_valsearch)$perf_sub$elpd >=
summary(cvvss[[tstsetup]])$perf_sub$elpd
) >= prop_sizes_as_expected,
info = tstsetup
)
# Without a validated search, we expect overfitting in the suggested size:
sgg_size_valsearch <- suggest_size(cvvs_valsearch, warnings = FALSE)
sgg_size <- suggest_size(cvvss[[tstsetup]], warnings = FALSE)
if (!is.na(sgg_size_valsearch) & !is.na(sgg_size)) {
suggsize_cond[tstsetup] <- sgg_size_valsearch >= sgg_size
}
}
expect_true(mean(!suggsize_cond, na.rm = TRUE) <= 0.25)
})
## Arguments specific to K-fold CV ----------------------------------------
test_that("invalid `K` fails", {
skip_if_not(length(fits) > 0)
expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = 1),
"^`K` must be at least 2\\.$")
expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = 1000),
"^`K` cannot exceed the number of observations\\.$")
expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = c(4, 9)),
"^`K` must be a single integer value\\.$")
expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = "a"),
"^`K` must be a single integer value\\.$")
expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = dat),
"^`K` must be a single integer value\\.$")
})
test_that(paste(
"`cvfits` included in the `refmodel` object works for rstanarm reference",
"models"
), {
skip_if_not(run_cvvs)
tstsetups <- grep("^rstanarm\\..*\\.kfold", names(cvvss), value = TRUE)
if (!run_cvfits_all) {
tstsetups_tmp <- head(grep("\\.glmm\\.", tstsetups, value = TRUE), 1)
if (length(tstsetups_tmp) == 0) {
tstsetups_tmp <- head(tstsetups, 1)
}
tstsetups <- tstsetups_tmp
}
for (tstsetup in tstsetups) {
args_cvvs_i <- args_cvvs[[tstsetup]]
tstsetup_fit <- args_cvvs_i$tstsetup_fit
mod_crr <- args_cvvs_i$mod_nm
fam_crr <- args_cvvs_i$fam_nm
prj_crr <- args_cvvs_i$prj_nm
meth_exp_crr <- args_cvvs_i$method %||% "forward"
fit_crr <- fits[[tstsetup_fit]]
K_crr <- K_tst
# Refit `K_crr` times (note: below, the seed for constructing `folds_vec`
# had to be changed in some cases to avoid unfavorable PRNG situations,
# leading to technical issues such as nonconvergence of the submodel fitter;
# this is also tied to the value of `seed_tst`):
if (grepl("\\.glmm\\.", tstsetup)) {
# Perform a grouped K-fold CV to test an edge case where all observations
# belonging to the same level of a variable with group-level effects are
# in the same fold, so prediction is performed for new levels (see, e.g.,
# brms's GitHub issue #1286):
if (exists(".Random.seed", envir = .GlobalEnv)) {
rng_old <- get(".Random.seed", envir = .GlobalEnv)
}
# Make the construction of the CV folds reproducible:
set.seed(seed2_tst * 3L)
folds_vec <- loo::kfold_split_grouped(K = K_crr, x = dat$z.1)
if (exists("rng_old")) assign(".Random.seed", rng_old, envir = .GlobalEnv)
} else {
folds_vec <- cv_folds(nobsv, K = K_crr, seed = seed2_tst)
}
# Additionally to suppressWarnings(), suppressMessages() could be used here
# (but is not necessary since messages seem to be suppressed within
# test_that()'s `code`); furthermore, try() is used because rstanarm
# sometimes fails to refit:
kfold_obj <- try(suppressWarnings(kfold(fit_crr,
K = K_crr,
folds = folds_vec,
save_fits = TRUE,
cores = 1)),
silent = TRUE)
if (inherits(kfold_obj, "try-error")) {
cat("Could not test `tstsetup = \"", tstsetup, "\"` in the rstanarm ",
"`cvfits` test. Error message: \"",
attr(kfold_obj, "condition")$message, "\"\n", sep = "")
next
}
kfold_obj <- structure(kfold_obj$fits[, "fit"], folds = folds_vec)
# Create `refmodel` object with `cvfits`:
refmod_crr <- do.call(get_refmodel, c(
list(object = fit_crr, cvfits = kfold_obj),
excl_nonargs(args_ref[[args_cvvs_i$tstsetup_ref]])
))
# Run cv_varsel():
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_cvfits <- suppressWarnings(do.call(cv_varsel, c(
list(object = refmod_crr),
excl_nonargs(args_cvvs_i, nms_excl_add = "K")
)))
# Checks:
vsel_tester(
cvvs_cvfits,
with_cv = TRUE,
refmod_expected = refmod_crr,
cvfits_expected = kfold_obj,
prd_trms_len_expected = args_cvvs_i$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "kfold",
valsearch_expected = args_cvvs_i$validate_search,
search_terms_expected = args_cvvs_i$search_terms,
search_trms_empty_size =
length(args_cvvs_i$search_terms) &&
all(grepl("\\+", args_cvvs_i$search_terms)),
info_str = tstsetup
)
# Expected equality for most elements with a few exceptions:
# TODO: Currently, `check.environment = FALSE` is needed. The reason is
# probably that in the divergence minimizers, the projpred-extended family
# is passed to argument `family` of the external model fitting functions
# like lme4::glmer(). This should be fixed and then `check.environment =
# FALSE` should be removed.
expect_equal(cvvs_cvfits[setdiff(vsel_nms, vsel_nms_cvfits)],
cvvss[[tstsetup]][setdiff(vsel_nms, vsel_nms_cvfits)],
check.environment = FALSE,
info = tstsetup)
# Expected inequality for the exceptions (the elements from
# `vsel_nms_cvfits_opt` can be, but don't need to be differing):
for (vsel_nm in setdiff(vsel_nms_cvfits, vsel_nms_cvfits_opt)) {
# Suppress warning
# "longer object length is not a multiple of shorter object length":
suppressWarnings(
expect_false(isTRUE(all.equal(cvvs_cvfits[[vsel_nm]],
cvvss[[tstsetup]][[vsel_nm]])),
info = paste(tstsetup, vsel_nm, sep = "__"))
)
}
}
})
test_that(paste(
"`cvfits` included in the `refmodel` object works for brms reference models"
), {
skip_if_not(run_cvvs)
skip_if_not(packageVersion("brms") >= "2.16.4")
tstsetups <- grep("^brms\\..*\\.kfold", names(cvvss), value = TRUE)
if (!run_cvfits_all) {
tstsetups_tmp <- head(grep("\\.glmm\\.", tstsetups, value = TRUE), 1)
if (length(tstsetups_tmp) == 0) {
tstsetups_tmp <- head(tstsetups, 1)
}
tstsetups <- tstsetups_tmp
}
for (tstsetup in tstsetups) {
args_cvvs_i <- args_cvvs[[tstsetup]]
tstsetup_fit <- args_cvvs_i$tstsetup_fit
mod_crr <- args_cvvs_i$mod_nm
fam_crr <- args_cvvs_i$fam_nm
prj_crr <- args_cvvs_i$prj_nm
meth_exp_crr <- args_cvvs_i$method %||% "forward"
fit_crr <- fits[[tstsetup_fit]]
K_crr <- K_tst
# Refit `K_crr` times (note: below, the seed for constructing `folds_vec`
# had to be changed in some cases to avoid unfavorable PRNG situations,
# leading to technical issues such as nonconvergence of the submodel fitter;
# this is also tied to the value of `seed_tst`):
if (grepl("\\.glmm\\.", tstsetup)) {
# Perform a grouped K-fold CV to test an edge case where all observations
# belonging to the same level of a variable with group-level effects are
# in the same fold, so prediction is performed for new levels (see, e.g.,
# brms's GitHub issue #1286):
if (exists(".Random.seed", envir = .GlobalEnv)) {
rng_old <- get(".Random.seed", envir = .GlobalEnv)
}
# Make the construction of the CV folds reproducible:
set.seed(seed2_tst + 10L)
folds_vec <- loo::kfold_split_grouped(K = K_crr, x = dat$z.1)
if (exists("rng_old")) assign(".Random.seed", rng_old, envir = .GlobalEnv)
} else if (grepl("\\.gam\\.", tstsetup)) {
folds_vec <- cv_folds(nobsv, K = K_crr, seed = seed2_tst + 10L)
} else {
folds_vec <- cv_folds(nobsv, K = K_crr, seed = seed2_tst)
}
kfold_obj <- kfold(fit_crr,
K = K_crr,
folds = folds_vec,
save_fits = TRUE,
seed = seed_fit)
kfold_obj <- structure(kfold_obj$fits[, "fit"], folds = folds_vec)
# Create `refmodel` object with `cvfits`:
refmod_crr <- do.call(get_refmodel, c(
list(object = fit_crr, cvfits = kfold_obj),
excl_nonargs(args_ref[[args_cvvs_i$tstsetup_ref]])
))
# Run cv_varsel():
cvvs_cvfits <- try(
do.call(cv_varsel, c(
list(object = refmod_crr),
excl_nonargs(args_cvvs_i, nms_excl_add = "K")
)),
silent = TRUE
)
if (inherits(cvvs_cvfits, "try-error")) {
cat("Failure for `tstsetup = \"", tstsetup, "\"` in the brms ",
"`cvfits` test. Error message: \"",
attr(cvvs_cvfits, "condition")$message, "\"\n", sep = "")
# Check that this is a "pwrssUpdate" failure in lme4, so for solving this,
# we would either need to tweak the lme4 tuning parameters manually (via
# `...`) or change the data-generating mechanism here in the tests (to
# obtain less extreme or more data):
expect_true(grepl("pwrssUpdate", attr(cvvs_cvfits, "condition")$message),
info = tstsetup)
# Furthermore, this should only occur in the `run_more = TRUE` case, so it
# can be skipped (because there are enough other `tstsetups` for which
# this works):
expect_true(run_more, info = tstsetup)
next
}
# Test the reproducibility of ref_predfun() when applied to new observations
# (should be ensured by get_refmodel.brmsfit()'s internal `refprd_seed`):
runif(1)
cvvs_cvfits_repr <- do.call(cv_varsel, c(
list(object = refmod_crr),
excl_nonargs(args_cvvs_i, nms_excl_add = "K")
))
# Checks:
expect_equal(cvvs_cvfits, cvvs_cvfits_repr, info = tstsetup)
vsel_tester(
cvvs_cvfits,
with_cv = TRUE,
refmod_expected = refmod_crr,
cvfits_expected = kfold_obj,
prd_trms_len_expected = args_cvvs_i$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "kfold",
valsearch_expected = args_cvvs_i$validate_search,
search_terms_expected = args_cvvs_i$search_terms,
search_trms_empty_size =
length(args_cvvs_i$search_terms) &&
all(grepl("\\+", args_cvvs_i$search_terms)),
search_control_expected = args_cvvs_i[c("avoid.increase")],
info_str = tstsetup
)
# Expected equality for most elements with a few exceptions:
# TODO: Currently, `check.environment = FALSE` is needed. The reason is
# probably that in the divergence minimizers, the projpred-extended family
# is passed to argument `family` of the external model fitting functions
# like lme4::glmer(). This should be fixed and then `check.environment =
# FALSE` should be removed.
expect_equal(cvvs_cvfits[setdiff(vsel_nms, vsel_nms_cvfits)],
cvvss[[tstsetup]][setdiff(vsel_nms, vsel_nms_cvfits)],
check.environment = FALSE,
info = tstsetup)
# Expected inequality for the exceptions (the elements from
# `vsel_nms_cvfits_opt` can be, but don't need to be differing):
for (vsel_nm in setdiff(vsel_nms_cvfits, vsel_nms_cvfits_opt)) {
# Suppress warning
# "longer object length is not a multiple of shorter object length":
suppressWarnings(
expect_false(isTRUE(all.equal(cvvs_cvfits[[vsel_nm]],
cvvss[[tstsetup]][[vsel_nm]])),
info = paste(tstsetup, vsel_nm, sep = "__"))
)
}
}
})
test_that("`cvfun` included in the `refmodel` object works", {
skip_if_not(run_cvvs)
tstsetups <- c(
head(grep("^rstanarm\\..*\\.kfold", names(cvvss), value = TRUE), 1),
head(grep("^brms\\..*\\.kfold", names(cvvss), value = TRUE), 1)
)
for (tstsetup in tstsetups) {
cvvs_cvfun <- do.call(cv_varsel, c(
list(object = refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]], K = K_tst),
excl_nonargs(args_cvvs[[tstsetup]])
))
vsel_tester(
cvvs_cvfun,
with_cv = TRUE,
refmod_expected = refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]],
cvfits_expected = NULL,
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = args_cvvs[[tstsetup]]$method %||% "forward",
cv_method_expected = "kfold",
valsearch_expected = args_cvvs[[tstsetup]]$validate_search,
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
}
})
## cv_varsel.vsel() -------------------------------------------------------
test_that(paste(
"cv_varsel.vsel() (in particular, re-using `cv_method` and",
"`validate_search`) works for `vsel` objects from cv_varsel()"
), {
skip_if_not(run_cvvs)
tstsetups <- names(cvvss)
if (!run_more) {
tstsetups <- head(tstsetups, 1)
}
for (tstsetup in tstsetups) {
refit_prj_crr <- !identical(args_cvvs[[tstsetup]]$validate_search, FALSE) ||
identical(args_cvvs[[tstsetup]]$cv_method, "kfold")
nclusters_pred_crr <- nclusters_pred_tst - if (refit_prj_crr) 1L else 0L
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_eval <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], refit_prj = refit_prj_crr,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
))
tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref
meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
cvfits_expected = if (identical(args_cvvs[[tstsetup]]$cv_method,
"kfold")) {
cvfitss[[tstsetup_ref]]
} else {
refmods[[tstsetup_ref]]$cvfits
},
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = args_cvvs[[tstsetup]]$cv_method,
valsearch_expected = args_cvvs[[tstsetup]]$validate_search,
refit_prj_expected = refit_prj_crr,
nprjdraws_eval_expected = if (!refit_prj_crr && meth_exp_crr == "L1") {
1L
} else if (!refit_prj_crr) {
nclusters_tst
} else {
nclusters_pred_crr
},
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
}
})
test_that(paste(
"cv_varsel.vsel() with PSIS-LOO CV and `validate_search = FALSE` works for",
"`vsel` objects from varsel()"
), {
skip_if_not(run_vs)
skip_if_not(run_cvvs)
tstsetup_counter <- 0L
for (tstsetup in names(vss)) {
if (!run_more && tstsetup_counter > 0L) {
next
} else if (run_more && tstsetup_counter > 0L) {
refit_prj_crr <- TRUE
nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred - 1L
} else {
refit_prj_crr <- FALSE
nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred
}
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_eval <- suppressWarnings(cv_varsel(
vss[[tstsetup]], validate_search = FALSE, refit_prj = refit_prj_crr,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
))
tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref
meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "LOO",
valsearch_expected = FALSE,
refit_prj_expected = refit_prj_crr,
nprjdraws_eval_expected = if (!refit_prj_crr && meth_exp_crr == "L1") {
1L
} else if (!refit_prj_crr) {
nclusters_tst
} else {
nclusters_pred_crr
},
search_terms_expected = args_vs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_vs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_vs[[tstsetup]]$search_terms)),
search_control_expected = args_vs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
tstsetup_counter <- tstsetup_counter + 1L
}
})
test_that(paste(
"cv_varsel.vsel() with K-fold CV and `validate_search = FALSE` works for",
"`vsel` objects from varsel()"
), {
skip_if_not(run_vs)
skip_if_not(run_cvvs)
tstsetup_counter <- 0L
for (tstsetup in names(vss)) {
tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref
if (grepl("\\.with_wobs", tstsetup_ref) &&
args_vs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't support observation
# weights:
next
} else if (args_vs[[tstsetup]]$fam_nm == "cumul" &&
args_vs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't work for
# rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564:
next
} else if (!tstsetup_ref %in% names(cvfitss)) {
# We would need to call run_cvfun() for this reference model object:
next
}
if (!run_more && tstsetup_counter > 0L) {
next
} else if (run_more && tstsetup_counter > 0L) {
nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred - 1L
} else {
nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred
}
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_eval <- suppressWarnings(cv_varsel(
vss[[tstsetup]], cv_method = "kfold", cvfits = cvfitss[[tstsetup_ref]],
validate_search = FALSE, nclusters_pred = nclusters_pred_crr,
verbose = FALSE, seed = seed2_tst
))
meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
cvfits_expected = cvfitss[[tstsetup_ref]],
prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "kfold",
nloo_expected = NULL,
valsearch_expected = FALSE,
nprjdraws_eval_expected = nclusters_pred_crr,
search_terms_expected = args_vs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_vs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_vs[[tstsetup]]$search_terms)),
search_control_expected = args_vs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
tstsetup_counter <- tstsetup_counter + 1L
}
})
test_that(paste(
"cv_varsel.vsel() with PSIS-LOO CV and `validate_search = FALSE` works for",
"`vsel` objects from cv_varsel() created with `validate_search = TRUE`"
), {
skip_if_not(run_cvvs)
tstsetups <- names(cvvss)
if (!run_more) {
tstsetups <- head(tstsetups, 1)
}
for (tstsetup in tstsetups) {
if (isFALSE(args_cvvs[[tstsetup]]$validate_search)) {
next
}
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
cvvs_eval <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], cv_method = "LOO", validate_search = FALSE,
refit_prj = FALSE, verbose = FALSE, seed = seed2_tst
))
} else {
cvvs_eval <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], validate_search = FALSE, refit_prj = FALSE,
verbose = FALSE, seed = seed2_tst
))
}
tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref
meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward"
extra_tol_crr <- 1.1
if (meth_exp_crr == "L1" &&
any(grepl(":", ranking(cvvs_eval)[["fulldata"]]))) {
### Testing for non-increasing element `ce` (for increasing model size)
### doesn't make sense if the ranking of predictors involved in
### interactions has been changed, so we choose a higher `extra_tol`:
extra_tol_crr <- 1.2
###
}
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
cvfits_expected = if (identical(args_cvvs[[tstsetup]]$cv_method,
"kfold")) {
cvfitss[[tstsetup_ref]]
} else {
refmods[[tstsetup_ref]]$cvfits
},
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "LOO",
valsearch_expected = FALSE,
refit_prj_expected = FALSE,
K_expected = if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
K_tst
} else {
NULL
},
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
extra_tol = extra_tol_crr,
info_str = tstsetup
)
}
})
test_that(paste(
"cv_varsel.vsel() with K-fold CV and `validate_search = FALSE` works for",
"`vsel` objects from cv_varsel() created with `validate_search = TRUE`"
), {
skip_if_not(run_cvvs)
tstsetups <- names(cvvss)
if (!run_more) {
tstsetups <- head(tstsetups, 8)
}
for (tstsetup in tstsetups) {
tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref
if (isFALSE(args_cvvs[[tstsetup]]$validate_search)) {
next
} else if (grepl("\\.with_wobs", tstsetup_ref) &&
args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't support observation
# weights:
next
} else if (args_cvvs[[tstsetup]]$fam_nm == "cumul" &&
args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't work for
# rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564:
next
}
nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
cvvs_eval <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], validate_search = FALSE,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
))
} else {
cvvs_eval <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], cv_method = "kfold", K = K_tst,
cvfits = cvfitss[[tstsetup_ref]], validate_search = FALSE,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
))
}
meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
cvfits_expected = cvfitss[[tstsetup_ref]],
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "kfold",
valsearch_expected = FALSE,
nprjdraws_eval_expected = nclusters_pred_crr,
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
}
})
test_that(paste(
"switching the CV method in cv_varsel.vsel() works for `vsel` objects from",
"cv_varsel() created with `validate_search = FALSE`"
), {
skip_if_not(run_cvvs)
tstsetups <- names(cvvss)
if (!run_more) {
tstsetups <- head(tstsetups, 2)
}
for (tstsetup in tstsetups) {
tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref
if (!isFALSE(args_cvvs[[tstsetup]]$validate_search)) {
next
} else if (grepl("\\.with_wobs", tstsetup_ref) &&
args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't support observation
# weights:
next
} else if (args_cvvs[[tstsetup]]$fam_nm == "cumul" &&
args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't work for
# rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564:
next
}
nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
cv_meth_crr <- "LOO"
cvvs_eval <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], cv_method = cv_meth_crr,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
))
} else {
cv_meth_crr <- "kfold"
cvvs_eval <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], cv_method = cv_meth_crr, K = K_tst,
cvfits = cvfitss[[tstsetup_ref]], nclusters_pred = nclusters_pred_crr,
verbose = FALSE, seed = seed2_tst
))
}
meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
cvfits_expected = cvfitss[[tstsetup_ref]],
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = cv_meth_crr,
valsearch_expected = FALSE,
K_expected = K_tst,
nprjdraws_eval_expected = nclusters_pred_crr,
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
}
})
test_that(paste(
"switching the CV method in cv_varsel.vsel() works for `vsel` objects from",
"cv_varsel() created with `validate_search = TRUE`"
), {
skip_if_not(run_cvvs)
tstsetups <- names(cvvss)
if (!run_more) {
tstsetups <- head(tstsetups, 8)
}
for (tstsetup in tstsetups) {
tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref
if (isFALSE(args_cvvs[[tstsetup]]$validate_search)) {
next
} else if (grepl("\\.with_wobs", tstsetup_ref) &&
args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't support observation
# weights:
next
} else if (args_cvvs[[tstsetup]]$fam_nm == "cumul" &&
args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't work for
# rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564:
next
}
nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
cv_meth_crr <- "LOO"
cvvs_eval <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], cv_method = cv_meth_crr,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
))
} else {
cv_meth_crr <- "kfold"
cvvs_eval <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], cv_method = cv_meth_crr, K = K_tst,
cvfits = cvfitss[[tstsetup_ref]], nclusters_pred = nclusters_pred_crr,
verbose = FALSE, seed = seed2_tst
))
}
meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
cvfits_expected = cvfitss[[tstsetup_ref]],
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = cv_meth_crr,
valsearch_expected = TRUE,
K_expected = K_tst,
nprjdraws_eval_expected = nclusters_pred_crr,
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
}
})
test_that(paste(
"cv_varsel.vsel() with PSIS-LOO CV and `validate_search = TRUE` works for",
"`vsel` objects from varsel()"
), {
skip_if_not(run_vs)
skip_if_not(run_cvvs)
tstsetup_counter <- 0L
for (tstsetup in names(vss)) {
if ((!run_more && tstsetup_counter > 0L) ||
(run_more && !args_vs[[tstsetup]]$mod_nm %in% c("glm", "gam"))) {
next
} else if (run_more && tstsetup_counter > 0L) {
nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred - 1L
} else {
nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred
}
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_eval <- try(
suppressWarnings(cv_varsel(
vss[[tstsetup]], nclusters_pred = nclusters_pred_crr, verbose = FALSE,
seed = seed2_tst
)),
silent = TRUE
)
if (inherits(cvvs_eval, "try-error")) {
cat("Failure for `tstsetup = \"", tstsetup, "\"` in a cv_varsel.vsel() ",
"test. Error message: \"",
attr(cvvs_eval, "condition")$message, "\"\n", sep = "")
# Check that this is a "pwrssUpdate" failure in lme4, so for solving this,
# we would either need to tweak the lme4 tuning parameters manually (via
# `...`) or change the data-generating mechanism here in the tests (to
# obtain less extreme or more data):
expect_true(grepl("pwrssUpdate", attr(cvvs_eval, "condition")$message),
info = tstsetup)
# Furthermore, this should only occur in the `run_more = TRUE` case, so it
# can be skipped (because there are enough other `tstsetups` for which
# this works):
expect_true(run_more, info = tstsetup)
next
}
tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref
meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "LOO",
valsearch_expected = TRUE,
nprjdraws_eval_expected = nclusters_pred_crr,
search_terms_expected = args_vs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_vs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_vs[[tstsetup]]$search_terms)),
search_control_expected = args_vs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
tstsetup_counter <- tstsetup_counter + 1L
}
})
test_that(paste(
"cv_varsel.vsel() with K-fold CV and `validate_search = TRUE` works for",
"`vsel` objects from varsel()"
), {
skip_if_not(run_vs)
skip_if_not(run_cvvs)
tstsetup_counter <- 0L
for (tstsetup in names(vss)) {
tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref
if (grepl("\\.with_wobs", tstsetup_ref) &&
args_vs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't support observation
# weights:
next
} else if (args_vs[[tstsetup]]$fam_nm == "cumul" &&
args_vs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't work for
# rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564:
next
} else if (!tstsetup_ref %in% names(cvfitss)) {
# We would need to call run_cvfun() for this reference model object:
next
}
if (!run_more && tstsetup_counter > 0L) {
next
} else if (run_more && tstsetup_counter > 0L) {
nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred - 1L
} else {
nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred
}
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_eval <- try(
suppressWarnings(cv_varsel(
vss[[tstsetup]], cv_method = "kfold", cvfits = cvfitss[[tstsetup_ref]],
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
)),
silent = TRUE
)
if (inherits(cvvs_eval, "try-error")) {
cat("Failure for `tstsetup = \"", tstsetup, "\"` in a cv_varsel.vsel() ",
"test. Error message: \"",
attr(cvvs_eval, "condition")$message, "\"\n", sep = "")
# Check that this is a "pwrssUpdate" failure in lme4, so for solving this,
# we would either need to tweak the lme4 tuning parameters manually (via
# `...`) or change the data-generating mechanism here in the tests (to
# obtain less extreme or more data):
expect_true(grepl("pwrssUpdate", attr(cvvs_eval, "condition")$message),
info = tstsetup)
# Furthermore, this should only occur in the `run_more = TRUE` case, so it
# can be skipped (because there are enough other `tstsetups` for which
# this works):
expect_true(run_more, info = tstsetup)
next
}
meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
cvfits_expected = cvfitss[[tstsetup_ref]],
prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "kfold",
nloo_expected = NULL,
valsearch_expected = TRUE,
nprjdraws_eval_expected = nclusters_pred_crr,
search_terms_expected = args_vs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_vs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_vs[[tstsetup]]$search_terms)),
search_control_expected = args_vs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
tstsetup_counter <- tstsetup_counter + 1L
}
})
test_that(paste(
"cv_varsel.vsel() with PSIS-LOO CV and `validate_search = TRUE` works for",
"`vsel` objects from cv_varsel() created with `validate_search = FALSE`"
), {
skip_if_not(run_cvvs)
tstsetups <- names(cvvss)
if (!run_more) {
tstsetups <- head(tstsetups, 2)
}
for (tstsetup in tstsetups) {
if (run_more && !args_cvvs[[tstsetup]]$mod_nm %in% c("glm", "gam")) {
# Save some time:
next
}
if (!isFALSE(args_cvvs[[tstsetup]]$validate_search)) {
next
}
nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
cvvs_eval <- try(
suppressWarnings(cv_varsel(
cvvss[[tstsetup]], cv_method = "LOO", validate_search = TRUE,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
)),
silent = TRUE
)
} else {
cvvs_eval <- try(
suppressWarnings(cv_varsel(
cvvss[[tstsetup]], validate_search = TRUE,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
)),
silent = TRUE
)
}
if (inherits(cvvs_eval, "try-error")) {
cat("Failure for `tstsetup = \"", tstsetup, "\"` in a cv_varsel.vsel() ",
"test. Error message: \"",
attr(cvvs_eval, "condition")$message, "\"\n", sep = "")
# Check that this is a "pwrssUpdate" failure in lme4, so for solving this,
# we would either need to tweak the lme4 tuning parameters manually (via
# `...`) or change the data-generating mechanism here in the tests (to
# obtain less extreme or more data):
expect_true(grepl("pwrssUpdate", attr(cvvs_eval, "condition")$message),
info = tstsetup)
# Furthermore, this should only occur in the `run_more = TRUE` case, so it
# can be skipped (because there are enough other `tstsetups` for which
# this works):
expect_true(run_more, info = tstsetup)
next
}
tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref
meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward"
extra_tol_crr <- 1.1
if (meth_exp_crr == "L1" &&
any(grepl(":", ranking(cvvs_eval)[["fulldata"]]))) {
### Testing for non-increasing element `ce` (for increasing model size)
### doesn't make sense if the ranking of predictors involved in
### interactions has been changed, so we choose a higher `extra_tol`:
extra_tol_crr <- 1.2
###
}
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
cvfits_expected = if (identical(args_cvvs[[tstsetup]]$cv_method,
"kfold")) {
cvfitss[[tstsetup_ref]]
} else {
refmods[[tstsetup_ref]]$cvfits
},
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "LOO",
valsearch_expected = TRUE,
nprjdraws_eval_expected = nclusters_pred_crr,
K_expected = if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
K_tst
} else {
NULL
},
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
extra_tol = extra_tol_crr,
info_str = tstsetup
)
}
})
test_that(paste(
"cv_varsel.vsel() with K-fold CV and `validate_search = TRUE` works for",
"`vsel` objects from cv_varsel() created with `validate_search = FALSE`"
), {
skip_if_not(run_cvvs)
tstsetups <- names(cvvss)
if (!run_more) {
tstsetups <- head(tstsetups, 2)
}
for (tstsetup in tstsetups) {
tstsetup_ref <- args_cvvs[[tstsetup]]$tstsetup_ref
if (!isFALSE(args_cvvs[[tstsetup]]$validate_search)) {
next
} else if (grepl("\\.with_wobs", tstsetup_ref) &&
args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't support observation
# weights:
next
} else if (args_cvvs[[tstsetup]]$fam_nm == "cumul" &&
args_cvvs[[tstsetup]]$pkg_nm == "rstanarm") {
# Currently, rstanarm:::kfold.stanreg() doesn't work for
# rstanarm::stan_polr() fits, see rstanarm issue stan-dev/rstanarm#564:
next
}
nclusters_pred_crr <- args_cvvs[[tstsetup]]$nclusters_pred - 1L
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
cvvs_eval <- try(
suppressWarnings(cv_varsel(
cvvss[[tstsetup]], validate_search = TRUE,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
)),
silent = TRUE
)
} else {
cvvs_eval <- try(
suppressWarnings(cv_varsel(
cvvss[[tstsetup]], cv_method = "kfold", K = K_tst,
cvfits = cvfitss[[tstsetup_ref]], validate_search = TRUE,
nclusters_pred = nclusters_pred_crr, verbose = FALSE, seed = seed2_tst
)),
silent = TRUE
)
}
if (inherits(cvvs_eval, "try-error")) {
cat("Failure for `tstsetup = \"", tstsetup, "\"` in a cv_varsel.vsel() ",
"test. Error message: \"",
attr(cvvs_eval, "condition")$message, "\"\n", sep = "")
# Check that this is a "pwrssUpdate" failure in lme4, so for solving this,
# we would either need to tweak the lme4 tuning parameters manually (via
# `...`) or change the data-generating mechanism here in the tests (to
# obtain less extreme or more data):
expect_true(grepl("pwrssUpdate", attr(cvvs_eval, "condition")$message),
info = tstsetup)
# Furthermore, this should only occur in the `run_more = TRUE` case, so it
# can be skipped (because there are enough other `tstsetups` for which
# this works):
expect_true(run_more, info = tstsetup)
next
}
meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvs_eval,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
cvfits_expected = cvfitss[[tstsetup_ref]],
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "kfold",
valsearch_expected = TRUE,
nprjdraws_eval_expected = nclusters_pred_crr,
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
}
})
test_that("cv_varsel.vsel(): `nloo` works for `vsel` objects from varsel()", {
skip_if_not(run_vs)
skip_if_not(run_cvvs)
nloo_tst <- nobsv %/% 5L
tstsetup_counter <- 0L
for (tstsetup in names(vss)) {
if (!run_more && tstsetup_counter > 0L) {
next
} else if (run_more && tstsetup_counter > length(vss) %/% 6) {
next
} else if (run_more) {
refit_prj_crr <- TRUE
nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred - 1L
} else {
refit_prj_crr <- FALSE
nclusters_pred_crr <- args_vs[[tstsetup]]$nclusters_pred
}
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
cvvs_eval_valF <- suppressWarnings(cv_varsel(
vss[[tstsetup]], nloo = nloo_tst, validate_search = FALSE,
refit_prj = refit_prj_crr, nclusters_pred = nclusters_pred_crr,
verbose = FALSE, seed = seed2_tst
))
cvvs_eval_valT <- suppressWarnings(cv_varsel(
vss[[tstsetup]], nloo = nloo_tst, nclusters_pred = nclusters_pred_crr,
verbose = FALSE, seed = seed2_tst
))
tstsetup_ref <- args_vs[[tstsetup]]$tstsetup_ref
meth_exp_crr <- args_vs[[tstsetup]]$method %||% "forward"
vsel_tester(
cvvs_eval_valF,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "LOO",
nloo_expected = nloo_tst,
valsearch_expected = FALSE,
refit_prj_expected = refit_prj_crr,
nprjdraws_eval_expected = if (!refit_prj_crr && meth_exp_crr == "L1") {
1L
} else if (!refit_prj_crr) {
nclusters_tst
} else {
nclusters_pred_crr
},
search_terms_expected = args_vs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_vs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_vs[[tstsetup]]$search_terms)),
search_control_expected = args_vs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
vsel_tester(
cvvs_eval_valT,
with_cv = TRUE,
refmod_expected = refmods[[tstsetup_ref]],
prd_trms_len_expected = args_vs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "LOO",
nloo_expected = nloo_tst,
valsearch_expected = TRUE,
nprjdraws_eval_expected = nclusters_pred_crr,
search_terms_expected = args_vs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_vs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_vs[[tstsetup]]$search_terms)),
search_control_expected = args_vs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
tstsetup_counter <- tstsetup_counter + 1L
}
})
test_that(paste(
"cv_varsel.vsel(): `nloo` works for `vsel` objects from cv_varsel()"
), {
skip_if_not(run_cvvs)
nloo_tst <- nobsv %/% 5L
tstsetups <- names(cvvss)
if (!run_more) {
tstsetups <- head(tstsetups, 1)
} else {
tstsetups <- head(grep("\\.glm\\.|\\.gam\\.", tstsetups, value = TRUE),
length(tstsetups) %/% 6)
# Make sure that in the test setups, we have `validate_search = TRUE` as
# well as `validate_search = FALSE`:
valsearches <- !unlist(lapply(
lapply(args_cvvs[tstsetups], "[[", "validate_search"),
isFALSE
))
stopifnot(any(valsearches), any(!valsearches))
}
for (tstsetup in tstsetups) {
# Use suppressWarnings() because test_that() somehow redirects stderr() and
# so throws warnings that projpred wants to capture internally:
if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
cvvs_eval_valF <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], cv_method = "LOO", validate_search = FALSE,
nloo = nloo_tst, refit_prj = FALSE, verbose = FALSE, seed = seed2_tst
))
cvvs_eval_valT <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], cv_method = "LOO", validate_search = TRUE,
nloo = nloo_tst, refit_prj = FALSE, verbose = FALSE, seed = seed2_tst
))
} else {
cvvs_eval_valF <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], nloo = nloo_tst, validate_search = FALSE,
refit_prj = FALSE, verbose = FALSE, seed = seed2_tst
))
cvvs_eval_valT <- suppressWarnings(cv_varsel(
cvvss[[tstsetup]], nloo = nloo_tst, validate_search = TRUE,
refit_prj = FALSE, verbose = FALSE, seed = seed2_tst
))
}
meth_exp_crr <- args_cvvs[[tstsetup]]$method %||% "forward"
extra_tol_crr <- 1.1
if (meth_exp_crr == "L1" &&
any(grepl(":", ranking(cvvs_eval_valF)[["fulldata"]]))) {
### Testing for non-increasing element `ce` (for increasing model size)
### doesn't make sense if the ranking of predictors involved in
### interactions has been changed, so we choose a higher `extra_tol`:
extra_tol_crr <- 1.2
###
}
vsel_tester(
cvvs_eval_valF,
with_cv = TRUE,
refmod_expected = refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]],
cvfits_expected = if (identical(args_cvvs[[tstsetup]]$cv_method,
"kfold")) {
cvfitss[[args_cvvs[[tstsetup]]$tstsetup_ref]]
} else {
refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]]$cvfits
},
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "LOO",
nloo_expected = nloo_tst,
valsearch_expected = FALSE,
refit_prj_expected = FALSE,
nprjdraws_eval_expected = if (meth_exp_crr == "L1") {
1L
} else {
nclusters_tst
},
K_expected = if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
K_tst
} else {
NULL
},
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
extra_tol = extra_tol_crr,
info_str = tstsetup
)
vsel_tester(
cvvs_eval_valT,
with_cv = TRUE,
refmod_expected = refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]],
cvfits_expected = if (identical(args_cvvs[[tstsetup]]$cv_method,
"kfold")) {
cvfitss[[args_cvvs[[tstsetup]]$tstsetup_ref]]
} else {
refmods[[args_cvvs[[tstsetup]]$tstsetup_ref]]$cvfits
},
prd_trms_len_expected = args_cvvs[[tstsetup]]$nterms_max,
method_expected = meth_exp_crr,
cv_method_expected = "LOO",
nloo_expected = nloo_tst,
valsearch_expected = TRUE,
refit_prj_expected = FALSE,
nprjdraws_eval_expected = if (meth_exp_crr == "L1") {
1L
} else {
nclusters_tst
},
K_expected = if (identical(args_cvvs[[tstsetup]]$cv_method, "kfold")) {
K_tst
} else {
NULL
},
search_terms_expected = args_cvvs[[tstsetup]]$search_terms,
search_trms_empty_size =
length(args_cvvs[[tstsetup]]$search_terms) &&
all(grepl("\\+", args_cvvs[[tstsetup]]$search_terms)),
search_control_expected = args_cvvs[[tstsetup]][c("avoid.increase")],
info_str = tstsetup
)
}
})
# run_cvfun() -------------------------------------------------------------
test_that("argument `folds` of run_cvfun() works", {
skip_if_not(run_cvvs)
tstsetups <- names(cvfitss)
if (!run_more) {
tstsetups <- head(tstsetups, 1)
}
if (exists(".Random.seed", envir = .GlobalEnv)) {
rng_old <- get(".Random.seed", envir = .GlobalEnv)
}
for (tstsetup in tstsetups) {
set.seed(seed3_tst)
folds_sep <- cv_folds(nobsv, K = K_tst)
cvfits_sep <- run_cvfun(object = refmods[[tstsetup]], folds = folds_sep)
expect_identical(lapply(cvfits_sep, as.matrix),
lapply(cvfitss[[tstsetup]], as.matrix), info = tstsetup)
}
if (exists("rng_old")) assign(".Random.seed", rng_old, envir = .GlobalEnv)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.