projpred: Projection Predictive Feature Selection

#_______________________________________________________________________________
# Setup for the unit tests
#_______________________________________________________________________________

# General setup -----------------------------------------------------------

options(warn = 1)

# These switches may be set to `FALSE` to save time (e.g., when debugging
# interactively):
# Run more tests, at the downside of increased runtime?:
run_more <- identical(Sys.getenv("RUN_MORE"), "true")
# Run project()?:
run_prj <- identical(Sys.getenv("NOT_CRAN"), "true")
# Run varsel()?:
run_vs <- identical(Sys.getenv("NOT_CRAN"), "true")
# Run cv_varsel()?:
run_cvvs <- run_vs
# Run cv_varsel() with `validate_search = TRUE` always (`TRUE`) or just for L1
# search (`FALSE`)?:
run_valsearch_always <- FALSE
# Run cv_varsel() with `validate_search = TRUE` also for the latent and the
# augmented-data projection (and the corresponding traditional projection
# settings which are used for comparison)? (Only relevant if
# `run_valsearch_always = FALSE`.):
run_valsearch_aug_lat <- FALSE
# Run the `cvfits` test for all possible test setups (`TRUE`) or just for the
# first one among the GLMMs (`FALSE`; note that if there is no GLMM available in
# that test, the first test setup among those for K-fold CV is used)?:
run_cvfits_all <- run_more
# Run tests for `brmsfit`s?:
run_brms <- identical(Sys.getenv("NOT_CRAN"), "true")
# Run snapshot tests?:
# Notes:
#   * For general information about snapshot tests, see, e.g.,
#   `?expect_snapshot` and `vignette("snapshotting", package = "testthat")`.
#   * The snapshot tests are at least OS-dependent (perhaps even
#   machine-dependent), so they only make sense locally. Therefore, we don't run
#   the snapshot tests on CRAN or continuous integration (CI) systems. The
#   detection of a CI system by the help of environment variable `CI` needs
#   special care, see <https://github.com/r-lib/testthat/issues/825> and the
#   source code of `testthat:::on_ci()`.
#   * The last of the following conditions avoids that the snapshot tests are
#   run by a local `R CMD check` (at least in RStudio). The reason for avoiding
#   this is that in `R CMD check`, the previous snapshots are not available (at
#   least as long as they are listed in the `.Rbuildignore` file), so they would
#   be re-created, which would throw a lot of test warnings (which could obscure
#   potentially important warnings).
run_snaps <- Sys.getenv("RUN_SNAPS")
if (identical(run_snaps, "")) {
  run_snaps <- identical(Sys.getenv("NOT_CRAN"), "true") &&
    !identical(toupper(Sys.getenv("CI")), "TRUE") &&
    identical(Sys.getenv("_R_CHECK_FORCE_SUGGESTS_"), "")
} else {
  run_snaps <- as.logical(toupper(run_snaps))
  stopifnot(isTRUE(run_snaps) || isFALSE(run_snaps))
}
if (run_snaps && !requireNamespace("rlang", quietly = TRUE)) {
  warning("Package 'rlang' is needed for snapshot testing, but could not be ",
          "found. Deactivating snapshot testing now.")
  run_snaps <- FALSE
}
if (run_snaps && !requireNamespace("vdiffr", quietly = TRUE)) {
  warning("Package 'vdiffr' is needed for snapshot testing, but could not be ",
          "found. Deactivating snapshot testing now.")
  run_snaps <- FALSE
}
if (run_snaps) {
  testthat_ed_max2 <- edition_get() <= 2
}
# Run tests for the parallelization of the projection?:
# Notes:
#   * Throughout the tests, the terms "parallelization" and "parallel" refer to
#   the parallelization of the projection ("projection parallelization"), not
#   the parallelization of the CV ("CV parallelization").
#   * We don't run the parallel tests on CRAN or continuous integration (CI)
#   systems because parallelization might require special care there.
#   * Currently, parallelization on Windows takes longer than running
#   sequentially. This makes parallelization impractical on Windows, so we
#   don't run the tests on Windows by default.
#   * Currently, parallelization only works reliably for GLMs (because of
#   memory issues for more complex models like GLMMs, GAMs and GAMMs).
#   Therefore, we will only test GLMs here.
run_prll <- identical(Sys.getenv("NOT_CRAN"), "true") &&
  !identical(toupper(Sys.getenv("CI")), "TRUE") &&
  !identical(.Platform$OS.type, "windows")
if (run_prll) {
  ncores <- parallel::detectCores(logical = FALSE)
  if (ncores == 1) {
    warning("Deactivating the parallel tests because only a single worker ",
            "could be detected.")
    run_prll <- FALSE
  }
  # Do not run on more than 2 cores if requested so:
  if (identical(Sys.getenv("_R_CHECK_LIMIT_CORES_"), "TRUE")) {
    ncores <- min(ncores, 2L)
  }
  # Use the 'doParallel' package on all platforms except Windows. For Windows,
  # the 'doFuture' package provides a faster alternative via the 'future.callr'
  # package (which is still slower than a sequential run, though):
  if (!identical(.Platform$OS.type, "windows")) {
    if (!requireNamespace("doParallel", quietly = TRUE)) {
      warning("Package 'doParallel' is needed for the parallel tests, but ",
              "could not be found. Deactivating the parallel tests now.")
      run_prll <- FALSE
    } else {
      dopar_backend <- "doParallel"
    }
  } else {
    # This case (which should not be possible by default) is only included
    # here to demonstrate how parallelization should be used on Windows (but
    # currently, this makes no sense, as explained above).
    if (!requireNamespace("doFuture", quietly = TRUE)) {
      warning("Package 'doFuture' is needed for the parallel tests, but ",
              "could not be found. Deactivating the parallel tests now.")
      run_prll <- FALSE
    } else {
      dopar_backend <- "doFuture"
    }
    if (!requireNamespace("future", quietly = TRUE)) {
      warning("Package 'future' is needed for the parallel tests, but ",
              "could not be found. Deactivating the parallel tests now.")
      run_prll <- FALSE
    }
    if (!identical(.Platform$OS.type, "windows")) {
      # This case (which should not be possible by default) is only included
      # here to demonstrate how other systems should be used with the 'doFuture'
      # package.
      future_plan <- "multicore"
    } else {
      ### Not used in this case because the 'future.callr' package provides a
      ### faster alternative on Windows (which is still slower than a sequential
      ### run, though):
      # future_plan <- "multisession"
      ###
      if (!requireNamespace("future.callr", quietly = TRUE)) {
        warning("Package 'future.callr' is needed for the parallel tests, but ",
                "could not be found. Deactivating the parallel tests now.")
        run_prll <- FALSE
      } else {
        future_plan <- "callr"
      }
    }
  }
}
# Run all test scripts (following this setup script) in a completely random RNG
# state? (The tests should still pass then, because in all situations where RNG
# is used, a specific seed is supposed to be set.):
run_randRNG <- identical(Sys.getenv("NOT_CRAN"), "true")
# Run tests for additive models (GAMs and GAMMs)?:
run_additive <- TRUE

# Use a factor or an integer response for ordinal and categorical families?:
use_fac <- TRUE

# Use polym() instead of poly()?:
use_polym <- FALSE

source(testthat::test_path("helpers", "unlist_cust.R"), local = TRUE)
source(testthat::test_path("helpers", "testers.R"), local = TRUE)
source(testthat::test_path("helpers", "args.R"), local = TRUE)
source(testthat::test_path("helpers", "getters.R"), local = TRUE)
source(testthat::test_path("helpers", "formul_handlers.R"), local = TRUE)
source(testthat::test_path("helpers", "predictor_handlers.R"), local = TRUE)
source(testthat::test_path("helpers", "dummies.R"), local = TRUE)
source(testthat::test_path("helpers", "creators.R"), local = TRUE)

# Note: The following `mod_nms` refer to *generalized* (linear/additive,
# multilevel) models. This is due to history (when these tests were written,
# only such *generalized* models were supported by projpred). Now that more
# models are supported (even non-generalized ones), these model names are not
# really correct anymore. However, we keep them for simplicity.
mod_nms <- c("glm", "glmm", "gam", "gamm")
if (run_additive) {
  # Suppress the warning for additive models (GAMs and GAMMs) stating that their
  # implementation is currently only experimental:
  options(projpred.warn_additive_experimental = FALSE)
} else {
  mod_nms <- setdiff(mod_nms, c("gam", "gamm"))
}
mod_nms <- setNames(nm = mod_nms)

fam_nms_trad <- c("gauss", "brnll", "binom", "poiss")
fam_nms_ordin <- c("cumul", "srtio", "crtio", "adcat")
fam_nms_categ <- "categ"
fam_nms_aug <- c(fam_nms_ordin, fam_nms_categ)
fam_nms <- c(fam_nms_trad, fam_nms_aug)
fam_nms_unsupp <- setdiff(fam_nms_ordin, "cumul")
fam_nms_brms_only <- setdiff(fam_nms_aug, "cumul")
if (!run_brms) {
  fam_nms <- setdiff(fam_nms, fam_nms_brms_only)
}
fam_nms_trad <- setNames(nm = fam_nms_trad)
fam_nms_ordin <- setNames(nm = fam_nms_ordin)
fam_nms_categ <- setNames(nm = fam_nms_categ)
fam_nms_aug <- setNames(nm = fam_nms_aug)
fam_nms <- setNames(nm = fam_nms)
fam_nms_unsupp <- setNames(nm = fam_nms_unsupp)
fam_nms_brms_only <- setNames(nm = fam_nms_brms_only)
# Long names:
fam_nms_aug_long <- c(sapply(fam_nms_aug, get_fam_long),
                      cumul = "cumulative_rstanarm")
fam_nms_ordin_long <- c(sapply(fam_nms_ordin, get_fam_long),
                        cumul = "cumulative_rstanarm")
fam_nms_long <- c(sapply(fam_nms, get_fam_long_full),
                  cumul = "cumulative_rstanarm")
# Regular expressions:
fam_nms_aug_regex <- paste0("\\.(", paste(fam_nms_aug, collapse = "|"), ")\\.")
fam_nms_unsupp_regex <- paste0("\\.(", paste(fam_nms_unsupp, collapse = "|"),
                               ")\\.")

# Data --------------------------------------------------------------------

## Setup ------------------------------------------------------------------

# Number of observations:
nobsv <- 39L
# Values for testing:
nobsv_tst <- c(1L, nobsv %/% 3L)

# For ordinal models (but also used for categorical models):
nthres <- 2L
ncat <- nthres + 1L
yunq_num <- seq_len(ncat)
yunq_chr <- paste0("y", yunq_num)
# The intercepts at centered predictors, also known as thresholds:
thres <- qlogis(seq_len(nthres) / ncat)
link_str <- "logit"

# Seed:
seed_dat <- 8541351
set.seed(seed_dat)

## GLMs --------------------------------------------------------------------
## Add population-level effects to the intercept-(and-offset-)only model

nterms_cont <- 3L
x_cont <- matrix(rnorm(nobsv * nterms_cont), nobsv, nterms_cont)
b_cont <- runif(nterms_cont, min = -0.5, max = 0.5)

nlvl_fix <- c(3L, 2L)
nlvl_fix <- setNames(nlvl_fix, seq_along(nlvl_fix))
if (length(nlvl_fix) <= 1) {
  names(nlvl_fix) <- paste0("z.", names(nlvl_fix))
}
nterms_cate <- length(nlvl_fix)
x_cate_list <- lapply(nlvl_fix, function(nlvl_fix_i) {
  x_cate <- gl(n = nlvl_fix_i, k = floor(nobsv / nlvl_fix_i), length = nobsv,
               labels = paste0("lvl", seq_len(nlvl_fix_i)))
  b_cate <- runif(nlvl_fix_i, min = -0.5, max = 0.5)
  ### Using a model.matrix() approach:
  # x_cate_mat <- model.matrix(~ 0 + x_cate)
  # eta_cate <- x_cate_mat %*% b_cate
  ###
  ### Using an indexing approach:
  eta_cate <- b_cate[x_cate]
  ###
  return(nlist(x_cate, eta_cate, b_cate))
})

# Intercept and offsets (offsets are added later):
icpt <- -0.42
offs_tst <- rnorm(nobsv)

eta_glm <- icpt +
  x_cont %*% b_cont +
  do.call("+", lapply(x_cate_list, "[[", "eta_cate"))

nterms_glm <- nterms_cont + nterms_cate

## GLMMs ------------------------------------------------------------------
## Add group-level effects to the GLMs, yielding GLMMs

nlvl_ran <- c(6L)
nlvl_ran <- setNames(nlvl_ran, seq_along(nlvl_ran))
if (length(nlvl_ran) <= 1) {
  names(nlvl_ran) <- paste0("z.", names(nlvl_ran))
}
# Multiply by 2 because of the random intercept as well as the random slope (the
# latter for `x_cont[, 1]`):
nterms_z <- length(nlvl_ran) * 2L
z_list <- lapply(nlvl_ran, function(nlvl_ran_i) {
  z <- gl(n = nlvl_ran_i, k = floor(nobsv / nlvl_ran_i), length = nobsv,
          labels = paste0("lvl", seq_len(nlvl_ran_i)))
  r_icpts <- rnorm(nlvl_ran_i, sd = 0.4)
  r_xco1 <- rnorm(nlvl_ran_i, sd = 0.4)
  eta_z <- r_icpts[z] + r_xco1[z] * x_cont[, 1]
  return(nlist(z, eta_z, r_icpts, r_xco1))
})
eta_glmm <- eta_glm + do.call("+", lapply(z_list, "[[", "eta_z"))

nterms_glmm <- nterms_glm + nterms_z

## GAMs -------------------------------------------------------------------
## Add nonlinear (smoothed) effects to the GLMs

# For simplicity, always use the same nonlinear function (could be extended in
# the future):
s_mat <- apply(x_cont[, 1, drop = FALSE], 2, function(x, b = 2, c = - pi / 4) {
  b * sin(x - c)
})
s_sum <- rowSums(s_mat)
nterms_s <- ncol(s_mat)
eta_gam <- eta_glm + s_sum

# Multiply by 2 because of the baseline linear term as well as the standard
# deviation for the wiggliness around it:
nterms_gam <- nterms_glm + 2L * nterms_s

## GAMMs ------------------------------------------------------------------
## Add nonlinear (smoothed) effects to the GLMMs

eta_gamm <- eta_glmm + s_sum

nterms_gamm <- nterms_glmm + 2L * nterms_s

## Combined dataset -------------------------------------------------------

f_gauss <- gaussian()
f_binom <- f_brnll <- binomial()
f_poiss <- poisson()
dis_tst <- runif(1L, 1, 2)
wobs_tst <- sample(1:4, nobsv, replace = TRUE)
# For the "brnll" `fam_nm`, offsets are simply not added to have some
# scenarios without offsets.
# For GAMs, offsets are not added because of rstanarm issue #546 (see
# also further below).
# For GAMMs, offsets are not added because of rstanarm issue #253 (see
# also further below).
# (The brms "gam" and "gamm" cases are handled in the same way as the rstanarm
# "gam" and "gamm" cases to avoid too many special cases.)
# For the "categ" `fam_nm`, offsets are not added because they are currently not
# supported for it.
offs_expr <- expression(!(fam_nm %in% c("brnll", "categ") ||
                            mod_nm %in% c("gam", "gamm")))
cre_dat <- function(idxs_crr, offs_crr, wobs_crr, dis_crr) {
  nobsv_crr <- length(idxs_crr)
  dat_crr <- lapply(mod_nms, function(mod_nm) {
    lapply(fam_nms, function(fam_nm) {
      pred_link <- get(paste0("eta_", mod_nm))
      pred_link <- pred_link[idxs_crr, , drop = FALSE]
      if (eval(offs_expr)) {
        pred_link <- pred_link + offs_crr
      }
      if (fam_nm %in% fam_nms_ordin) {
        pred_link <- pred_link - icpt
        if (eval(offs_expr)) {
          # The equal-probability thresholds defined above refer to the state
          # before offsets are added, so we need to subtract them here in the
          # data-generating model:
          pred_link <- pred_link - offs_crr
        }
        thres_eta <- sapply(thres, function(thres_k) {
          thres_k - pred_link
        })
      } else if (fam_nm %in% fam_nms_categ) {
        pred_link <- sapply(thres, function(thres_k) {
          thres_k + pred_link
        })
      }
      if (fam_nm == "cumul") {
        pred_resp <- augdat_ilink_cumul(thres_eta, link = link_str)
      } else if (fam_nm %in% fam_nms_ordin) {
        if (fam_nm %in% c("crtio", "adcat")) {
          thres_eta <- -thres_eta
        }
        ilink_crr <- get(paste0("inv_link_", get_fam_long(fam_nm)),
                         asNamespace("brms"), mode = "function",
                         inherits = FALSE)
        pred_resp <- ilink_crr(thres_eta, link = link_str)
      } else if (fam_nm %in% fam_nms_categ) {
        ilink_crr <- get(paste0("inv_link_", get_fam_long(fam_nm)),
                         asNamespace("brms"), mode = "function",
                         inherits = FALSE)
        pred_resp <- ilink_crr(pred_link)
      } else {
        pred_resp <- get(paste0("f_", fam_nm))$linkinv(pred_link)
      }
      if (fam_nm == "gauss") {
        return(rnorm(nobsv_crr, mean = pred_resp, sd = dis_crr))
      } else if (fam_nm == "brnll") {
        return(rbinom(nobsv_crr, 1, pred_resp))
      } else if (fam_nm == "binom") {
        return(rbinom(nobsv_crr, wobs_crr, pred_resp))
      } else if (fam_nm == "poiss") {
        return(rpois(nobsv_crr, pred_resp))
      } else if (fam_nm %in% fam_nms_aug) {
        ryunq <- sapply(seq_len(nobsv_crr), function(i_obs) {
          sample(yunq_num, size = 1L, prob = pred_resp[i_obs, ])
        })
        if (use_fac) {
          ryunq <- factor(ryunq, levels = yunq_num, labels = yunq_chr,
                          ordered = fam_nm %in% fam_nms_ordin)
        }
        return(ryunq)
      } else {
        stop("Unknown `fam_nm`.")
      }
    })
  })
  dat_crr <- unlist(dat_crr, recursive = FALSE)
  names(dat_crr) <- paste("y", gsub("\\.", "_", names(dat_crr)), sep = "_")
  return(dat_crr)
}
dat <- cre_dat(idxs_crr = seq_len(nobsv), offs_crr = offs_tst,
               wobs_crr = wobs_tst, dis_crr = dis_tst)
dat <- data.frame(
  dat,
  xco = x_cont, xca = lapply(x_cate_list, "[[", "x_cate"),
  z = lapply(z_list, "[[", "z"),
  s = s_mat,
  wobs_col = wobs_tst, offs_col = offs_tst,
  check.names = FALSE
)
if (ncol(s_mat) == 1) {
  names(dat)[names(dat) == "s"] <- "s.1"
}

## Modified datasets ------------------------------------------------------

dat_wobs_ones <- within(dat, {
  wobs_col <- NULL
  wobs_col_ones <- rep_len(1, length.out = nobsv)
})
dat_wobs_new <- within(dat, {
  wobs_col <- NULL
  wobs_col_new <- rep_len(2:5, length.out = nobsv)
})

dat_offs_zeros <- within(dat, {
  offs_col <- NULL
  offs_col_zeros <- rep_len(0, length.out = nobsv)
})
dat_offs_new <- within(dat, {
  offs_col <- NULL
  offs_col_new <- seq(-2, 2, length.out = nobsv)
})

nobsv_indep <- tail(nobsv_tst, 1)
dis_indep <- runif(1L, 1, 2)
offs_indep <- rnorm(nobsv_indep)
wobs_indep <- sample(1:4, nobsv_indep, replace = TRUE)
idxs_indep <- sample.int(nobsv, size = nobsv_indep, replace = TRUE)
dat_indep <- cre_dat(idxs_crr = idxs_indep, offs_crr = offs_indep,
                     wobs_crr = wobs_indep, dis_crr = dis_indep)
dat_indep <- cbind(
  as.data.frame(dat_indep),
  dat[idxs_indep,
      grep("^y_", names(dat), value = TRUE, invert = TRUE),
      drop = FALSE]
)
dat_indep$wobs_col <- wobs_indep
dat_indep$offs_col <- offs_indep

# Fits --------------------------------------------------------------------

## Setup ------------------------------------------------------------------

if (!requireNamespace("rstanarm", quietly = TRUE) ||
    (!identical(Sys.getenv("NOT_CRAN"), "true") &&
     packageVersion("Matrix") >= "1.6-4")) {
  if (!requireNamespace("rstanarm", quietly = TRUE)) {
    txt_start_warn_rstanarm <- paste0(
      "Package 'rstanarm' is needed for the rstanarm tests, but could not be ",
      "found. "
    )
  } else if (!identical(Sys.getenv("NOT_CRAN"), "true") &&
             packageVersion("Matrix") >= "1.6-4") {
    # For 'Matrix' version >= 1.6-4, we cannot be sure that there are no
    # incompatibility issues with lme4 (binaries), so don't run the 'rstanarm'
    # tests on CRAN:
    txt_start_warn_rstanarm <- paste0(
      "Currently, version < 1.6-4 of package 'Matrix' is needed for the ",
      "rstanarm tests on CRAN (see rstanarm issue stan-dev/rstanarm#610). "
    )
  } else {
    txt_start_warn_rstanarm <- "THIS CASE SHOULD NOT OCCUR. "
  }
  warning(txt_start_warn_rstanarm, "Deactivating the rstanarm tests now. ",
          "Furthermore, in this case, `run_prj`, `run_vs`, and `run_cvvs` are ",
          "currently set to `FALSE`."
  )
  pkg_nms <- character()
  # TODO: Adapt the tests to avoid the following line, at least if `run_brms` is
  # `TRUE` (better: avoid it in any case, no matter whether `run_brms` is `TRUE`
  # or `FALSE`).
  run_prj <- run_vs <- run_cvvs <- FALSE
} else {
  pkg_nms <- "rstanarm"
}

if (run_brms && !requireNamespace("brms", quietly = TRUE)) {
  warning("Package 'brms' is needed for the brms tests, but could not be ",
          "found. Deactivating the brms tests now.")
  run_brms <- FALSE
}
if (run_brms) {
  pkg_nms <- c(pkg_nms, "brms")
  # For storing `brmsfit`s locally:
  file_pth <- testthat::test_path("bfits")
  if (!dir.exists(file_pth)) dir.create(file_pth)
  # Backend:
  if (identical(Sys.getenv("TESTS_BRMS_BACKEND"), "cmdstanr") &&
      requireNamespace("cmdstanr", quietly = TRUE) &&
      # Relative file paths for cmdstanr's global option
      # `cmdstanr_write_stan_file_dir` didn't work before cmdstanr PR #665.
      # Using the workaround `file.path(getwd(), file_pth)` instead of only
      # `file_pth` also doesn't work in `R CMD check` (it doesn't throw any
      # exceptions, but recompilations take place, causing a huge increase in
      # runtime). At the time of cmdstanr's PR #665, the (development) version
      # number of cmdstanr was 0.5.2.1, so requiring >= 0.5.3 guarantees that
      # the fix is included:
      packageVersion("cmdstanr") >= "0.5.3" &&
      !is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE))) {
    options(brms.backend = "cmdstanr")
    options(cmdstanr_write_stan_file_dir = file_pth)
  }
}
pkg_nms <- setNames(nm = pkg_nms)

chains_tst <- 2L
iter_tst <- 500L
nrefdraws <- chains_tst * iter_tst %/% 2L
seed_fit <- 74345

### Formula ---------------------------------------------------------------

# Notes:
#   * Argument `offset` is not supported by rstanarm::stan_gamm4(). Instead, use
#     offset() in the formula (like for all other models). However, because of
#     rstanarm issue #546 and rstanarm issue #253, omit the offsets in GAMs and
#     GAMMs.
#   * In rstanarm::stan_gamm4(), multilevel terms are specified via argument
#     `random`.

trms_common <- c("xco.1", "xco.2", "xco.3", "xca.1", "xca.2",
                 "offset(offs_col)")
trms_grp <- c("(xco.1 | z.1)")
trms_add <- c("s(s.1)") # , "s(s.2)", "s(s.3)"
if (use_polym) {
  trm_poly <- "polym(xco.1, degree = 2, raw = TRUE)"
} else {
  trm_poly <- "poly(xco.1, 2, raw = TRUE)"
}
trms_common_spcl <- c(trm_poly,
                      "sqrt(abs(xco.3)^2) * I(!as.logical(xco.3 > 0))", "xca.1",
                      "xca.2", "offset(offs_col)")
trms_universe <- unique(c(trms_common, trms_grp, trms_add, trms_common_spcl))
trms_universe_split <- setdiff(trms_universe, "offset(offs_col)")
# Handle interaction terms:
stopifnot(!any(grepl(":", trms_universe_split)))
trms_universe_split_IA <- grep("\\*", trms_universe_split, value = TRUE)
if (length(trms_universe_split_IA)) {
  trms_universe_split_noIA <- setdiff(trms_universe_split,
                                      trms_universe_split_IA)
  # Replace " * " in interaction terms by ":":
  trms_universe_split_IA <- gsub(" \\* ", ":", trms_universe_split_IA)
  # Add main effects from interaction terms:
  trms_universe_split_noIA <- union(
    trms_universe_split_noIA,
    unlist(strsplit(trms_universe_split_IA, split = ":"))
  )
  trms_universe_split <- c(trms_universe_split_noIA, trms_universe_split_IA)
}
# Add lower-order group-level terms:
if ("(xco.1 | z.1)" %in% trms_universe_split) {
  trms_universe_split <- union(trms_universe_split, "(1 | z.1)")
}
# Ensure that for all terms on the left-hand side of the `|` character in
# group-level terms, corresponding population-level terms exist:
if ("(xco.1 | z.1)" %in% trms_universe_split) {
  trms_universe_split <- union(trms_universe_split, "xco.1")
}

# Predictor terms for project()-ing from `refmodel`s:
prd_trms_x <- c("xco.1", "xca.1")
prd_trms_z <- c("(1 | z.1)", "(xco.1 | z.1)") # removing one of them later
prd_trms_s <- c("s(s.1)") # , "s(s.2)"
prd_trms_spcl <- c("xca.1", trm_poly,
                   "sqrt(abs(xco.3)^2)", "I(!as.logical(xco.3 > 0))",
                   "sqrt(abs(xco.3)^2):I(!as.logical(xco.3 > 0))")

### Weights (observations) ------------------------------------------------

# Argument `weights` is not needed when using the cbind() syntax (for the
# binomial family with > 1 trials). Furthermore, rstanarm:::kfold.stanreg() does
# not support weights. Thus, we have two possible options for the `weights`
# argument:
wobss_tst <- list(with_wobs = list(weights = wobs_tst),
                  without_wobs = list())

### Offsets ---------------------------------------------------------------

### See the notes above: Due to rstanarm issue #541, the fact that rstanarm
### doesn't support argument `offset` for GAMs and GAMMs, and the fact that
### brms::brm() has no argument `offset`, the easiest way to use offsets is to
### always specify them in the formula (or not at all, see the definition of
### `args_fit` below). Therefore, the following object which could be used to
### use *argument* `offset` is just a dummy (but used nevertheless, namely to
### construct the names for the argument lists):
offss_tst <- list(with_offs = list(), # with_offs = list(offset = offs_tst),
                  without_offs = list())
###

### Argument list ---------------------------------------------------------

# For some arguments, if they are specified via objects,
# rstanarm:::kfold.stanreg() seems to assume these objects to lie in the
# global environment. Since `testthat` uses a new environment for running
# the tests (see `?testthat::test_env`), we need the following code to be
# able to run devtools::test():
for (obj_symb_chr in c(paste0("f_", fam_nms_trad))) {
  if (!exists(obj_symb_chr, envir = .GlobalEnv)) {
    assign(obj_symb_chr, get(obj_symb_chr), envir = .GlobalEnv)
  }
}

args_fit <- lapply(pkg_nms, function(pkg_nm) {
  # Depending on the package used for fitting the reference model, `mod_nms`
  # could be restricted here.
  mod_nms <- setNames(nm = mod_nms)
  lapply(mod_nms, function(mod_nm) {
    if (pkg_nm == "rstanarm") {
      fam_nms <- setdiff(fam_nms, fam_nms_brms_only)
      if (mod_nm != "glm" || !use_fac) {
        # rstanarm::stan_polr() does not support multilevel or additive terms
        # and it also does not support a numeric response:
        fam_nms <- setdiff(fam_nms, "cumul")
      }

      if (mod_nm != "gamm") {
        random_arg <- list()
      } else {
        random_arg <- list(random = as.formula(paste("~", trms_grp)))
      }
    }

    if (mod_nm != "glm") {
      if (mod_nm %in% c("gam", "gamm")) {
        # Additive models are currently not supported by the augmented-data
        # projection:
        fam_nms <- setdiff(fam_nms, fam_nms_aug)
      }
      if (pkg_nm == "brms") {
        # For speed reasons, do not test all families:
        if (mod_nm == "glmm") {
          fam_nms <- intersect(fam_nms, c("brnll", "cumul", "categ"))
        } else {
          fam_nms <- intersect(fam_nms, "binom")
        }
      }
      # Because of issue #207:
      fam_nms <- setdiff(fam_nms, "poiss")
    }

    if (pkg_nm == "rstanarm" && mod_nm == "gamm") {
      # Exclude "binom" from `fam_nms` since there seems to be an issue with
      # get_refmodel.stanreg() in this case (probably issue #148):
      fam_nms <- setdiff(fam_nms, "binom")
      # TODO (GAMMs): Fix this. This exclusion also has the downside that K-fold
      # CV cannot be tested in that case.
    }

    fam_nms <- setNames(nm = fam_nms)
    lapply(fam_nms, function(fam_nm) {
      y_chr <- paste("y", mod_nm, fam_nm, sep = "_")

      if (fam_nm == "gauss" && !(pkg_nm == "rstanarm" && mod_nm == "gamm")) {
        # Here, we also test a special formula (the rstanarm "gamm" case is
        # excluded because of rstanarm issue #545):
        formul_nms <- c("stdformul", "spclformul")
      } else {
        formul_nms <- "stdformul"
      }

      fam_nm_long <- get_fam_long(fam_nm)
      if (pkg_nm == "brms" && !is.na(fam_nm_long)) {
        family_crr <- substitute(
          get(fam_nm_long_subst, envir = asNamespace("brms"))(),
          list(fam_nm_long_subst = fam_nm_long)
        )
      } else {
        family_crr <- as.name(paste0("f_", fam_nm))
      }

      if (eval(offs_expr)) {
        offss_nms <- "with_offs"
      } else {
        offss_nms <- "without_offs"
      }

      formul_nms <- setNames(nm = formul_nms)
      lapply(formul_nms, function(formul_nm) {
        if (formul_nm == "spclformul") {
          trms_common <- trms_common_spcl
          if (fam_nm != "gauss") {
            stop("`y_chr` needs to be adapted for families other than ",
                 "`\"gauss\"`.")
          }
          y_chr <- paste0("log(abs(", y_chr, "))")
        }
        if (fam_nm == "binom") {
          if (pkg_nm == "rstanarm") {
            y_chr <- paste0("cbind(", y_chr, ", wobs_col - ", y_chr, ")")
          } else if (pkg_nm == "brms") {
            y_chr <- paste(y_chr, "| trials(wobs_col)")
          }
        }
        trms <- switch(mod_nm,
                       "glm" = trms_common,
                       "glmm" = c(trms_common, trms_grp),
                       "gam" = c(trms_common, trms_add),
                       "gamm" = switch(
                         pkg_nm,
                         "rstanarm" = c(trms_common, trms_add),
                         "brms" = c(trms_common, trms_add, trms_grp),
                         stop("Unknown `pkg_nm`.")
                       ),
                       stop("Unknown `mod_nm`."))

        if (fam_nm %in% c("brnll", "binom", fam_nms_aug)) {
          # In this case, observation weights are not supported by projpred:
          wobss_nms <- "without_wobs"
        } else {
          wobss_nms <- "with_wobs"
        }

        wobss_nms <- setNames(nm = wobss_nms)
        lapply(wobss_nms, function(wobss_nm) {
          if (pkg_nm == "brms" && wobss_nm == "with_wobs") {
            if (fam_nm == "binom") {
              stop("Because of `\"| trials(wobs_col)\"` above, the code here ",
                   "(for pasting `\"| weights(wobs_col)\"`) needs to be ",
                   "adapted.")
            }
            y_chr <- paste(y_chr, "| weights(wobs_col)")
          }

          offss_nms <- setNames(nm = offss_nms)
          lapply(offss_nms, function(offss_nm) {
            if (offss_nm == "without_offs") {
              trms <- setdiff(trms, "offset(offs_col)")
            }
            formul_crr <- as.formula(paste(
              y_chr, "~", paste(trms, collapse = " + ")
            ))

            if (pkg_nm == "rstanarm") {
              pkg_args <- c(list(QR = TRUE),
                            wobss_tst[[wobss_nm]],
                            offss_tst[[offss_nm]],
                            random_arg)
            } else if (pkg_nm == "brms") {
              pkg_args <- list(file = file_pth,
                               file_refit = "on_change",
                               silent = 2)
              if (identical(getOption("brms.backend", "rstan"), "cmdstanr")) {
                pkg_args <- c(pkg_args, list(diagnostics = NULL))
              }
            }

            return(c(
              nlist(mod_nm, fam_nm, pkg_nm, formula = formul_crr,
                    family = family_crr, data = quote(dat),
                    chains = chains_tst, iter = iter_tst, seed = seed_fit,
                    refresh = 0),
              pkg_args
            ))
          })
        })
      })
    })
  })
})
args_fit <- unlist_cust(args_fit)
stopifnot(length(unique(names(args_fit))) == length(args_fit))
# For `brmsfit`s, set a unique file name (done here because during the creation
# of `args_fit`, these unique names are not easily accessible):
args_fit <- lapply(setNames(nm = names(args_fit)), function(args_fit_nm) {
  if (args_fit[[args_fit_nm]]$pkg_nm == "brms" &&
      "file" %in% names(args_fit[[args_fit_nm]])) {
    args_fit[[args_fit_nm]]$file <- file.path(
      args_fit[[args_fit_nm]]$file,
      paste0("bfit_", args_fit_nm)
    )
  }
  return(args_fit[[args_fit_nm]])
})

if (!run_more && length(pkg_nms)) {
  sel_fits <- c(
    "rstanarm.glm.gauss.stdformul.with_wobs.with_offs",
    "rstanarm.glm.brnll.stdformul.without_wobs.without_offs",
    "rstanarm.glmm.gauss.spclformul.with_wobs.with_offs",
    "rstanarm.gam.gauss.spclformul.with_wobs.without_offs",
    "rstanarm.gamm.brnll.stdformul.without_wobs.without_offs",
    "brms.glm.poiss.stdformul.with_wobs.with_offs",
    "brms.glmm.brnll.stdformul.without_wobs.without_offs",
    # "brms.gam.binom.stdformul.without_wobs.without_offs",
    "brms.gamm.binom.stdformul.without_wobs.without_offs",
    # grep(paste(paste0("\\.", fam_nms_aug, "\\."), collapse = "|"),
    #      names(args_fit), value = TRUE)
    "rstanarm.glm.cumul.stdformul.without_wobs.with_offs",
    "brms.glm.cumul.stdformul.without_wobs.with_offs",
    "brms.glm.srtio.stdformul.without_wobs.with_offs",
    "brms.glm.crtio.stdformul.without_wobs.with_offs",
    "brms.glm.adcat.stdformul.without_wobs.with_offs",
    "brms.glm.categ.stdformul.without_wobs.without_offs",
    "brms.glmm.cumul.stdformul.without_wobs.with_offs",
    "brms.glmm.categ.stdformul.without_wobs.without_offs"
  )
  if (!use_fac) {
    # rstanarm::stan_polr() cannot deal with a numeric response:
    sel_fits <- grep("^rstanarm\\.glm\\.cumul\\.", sel_fits, value = TRUE,
                     invert = TRUE)
  } else {
    # The non-multilevel (and non-additive) brms::cumulative() case is
    # redundant, given the corresponding rstanarm::stan_polr() case and the
    # multilevel (and non-additive) brms::cumulative() case:
    sel_fits <- grep("^brms\\.glm\\.cumul\\.", sel_fits, value = TRUE,
                     invert = TRUE)
  }
  args_fit <- args_fit[names(args_fit) %in% sel_fits]
  if (run_brms && "rstanarm" %in% pkg_nms) {
    stopifnot(setequal(names(args_fit), sel_fits))
  } else if (run_brms && !"rstanarm" %in% pkg_nms) {
    stopifnot(setequal(names(args_fit),
                       grep("^brms\\.", sel_fits, value = TRUE)))
  } else {
    stopifnot(setequal(names(args_fit),
                       grep("^brms\\.", sel_fits, value = TRUE, invert = TRUE)))
  }
}

## Run --------------------------------------------------------------------

fits <- suppressWarnings(lapply(args_fit, function(args_fit_i) {
  fit_fun_nm <- get_fit_fun_nm(args_fit_i)
  if (args_fit_i$pkg_nm == "rstanarm" && args_fit_i$fam_nm == "cumul") {
    fit_fun_nm <- "stan_polr"
    args_fit_i$family <- NULL
    args_fit_i$prior <- quote(rstanarm::R2(location = 0.5, what = "median"))
    args_fit_i$QR <- NULL
  }
  ### Option 1:
  # do.call(fit_fun_nm,
  #         excl_nonargs(args_fit_i),
  #         envir = as.environment(asNamespace(args_fit_i$pkg_nm)))
  ###
  ### Option 2:
  do.call(get(fit_fun_nm, asNamespace(args_fit_i$pkg_nm)),
          excl_nonargs(args_fit_i))
  ###
}))

# projpred ----------------------------------------------------------------

## Setup ------------------------------------------------------------------

seed_tst <- 20411346
seed2_tst <- 866028
seed3_tst <- 1208499

nclusters_tst <- 2L
nclusters_pred_tst <- 3L
### Later, we will subtract 1L and still wish to have `nclusters_pred >= 2` in
### order to differentiate this from `nclusters_pred == 1` which is more or less
### a special case:
stopifnot(nclusters_pred_tst >= 3)
###
if (!run_more) {
  ndr_ncl_pred_tst <- list()
} else {
  ndr_ncl_pred_tst <- list(default_ndr_ncl = list())
}
ndr_ncl_pred_tst <- c(ndr_ncl_pred_tst, list(
  noclust = list(ndraws = nclusters_pred_tst),
  clust = list(nclusters = nclusters_pred_tst),
  clust1 = list(nclusters = 1L)
))
if (any(unlist(lapply(ndr_ncl_pred_tst, "[[", "ndraws")) <= 20)) {
  # Suppress the message concerning small `ndraws` or `ndraws_pred` values:
  options(projpred.mssg_ndraws = FALSE)
}
nresample_clusters_tst <- c(1L, 100L)

meth_tst <- list(
  default_meth = list(),
  L1 = list(method = "L1"),
  forward = list(method = "forward")
)

# Suppress the message when cutting off the search at `nterms_max = 19`:
options(projpred.mssg_cut_search = FALSE)
# Suppress the message for the runtime of the forward search:
options(projpred.mssg_time = FALSE)
# Suppress the PSIS warnings:
options(projpred.warn_psis = FALSE)
# Suppress the warnings for the K reference model refits in case of K-fold CV:
options(projpred.warn_kfold_refits = FALSE)
# Suppress the warning for interaction terms being selected before all involved
# main effects have been selected (only concerns L1 search):
options(projpred.warn_L1_interactions = FALSE)
# Suppress the warning thrown by proj_predict() in case of observation weights
# that are not all equal to `1`:
options(projpred.warn_wobs_ppd = FALSE)
# Suppress the verbose-mode progress bar in project():
options(projpred.verbose_project = FALSE)
# Suppress instability warnings:
options(projpred.warn_instable_projections = FALSE)
# Run additional checks:
options(projpred.additional_checks = TRUE)
# Suppress the warning thrown if `cvrefbuilder` is `NULL` (here in the tests,
# this should only be relevant for `datafit`s):
options(projpred.warn_cvrefbuilder_NULL = FALSE)
# Suppress warnings thrown while fitting the submodels:
options(projpred.warn_prj_drawwise = FALSE)
# Don't use the convergence checker:
options(projpred.check_conv = FALSE)
# Set default number of significant digits to be printed:
options(projpred.digits = getOption("digits"))

search_trms_tst <- list(
  default_search_trms = list(),
  alltrms = list(search_terms = setdiff(trms_common, "offset(offs_col)")),
  fixed = list(search_terms = c("xco.1", "xco.1 + xco.2", "xco.1 + xco.3",
                                "xco.1 + xco.2 + xco.3")),
  excluded = list(search_terms = c("xco.2", "xco.3", "xco.2 + xco.3")),
  empty_size = list(search_terms = c("xco.1 + xco.2", "xco.1 + xco.3",
                                     "xco.2 + xco.3", "xco.1 + xco.2 + xco.3"))
)

K_tst <- 2L
cvmeth_tst <- list(
  default_cvmeth = list(),
  LOO = list(cv_method = "LOO"),
  kfold = list(cv_method = "kfold")
)

nloo_tst <- list(
  default_nloo = list(),
  subsmpl = list(nloo = as.integer(nobsv %/% 10))
)

resp_oscale_tst <- list(
  default_r_oscale = list(),
  r_oscale_F = list(resp_oscale = FALSE)
)

vsel_funs <- nlist("summary.vsel", "plot.vsel", "suggest_size.vsel")
# Performance statistics common across all families, when using the traditional
# projection (or the latent projection with `resp_oscale = FALSE` or the latent
# projection with `resp_oscale = TRUE`, but the latter only in combination with
# `<refmodel>$family$cats` being `NULL`):
stats_common <- c("elpd", "mlpd", "gmpd", "mse", "rmse", "R2")
# Performance statistics for the binomial() family when using the traditional
# projection (or the latent projection with `resp_oscale = TRUE` and
# `<refmodel>$family$cats` being `NULL`):
stats_binom <- c(stats_common, "acc", "auc")
# For creating test setups:
stats_tst <- list(
  default_stats = list(),
  common_stats = list(stats = stats_common),
  binom_stats = list(stats = stats_binom),
  augdat_stats = list(stats = c("elpd", "mlpd", "gmpd", "acc"))
)
type_tst <- c("mean", "lower", "upper", "se")

rk_abbv_tst <- list(
  default_abbv = list(),
  abbv3 = list(ranking_abbreviate = TRUE,
               ranking_abbreviate_args = list(minlength = 3))
)

rk_repel_tst <- list(
  default_repel = list(),
  repelText = list(ranking_repel = "text",
                   ranking_repel_args = list(seed = seed3_tst))
)

rk_col_tst <- as.list(setNames(nm = c(FALSE, TRUE)))
names(rk_col_tst) <- paste0("col", names(rk_col_tst))

cumulate_tst <- as.list(setNames(nm = c(FALSE, TRUE)))
names(cumulate_tst) <- paste0("cu", names(cumulate_tst))

angle_tst <- list(
  default_angle = list(),
  angle45 = list(text_angle = 45)
)

### nterms ----------------------------------------------------------------

ntermss <- sapply(mod_nms, function(mod_nm) {
  get(paste("nterms", mod_nm, sep = "_"))
})
# The `nterms_max` setting which will be used throughout the tests, except for
# the special `search_terms` tests:
nterms_max_tst <- min(ntermss)
if (!run_more) {
  nterms_max_tst <- min(nterms_max_tst, 2L)
}

nterms_unavail <- list(
  single = nterms_max_tst + 130L,
  vec = c(nterms_max_tst + 130L, nterms_max_tst + 290L)
)
if (!run_more) {
  nterms_avail <- list()
} else {
  nterms_avail <- list(default_nterms = NULL)
}
nterms_avail <- c(nterms_avail, list(
  empty = 0L,
  single = nterms_max_tst %/% 2L,
  subvec = as.integer(round(seq(0, nterms_max_tst, length.out = 2))),
  full = 0:nterms_max_tst
))

nterms_max_smmry <- list(
  default_nterms_max_smmry = NULL,
  halfway = nterms_max_tst %/% 2L,
  zero = 0L
)

nterms_max_rk <- list(
  default_nterms_max_rk = list(),
  halfway = list(nterms_max = nterms_max_tst %/% 2L),
  zero = list(nterms_max = 0L)
)

rk_max_tst <- list(
  default_rk_max = list(),
  rk_max_NA = list(ranking_nterms_max = NA),
  rk_max_1 = list(ranking_nterms_max = 1L)
)

## Reference model --------------------------------------------------------

args_ref <- lapply(setNames(nm = names(fits)), function(tstsetup_fit) {
  if (args_fit[[tstsetup_fit]]$pkg_nm == "brms" &&
      packageVersion("brms") >= "2.16.4") {
    pkg_args <- list(brms_seed = seed2_tst)
  } else {
    pkg_args <- list()
  }

  if (args_fit[[tstsetup_fit]]$fam_nm == "brnll") {
    # In this case, test the latent projection and the augmented-data projection
    # (the latter only in a slightly reduced set of test setups). For the
    # corresponding traditional projection case (needed for comparison), use the
    # keyword `trad_compare` to be able to find this case more easily later.
    augdat_args <- list(
      trad_compare = list(),
      latent = list(latent = TRUE) # , latent_y_unqs = c("0", "1")
    )
    if (!args_fit[[tstsetup_fit]]$mod_nm %in% c("gam", "gamm")) {
      augdat_args <- c(augdat_args, list(
        augdat = list(augdat_y_unqs = c("0", "1"),
                      augdat_link = quote(augdat_link_binom),
                      augdat_ilink = quote(augdat_ilink_binom))
      ))
    }
  } else if (args_fit[[tstsetup_fit]]$fam_nm %in% fam_nms_aug) {
    augdat_args <- list(augdat = list())
    if (args_fit[[tstsetup_fit]]$fam_nm %in% "cumul") {
      augdat_args <- c(augdat_args, list(
        latent = list(latent = TRUE)
      ))
    }
  } else {
    augdat_args <- list(trad = list())
  }

  lapply(setNames(nm = names(augdat_args)), function(augdat_args_nm) {
    return(c(nlist(tstsetup_fit),
             only_nonargs(args_fit[[tstsetup_fit]]),
             list(prj_nm = augdat_args_nm),
             pkg_args,
             augdat_args[[augdat_args_nm]]))
  })
})
args_ref <- unlist_cust(args_ref)

refmods <- lapply(args_ref, function(args_ref_i) {
  do.call(get_refmodel, c(
    list(object = fits[[args_ref_i$tstsetup_fit]]),
    excl_nonargs(args_ref_i)
  ))
})

## Variable selection -----------------------------------------------------

### varsel() --------------------------------------------------------------

if (run_vs) {
  # Some families are not supported yet, apart from the creation of a `refmodel`
  # object:
  tstsetups_vs_ref <- grep(fam_nms_unsupp_regex, names(refmods), value = TRUE,
                           invert = TRUE)
  if (!run_more) {
    tstsetups_vs_ref <- grep(paste0("\\.glmm", fam_nms_aug_regex),
                             tstsetups_vs_ref, value = TRUE, invert = TRUE)
  }
  tstsetups_vs_ref <- setNames(nm = tstsetups_vs_ref)
  args_vs <- lapply(tstsetups_vs_ref, function(tstsetup_ref) {
    mod_crr <- args_ref[[tstsetup_ref]]$mod_nm
    fam_crr <- args_ref[[tstsetup_ref]]$fam_nm
    prj_crr <- args_ref[[tstsetup_ref]]$prj_nm
    if (prj_crr == "trad" && mod_crr == "glm" && fam_crr == "gauss") {
      # Here, we test the default `method` (forward search) as well as L1
      # search:
      meth <- meth_tst[setdiff(names(meth_tst), "forward")]
    } else if (prj_crr == "trad" && mod_crr == "glm") {
      # Here, we only test L1 search:
      meth <- meth_tst["L1"]
    } else {
      # Here, we only test the default `method` (forward search):
      meth <- meth_tst["default_meth"]
    }
    lapply(meth, function(meth_i) {
      if (mod_crr == "glm" && fam_crr == "gauss" &&
          grepl("\\.stdformul\\.", tstsetup_ref) &&
          !identical(meth_i$method, "L1")) {
        # Here, we also test non-NULL `search_terms`:
        search_trms <- search_trms_tst
      } else {
        search_trms <- search_trms_tst["default_search_trms"]
      }
      lapply(search_trms, function(search_trms_i) {
        if (length(search_trms_i) &&
            !identical(search_trms_i$search_terms,
                       search_trms_tst$alltrms$search_terms)) {
          nterms_max_tst <- count_terms_chosen(search_trms_i$search_terms) - 1L
        }
        if (mod_crr == "glmm" && fam_crr == "categ") {
          # Quick-and-dirty solution to get some working results (it's probably
          # due to unfortunate test data simulated here that convergence at the
          # default settings is not given):
          extra_args <- list(avoid.increase = TRUE)
        } else {
          extra_args <- list()
        }
        return(c(
          nlist(tstsetup_ref), only_nonargs(args_ref[[tstsetup_ref]]),
          list(
            nclusters = nclusters_tst, nclusters_pred = nclusters_pred_tst,
            nterms_max = nterms_max_tst, verbose = FALSE, seed = seed_tst
          ),
          meth_i, search_trms_i, extra_args
        ))
      })
    })
  })
  args_vs <- unlist_cust(args_vs)
  stopifnot(sum(sapply(args_vs, function(args_vs_i) {
    !is.null(args_vs_i$search_terms)
  })) >= 1)

  vss <- lapply(args_vs, function(args_vs_i) {
    do.call(varsel, c(
      list(object = refmods[[args_vs_i$tstsetup_ref]]),
      excl_nonargs(args_vs_i)
    ))
  })
}

### cv_varsel() -----------------------------------------------------------

if (run_cvvs) {
  tstsetups_cvvs_ref <- tstsetups_vs_ref
  # Even in the `run_more = TRUE` case (which is not run by default), we need to
  # impose some restrictions to have the tests run through in a reasonable
  # amount of time:
  tstsetups_cvvs_ref <- grep(
    paste0("\\.glmm", fam_nms_aug_regex), tstsetups_cvvs_ref, value = TRUE,
    invert = TRUE
  )
  if (!run_more) {
    tstsetups_cvvs_ref <- grep("\\.gam\\.", tstsetups_cvvs_ref, value = TRUE,
                               invert = TRUE)
  }
  # Under the special test settings used here, Bernoulli GAMMs often seem to run
  # into lme4 errors. However, since these Bernoulli GAMMs are basically
  # redundant given the other tested models, we can simply skip them:
  tstsetups_cvvs_ref <- grep("\\.gamm\\.brnll\\.", tstsetups_cvvs_ref,
                             value = TRUE, invert = TRUE)
  tstsetups_cvvs_ref <- setNames(nm = tstsetups_cvvs_ref)
  args_cvvs <- lapply(tstsetups_cvvs_ref, function(tstsetup_ref) {
    pkg_crr <- args_ref[[tstsetup_ref]]$pkg_nm
    mod_crr <- args_ref[[tstsetup_ref]]$mod_nm
    fam_crr <- args_ref[[tstsetup_ref]]$fam_nm
    prj_crr <- args_ref[[tstsetup_ref]]$prj_nm
    if (prj_crr == "trad" && mod_crr == "glm") {
      if (run_more && fam_crr == "gauss" &&
          grepl("\\.stdformul\\.", tstsetup_ref)) {
        # Needed for testing non-default `search_terms`:
        meth <- meth_tst["default_meth"]
      } else {
        meth <- meth_tst["L1"]
      }
    } else {
      meth <- meth_tst["default_meth"]
    }
    if (grepl("\\.without_wobs", tstsetup_ref)) {
      # In principle, we want to use K-fold CV here and LOO CV else because
      # rstanarm:::kfold.stanreg() doesn't support observation weights. However,
      # there are some special cases to take care of:
      if (pkg_crr == "brms" && packageVersion("brms") <= "2.16.3") {
        # For brms versions <= 2.16.3, there is a reproducibility issue when
        # using K-fold CV, so use LOO CV:
        cvmeth <- cvmeth_tst["default_cvmeth"]
      } else if (pkg_crr == "brms" && mod_crr == "gamm") {
        # For GAMMs fitted by brms with the rstan backend, there is a (random,
        # i.e., only occasional) reproducibility issue when using K-fold CV (see
        # rstan issue stan-dev/rstan#989), so use LOO CV:
        cvmeth <- cvmeth_tst["default_cvmeth"]
      } else if (prj_crr %in% c("latent", "augdat") && fam_crr != "brnll") {
        # We also want to test the latent and the augmented-data projection with
        # LOO CV:
        cvmeth <- cvmeth_tst["default_cvmeth"]
      } else {
        cvmeth <- cvmeth_tst["kfold"]
      }
    } else {
      cvmeth <- cvmeth_tst["default_cvmeth"]
    }
    lapply(meth, function(meth_i) {
      if (run_more && mod_crr == "glm" && fam_crr == "gauss" &&
          grepl("\\.stdformul\\.", tstsetup_ref) && pkg_crr == "brms") {
        # Needed for testing non-default `search_terms` with K-fold CV:
        cvmeth <- cvmeth_tst["kfold"]
      }
      lapply(cvmeth, function(cvmeth_i) {
        if (!identical(meth_i$method, "L1") && !run_valsearch_always &&
            (!prj_crr %in% c("latent", "augdat", "trad_compare") ||
             (prj_crr %in% c("latent", "augdat", "trad_compare") &&
              !run_valsearch_aug_lat))) {
          cvmeth_i <- c(cvmeth_i, list(validate_search = FALSE))
        }
        if (identical(cvmeth_i$cv_method, "kfold")) {
          nloo_tst <- nloo_tst["default_nloo"]
        } else if (!((prj_crr == "trad" && mod_crr == "glm" &&
                      fam_crr == "gauss") ||
                     (prj_crr %in% c("augdat", "latent") && mod_crr == "glm" &&
                      fam_crr == "cumul"))) {
          nloo_tst <- nloo_tst["subsmpl"]
        }
        if (run_more && mod_crr == "glm" && fam_crr == "gauss" &&
            grepl("\\.stdformul\\.", tstsetup_ref)) {
          # Here, we also test non-NULL `search_terms`:
          search_trms <- search_trms_tst
        } else {
          search_trms <- search_trms_tst["default_search_trms"]
        }
        lapply(search_trms, function(search_trms_i) {
          if (length(search_trms_i) &&
              !identical(search_trms_i$search_terms,
                         search_trms_tst$alltrms$search_terms)) {
            nterms_max_tst <- count_terms_chosen(search_trms_i$search_terms) -
              1L
          }
          lapply(nloo_tst, function(nloo_i) {
            if (!is.null(nloo_i$nloo) && nloo_i$nloo < nobsv &&
                identical(cvmeth_i$validate_search, FALSE)) {
              cvmeth_i$validate_search <- NULL
            }
            return(c(
              nlist(tstsetup_ref), only_nonargs(args_ref[[tstsetup_ref]]),
              list(
                nclusters = nclusters_tst, nclusters_pred = nclusters_pred_tst,
                nterms_max = nterms_max_tst, verbose = FALSE, seed = seed_tst
              ),
              meth_i, cvmeth_i, nloo_i, search_trms_i
            ))
          })
        })
      })
    })
  })
  args_cvvs <- unlist_cust(args_cvvs)

  args_cvvs_kfold <- args_cvvs[
    sapply(lapply(args_cvvs, "[[", "cv_method"), identical, "kfold")
  ]
  tstsetups_ref_kfold <- setNames(nm = unique(unname(
    sapply(args_cvvs_kfold, "[[", "tstsetup_ref")
  )))
  cvfitss <- lapply(tstsetups_ref_kfold, function(tstsetup_ref) {
    # Due to rstanarm:::kfold.stanreg() failing sometimes, we have to wrap the
    # call to run_cvfun() in try():
    return(try(run_cvfun(object = refmods[[tstsetup_ref]], K = K_tst,
                         seed = seed3_tst), silent = TRUE))
  })
  success_cvfits <- !sapply(cvfitss, inherits, "try-error")
  err_ok_cvfits <- sapply(cvfitss[!success_cvfits], function(cvfits_err) {
    attr(cvfits_err, "condition")$message ==
      "pwrssUpdate did not converge in (maxit) iterations"
  })
  expect_true(
    all(err_ok_cvfits),
    info = paste("Unexpected error for",
                 paste(names(cvfitss[!success_cvfits])[!err_ok_cvfits],
                       collapse = ", "))
  )
  args_cvvs_kfold <- args_cvvs_kfold[!sapply(
    lapply(args_cvvs_kfold, "[[", "tstsetup_ref"),
    `%in%`, names(cvfitss)[!success_cvfits]
  )]
  args_cvvs <- args_cvvs[!sapply(lapply(args_cvvs, "[[", "tstsetup_ref"),
                                 `%in%`, names(cvfitss)[!success_cvfits])]

  cvvss <- lapply(args_cvvs, function(args_cvvs_i) {
    cvvs_expr <- expression(do.call(cv_varsel, c(
      list(object = refmods[[args_cvvs_i$tstsetup_ref]],
           cvfits = if (identical(args_cvvs_i$cv_method, "kfold")) {
             cvfitss[[args_cvvs_i$tstsetup_ref]]
           } else {
             refmods[[args_cvvs_i$tstsetup_ref]]$cvfits # should be `NULL`
           }),
      excl_nonargs(args_cvvs_i)
    )))
    if (args_cvvs_i$mod_nm == "gamm" &&
        !identical(args_cvvs_i$cv_method, "kfold")) {
      # Due to issue #239, we have to wrap the call to cv_varsel() in try():
      return(try(eval(cvvs_expr), silent = TRUE))
    } else {
      return(eval(cvvs_expr))
    }
  })
  success_cvvs <- !sapply(cvvss, inherits, "try-error")
  err_ok_cvvs <- sapply(cvvss[!success_cvvs], function(cvvs_err) {
    attr(cvvs_err, "condition")$message ==
      "Not enough (non-NA) data to do anything meaningful"
  })
  expect_true(
    all(err_ok_cvvs),
    info = paste("Unexpected error for",
                 paste(names(cvvss[!success_cvvs])[!err_ok_cvvs],
                       collapse = ", "))
  )
  cvvss <- cvvss[success_cvvs]
  args_cvvs <- args_cvvs[success_cvvs]
}

## Projection -------------------------------------------------------------

### From `refmodel` -------------------------------------------------------

if (run_prj) {
  # Some families are not supported yet, apart from the creation of a `refmodel`
  # object:
  tstsetups_prj_ref <- grep(fam_nms_unsupp_regex, names(refmods), value = TRUE,
                            invert = TRUE)
  tstsetups_prj_ref <- setNames(nm = tstsetups_prj_ref)
  args_prj <- lapply(tstsetups_prj_ref, function(tstsetup_ref) {
    pkg_crr <- args_ref[[tstsetup_ref]]$pkg_nm
    mod_crr <- args_ref[[tstsetup_ref]]$mod_nm
    fam_crr <- args_ref[[tstsetup_ref]]$fam_nm
    prj_crr <- args_ref[[tstsetup_ref]]$prj_nm
    if (grepl("\\.spclformul", tstsetup_ref)) {
      prd_trms_x <- prd_trms_spcl
    }
    prd_trms <- nlist(empty = character(), prd_trms_x)
    if (prj_crr %in% c("augdat", "trad_compare") && fam_crr == "brnll" &&
        mod_crr == "glmm") {
      # We need a single group-level term (which only consists of group-level
      # intercepts) to be able to use `nAGQ` later:
      prd_trms_z <- setdiff(prd_trms_z, "(xco.1 | z.1)")
    } else {
      prd_trms_z <- setdiff(prd_trms_z, "(1 | z.1)")
    }
    if (mod_crr %in% c("glmm", "gamm")) {
      prd_trms <- c(prd_trms,
                    nlist(prd_trms_z, prd_trms_xz = c(prd_trms_x, prd_trms_z)))
    }
    if (mod_crr %in% c("gam", "gamm")) {
      prd_trms <- c(prd_trms,
                    nlist(prd_trms_s, prd_trms_xs = c(prd_trms_x, prd_trms_s)))
    }
    if (mod_crr == "gamm") {
      prd_trms <- c(prd_trms,
                    nlist(prd_trms_sz = c(prd_trms_s, prd_trms_z),
                          prd_trms_xsz = c(prd_trms_x, prd_trms_s, prd_trms_z)))
    }
    if (!run_more &&
        (fam_crr != "gauss" || grepl("\\.spclformul", tstsetup_ref))) {
      prd_trms <- tail(prd_trms, 1)
    }
    lapply(setNames(nm = names(prd_trms)), function(prd_trms_nm_i) {
      if (pkg_crr == "rstanarm" && mod_crr == "glm" &&
          fam_crr == "gauss" && prd_trms_nm_i == "prd_trms_x") {
        ndr_ncl_pred <- ndr_ncl_pred_tst
      } else if (pkg_crr == "rstanarm" && mod_crr == "glm" &&
                 fam_crr == "gauss" && prd_trms_nm_i == "empty") {
        ndr_ncl_pred <- ndr_ncl_pred_tst[c("noclust", "clust", "clust1")]
      } else if (
        (run_more && (
          (pkg_crr == "rstanarm" && mod_crr == "glmm" &&
           fam_crr == "brnll" && prd_trms_nm_i == "prd_trms_xz") ||
          (pkg_crr == "rstanarm" && mod_crr == "gam" &&
           fam_crr == "binom" && prd_trms_nm_i == "prd_trms_xs") ||
          (pkg_crr == "rstanarm" && mod_crr == "gamm" &&
           fam_crr == "brnll" && prd_trms_nm_i == "prd_trms_xsz")
        )) ||
        (!run_more && mod_crr %in% c("glmm", "gam", "gamm")) ||
        prj_crr %in% c("latent", "augdat", "trad_compare")
      ) {
        # The `noclust` setting is important for the test "non-clustered
        # projection does not require a seed" in `test_project.R`.
        ndr_ncl_pred <- ndr_ncl_pred_tst[c("noclust", "clust")]
      } else {
        ndr_ncl_pred <- ndr_ncl_pred_tst[c("clust")]
      }
      if (prj_crr %in% c("augdat", "trad_compare") && fam_crr == "brnll" &&
          mod_crr == "glmm" && grepl("z", prd_trms_nm_i)) {
        # We need an increased accuracy to be able to compare traditional and
        # augmented-data projection:
        divmin_args <- list(nAGQ = 30L)
      } else {
        divmin_args <- list()
      }
      lapply(ndr_ncl_pred, function(ndr_ncl_pred_i) {
        if (mod_crr == "glmm" && fam_crr == "categ") {
          # Quick-and-dirty solution to get some working results (it's probably
          # due to unfortunate test data simulated here that convergence at the
          # default settings is not given):
          divmin_args <- c(divmin_args, list(avoid.increase = TRUE))
        }
        return(c(
          nlist(tstsetup_ref), only_nonargs(args_ref[[tstsetup_ref]]),
          list(predictor_terms = prd_trms[[prd_trms_nm_i]], seed = seed_tst),
          ndr_ncl_pred_i, divmin_args
        ))
      })
    })
  })
  args_prj <- unlist_cust(args_prj)

  prjs <- lapply(args_prj, function(args_prj_i) {
    do.call(project, c(
      list(object = refmods[[args_prj_i$tstsetup_ref]]),
      excl_nonargs(args_prj_i)
    ))
  })
}

### From `vsel` -----------------------------------------------------------

# A helper function to create the argument list for project() for a given
# character vector of test setups (referring to either `vss` or `cvvss`):
cre_args_prj_vsel <- function(tstsetups_prj_vsel) {
  vsel_type <- deparse(substitute(tstsetups_prj_vsel))
  args_obj <- switch(vsel_type,
                     "tstsetups_prj_vs" = args_vs,
                     "tstsetups_prj_cvvs" = args_cvvs,
                     stop("Unexpected `vsel_type`."))
  lapply(tstsetups_prj_vsel, function(tstsetup_vsel) {
    args_out <- c(
      nlist(tstsetup_vsel), only_nonargs(args_obj[[tstsetup_vsel]]),
      list(nclusters = nclusters_pred_tst, seed = seed_tst)
    )
    if (args_obj[[tstsetup_vsel]]$mod_nm != "glm" ||
        !is.null(args_obj[[tstsetup_vsel]]$search_terms) ||
        args_obj[[tstsetup_vsel]]$prj_nm %in% c("latent", "augdat") ||
        grepl("\\.spclformul", tstsetup_vsel)) {
      nterms_avail <- nterms_avail["subvec"]
    }
    if (!is.null(args_obj[[tstsetup_vsel]]$search_terms)) {
      nterms_max_cut <- args_obj[[tstsetup_vsel]]$nterms_max
      if (all(grepl("\\+", args_obj[[tstsetup_vsel]]$search_terms))) {
        # This is the "empty_size" setting, so we have to subtract the skipped
        # model size (see issue #307):
        nterms_max_cut <- nterms_max_cut - 1L
      }
      nterms_avail <- lapply(nterms_avail, function(nterms_avail_i) {
        pmin(nterms_avail_i, nterms_max_cut)
      })
    }
    lapply(nterms_avail, function(nterms_crr) {
      if (!is.null(nterms_crr)) {
        args_out <- c(args_out, list(nterms = nterms_crr))
      }
      return(args_out)
    })
  })
}

#### varsel() -------------------------------------------------------------

if (run_vs) {
  tstsetups_prj_vs <- unlist(lapply(mod_nms, function(mod_nm) {
    if (any(grepl(paste0("\\.", mod_nm, "\\.gauss\\."), names(vss)))) {
      tstsetups_out <- grep(paste0("\\.", mod_nm, "\\.gauss\\."), names(vss),
                            value = TRUE)
    } else {
      tstsetups_out <- grep(paste0("\\.", mod_nm, "\\."), names(vss),
                            value = TRUE)
    }
    if (any(grepl("\\.L1\\.", tstsetups_out))) {
      tstsetups_out <- grep("\\.L1\\.", tstsetups_out, value = TRUE)
    }
    if (!run_more) {
      tstsetups_out <- head(tstsetups_out, 1)
    }
    return(tstsetups_out)
  }))
  tstsetups_prj_vs <- union(
    tstsetups_prj_vs,
    grep("\\.default_search_trms", names(vss), value = TRUE, invert = TRUE)
  )
  tstsetups_prj_vs <- union(
    tstsetups_prj_vs,
    grep("\\.(latent|augdat)\\.", names(vss), value = TRUE)
  )
  tstsetups_prj_vs <- setNames(nm = tstsetups_prj_vs)
  stopifnot(length(tstsetups_prj_vs) > 0)
  args_prj_vs <- cre_args_prj_vsel(tstsetups_prj_vs)
  args_prj_vs <- unlist_cust(args_prj_vs)

  prjs_vs <- lapply(args_prj_vs, function(args_prj_vs_i) {
    do.call(project, c(
      list(object = vss[[args_prj_vs_i$tstsetup_vsel]]),
      excl_nonargs(args_prj_vs_i)
    ))
  })
}

#### cv_varsel() ----------------------------------------------------------

if (run_cvvs) {
  tstsetups_prj_cvvs <- unlist(lapply(mod_nms, function(mod_nm) {
    if (any(grepl(paste0("\\.", mod_nm, "\\.gauss\\."), names(cvvss)))) {
      tstsetups_out <- grep(paste0("\\.", mod_nm,
                                   "\\.gauss\\..*\\.default_cvmeth"),
                            names(cvvss), value = TRUE)
    } else {
      tstsetups_out <- grep(paste0("\\.", mod_nm, "\\..*\\.default_cvmeth"),
                            names(cvvss), value = TRUE)
    }
    if (any(grepl("\\.L1\\.", tstsetups_out))) {
      tstsetups_out <- grep("\\.L1\\.", tstsetups_out, value = TRUE)
    }
    if (!run_more) {
      tstsetups_out <- head(tstsetups_out, 1)
    }
    return(tstsetups_out)
  }))
  tstsetups_prj_cvvs <- union(
    tstsetups_prj_cvvs,
    grep("\\.default_search_trms", names(cvvss), value = TRUE, invert = TRUE)
  )
  tstsetups_prj_cvvs <- union(
    tstsetups_prj_cvvs,
    grep("\\.(latent|augdat)\\.", names(cvvss), value = TRUE)
  )
  tstsetups_prj_cvvs <- setNames(nm = tstsetups_prj_cvvs)
  stopifnot(length(tstsetups_prj_cvvs) > 0)
  args_prj_cvvs <- cre_args_prj_vsel(tstsetups_prj_cvvs)
  args_prj_cvvs <- unlist_cust(args_prj_cvvs)

  prjs_cvvs <- lapply(args_prj_cvvs, function(args_prj_cvvs_i) {
    do.call(project, c(
      list(object = cvvss[[args_prj_cvvs_i$tstsetup_vsel]]),
      excl_nonargs(args_prj_cvvs_i)
    ))
  })
}

## Prediction -------------------------------------------------------------

### From `projection` -----------------------------------------------------

if (run_prj) {
  pls <- lapply(prjs, proj_linpred, allow_nonconst_wdraws_prj = TRUE,
                .seed = seed2_tst)
  pps <- lapply(prjs, proj_predict, .seed = seed2_tst)
}

### From `proj_list` ------------------------------------------------------

#### varsel() -------------------------------------------------------------

if (run_vs) {
  pls_vs <- lapply(prjs_vs, proj_linpred, allow_nonconst_wdraws_prj = TRUE,
                   .seed = seed2_tst)
  pps_vs <- lapply(prjs_vs, proj_predict, .seed = seed2_tst)
}

#### cv_varsel() ----------------------------------------------------------

if (run_cvvs) {
  pls_cvvs <- lapply(prjs_cvvs, proj_linpred, allow_nonconst_wdraws_prj = TRUE,
                     .seed = seed2_tst)
  pps_cvvs <- lapply(prjs_cvvs, proj_predict, .seed = seed2_tst)
}

## summary.vsel() ---------------------------------------------------------

cre_args_smmry_vsel <- function(args_obj) {
  tstsetups <- names(args_obj)
  # Choose all test setups which are for special `search_terms` settings:
  tstsetups_smmry_vsel <- tstsetups[sapply(tstsetups, function(tstsetup_vsel) {
    !is.null(args_obj[[tstsetup_vsel]]$search_terms)
  })]
  # Choose all test setups which are for the latent or the augmented-data
  # projection or which are corresponding to such:
  tstsetups_smmry_vsel <- union(
    tstsetups_smmry_vsel,
    tstsetups[sapply(tstsetups, function(tstsetup_vsel) {
      args_obj[[tstsetup_vsel]]$prj_nm %in% c("latent", "augdat",
                                              "trad_compare")
    })]
  )

  # Ensure that from each model type (`mod_nm`) and each family (`fam_nm`), we
  # have at least one test setup:
  mods_fams_existing <- sapply(tstsetups_smmry_vsel, function(tstsetup_vsel) {
    paste0(args_obj[[tstsetup_vsel]]$mod_nm, ".",
           args_obj[[tstsetup_vsel]]$fam_nm)
  })
  mods_fams_possible <- apply(expand.grid(mod_nms, fam_nms), 1, paste,
                              collapse = ".")
  mods_fams_missing <- setdiff(mods_fams_possible, mods_fams_existing)
  tstsetups_smmry_vsel <- union(
    tstsetups_smmry_vsel,
    unlist(lapply(mods_fams_missing, function(mod_fam) {
      head(
        grep(paste0(".", mod_fam, "."), tstsetups, value = TRUE, fixed = TRUE),
        1
      )
    }))
  )

  # In case of `run_more = TRUE`, we need to make sure to include the following:
  if (run_more) {
    tstsetups_smmry_vsel <- union(
      tstsetups_smmry_vsel,
      head(
        tstsetups[sapply(tstsetups, function(tstsetup_vsel) {
          args_obj[[tstsetup_vsel]]$mod_nm == "glm" &&
            args_obj[[tstsetup_vsel]]$fam_nm == "gauss" &&
            is.null(args_obj[[tstsetup_vsel]]$search_terms)
        })],
        1
      )
    )
  }

  tstsetups_smmry_vsel <- setNames(nm = tstsetups_smmry_vsel)
  stopifnot(length(tstsetups_smmry_vsel) > 0)
  lapply(tstsetups_smmry_vsel, function(tstsetup_vsel) {
    mod_crr <- args_obj[[tstsetup_vsel]]$mod_nm
    fam_crr <- args_obj[[tstsetup_vsel]]$fam_nm
    prj_crr <- args_obj[[tstsetup_vsel]]$prj_nm
    add_stats <- switch(mod_crr,
                        "glm" = switch(prj_crr,
                                       "augdat" = "augdat_stats",
                                       "latent" = "augdat_stats",
                                       switch(fam_crr,
                                              "brnll" = "binom_stats",
                                              "binom" = "binom_stats",
                                              "common_stats")),
                        character())
    if (!run_more && !is.null(args_obj[[tstsetup_vsel]]$search_terms)) {
      add_stats <- character()
    }
    stats_tst <- stats_tst[c("default_stats", add_stats)]
    lapply(stats_tst, function(stats_crr) {
      if (!run_more) {
        if (!is.null(args_obj[[tstsetup_vsel]]$search_terms)) {
          nterms_tst <- nterms_max_smmry["default_nterms_max_smmry"]
        } else {
          nterms_tst <- nterms_max_smmry["halfway"]
        }
      } else {
        if (mod_crr == "glm" && fam_crr == "gauss" &&
            is.null(args_obj[[tstsetup_vsel]]$search_terms) &&
            length(stats_crr) == 0) {
          nterms_tst <- nterms_max_smmry[c("default_nterms_max_smmry",
                                           "halfway", "zero")]
        } else {
          nterms_tst <- nterms_max_smmry["default_nterms_max_smmry"]
        }
      }
      if (length(stats_crr)) {
        cumulate_tst <- cumulate_tst["cuFALSE"]
      }
      lapply(nterms_tst, function(nterms_crr) {
        if (prj_crr != "latent") {
          resp_oscale_tst <- resp_oscale_tst["default_r_oscale"]
        }
        lapply(resp_oscale_tst, function(resp_oscale_crr) {
          if (isFALSE(resp_oscale_crr$resp_oscale) &&
              any(setdiff(stats_binom, stats_common) %in% stats_crr$stats)) {
            return(dummy_glob)
          }
          lapply(cumulate_tst, function(cumulate_crr) {
            return(c(
              nlist(tstsetup_vsel), only_nonargs(args_obj[[tstsetup_vsel]]),
              list(type = type_tst, nterms_max = nterms_crr),
              stats_crr, resp_oscale_crr, list(cumulate = cumulate_crr)
            ))
          })
        })
      })
    })
  })
}

### varsel() --------------------------------------------------------------

if (run_vs) {
  args_smmry_vs <- cre_args_smmry_vsel(args_vs)
  args_smmry_vs <- unlist_cust(args_smmry_vs)
  args_smmry_vs <- rm_dummies(args_smmry_vs)
  has_zero_vs <- any(sapply(lapply(args_smmry_vs, "[[", "nterms_max"),
                            identical, 0L))

  smmrys_vs <- lapply(args_smmry_vs, function(args_smmry_vs_i) {
    if (any(c("auc") %in% args_smmry_vs_i$stats)) {
      smmry_seed <- list(seed = seed3_tst)
    } else {
      smmry_seed <- list()
    }
    do.call(summary, c(
      list(object = vss[[args_smmry_vs_i$tstsetup_vsel]]),
      excl_nonargs(args_smmry_vs_i),
      smmry_seed
    ))
  })
}

### cv_varsel() -----------------------------------------------------------

if (run_cvvs) {
  args_smmry_cvvs <- cre_args_smmry_vsel(args_cvvs)
  args_smmry_cvvs <- unlist_cust(args_smmry_cvvs)
  args_smmry_cvvs <- rm_dummies(args_smmry_cvvs)
  has_zero_cvvs <- any(sapply(lapply(args_smmry_cvvs, "[[", "nterms_max"),
                              identical, 0L))

  smmrys_cvvs <- lapply(args_smmry_cvvs, function(args_smmry_cvvs_i) {
    if (any(c("auc") %in% args_smmry_cvvs_i$stats)) {
      smmry_seed <- list(seed = seed3_tst)
    } else {
      smmry_seed <- list()
    }
    do.call(summary, c(
      list(object = cvvss[[args_smmry_cvvs_i$tstsetup_vsel]]),
      excl_nonargs(args_smmry_cvvs_i),
      smmry_seed
    ))
  })
}

if (run_more) {
  has_zero_combined <- logical()
  if (run_vs) {
    has_zero_combined <- c(has_zero_combined, has_zero_vs)
  }
  if (run_cvvs) {
    has_zero_combined <- c(has_zero_combined, has_zero_cvvs)
  }
  if (length(has_zero_combined)) {
    stopifnot(any(has_zero_combined))
  }
}

## performances() ---------------------------------------------------------

### varsel() --------------------------------------------------------------

if (run_vs) {
  args_perf_vs <- lapply(
    setNames(nm = names(smmrys_vs)),
    function(tstsetup_smmry_vsel) {
      return(c(
        nlist(tstsetup_smmry_vsel),
        only_nonargs(args_smmry_vs[[tstsetup_smmry_vsel]])
      ))
    }
  )
  args_perf_vs <- unlist_cust(args_perf_vs)

  perfs_vs <- lapply(args_perf_vs, function(args_perf_vs_i) {
    do.call(performances, c(
      list(object = smmrys_vs[[args_perf_vs_i$tstsetup_smmry_vsel]]),
      excl_nonargs(args_perf_vs_i)
    ))
  })
}

### cv_varsel() -----------------------------------------------------------

if (run_cvvs) {
  args_perf_cvvs <- lapply(
    setNames(nm = names(smmrys_cvvs)),
    function(tstsetup_smmry_vsel) {
      return(c(
        nlist(tstsetup_smmry_vsel),
        only_nonargs(args_smmry_cvvs[[tstsetup_smmry_vsel]])
      ))
    }
  )
  args_perf_cvvs <- unlist_cust(args_perf_cvvs)

  perfs_cvvs <- lapply(args_perf_cvvs, function(args_perf_cvvs_i) {
    do.call(performances, c(
      list(object = smmrys_cvvs[[args_perf_cvvs_i$tstsetup_smmry_vsel]]),
      excl_nonargs(args_perf_cvvs_i)
    ))
  })
}

## plot.vsel() ------------------------------------------------------------

cre_args_plot_vsel <- function(args_obj) {
  tstsetups <- grep("\\.brnll\\..*\\.trad", names(args_obj), value = TRUE)
  tstsetups <- union(
    tstsetups,
    grep("\\.default_search_trms|\\.alltrms", names(args_obj), value = TRUE,
         invert = TRUE)
  )
  tstsetups <- union(
    tstsetups,
    head(grep("\\.spclformul", names(args_obj), value = TRUE), 1)
  )
  lapply(
    setNames(nm = tstsetups),
    function(tstsetup_vsel) {
      nterms_max_plot <- nterms_max_smmry[c("default_nterms_max_smmry",
                                            "halfway")]
      lapply(nterms_max_plot, function(nterms_crr) {
        lapply(rk_max_tst, function(rk_max_crr) {
          lapply(rk_abbv_tst, function(rk_abbv_crr) {
            lapply(rk_repel_tst, function(rk_repel_crr) {
              lapply(rk_col_tst, function(rk_col_crr) {
                lapply(cumulate_tst, function(cumulate_crr) {
                  lapply(angle_tst, function(angle_crr) {
                    return(c(
                      nlist(tstsetup_vsel),
                      only_nonargs(args_obj[[tstsetup_vsel]]),
                      list(nterms_max = nterms_crr),
                      rk_max_crr, rk_abbv_crr, rk_repel_crr,
                      list(ranking_colored = rk_col_crr,
                           cumulate = cumulate_crr),
                      angle_crr
                    ))
                  })
                })
              })
            })
          })
        })
      })
    })
}

### varsel() --------------------------------------------------------------

if (run_vs) {
  args_plot_vs <- cre_args_plot_vsel(args_vs)
  args_plot_vs <- unlist_cust(args_plot_vs)

  plots_vs <- lapply(args_plot_vs, function(args_plot_vs_i) {
    do.call(plot, c(
      list(x = vss[[args_plot_vs_i$tstsetup_vsel]]),
      excl_nonargs(args_plot_vs_i)
    ))
  })
}

### cv_varsel() -----------------------------------------------------------

if (run_cvvs) {
  args_plot_cvvs <- cre_args_plot_vsel(args_cvvs)
  args_plot_cvvs <- unlist_cust(args_plot_cvvs)

  plots_cvvs <- lapply(args_plot_cvvs, function(args_plot_cvvs_i) {
    do.call(plot, c(
      list(x = cvvss[[args_plot_cvvs_i$tstsetup_vsel]]),
      excl_nonargs(args_plot_cvvs_i)
    ))
  })
}

## ranking() --------------------------------------------------------------

### varsel() --------------------------------------------------------------

if (run_vs) {
  args_rk_vs <- lapply(setNames(nm = names(vss)), function(tstsetup_vsel) {
    lapply(nterms_max_rk, function(nterms_crr) {
      return(c(
        nlist(tstsetup_vsel), only_nonargs(args_vs[[tstsetup_vsel]]),
        nterms_crr
      ))
    })
  })
  args_rk_vs <- unlist_cust(args_rk_vs)

  rks_vs <- lapply(args_rk_vs, function(args_rk_vs_i) {
    do.call(ranking, c(
      list(object = vss[[args_rk_vs_i$tstsetup_vsel]]),
      excl_nonargs(args_rk_vs_i)
    ))
  })
}

### cv_varsel() -----------------------------------------------------------

if (run_cvvs) {
  args_rk_cvvs <- lapply(setNames(nm = names(cvvss)), function(tstsetup_vsel) {
    lapply(nterms_max_rk, function(nterms_crr) {
      return(c(
        nlist(tstsetup_vsel), only_nonargs(args_cvvs[[tstsetup_vsel]]),
        nterms_crr
      ))
    })
  })
  args_rk_cvvs <- unlist_cust(args_rk_cvvs)

  rks_cvvs <- lapply(args_rk_cvvs, function(args_rk_cvvs_i) {
    do.call(ranking, c(
      list(object = cvvss[[args_rk_cvvs_i$tstsetup_vsel]]),
      excl_nonargs(args_rk_cvvs_i)
    ))
  })
}

## cv_proportions() -------------------------------------------------------

err_no_foldwise_rk <- "Could not find fold-wise predictor rankings"

### varsel() --------------------------------------------------------------

if (run_vs) {
  args_pr_vs <- lapply(setNames(nm = names(rks_vs)), function(tstsetup_rk) {
    return(c(
      nlist(tstsetup_rk), only_nonargs(args_rk_vs[[tstsetup_rk]])
    ))
  })
  args_pr_vs <- unlist_cust(args_pr_vs)

  prs_vs <- lapply(args_pr_vs, function(args_pr_vs_i) {
    err_expected <- err_no_foldwise_rk
    expect_error(
      do.call(cv_proportions, c(
        list(object = rks_vs[[args_pr_vs_i$tstsetup_rk]]),
        excl_nonargs(args_pr_vs_i)
      )),
      err_expected,
      info = args_pr_vs_i$tstsetup_rk
    )
    return(dummy_glob)
  })
  keep_prs_vs <- rm_dummies(prs_vs, return_logical = TRUE)
  prs_vs <- prs_vs[keep_prs_vs]
  args_pr_vs <- args_pr_vs[keep_prs_vs]
}

### cv_varsel() -----------------------------------------------------------

if (run_cvvs) {
  args_pr_cvvs <- lapply(setNames(nm = names(rks_cvvs)), function(tstsetup_rk) {
    lapply(cumulate_tst, function(cumulate_crr) {
      return(c(
        nlist(tstsetup_rk), only_nonargs(args_rk_cvvs[[tstsetup_rk]]),
        list(cumulate = cumulate_crr)
      ))
    })
  })
  args_pr_cvvs <- unlist_cust(args_pr_cvvs)

  prs_cvvs <- lapply(args_pr_cvvs, function(args_pr_cvvs_i) {
    if (isFALSE(args_cvvs[[args_pr_cvvs_i$tstsetup_vsel]]$validate_search)) {
      err_expected <- err_no_foldwise_rk
    } else if (isTRUE(
      args_rk_cvvs[[args_pr_cvvs_i$tstsetup_rk]]$nterms_max == 0
    )) {
      err_expected <- "Needing `nterms_max >= 1`"
    } else {
      err_expected <- NA
    }
    expect_error(
      pr_out <- do.call(cv_proportions, c(
        list(object = rks_cvvs[[args_pr_cvvs_i$tstsetup_rk]]),
        excl_nonargs(args_pr_cvvs_i)
      )),
      err_expected,
      info = args_pr_cvvs_i$tstsetup_rk
    )
    if (is.na(err_expected)) {
      return(pr_out)
    } else {
      return(dummy_glob)
    }
  })
  keep_prs_cvvs <- rm_dummies(prs_cvvs, return_logical = TRUE)
  prs_cvvs <- prs_cvvs[keep_prs_cvvs]
  args_pr_cvvs <- args_pr_cvvs[keep_prs_cvvs]
}

## plot.cv_proportions() --------------------------------------------------

if (run_cvvs) {
  args_plotpr <- lapply(setNames(nm = names(prs_cvvs)), function(tstsetup_pr) {
    return(c(
      nlist(tstsetup_pr), only_nonargs(args_pr_cvvs[[tstsetup_pr]])
    ))
  })
  args_plotpr <- unlist_cust(args_plotpr)

  plotprs <- lapply(args_plotpr, function(args_plotpr_i) {
    do.call(plot, c(
      list(x = prs_cvvs[[args_plotpr_i$tstsetup_pr]]),
      excl_nonargs(args_plotpr_i)
    ))
  })
}

## Output names -----------------------------------------------------------

# Output elements of `vsel` objects:
vsel_nms <- c(
  "refmodel", "nobs_train", "search_path", "predictor_ranking",
  "predictor_ranking_cv", "ce", "type_test", "y_wobs_test", "nobs_test",
  "summaries", "summaries_fast", "nterms_all", "nterms_max", "method",
  "cv_method", "nloo", "loo_inds", "K",
  "validate_search", "cvfits", "args_search", "clust_used_search",
  "clust_used_eval", "nprjdraws_search", "nprjdraws_eval", "refit_prj",
  "projpred_version"
)
# Output elements of `vsel` objects that may be influenced by `cvfits`:
vsel_nms_cvfits <- c("refmodel", "cvfits", "summaries", "predictor_ranking_cv")
vsel_nms_cvfits_opt <- c("predictor_ranking_cv")
# Sub-elements of `summaries`'s `sub` and `ref` elements:
vsel_smmrs_sub_nms <- vsel_smmrs_ref_nms <- c("mu", "lppd")

## Defaults ---------------------------------------------------------------

nclusters_default <- 20L
ndraws_pred_default <- 400L
nresample_clusters_default <- 1000L
regul_default <- 1e-4

# Seed --------------------------------------------------------------------

if (run_randRNG) {
  set.seed(NULL)
}
stan-dev/projpred documentation built on April 13, 2025, 12:55 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
stan-dev/projpred
Projection Predictive Feature Selection

tests/testthat/setup.R
In stan-dev/projpred: Projection Predictive Feature Selection

R Package Documentation

Browse R Packages

We want your feedback!

stan-dev/projpred Projection Predictive Feature Selection

tests/testthat/setup.R In stan-dev/projpred: Projection Predictive Feature Selection

R Package Documentation

Browse R Packages

We want your feedback!

stan-dev/projpred
Projection Predictive Feature Selection

tests/testthat/setup.R
In stan-dev/projpred: Projection Predictive Feature Selection