# resLearn.R
# ::rtemis::
# 2022 E.D. Gennatas www.lambdamd.org
# new version of resLearn using future
#' \pkg{rtemis} internal: Resample Learn
#'
#' Train an \pkg{rtemis} learner on a set of resamples
#'
#' Input: features (x) and outcome (y)
#' Procedure: [resample], train learners
#' Output: trained learners
#' This is used internally by [train_cv] and for bagging, when the `bag.resampler`
#' argument is set in a learner.
#' @param x features - training set
#' @param y outcome - training set
#' @param alg Character: \pkg{rtemis} supervised learning algorithm to use. See `select_learn` for available options.
#' @param resample.params List: output of [rtset] (or a list of same structure)
#' @param params List of named elements, each is a single value
#' @param verbose Logical: If TRUE, print messages to screen
#' @param res.verbose Logical: Will be passed to each algorithm's `verbose` argument
#' @param save.mods Logical: If TRUE, save all models, otherwise discard after training.
#' Use with [train_cv] when training a large number of resamples. Default = TRUE
#' @param outdir Character: Path to save output. Default = NULL
#' @param n.workers Integer: Number of cores to use.
#'
#' @author E.D. Gennatas
#' @keywords internal
#' @noRd
resLearn <- function(x, y, alg,
resample.params = setup.cv.resample(),
weights = NULL,
params = list(),
mtry = NULL,
.preprocess = NULL,
verbose = TRUE,
res.verbose = FALSE,
trace = 0,
save.mods = TRUE,
outdir = NULL,
n.workers = 1) {
# Dependencies ----
dependency_check("future.apply")
# Intro ----
start.time <- intro(
verbose = trace > 0,
message = "Starting resLearn...",
newline.pre = TRUE
)
future::plan(rtPlan)
# rtemis_init(n.workers, context = "Outer resampling")
# Arguments ----
if (missing(x) || missing(y)) {
print(args(resLearn))
stop("Input missing")
}
mod.name <- toupper(alg)
if (!is.null(outdir)) {
outdir <- normalizePath(outdir, mustWork = FALSE)
if (!dir.exists(outdir)) {
dir.create(outdir,
showWarnings = TRUE,
recursive = TRUE,
mode = "0777"
)
}
}
# Resamples ----
learner <- select_learn(alg)
res <- resample(y, rtset = resample.params, verbosity = trace)
resampler <- attr(res, "resampler") # for res.group and res.index
n.workers <- min(n.workers, resample.params$n.resamples)
# Parallel ----
if (n.workers == 1) {
future::plan(list("sequential", rtPlan), workers = n.workers)
if (verbose) {
msg2(magenta(
"Outer resampling plan set to",
bold("sequential")
))
}
} else {
future::plan(list(rtPlan, "sequential"), workers = n.workers)
if (verbose) {
msg2(magenta(
"Outer resampling: Future plan set to", bold(rtPlan),
"with", bold(n.workers), "workers"
))
}
}
# learner1 ----
p <- progressr::progressor(along = res)
learner1 <- function(index, learner,
x, y,
weights = NULL,
mtry,
res,
params,
.preprocess,
verbose,
outdir,
save.mods,
nres) {
if (verbose) msg2("Running resample #", index, sep = "")
res1 <- res[[index]]
if (is.null(mtry)) {
feat.index <- seq_len(NCOL(x))
} else {
feat.index <- sample(seq_len(NCOL(x)), mtry, replace = FALSE)
}
x.train1 <- x[res1, feat.index, drop = FALSE]
y.train1 <- y[res1]
x.test1 <- x[-res1, feat.index, drop = FALSE]
y.test1 <- y[-res1]
if (!is.null(.preprocess)) {
# This allows imputing training and testing sets separately
preproc.params <- c(
list(x = x.train1),
.preprocess,
verbose = verbose
)
x.train1 <- do.call(preprocess, preproc.params)
preproc.params <- c(
list(x = x.test1),
.preprocess,
verbose = verbose
)
x.test1 <- do.call(preprocess, preproc.params)
}
if (!is.null(weights)) weights <- weights[res1]
if (!is.null(outdir)) {
outdir1 <- paste0(outdir, "/", mod.name, ".resLearn.", index)
} else {
outdir1 <- NULL
}
if (!is.null(weights)) params$weights <- weights
args <- list(
x = x.train1, y = y.train1,
x.test = x.test1, y.test = y.test1,
print.plot = FALSE,
verbose = res.verbose,
outdir = outdir1
)
## Get id.strat for resample
if (!is.null(attr(res, "id.strat")) && !is.null(params$grid.resample.params)) {
params$grid.resample.params$id.strat <- attr(res, "id.strat")[res1]
}
args <- c(args, params)
mod1 <- do.call(learner, args = args)
if (!save.mods) mod1$mod <- NA
p(sprintf("Outer resample: %i/%i...", index, nres))
list(
mod1 = mod1,
res.id = index,
params = params
)
}
# Res run ----
if (trace > 0) {
desc <- switch(resampler,
kfold = "independent folds",
strat.sub = "stratified subsamples",
strat.boot = "stratified bootstraps",
bootstrap = "bootstrap resamples",
loocv = "independent folds (LOOCV)",
"custom resamples"
)
msg20("Training ", select_learn(alg, desc = TRUE), " on ",
length(res), " ", desc, "...",
newline.pre = FALSE
)
}
res.run <- future.apply::future_lapply(
seq_along(res), learner1,
learner,
x, y,
weights,
mtry,
res,
params,
.preprocess,
verbose = res.verbose,
outdir = outdir,
save.mods = save.mods,
future.seed = TRUE,
length(res)
)
names(res.run) <- paste0(toupper(alg), seq(res))
if (res.verbose) cat("\n")
# Outro ----
outro(start.time, verbose = trace > 0)
list(res = res, mods = res.run)
} # rtemis::resLearn
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.