# Easy fit
#'
#'
#' This function performs the training of the chosen models to a group of outcomes (y)
#' @param dy outcomes
#' @param dx co-variables
#' @param models vector of models to be fitted. Uses algortims names from Caret package.
#' @param nfolds Number of folds to be build in crossvalidation
#' @param resample_ resample method 'boot', 'boot632', 'optimism_boot', 'boot_all', 'cv', 'repeatedcv', 'LOOCV', 'LGOCV','none', 'oob', 'timeslice', 'adaptive_cv', 'adaptive_boot', 'adaptive_LGOCV'
#' @param repeats repeats
#' @param cpu_cores Number of CPU cores to be used in parallel processing
#' @param tune_length By default, this argument is the number of levels for each tuning parameters that should be generated by train.
#' @param tolerance_RFE tolerance
#' @param metric metric used to evaluate model fit. For numeric outcome ("RMSE", "Rsquared)
#' @param seeds seeds
#' @param fun_RFE A list of functions for model fitting, prediction and variable importance
#' @param nfolds_RFE nfolds to recursive feature elimination
#' @param repeats_RFE repeats to recursive feature elimination
#' @param sizes_RFE A numeric vector of integers corresponding to the number of features that should be retained
#' @param metric_RFE metric to recursive feature elimination
#' @param preprocess pre process
#' @param verbose show results and messages from processing functions
#' @keywords Train kappa
#' @importFrom caret pickSizeTolerance
#' @importFrom caret rfFuncs
#' @importFrom caret contr.ltfr
#' @importFrom caret dummyVars
#' @importFrom dplyr select tibble one_of
#' @importFrom stats as.formula na.omit predict
#' @author Elpidio Filho, \email{elpidio@ufv.br}
#' @details details
#' @export
#' @examples
#' \dontrun{
#' easy_fit(dy,dx,train,models = c("ridge", "rf", "cubist"), metric = "Rsquared", tolerance = 2)
#' }
easy_fit <- function(dy, dx,
nfolds= 10,
repeats = NA,
resample_ = 'cv',
metric = ifelse(is.factor(dy[, 1]), "Kappa", "Rsquared"),
cpu_cores = 7,
tune_length = 5,
fun_RFE = caret::rfFuncs,
repeats_RFE = 1,
metric_RFE = ifelse(is.factor(dy[, 1]), "Kappa", "Rsquared"),
nfolds_RFE = 5,
sizes_RFE = c(2:10, 15),
tolerance_RFE = 0,
models = c("ridge", "rf", "cubist", "pls",
"foba", "gbm", "glmboost"),
preprocess = NULL,
seeds = NULL,
verbose = TRUE) {
dy = data.frame(dy)
ny <- ncol(dy)
list.model <- dplyr::tibble(
var = character(ny), selec = character(ny),
models = vector(mode = "list", length = ny))
vm <- caret::dummyVars(~ ., data = dx)
dxm <- predict(vm, dx) %>%
data.frame()
for (i in 1:ny) {
maximize <- TRUE
if (is.numeric(dy[, i])) {
if (is.null(metric) | metric == "Rsquared") {
maximize <- TRUE
}
} else {
if (is.null(metric) | metric == "Kappa") {
maximize <- TRUE
}
}
el <- names(dy)[i]
y <- dy[, i]
xy <- data.frame(y, dxm)
xy <- xy %>%
na.omit() %>%
data.frame()
names(xy)[1] <- el
set.seed(313)
fit <- recursive_feature_elimination(
xy,
sizes = sizes_RFE,
fun = fun_RFE,
cpu_cores = cpu_cores,
nfolds = nfolds_RFE,
metric = metric_RFE,
seeds = seeds,
verbose = verbose
)
if (tolerance_RFE > 0) {
tol5 <- caret::pickSizeTolerance(
fit$results, metric = "Rsquared",
tol = tolerance_RFE, maximize = maximize
)
vs <- c(el, fit$optVariables[1:tol5])
} else {
vs <- c(el, fit$optVariables)
}
dsel <- xy %>%
dplyr::select(one_of(vs))
fit.reg <- run_models(
dsel,
models = models,
resample_ = resample_,
preprocess = preprocess,
metric = metric,
cpu_cores = cpu_cores,
tune_length = tune_length,
nfolds = nfolds,
repeats = repeats,
seeds = seeds,
verbose = verbose
)
list.model$var[i] <- el
list.model$selec[i] <- list(vs)
list.model$models[i] <- list(fit.reg)
}
return(list.model)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.