#' Run Models
#'
#'
#' This function run many models using the same data
#' @param df Training dataframe
#' @param formula A formula of the form y ~ x1 + x2 + ... If users don't inform formula, the first column will be used as Y values and the others columns with x1,x2....xn
#' @param preprocess pre process
#' @param index Users cross validation folds. Default = NULL
#' @param models chosen models to be used to train model. Uses algortims names from Caret package.
#' @param rsample resample method 'boot', 'boot632', 'optimism_boot', 'boot_all', 'cv', 'repeatedcv', 'LOOCV', 'LGOCV','none', 'oob', 'timeslice', 'adaptive_cv', 'adaptive_boot', 'adaptive_LGOCV'
#' @param nfolds Number of folds to be build in crossvalidation
#' @param repeats number of repeats to resample method repeatedcv
#' @param cpu_cores Number of CPU cores to be used in parallel processing
#' @param tune_length This argument is the number of levels for each tuning parameters that should be generated by train
#' @param search search option "grid" or "random"
#' @param metric metric used to evaluate model fit. For numeric outcome ('RMSE', 'Rsquared)
#' @param seeds generate random seeds to allow reproductible results
#' @param verbose prints results during the execution of the function
#' @importFrom utils flush.console
#' @importFrom caret getModelInfo
#' @importFrom utils install.packages installed.packages
#' @keywords models
#' @author Elpidio Filho, \email{elpidio@ufv.br}
#' @details details
#' @export
#' @examples
#' \dontrun{
#' models = c('ridge', 'rf', 'cubist','pls','pcr','foba','gbm','glmboost')
#' fit_models = run_models(df,models = models)
#' }
run_models <- function(df, models = 'rf',
formula = NULL,
preprocess = NULL,
index = NULL,
rsample = "cv",
nfolds = 10,
repeats = NA,
tune_length = 5,
search = "grid",
cpu_cores = 0,
metric = NULL,
seeds = NULL,
verbose = TRUE) {
if (class(df) != "data.frame") stop("df is not a data frame.")
if (is.null(formula) == FALSE) {
if (class(formula) != 'formula') {
formula =as.formula(formula)
}
}
mod = is_factor_income(df, formula)
# if (is.factor(df[, 1]) == TRUE) {
# mod <- 1
# } else {
# mod <- 0
# }
vlib <- character()
for (i in seq_along(models)) {
md <- caret::getModelInfo(models[i], regex = FALSE)[[1]]
vlib <- c(vlib, md$library)
if (length(md) == 0) {
stop(paste(
"Model", models[i],
"is not in caret's built-in library"
), call. = FALSE)
} else {
if (mod == 0) {
if (!("Regression" %in% md$type)) {
stop(paste(
"Model", models[i],
"is not in a regression model"
), call. = FALSE)
}
} else {
if (!("Classification" %in% md$type)) {
stop(paste(
"Model", models[i],
"is not in a classification model"
), call. = FALSE)
}
}
}
}
plataforma <- .Platform$OS.type
remove <- c("NA", " ")
idx <- which(vlib %in% remove)
if (length(idx) > 0) {
vlib <- vlib[-idx]
}
pkglist <- unique(vlib)
inst <- vlib %in% utils::installed.packages()
if (length(pkglist[!inst]) > 0) {
np <- paste(pkglist[!inst], collapse = ", ")
# if (plataforma == "windows") {
# print(paste("packages ", np, " will be installed"))
# utils::install.packages(pkglist[!inst], dep = TRUE)
# } else {
print(paste("Warning : packages ", np, " needs to installed"))
#}
}
package.inicio <- search()[ifelse(unlist(
gregexpr("package:", search())
) == 1, TRUE, FALSE)]
if (verbose == TRUE) {
label1 <- models[i]
}
list.model <- vector("list")
cont <- 1
failed <- character()
inicio_total <- Sys.time()
for (j in seq_along(models)) {
if (verbose == TRUE) {
inicio <- Sys.time()
if (j == 1) {
nm <- length(models)
width <- 40
label1 <- stringr::str_pad(models[j], 15, "right")
cat("\r", label1)
}
}
if (mod == 0) {
if (is.null(metric)) { metric = 'Rsquared'}
fit.reg <- tryCatch({
regression(
df.train = df,
formula = formula,
index = index,
search = search,
rsample = rsample,
regressor = models[j],
preprocess = preprocess,
nfolds = nfolds,
cpu_cores = cpu_cores,
repeats = repeats,
metric = metric,
tune_length = tune_length,
seeds = vector_seeds(seeds, repeats, nfolds)
)
},
error = function(e) {
print(paste("Error:", conditionMessage(e)))
return(NULL)
}
)
if (is.null(fit.reg) == FALSE) {
list.model[cont] <- list(fit.reg)
names(list.model)[cont] <- models[j]
cont <- cont + 1
} else {
failed <- c(failed, models[j])
}
} else {
if (is.null(metric)) { metric = 'Kappa'}
fit.class <- tryCatch({
classification(
df.train = df,
formula = formula,
rsample = rsample,
index = index,
search = search,
classifier = models[j],
preprocess = preprocess,
nfolds = nfolds,
cpu_cores = cpu_cores,
repeats = repeats,
metric = metric,
tune_length = tune_length,
seeds = vector_seeds(seeds, repeats, nfolds)
)
},
error = function(e) {
print(" ")
print(e)
}
)
if (is.null(fit.class) == FALSE) {
list.model[cont] <- list(fit.class)
names(list.model)[cont] <- models[j]
cont <- cont + 1
}
}
if (verbose == TRUE) {
nm <- length(models)
width <- 40
tdif <- until_now(inicio)
tdif_total <- until_now(inicio_total)
label1 <- stringr::str_pad(models[j], 15, "right")
s1 <- paste0(rep("#", j / nm * width), collapse = "")
s2 <- paste0(rep(".", (nm - j) / nm * width), collapse = "")
cat("\r", label1, s1, s2, tdif, tdif_total)
if (j == nm) cat("\n")
}
}
package.fim <- search()[ifelse(unlist(gregexpr(
"package:",
search()
)) == 1, TRUE, FALSE)]
package.list <- setdiff(package.fim, package.inicio)
if (length(package.list) > 0) {
for (package in package.list)
detach(package, character.only = TRUE)
}
list.model <- list.model[!sapply(list.model, is.null)]
if (verbose == TRUE) {
if (length(failed) > 0) {
print("failed models")
print(failed)
}
}
return(list.model)
}
vector_seeds <- function(seeds, repeats, nfolds) {
if (is.null(seeds)) {
vseed <- NULL
} else {
set.seed(seeds)
if (is.na(repeats) == FALSE) {
nel <- (nfolds * repeats) + 1
} else {
nel <- nfolds + 1
}
vseed <- vector(mode = "list", length = nel)
for (i in 1:nel) vseed[[i]] <- sample.int(n = 100000, 1000)
vseed[[nel + 1]] <- sample.int(100000, 1)
}
return(vseed)
}
run_nested_models <- function(df, models ,
formula = NULL,
preprocess = NULL,
index = NULL,
rsample = "cv",
nfolds = 10,
repeats = NA,
tune_length = 5,
cpu_cores = 0,
metric = ifelse(is.factor(df[, 1]),
"Kappa", "Rsquared"
),
seeds = NULL,
verbose = T)
{
}
is_factor_income <- function(d, formula = NULL){
if (is.null(formula)) {
if (is.factor(d[, 1]) == TRUE) {
mod <- 1
} else {
mod <- 0
}
} else {
if (class(formula) != 'formula') {
formula =as.formula(formula)
}
v = all.vars(formula)[1]
qc = d %>% dplyr::select(one_of(v)) %>% dplyr::pull() %>% class
if (qc == 'factor') {
mod <- 1
} else {
mod <- 0
}
}
return(mod)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.