# Ajusta Regressao
#'
#'
#' This function performs the training of the chosen regressor
#' @param df.train Training dataframe
#' @param formula A formula of the form y ~ x1 + x2 + ... If users don't inform formula, the first column will be used as Y values and the others columns with x1,x2....xn
#' @param preprocess pre process
#' @param regressor Choice of regressor to be used to train model. Uses algortims names from Caret package.
#' @param resample_ ressample method 'boot', 'boot632', 'optimism_boot', 'boot_all', 'cv', 'repeatedcv', 'LOOCV', 'LGOCV','none', 'oob', 'timeslice', 'adaptive_cv', 'adaptive_boot', 'adaptive_LGOCV'
#' @param nfolds Number of folds to be build in crossvalidation
#' @param repeats repeats
#' @param index index
#' @param cpu_cores Number of CPU cores to be used in parallel processing
#' @param tune_length This argument is the number of levels for each tuning parameters that should be generated by train
#' @param metric metric used to evaluate model fit. For numeric outcome ("RMSE", "Rsquared)
#' @param seeds seeds
#' @param verbose verbose
#' @keywords Train regression RMSE Rsquared
#' @importFrom parallel makePSOCKcluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom caret trainControl train getTrainPerf
#' @importFrom stats as.formula
#' @importFrom foreach registerDoSEQ
#' @author Elpidio Filho, \email{elpidio@ufv.br}
#' @details details
#' @export
#' @examples
#' \dontrun{
#' regression(df.train = df, regressor = "rf", metric = "Rsquared", seeds = 313)
#' }
regression <- function(df.train, formula = NULL, preprocess = NULL,
regressor = "rf", resample_ = 'cv', nfolds = 10,
repeats = NA, index = NULL, cpu_cores = 0,
tune_length = 5, metric = "Rsquared",
seeds = NULL, verbose = FALSE) {
resample_methods = c('boot', 'boot632', 'optimism_boot', 'boot_all', 'cv',
'repeatedcv', 'LOOCV', 'LGOCV','none', 'oob',
'timeslice', 'adaptive_cv', 'adaptive_boot', 'adaptive_LGOCV')
if (!any(resample_ %in% resample_methods)) stop(paste("resample method",resample, "does not exist"))
#lb = caret::getModelInfo(regressor, regex = FALSE)[[1]]$library
# if (is.null(lb) == FALSE){
# print(paste("loading library", lb))
# suppressPackageStartupMessages(library(lb, character.only = TRUE))
# }
inicio <- Sys.time()
tc <- caret::trainControl( method = resample_, number = nfolds,
repeats = repeats, index = index,
seeds = seeds)
if (cpu_cores > 0) {
cl <- parallel::makePSOCKcluster(cpu_cores)
doParallel::registerDoParallel(cl)
on.exit(stopCluster(cl))
} else {
cl = NULL
}
if (is.null(formula)) {
fit <- tryCatch({
caret::train(x = df.train[, -1], y = df.train[, 1],
method = regressor, metric = metric,
trControl = tc, tuneLength = tune_length,
preProcess = preprocess
)},
error = function(e){NULL})
} else {
fit <- tryCatch({
caret::train(formula, data = df.train, method = regressor,
metric = metric,trControl = tc, tuneLength = tune_length,
preProcess = preprocess
)},
error = function(e){NULL})
}
if (!is.null(cl)) {
#parallel::stopCluster(cl)
foreach::registerDoSEQ()
}
if (verbose == TRUE) {
# print(paste("time elapsed : ", hms_span(inicio, Sys.time())))
# print(caret::getTrainPerf(fit))
}
return(fit)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.