R/easy_fit.R

# Easy fit
#'
#'
#' This function performs the training of the chosen models to a group of outcomes (y)
#' @param dy outcomes
#' @param dx co-variables
#' @param models vector of models to be fitted. Uses  algortims names from Caret package.
#' @param nfolds  Number of folds to be build in crossvalidation
#' @param resample_ resample method 'boot', 'boot632', 'optimism_boot', 'boot_all', 'cv', 'repeatedcv', 'LOOCV', 'LGOCV','none', 'oob', 'timeslice', 'adaptive_cv', 'adaptive_boot', 'adaptive_LGOCV'
#' @param repeats repeats
#' @param cpu_cores  Number of CPU cores to be used in parallel processing
#' @param tune_length By default, this argument is the number of levels for each tuning parameters that should be generated by train.
#' @param tolerance_RFE tolerance
#' @param metric metric used to evaluate model fit. For numeric outcome ("RMSE", "Rsquared)
#' @param seeds  seeds
#' @param fun_RFE A list of functions for model fitting, prediction and variable importance
#' @param nfolds_RFE  nfolds to recursive feature elimination
#' @param repeats_RFE repeats to recursive feature elimination
#' @param sizes_RFE A numeric vector of integers corresponding to the number of features that should be retained
#' @param metric_RFE metric to recursive feature elimination
#' @param preprocess pre process
#' @param verbose show results and messages from processing functions
#' @keywords Train kappa
#' @importFrom caret pickSizeTolerance
#' @importFrom caret rfFuncs
#' @importFrom caret contr.ltfr
#' @importFrom caret dummyVars
#' @importFrom dplyr select tibble one_of
#' @importFrom stats as.formula na.omit predict
#' @author Elpidio Filho, \email{elpidio@ufv.br}
#' @details details
#' @export
#' @examples
#' \dontrun{
#' easy_fit(dy,dx,train,models = c("ridge", "rf", "cubist"), metric = "Rsquared", tolerance = 2)
#' }



easy_fit <- function(dy, dx,
                     nfolds= 10,
                     repeats = NA,
                     resample_ = 'cv',
                     metric = ifelse(is.factor(dy[, 1]), "Kappa", "Rsquared"),
                     cpu_cores = 7,
                     tune_length = 5,
                     fun_RFE = caret::rfFuncs,
                     repeats_RFE = 1,
                     metric_RFE = ifelse(is.factor(dy[, 1]), "Kappa", "Rsquared"),
                     nfolds_RFE = 5,
                     sizes_RFE = c(2:10, 15),
                     tolerance_RFE = 0,
                     models = c("ridge", "rf", "cubist", "pls",
                                "foba", "gbm", "glmboost"),
                     preprocess = NULL,
                     seeds = NULL,
                     verbose = TRUE) {


  dy = data.frame(dy)
  ny <- ncol(dy)

  list.model <- dplyr::tibble(
    var = character(ny), selec = character(ny),
    models = vector(mode = "list", length = ny))

  vm <- caret::dummyVars(~ ., data = dx)
  dxm <- predict(vm, dx) %>%
    data.frame()

  for (i in 1:ny) {
    maximize <- TRUE
    if (is.numeric(dy[, i])) {
      if (is.null(metric) | metric == "Rsquared") {
        maximize <- TRUE
      }
    } else {
      if (is.null(metric) | metric == "Kappa") {
        maximize <- TRUE
      }
    }
    el <- names(dy)[i]
    y <- dy[, i]
    xy <- data.frame(y, dxm)
    xy <- xy %>%
      na.omit() %>%
      data.frame()
    names(xy)[1] <- el
    set.seed(313)
    fit <- recursive_feature_elimination(
      xy,
      sizes = sizes_RFE,
      fun = fun_RFE,
      cpu_cores = cpu_cores,
      nfolds = nfolds_RFE,
      metric = metric_RFE,
      seeds = seeds,
      verbose = verbose
    )
    if (tolerance_RFE > 0) {
      tol5 <- caret::pickSizeTolerance(
        fit$results, metric = "Rsquared",
        tol = tolerance_RFE, maximize = maximize
      )
      vs <- c(el, fit$optVariables[1:tol5])
    } else {
      vs <- c(el, fit$optVariables)
    }

    dsel <- xy %>%
      dplyr::select(one_of(vs))

    fit.reg <- run_models(
      dsel,
      models = models,
      resample_ = resample_,
      preprocess = preprocess,
      metric = metric,
      cpu_cores = cpu_cores,
      tune_length = tune_length,
      nfolds = nfolds,
      repeats = repeats,
      seeds = seeds,
      verbose = verbose
    )

    list.model$var[i] <- el
    list.model$selec[i] <- list(vs)
    list.model$models[i] <- list(fit.reg)
  }
  return(list.model)
}
elpidiofilho/easyFit documentation built on May 28, 2019, 8:36 p.m.