knitr::opts_chunk$set(echo = TRUE, message = F, warning = F)
suppressPackageStartupMessages( require(tidyverse) )
suppressPackageStartupMessages( require(pryr) )
suppressPackageStartupMessages( require('recipes') )

caret offers unified modelling syntax for a variety of modelling packages and it offers to be compatible with recipes, rsample.

Tree

data = rsample::attrition
formula = Attrition ~ JobSatisfaction + Gender + MonthlyIncome

grid = expand.grid( cp = 2^c(-10:10) )

car = caret::train( formula
                    , data = data
                    , method = 'rpart'
                    , tuneGrid = grid
                     )

car

Tree + recipes

The recipe can be used to replace the formula in the caret syntax

rec = recipes::recipe(formula, data) %>%
  recipes::step_center( all_numeric() )

car = caret::train( rec
                    , data
                    , method = 'rpart'
                    , tuneGrid = grid
                    )
car

Tree + rsample

rs = rsample::vfold_cv( data, v = 10, repeats = 10) %>%
  rsample::rsample2caret()

car = caret::train( formula
                    , data = data
                    , method = 'rpart'
                    , tuneGrid = grid
                    , trControl = caret::trainControl(index = rs$index
                                                      , indexOut = rs$indexOut
                                                      , method = 'cv'
                                                      , verboseIter = T
                                                      , savePredictions = T)
                     )

unique(car$pred$cp)
unique(car$pred$Resample)

car$pred %>%
  as_tibble() %>%
  dplyr::group_by(rowIndex) %>%
  dplyr::count() %>%
  head()

nrow(data)
nrow(grid)

car$pred %>%
  as_tibble() %>%
  dplyr::group_by(rowIndex) %>%
  dplyr::count() %>%
  nrow()

pryr::object_size(car)
pryr::object_size(data)

Bringing it all together

using all three packages and fitting two different kinds of models using two different formulas in a modelling dataframe

require(recipes)

data = rsample::attrition

formula1 = Attrition ~ JobSatisfaction + Gender + MonthlyIncome
formula2 = Attrition ~ JobSatisfaction + Gender + MonthlyIncome + DistanceFromHome

grid_tree = expand.grid( cp = 2^c(-10:10), method = 'rpart', stringsAsFactors = FALSE) %>%
  as_tibble() %>%
  nest( cp, .key = 'grid' ) %>%
  mutate()

grid_svm  = expand.grid( mtry = c(1:3), method = 'rf', stringsAsFactors = FALSE )  %>%
  as_tibble() %>%
  nest( mtry, .key = 'grid' )

df = expand.grid( formula = list(formula1, formula2)
             , grid = list( grid_tree, grid_svm  ) 
             ) %>%
  unnest(grid) %>%
  as.tibble()

df = df %>%
  mutate(  rec = map( formula, recipe, data)
          , rec = map( rec, step_scale, all_numeric() )
          , rec = map( rec, step_center, all_numeric() )
          , cv = list(rsample::vfold_cv(data, v = 5, repeats = 1) )
          , cv_rs = map( cv, rsample::rsample2caret )
          ) 

object_size(df)
object_size(data)

Wrapper for caret::train()

car = function( recipe, rsample, method, grid, data){

  grid = as.data.frame(grid)

  car = caret::train( recipe
                      , data
                      , method = method
                      , tuneGrid = grid
                      , trControl = caret::trainControl(index = rsample$index
                                                        , indexOut = rsample$indexOut
                                                        , method = 'cv'
                                                        , verboseIter = T
                                                        , savePredictions = T
                                                        , classProbs = T )
                       )
  return( as.tibble(car$pred) )
}

Apply wrapper

df = df %>%
  mutate( preds = pmap(list(rec, cv_rs, method, grid), car, data = data) )

df


erblast/oetteR documentation built on May 27, 2019, 12:11 p.m.