knitr::opts_chunk$set(echo = TRUE, message = F, warning = F)
suppressPackageStartupMessages( require(tidyverse) ) suppressPackageStartupMessages( require(pryr) ) suppressPackageStartupMessages( require('recipes') )
caret offers unified modelling syntax for a variety of modelling packages and it offers to be compatible with recipes
, rsample
.
data = rsample::attrition formula = Attrition ~ JobSatisfaction + Gender + MonthlyIncome grid = expand.grid( cp = 2^c(-10:10) ) car = caret::train( formula , data = data , method = 'rpart' , tuneGrid = grid ) car
recipes
The recipe can be used to replace the formula in the caret syntax
rec = recipes::recipe(formula, data) %>% recipes::step_center( all_numeric() ) car = caret::train( rec , data , method = 'rpart' , tuneGrid = grid ) car
rsample
rs = rsample::vfold_cv( data, v = 10, repeats = 10) %>% rsample::rsample2caret() car = caret::train( formula , data = data , method = 'rpart' , tuneGrid = grid , trControl = caret::trainControl(index = rs$index , indexOut = rs$indexOut , method = 'cv' , verboseIter = T , savePredictions = T) ) unique(car$pred$cp) unique(car$pred$Resample) car$pred %>% as_tibble() %>% dplyr::group_by(rowIndex) %>% dplyr::count() %>% head() nrow(data) nrow(grid) car$pred %>% as_tibble() %>% dplyr::group_by(rowIndex) %>% dplyr::count() %>% nrow() pryr::object_size(car) pryr::object_size(data)
using all three packages and fitting two different kinds of models using two different formulas in a modelling dataframe
require(recipes) data = rsample::attrition formula1 = Attrition ~ JobSatisfaction + Gender + MonthlyIncome formula2 = Attrition ~ JobSatisfaction + Gender + MonthlyIncome + DistanceFromHome grid_tree = expand.grid( cp = 2^c(-10:10), method = 'rpart', stringsAsFactors = FALSE) %>% as_tibble() %>% nest( cp, .key = 'grid' ) %>% mutate() grid_svm = expand.grid( mtry = c(1:3), method = 'rf', stringsAsFactors = FALSE ) %>% as_tibble() %>% nest( mtry, .key = 'grid' ) df = expand.grid( formula = list(formula1, formula2) , grid = list( grid_tree, grid_svm ) ) %>% unnest(grid) %>% as.tibble() df = df %>% mutate( rec = map( formula, recipe, data) , rec = map( rec, step_scale, all_numeric() ) , rec = map( rec, step_center, all_numeric() ) , cv = list(rsample::vfold_cv(data, v = 5, repeats = 1) ) , cv_rs = map( cv, rsample::rsample2caret ) ) object_size(df) object_size(data)
caret::train()
car = function( recipe, rsample, method, grid, data){ grid = as.data.frame(grid) car = caret::train( recipe , data , method = method , tuneGrid = grid , trControl = caret::trainControl(index = rsample$index , indexOut = rsample$indexOut , method = 'cv' , verboseIter = T , savePredictions = T , classProbs = T ) ) return( as.tibble(car$pred) ) }
df = df %>% mutate( preds = pmap(list(rec, cv_rs, method, grid), car, data = data) ) df
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.