knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE )
This document showcases the two basic tidytune
methods of doing hyperparameter optimization: grid search and random search.
We will be using the wollowing libraries for this example and most of the examples in the package.
library(recipes) library(magrittr) library(tidytune) library(rsample) library(ParamHelpers) library(MLmetrics) # for LogLoss library(dplyr)
data("attrition") attrition %<>% mutate(Attrition = ifelse(Attrition == 'Yes', 1, 0)) resamples <- rsample::vfold_cv(attrition, v = 2) rec <- recipe(attrition) %>% add_role(Attrition, new_role = 'outcome') %>% add_role(-Attrition, new_role = 'predictor') %>% step_novel(all_nominal(), -Attrition) %>% step_dummy(all_nominal(), -Attrition) %>% step_zv(all_predictors())
library(xgboost) xgboost_classif_score <- function(train_df, target_var, params, eval_df, ...){ X_train <- train_df %>% select(-matches(target_var)) %>% as.matrix() y_train <- train_df[[target_var]] xgb_train_data <- xgb.DMatrix(X_train, label = y_train) X_eval <- eval_df %>% select(-matches(target_var)) %>% as.matrix() y_eval <- eval_df[[target_var]] xgb_eval_data <- xgb.DMatrix(X_eval, label = y_eval) model <- xgb.train(params = params, data = xgb_train_data, watchlist = list(train = xgb_train_data, eval = xgb_eval_data), objective = 'binary:logistic', verbose = FALSE, ...) preds <- predict(model, xgb_eval_data) list(logloss = LogLoss(preds, y_eval), acc = Accuracy(ifelse(preds > 0.5, 1, 0), y_eval)) # You can also return a simple vector score: # LogLoss(preds, y_eval) }
set.seed(123) xgboost_param_grid <- expand.grid(eta = c(0.1, 0.05), max_depth = c(3:10)) results_grid_search <- grid_search( resamples = resamples, recipe = rec, param_grid = xgboost_param_grid, scoring_func = xgboost_classif_score, nrounds = 100, verbosity = FALSE ) head(results_grid_search)
set.seed(123) xgboost_random_params <- makeParamSet( makeIntegerParam('max_depth', lower = 1, upper = 15), makeNumericParam('eta', lower = 0.01, upper = 0.1), makeNumericParam('gamma', lower = 0, upper = 5), makeIntegerParam('min_child_weight', lower = 1, upper = 100), makeNumericParam('subsample', lower = 0.25, upper = 0.9), makeNumericParam('colsample_bytree', lower = 0.25, upper = 0.9) ) results_random_search <- random_search( resamples = resamples, recipe = rec, param_set = xgboost_random_params, scoring_func = xgboost_classif_score, nrounds = 100, n = 4, verbosity = FALSE ) head(results_random_search)
To get the performance of parameter combinations across folds and extract your optimal parameters, simply do:
library(dplyr) results_random_search %>% group_by_at(getParamIds(xgboost_random_params)) %>% summarise(logloss = mean(logloss), accuracy = mean(acc)) %>% arrange(logloss, accuracy)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.