scrap/test_surrogate.R

library(recipes)
library(magrittr)
library(tidytune)
library(rsample)
library(ParamHelpers)
library(MLmetrics)
library(knitr)

data("attrition")

attrition %<>% mutate(Attrition = ifelse(Attrition == 'Yes', 1, 0))

resamples <- rsample::vfold_cv(attrition, v = 5)

rec <- 
  recipe(attrition) %>%
  add_role(Attrition, new_role = 'outcome') %>%
  add_role(-Attrition, new_role = 'predictor') %>%
  step_novel(all_nominal(), -Attrition) %>%
  step_dummy(all_nominal(), -Attrition) %>%
  step_zv(all_predictors())

xgboost_random_params <-
  makeParamSet(
    makeIntegerParam('max_depth', lower = 1, upper = 15),
    makeNumericParam('eta', lower = 0.01, upper = 0.1),
    makeNumericParam('gamma', lower = 0, upper = 5),
    makeIntegerParam('min_child_weight', lower = 1, upper = 100),
    makeNumericParam('subsample', lower = 0.25, upper = 0.9),
    makeNumericParam('colsample_bytree', lower = 0.25, upper = 0.9)
  )

t1 <- proc.time()

results_random_search <- 
  map_dfr(
    1:2,
    
    function(i){
      
      cat(paste('Run', i, '\n'))
      
      set.seed(i)
      
      results_random_search <- 
        random_search(
          resamples = resamples, 
          recipe = rec, 
          param_set = xgboost_random_params, 
          train_predict_func = xgboost_classifier, 
          nrounds = 1000,
          early_stopping_rounds = 20,
          verbose = FALSE,
          eval_metric = 'logloss',
          n = 10,
          metrics = list(logloss = MLmetrics::LogLoss),
          verbosity = TRUE
        )
      
      results_random_search %>%
        group_by(param_id) %>%
        summarise(logloss = mean(logloss))
      
      bind_cols(
        run = rep(i, nrow(results_random_search)),
        logloss = results_random_search$logloss
      )
      
      cat(paste('Elapsed:', round((proc.time() - t1)['elapsed']/60, 1), 'mins.\n'))
    }
  )

t2 <- proc.time()
artichaud1/cook documentation built on May 21, 2019, 9:23 a.m.