R/scratch.R

# library(tidymodels)
# library(tidyverse)
# library(tidycrossval)
# library(extrarecipes)
#
# # load the example iris dataset
# data(iris)
#
# rec <- iris %>%
#   recipe(Species ~ .) %>%
#   step_scale(all_predictors(), id =  "scale") %>%
#   step_center(all_predictors(), id = "center") %>%
#   step_corr(all_predictors(), threshold = varying(), id = "correlation_filter")
#
# clf <- decision_tree(mode = "classification", tree_depth = varying()) %>%
#   set_engine("rpart")
#
# folds <- iris %>%
#   nested_cv(outside = vfold_cv(v = 10), inside = vfold_cv(v = 5))
#
# folds <- iris %>% vfold_cv(v = 5)
#
# threshold <- new_quant_param(
#   type = "double",
#   range = c(0.7, 1.0),
#   inclusive = c(TRUE, TRUE),
#   trans = NULL,
#   label = c(correlation_filter__threshold = "threshold"))
#
# params <- grid_regular(
#   tree_depth(),
#   threshold %>% range_set(c(0.8, 1.0)),
#   levels = 2L
# )
#
# clf <- pipeline(rec, clf)
#
# library(future)
# library(furrr)
# plan(multiprocess)
# scores <- folds %>%
#   tune(model_spec = clf,
#        param_grid = params,
#        scoring = accuracy,
#        maximize = TRUE,
#        .options = future_options(packages = c("recipes", "extrarecipes")))
#
# best_pars <- select_best(scores, maximize = TRUE)
#
# scores <- scores %>%
#   cross_validate(
#     model_spec = clf %>% update(!!!best_pars),
#     scoring = metric_set(accuracy, f_meas), keep_preds = T, keep_models = T,
#     .options = future_options(packages = c("recipes", "extrarecipes")))
# plan(sequential)
#
#
# # pipelines
#
# # create a preprocessing recipe and set the threshold parameter to varying
# # because we are going to tune this parameter like a model hyperparameter
# rec <- iris %>%
#  recipe(Species ~ .) %>%
#  step_scale(all_predictors()) %>%
#  step_center(all_predictors()) %>%
#  # step_corr(all_predictors(), threshold = varying()) %>%
#  step_infgain(all_predictors(), target = "Species", k = 0.5, id = "infogain")
#
# # create a model specification
# clf <- decision_tree(mode = "classification", tree_depth = varying()) %>%
#     set_engine("rpart")
#
# # create a pipeline
# clf <- pipeline(rec, clf)
#
# # create a nested_cv rsample object
# folds <- iris %>%
#     nested_cv(outside = vfold_cv(v = 2), inside = mc_cv(times = 1))
#
# # define a threshold tuning parameter that related to the corr step
# corr__threshold <- new_quant_param(
#     type = "double",
#     range = c(0.7, 1.0),
#     inclusive = c(TRUE, TRUE),
#     trans = NULL,
#     label = c(corr__threshold = "corr__threshold"))
#
# params <- grid_regular(
#     tree_depth,
#     corr__threshold %>% range_set(c(0.8, 1.0)),
#     levels = 2L
#    )
#
# # perform hyperparameter tuning on the inner folds
# scores <- folds %>%
#   tune(clf, param_grid = params, scoring = accuracy, maximize = TRUE)
#
# # fit and score the outer folds
# scores <- scores %>%
#     cross_validate(clf, scoring = metric_set(accuracy, f_meas))
#
# # create a new model and recipe using the best overall scoring hyperparameters
# clf_tuned <- clf %>% set_args(tree_depth = select_best(scores)$tree_depth)
# rec_tuned <- rec %>% update_recipe(corr__threshold = select_best(scores)$corr__threshold)
#
# # refit the model and recipe on all of the training data
# rec_tuned <- prep(rec_tuned)
# clf_fitted <- fit(clf_tuned, formula(rec_tuned), data = iris)
stevenpawley/tidycrossval documentation built on Oct. 3, 2019, 3:32 p.m.