knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE )
library(mlr3) library(mlr3learners.lightgbm) library(paradox) library(mlbench)
data("PimaIndiansDiabetes2") dataset = data.table::as.data.table(PimaIndiansDiabetes2) target_col = "diabetes" vec = setdiff(colnames(dataset), target_col) dataset = cbind( dataset[, c(target_col), with = F], lightgbm::lgb.convert_with_rules(dataset[, vec, with = F])[[1]] ) task = mlr3::TaskClassif$new( id = "pima", backend = dataset, target = target_col, positive = "pos" )
set.seed(17) split = list( train_index = sample(seq_len(task$nrow), size = 0.7 * task$nrow) ) split$test_index = setdiff(seq_len(task$nrow), split$train_index)
Initially, the classif.lgbpy
class needs to be instantiated:
learner = mlr3::lrn("classif.lightgbm", objective = "binary")
We will here switch off the parallelization of the lightgbm learner by setting the parameter num_threads = 1L
. Instead, we will later parallelize the resampling using the future
package, as recommended by the mlr3 team.
learner$param_set$values = mlr3misc::insert_named( learner$param_set$values, list( "learning_rate" = 0.1, "bagging_freq" = 5L, "seed" = 17L, "metric" = "auc", "num_threads" = 1 ) ) tune_ps = ParamSet$new(list( ParamDbl$new("bagging_fraction", lower = 0.4, upper = 1), ParamInt$new("min_data_in_leaf", lower = 5, upper = 30) )) # design_points design = paradox::generate_design_grid( tune_ps, param_resolutions = c( bagging_fraction = 2, min_data_in_leaf = 5 )) # shuffle order of design set.seed(17) shuffle = sample(seq_len(nrow(design$data)), size = nrow(design$data)) design$data = design$data[shuffle, ]
resampling = mlr3::rsmp("cv", folds = 5) measure = mlr3::msr("classif.auc")
# grid_resolution = 2 # tuner = mlr3tuning::tnr("grid_search", resolution = grid_resolution, batch_size = 1) tuner = mlr3tuning::tnr("design_points", design = design$data, batch_size = 1)
# using a specific number of iterations # n_iterations = (grid_resolution ^ tune_ps$length) n_iterations = nrow(design$data) n_iterations terminator = mlr3tuning::term("evals", n_evals = n_iterations)
at = mlr3tuning::AutoTuner$new( learner = learner, resampling = resampling, measures = measure, tune_ps = tune_ps, terminator = terminator, tuner = tuner ) at
future::plan("multisession") set.seed(17) at$train(task, row_ids = split$train_index) future::plan("sequential")
at$tuning_result best = at$tuning_instance$best() best$score(mlr3::msr("classif.auc"))
mlr3viz::autoplot(at$tuning_instance$best(), type = "roc")
at$tuning_instance$archive(unnest = "params")[, c("bagging_fraction", "min_data_in_leaf", "classif.auc")]
at$tuning_instance$result$params
importance = at$learner$importance()
predictions = at$predict(task, row_ids = split$test_index) head(predictions$response)
predictions$confusion
predictions$score(mlr3::msr("classif.logloss")) predictions$score(mlr3::msr("classif.auc"))
mlr3viz::autoplot(predictions)
mlr3viz::autoplot(predictions, type = "roc")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.