knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE
)
library(mlr3)
library(mlr3learners.lightgbm)
library(paradox)
library(mlbench)

Load the dataset

data("PimaIndiansDiabetes2")
dataset = data.table::as.data.table(PimaIndiansDiabetes2)
target_col = "diabetes"

vec = setdiff(colnames(dataset), target_col)

dataset = cbind(
  dataset[, c(target_col), with = F],
  lightgbm::lgb.convert_with_rules(dataset[, vec, with = F])[[1]]
)

task = mlr3::TaskClassif$new(
  id = "pima",
  backend = dataset,
  target = target_col,
  positive = "pos"
)
set.seed(17)
split = list(
  train_index = sample(seq_len(task$nrow), size = 0.7 * task$nrow)
)
split$test_index = setdiff(seq_len(task$nrow), split$train_index)

Instantiate the lightgbm learner

Initially, the classif.lgbpy class needs to be instantiated:

learner = mlr3::lrn("classif.lightgbm", objective = "binary")

Configure the learner

We will here switch off the parallelization of the lightgbm learner by setting the parameter num_threads = 1L. Instead, we will later parallelize the resampling using the future package, as recommended by the mlr3 team.

learner$param_set$values = mlr3misc::insert_named(
  learner$param_set$values,
    list(
      "learning_rate" = 0.1,
      "bagging_freq" = 5L,
      "seed" = 17L,
      "metric" = "auc",
      "num_threads" = 1
  )
)

tune_ps = ParamSet$new(list(
  ParamDbl$new("bagging_fraction", lower = 0.4, upper = 1),
  ParamInt$new("min_data_in_leaf", lower = 5, upper = 30)
))

# design_points
design = paradox::generate_design_grid(
  tune_ps,
  param_resolutions = c(
    bagging_fraction = 2,
    min_data_in_leaf = 5
  ))

# shuffle order of design
set.seed(17)
shuffle = sample(seq_len(nrow(design$data)), size = nrow(design$data))
design$data = design$data[shuffle, ]

Create the resampling strategy and the measure

resampling = mlr3::rsmp("cv", folds = 5)
measure = mlr3::msr("classif.auc")

Create the tuner

# grid_resolution = 2
# tuner = mlr3tuning::tnr("grid_search", resolution = grid_resolution, batch_size = 1)

tuner = mlr3tuning::tnr("design_points", design = design$data, batch_size = 1)

Create the terminator

# using a specific number of iterations
# n_iterations = (grid_resolution ^ tune_ps$length)

n_iterations = nrow(design$data)

n_iterations
terminator = mlr3tuning::term("evals", n_evals = n_iterations)

Instantiate the AutoTuner instance

at = mlr3tuning::AutoTuner$new(
  learner = learner,
  resampling = resampling,
  measures = measure,
  tune_ps = tune_ps,
  terminator = terminator,
  tuner = tuner
)
at

Train the tuner

future::plan("multisession")
set.seed(17)
at$train(task, row_ids = split$train_index)
future::plan("sequential")

Evaluate the best model

at$tuning_result
best = at$tuning_instance$best()
best$score(mlr3::msr("classif.auc"))
mlr3viz::autoplot(at$tuning_instance$best(), type = "roc")
at$tuning_instance$archive(unnest = "params")[, c("bagging_fraction", "min_data_in_leaf", "classif.auc")]

Best parameters

at$tuning_instance$result$params

Importance

importance = at$learner$importance()

Predict test data with best model

predictions = at$predict(task, row_ids = split$test_index)
head(predictions$response)
predictions$confusion
predictions$score(mlr3::msr("classif.logloss"))
predictions$score(mlr3::msr("classif.auc"))
mlr3viz::autoplot(predictions)

ROC

mlr3viz::autoplot(predictions, type = "roc")


kapsner/mlr3learners.lightgbm documentation built on Feb. 17, 2021, 5:53 p.m.