Nothing
## ----setup--------------------------------------------------------------------
# nolint start
library(mlexperiments)
library(mllrnrs)
## -----------------------------------------------------------------------------
library(mlbench)
data("PimaIndiansDiabetes2")
dataset <- PimaIndiansDiabetes2 |>
data.table::as.data.table() |>
na.omit()
feature_cols <- colnames(dataset)[1:8]
target_col <- "diabetes"
## -----------------------------------------------------------------------------
seed <- 123
if (isTRUE(as.logical(Sys.getenv("_R_CHECK_LIMIT_CORES_")))) {
# on cran
ncores <- 2L
} else {
ncores <- ifelse(
test = parallel::detectCores() > 4,
yes = 4L,
no = ifelse(
test = parallel::detectCores() < 2L,
yes = 1L,
no = parallel::detectCores()
)
)
}
options("mlexperiments.bayesian.max_init" = 10L)
## -----------------------------------------------------------------------------
data_split <- splitTools::partition(
y = dataset[, get(target_col)],
p = c(train = 0.7, test = 0.3),
type = "stratified",
seed = seed
)
train_x <- model.matrix(
~ -1 + .,
dataset[data_split$train, .SD, .SDcols = feature_cols]
)
train_y <- as.integer(dataset[data_split$train, get(target_col)]) - 1L
test_x <- model.matrix(
~ -1 + .,
dataset[data_split$test, .SD, .SDcols = feature_cols]
)
test_y <- as.integer(dataset[data_split$test, get(target_col)]) - 1L
## -----------------------------------------------------------------------------
fold_list <- splitTools::create_folds(
y = train_y,
k = 3,
type = "stratified",
seed = seed
)
## -----------------------------------------------------------------------------
# required learner arguments, not optimized
learner_args <- list(
family = "binomial",
type.measure = "class",
standardize = TRUE
)
# set arguments for predict function and performance metric,
# required for mlexperiments::MLCrossValidation and
# mlexperiments::MLNestedCV
predict_args <- list(type = "response")
performance_metric <- metric("auc")
performance_metric_args <- list(positive = "1")
return_models <- FALSE
# required for grid search and initialization of bayesian optimization
parameter_grid <- expand.grid(
alpha = seq(0, 1, 0.05)
)
# reduce to a maximum of 10 rows
if (nrow(parameter_grid) > 10) {
set.seed(123)
sample_rows <- sample(seq_len(nrow(parameter_grid)), 10, FALSE)
parameter_grid <- kdry::mlh_subset(parameter_grid, sample_rows)
}
# required for bayesian optimization
parameter_bounds <- list(
alpha = c(0., 1.)
)
optim_args <- list(
iters.n = ncores,
kappa = 3.5,
acq = "ucb"
)
## -----------------------------------------------------------------------------
tuner <- mlexperiments::MLTuneParameters$new(
learner = mllrnrs::LearnerGlmnet$new(
metric_optimization_higher_better = FALSE
),
strategy = "grid",
ncores = ncores,
seed = seed
)
tuner$parameter_grid <- parameter_grid
tuner$learner_args <- learner_args
tuner$split_type <- "stratified"
tuner$set_data(
x = train_x,
y = train_y
)
tuner_results_grid <- tuner$execute(k = 3)
#>
#> Parameter settings [==================>-----------------------------------------------------------------------------] 2/10 ( 20%)
#> Parameter settings [============================>-------------------------------------------------------------------] 3/10 ( 30%)
#> Parameter settings [=====================================>----------------------------------------------------------] 4/10 ( 40%)
#> Parameter settings [===============================================>------------------------------------------------] 5/10 ( 50%)
#> Parameter settings [=========================================================>--------------------------------------] 6/10 ( 60%)
#> Parameter settings [==================================================================>-----------------------------] 7/10 ( 70%)
#> Parameter settings [============================================================================>-------------------] 8/10 ( 80%)
#> Parameter settings [=====================================================================================>----------] 9/10 ( 90%)
#> Parameter settings [===============================================================================================] 10/10 (100%)
head(tuner_results_grid)
#> setting_id metric_optim_mean lambda alpha family type.measure standardize
#> 1: 1 0.1751825 0.094027663 0.70 binomial class TRUE
#> 2: 2 0.1788321 0.080262968 0.90 binomial class TRUE
#> 3: 3 0.1788321 0.101260561 0.65 binomial class TRUE
#> 4: 4 0.1751825 0.006282777 0.10 binomial class TRUE
#> 5: 5 0.1751825 0.110644301 0.45 binomial class TRUE
#> 6: 6 0.1751825 0.006551691 0.05 binomial class TRUE
## -----------------------------------------------------------------------------
tuner <- mlexperiments::MLTuneParameters$new(
learner = mllrnrs::LearnerGlmnet$new(
metric_optimization_higher_better = FALSE
),
strategy = "bayesian",
ncores = ncores,
seed = seed
)
tuner$parameter_grid <- parameter_grid
tuner$parameter_bounds <- parameter_bounds
tuner$learner_args <- learner_args
tuner$optim_args <- optim_args
tuner$split_type <- "stratified"
tuner$set_data(
x = train_x,
y = train_y
)
tuner_results_bayesian <- tuner$execute(k = 3)
#>
#> Registering parallel backend using 4 cores.
head(tuner_results_bayesian)
#> Epoch setting_id alpha gpUtility acqOptimum inBounds Elapsed Score metric_optim_mean lambda errorMessage family
#> 1: 0 1 0.70 NA FALSE TRUE 0.934 -0.1751825 0.1751825 0.094027663 NA binomial
#> 2: 0 2 0.90 NA FALSE TRUE 0.971 -0.1788321 0.1788321 0.080262968 NA binomial
#> 3: 0 3 0.65 NA FALSE TRUE 0.948 -0.1788321 0.1788321 0.101260561 NA binomial
#> 4: 0 4 0.10 NA FALSE TRUE 0.931 -0.1751825 0.1751825 0.006282777 NA binomial
#> 5: 0 5 0.45 NA FALSE TRUE 0.027 -0.1751825 0.1751825 0.110644301 NA binomial
#> 6: 0 6 0.05 NA FALSE TRUE 0.030 -0.1751825 0.1751825 0.006551691 NA binomial
#> type.measure standardize
#> 1: class TRUE
#> 2: class TRUE
#> 3: class TRUE
#> 4: class TRUE
#> 5: class TRUE
#> 6: class TRUE
## -----------------------------------------------------------------------------
validator <- mlexperiments::MLCrossValidation$new(
learner = mllrnrs::LearnerGlmnet$new(
metric_optimization_higher_better = FALSE
),
fold_list = fold_list,
ncores = ncores,
seed = seed
)
validator$learner_args <- tuner$results$best.setting[-1]
validator$predict_args <- predict_args
validator$performance_metric <- performance_metric
validator$performance_metric_args <- performance_metric_args
validator$return_models <- return_models
validator$set_data(
x = train_x,
y = train_y
)
validator_results <- validator$execute()
#>
#> CV fold: Fold1
#>
#> CV fold: Fold2
#>
#> CV fold: Fold3
head(validator_results)
#> fold performance alpha lambda family type.measure standardize
#> 1: Fold1 0.8773136 0.7568403 0.1047508 binomial class TRUE
#> 2: Fold2 0.8630354 0.7568403 0.1047508 binomial class TRUE
#> 3: Fold3 0.8304127 0.7568403 0.1047508 binomial class TRUE
## -----------------------------------------------------------------------------
validator <- mlexperiments::MLNestedCV$new(
learner = mllrnrs::LearnerGlmnet$new(
metric_optimization_higher_better = FALSE
),
strategy = "grid",
fold_list = fold_list,
k_tuning = 3L,
ncores = ncores,
seed = seed
)
validator$parameter_grid <- parameter_grid
validator$learner_args <- learner_args
validator$split_type <- "stratified"
validator$predict_args <- predict_args
validator$performance_metric <- performance_metric
validator$performance_metric_args <- performance_metric_args
validator$return_models <- return_models
validator$set_data(
x = train_x,
y = train_y
)
validator_results <- validator$execute()
#>
#> CV fold: Fold1
#>
#> Parameter settings [==================>-----------------------------------------------------------------------------] 2/10 ( 20%)
#> Parameter settings [============================>-------------------------------------------------------------------] 3/10 ( 30%)
#> Parameter settings [=====================================>----------------------------------------------------------] 4/10 ( 40%)
#> Parameter settings [===============================================>------------------------------------------------] 5/10 ( 50%)
#> Parameter settings [=========================================================>--------------------------------------] 6/10 ( 60%)
#> Parameter settings [==================================================================>-----------------------------] 7/10 ( 70%)
#> Parameter settings [============================================================================>-------------------] 8/10 ( 80%)
#> Parameter settings [=====================================================================================>----------] 9/10 ( 90%)
#> Parameter settings [===============================================================================================] 10/10 (100%)
#> CV fold: Fold2
#> CV progress [====================================================================>-----------------------------------] 2/3 ( 67%)
#>
#> Parameter settings [==================>-----------------------------------------------------------------------------] 2/10 ( 20%)
#> Parameter settings [============================>-------------------------------------------------------------------] 3/10 ( 30%)
#> Parameter settings [=====================================>----------------------------------------------------------] 4/10 ( 40%)
#> Parameter settings [===============================================>------------------------------------------------] 5/10 ( 50%)
#> Parameter settings [=========================================================>--------------------------------------] 6/10 ( 60%)
#> Parameter settings [==================================================================>-----------------------------] 7/10 ( 70%)
#> Parameter settings [============================================================================>-------------------] 8/10 ( 80%)
#> Parameter settings [=====================================================================================>----------] 9/10 ( 90%)
#> Parameter settings [===============================================================================================] 10/10 (100%)
#> CV fold: Fold3
#> CV progress [========================================================================================================] 3/3 (100%)
#>
#> Parameter settings [==================>-----------------------------------------------------------------------------] 2/10 ( 20%)
#> Parameter settings [============================>-------------------------------------------------------------------] 3/10 ( 30%)
#> Parameter settings [=====================================>----------------------------------------------------------] 4/10 ( 40%)
#> Parameter settings [===============================================>------------------------------------------------] 5/10 ( 50%)
#> Parameter settings [=========================================================>--------------------------------------] 6/10 ( 60%)
#> Parameter settings [==================================================================>-----------------------------] 7/10 ( 70%)
#> Parameter settings [============================================================================>-------------------] 8/10 ( 80%)
#> Parameter settings [=====================================================================================>----------] 9/10 ( 90%)
#> Parameter settings [===============================================================================================] 10/10 (100%)
head(validator_results)
#> fold performance lambda alpha family type.measure standardize
#> 1: Fold1 0.8741407 0.00093823 0.7 binomial class TRUE
#> 2: Fold2 0.8646219 0.09563561 0.7 binomial class TRUE
#> 3: Fold3 0.8648954 0.03175575 0.7 binomial class TRUE
## -----------------------------------------------------------------------------
validator <- mlexperiments::MLNestedCV$new(
learner = mllrnrs::LearnerGlmnet$new(
metric_optimization_higher_better = FALSE
),
strategy = "bayesian",
fold_list = fold_list,
k_tuning = 3L,
ncores = ncores,
seed = 312
)
validator$parameter_grid <- parameter_grid
validator$learner_args <- learner_args
validator$split_type <- "stratified"
validator$parameter_bounds <- parameter_bounds
validator$optim_args <- optim_args
validator$predict_args <- predict_args
validator$performance_metric <- performance_metric
validator$performance_metric_args <- performance_metric_args
validator$return_models <- TRUE
validator$set_data(
x = train_x,
y = train_y
)
validator_results <- validator$execute()
#>
#> CV fold: Fold1
#>
#> Registering parallel backend using 4 cores.
#>
#> CV fold: Fold2
#> CV progress [====================================================================>-----------------------------------] 2/3 ( 67%)
#>
#> Registering parallel backend using 4 cores.
#>
#> CV fold: Fold3
#> CV progress [========================================================================================================] 3/3 (100%)
#>
#> Registering parallel backend using 4 cores.
head(validator_results)
#> fold performance alpha lambda family type.measure standardize
#> 1: Fold1 0.8773136 0.9 0.08390173 binomial class TRUE
#> 2: Fold2 0.8767848 0.1 0.15109601 binomial class TRUE
#> 3: Fold3 0.8507631 0.5 0.11271736 binomial class TRUE
## -----------------------------------------------------------------------------
preds_glmnet <- mlexperiments::predictions(
object = validator,
newdata = test_x
)
## -----------------------------------------------------------------------------
perf_glmnet <- mlexperiments::performance(
object = validator,
prediction_results = preds_glmnet,
y_ground_truth = test_y,
type = "binary"
)
perf_glmnet
#> model performance auc prauc sensitivity specificity ppv npv tn tp fn fp tnr tpr fnr
#> 1: Fold1 0.7656605 0.7656605 0.5841923 0.3333333 0.8860759 0.5909091 0.7291667 70 13 26 9 0.8860759 0.3333333 0.6666667
#> 2: Fold2 0.7831873 0.7831873 0.5822704 0.3846154 0.8860759 0.6250000 0.7446809 70 15 24 9 0.8860759 0.3846154 0.6153846
#> 3: Fold3 0.7627394 0.7627394 0.5747411 0.3589744 0.8607595 0.5600000 0.7311828 68 14 25 11 0.8607595 0.3589744 0.6410256
#> fpr bbrier acc ce fbeta
#> 1: 0.1139241 0.1831706 0.7033898 0.2966102 0.4262295
#> 2: 0.1139241 0.1786208 0.7203390 0.2796610 0.4761905
#> 3: 0.1392405 0.1861673 0.6949153 0.3050847 0.4375000
#>
## ----include=FALSE------------------------------------------------------------
# nolint end
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.