load("config.Rda")
if (FALSE) {
config = list(task = "spam", type = "mlr", learner = "classif_lrn_cboost1")
#config = list(task = "4534", type = "oml", learner = "classif_lrn_xgboost")
#config = list(task = "4534", type = "oml", learner = "classif_lrn_acwb_bin")
config = list(task = "spam", type = "mlr", learner = "classif_lrn_cwb")
config = list(task = "7592", type = "oml", learner = "classif_lrn_hcwb_notune_bin")
config = list(task = "168335", type = "oml", learner = "classif_lrn_hcwb_notune")
config = list(task = "albert", type = "omldata-albert", learner = "classif_lrn_hcwb_notune")
config = list(task = "359994", type = "oml", learner = "classif_lrn_hcwb_notune")
config = list(task = "9977", type = "oml", learner = "classif_lrn_hcwb_notune")
}
suppressMessages(library(mlr3))
suppressMessages(library(mlr3tuning))
suppressMessages(library(mlrintermbo))
suppressMessages(library(mlr3learners))
suppressMessages(library(mlr3extralearners))
suppressMessages(library(mlr3pipelines))
suppressMessages(library(paradox))
base_dir = here::here()
bm_dir = paste0(base_dir, "/benchmark/mlr-bmr/")
library(R6)
source(paste0(bm_dir, "learner-src/classifCompboost.R"))
source(paste0(bm_dir, "learner-src/classifInterpretML_reticulate.R"))
### Benchmark:
### ==========================================
seed = 31415L
bm_test = FALSE
bm_small = FALSE
bm_full = TRUE
if (bm_test) {
n_evals_per_dim = 40L
getResampleInstance = function(task) {
resampling_inner = rsmp("holdout")
resampling_outer = rsmp("holdout", ratio = 0.2)
resampling_outer$instantiate(task)
return(list(inner = resampling_inner, outer = resampling_outer))
}
}
if (bm_small) {
n_evals_per_dim = 40L
getResampleInstance = function(task) {
resampling_inner = rsmp("cv", folds = 2)
resampling_outer = rsmp("cv", folds = 2)
resampling_outer$instantiate(task)
return(list(inner = resampling_inner, outer = resampling_outer))
}
}
if (bm_full) {
n_evals_per_dim = 50L
getResampleInstance = function(task) {
if (task$nrow <= 2000) {
resampling_inner = rsmp("cv", folds = 3)
resampling_outer = rsmp("repeated_cv", folds = 5, repeats = 10L)
}
if ((task$nrow <= 100000) && (task$nrow > 2000)) {
resampling_inner = rsmp("cv", folds = 3)
resampling_outer = rsmp("cv", folds = 5)
}
if ((task$nrow > 100000)) {
resampling_inner = rsmp("holdout", ratio = 0.33)
resampling_outer = rsmp("holdout", ratio = 0.33)
}
resampling_outer$instantiate(task)
return(list(inner = resampling_inner, outer = resampling_outer))
}
}
measure_classif = msr("classif.auc")
#library(mlr3oml)
#task = tsk("oml", task_id = 9977)
#classif_lrn_cwb = lrn("classif.compboost", id = "ps_cwb1", predict_type = "prob",
#optimizer = "cod", restart = FALSE)
#classif_lrn_cwb$param_set$values = updatePars(classif_lrn_cwb, cwb_pars)
#classif_lrn_cwb_bin = lrn("classif.compboost", id = "ps_cwb1_bin", predict_type = "prob",
#optimizer = "cod", restart = FALSE, bin_root = 2L)
#classif_lrn_cwb_bin$param_set$values = updatePars(classif_lrn_cwb_bin, cwb_pars)
#mb = microbenchmark::microbenchmark(
#nobin = classif_lrn_cwb$train(task),
#bin = classif_lrn_cwb_bin$train(task),
#times = 2L
#)
#mb
library(ggplot2)
library(dplyr)
diter %>% filter(!grepl("notune", learner)) %>%
ggplot(aes(x = iters_acwb, color = learner)) +
geom_density() +
facet_wrap(. ~ task)
#source(paste0(bm_dir, "learner-src/classifCompboost.R"))
source(paste0(bm_dir, "extract-archive.R"))
source(paste0(bm_dir, "tasks.R"))
source(paste0(bm_dir, "param-sets.R"))
source(paste0(bm_dir, "learners.R"))
source(paste0(bm_dir, "design.R"))
#bmr = benchmark(design_classif, store_models = TRUE)
## Run benchmark:
## -----------------------
# Measure which are tracked:
msrs_classif = c("time_train", "time_predict", "time_both",
"classif.auc", "classif.ce", "classif.bbrier")
cat("\n>> [", as.character(Sys.time()), "] BENCHMARK:\n", sep = "")
logfile = paste0(bm_dir, "log-files/mlr3log-", format(Sys.Date(), "%Y-%m-%d"),
"-task", config$task, "-", config$learner, ".txt")
cat("\t>> [", as.character(Sys.time()), "] Starting benchmark\n", sep = "")
e = try({
options(mlr3.debug = TRUE)
lgr::get_logger("mlr3")$set_threshold("info")
lgr::get_logger("mlr3")$set_threshold("trace")
lgr::get_logger("mlr3tuning")$set_threshold("trace")
lgr::get_logger("bbotk")$set_threshold("trace")
sink(logfile)
time = proc.time()
bmr = benchmark(design_classif, store_models = TRUE)
time = proc.time() - time
sink()
cat(" >> [", as.character(Sys.time()), "] Finish benchmark in ", time[3], " seconds\n", sep = "")
cat(" >> [", as.character(Sys.time()), "] Aggregate results and store data\n", sep = "")
bmr_arx = try(extractArchive(bmr), silent = TRUE)
#bmr_nest_arx = NULL
#if (! grepl("notune", config$learner)) bmr_nest_arx = extractNestedArchive(bmr)
lrners = as.data.table(bmr)$learner
bmr_tune_res = lapply(lrners, function(b) b$tuning_result)
bmr_aggr = bmr$aggregate(msrs(msrs_classif))
idx_aggr_rmv = which(names(bmr_aggr) %in% "resample_result")
bmr_aggr = as.data.frame(bmr_aggr)[, -idx_aggr_rmv]
bmr_score = bmr$score(msrs(msrs_classif))
idx_score_rmv = which(names(bmr_score) %in% c("task", "resampling", "learner", "prediction"))
bmr_score = as.data.frame(bmr_score)[, -idx_score_rmv]
if (nrow(design_classif) == 1) {
bmr_score$n_evals = design_classif$learner[[1]]$instance_args$terminator$param_set$values$n_evals
}
bmr_res = list(bmr_tune_res, bmr_aggr, bmr_score, archive = bmr_arx)#,
#nested_archive = bmr_nest_arx)
bm_file = paste0(bm_dir, "res-results/bmr-", format(Sys.Date(),
"%Y-%m-%d"), "-task", config$task, "-", config$learner, ".Rda")
save(bmr_res, file = bm_file)
cat(" >> [", as.character(Sys.time()), "] Save ", bm_file, "\n", sep = "")
rm(bmr, bmr_tune_res, bmr_aggr, bmr_res, bmr_score)
})
if ("try-error" %in% class(e)) {
cat(e)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.