#' @title Create an AutoTuner with a single line of code
#'
#' @description
#' Small utility function, which creates an [AutoTuner][mlr3tuning::AutoTuner] for a given [learner][mlr3::Learner]. \cr
#' Uses the same interface as the [AutoTuner][mlr3tuning::AutoTuner], but provides defaults for all arguments. \cr
#' Parameter spaces are identical to the ones used in [mlr3automl].
#' @param learner (`character(1)` | [Learner][mlr3::Learner]) \cr
#' Learner inside the [AutoTuner][mlr3tuning::AutoTuner]. Parameter sets are predefined for
#' `ranger`, `xgboost`, `liblinear`, `svm` and `cv_glmnet` learners for both
#' prediction and regression. Other learners will obtain empty parameter sets.
#' @param resampling ([Resampling][mlr3::Resampling]) \cr
#' @param measure ([Measure][mlr3::Measure]) \cr
#' @param terminator ([Terminator][bbotk::Terminator])\cr
#' @param tuner ([Tuner][mlr3tuning::Tuner] | [TunerHyperband][mlr3hyperband::TunerHyperband])\cr
#' Tuner. Hyperband is supported by creating a [`GraphLearner`][mlr3pipelines::GraphLearner]
#' with [`PipeOpSubsample`][mlr3pipelines::PipeOpSubsample].
#' @param num_effective_vars \cr
#' Integer giving the number of features in the dataset. Only required for
#' parameter transformation of `mtry` in Random Forest (we are tuning over
#' `num_effective_vars^0.1` to `num_effective_vars^0.9`)
#' @return [AutoTuner][mlr3tuning::AutoTuner]
#' @examples
#' \donttest{
#' library(mlr3automl)
#' my_autotuner = create_autotuner(lrn("classif.svm"))
#' }
#' @export
create_autotuner = function(
learner = lrn("classif.xgboost"), resampling = rsmp("cv", folds = 10),
measure = NULL, terminator = trm("run_time", secs = 60), tuner = tnr("random_search"),
num_effective_vars = NULL) {
if (is.character(learner)) {
assert_character(learner, any.missing = FALSE, len = 1)
learner = lrn(learner)
}
assert_learner(learner)
assert_resampling(resampling)
if (!is.null(measure)) assert_measure(measure)
assert_int(num_effective_vars, null.ok = TRUE)
task_type = sub("\\..*", "", learner$id)
default_msr = get(task_type, mlr_reflections$default_measures)
if (grepl("ranger", learner$id) && is.null(num_effective_vars)) {
warning("For tuning of Random Forest, the number of features in the dataset
should be provided. Defaulting to 10")
num_effective_vars = 10
}
if ("TunerHyperband" %in% class(tuner)) {
learner = GraphLearner$new(po("subsample") %>>% learner, id = learner$id)
using_hyperband = TRUE
} else {
using_hyperband = FALSE
}
params = default_params(learner_list = learner$id,
feature_counts = matrix(num_effective_vars, ncol = 2, nrow = 3, byrow = TRUE, dimnames = list(c("no_encoding", "one_hot_encoding", "impact_encoding"), c("numeric_cols", "all_cols"))),
using_hyperband,
using_prefixes = using_hyperband,
preprocessing = "none")
return(AutoTuner$new(
learner = learner,
resampling = resampling,
measure = measure %??% msr(default_msr),
search_space = params,
terminator = terminator,
tuner = tuner))
}
remove_existing_edges = function(current_pipeline, existing_pipeop) {
# remove source and destination edge of existing node
current_pipeline$edges = current_pipeline$edges[!(current_pipeline$edges$src_id == existing_pipeop | current_pipeline$edges$dst_id == existing_pipeop), ]
}
get_predecessor_successor = function(current_pipeline, existing_pipeop) {
predecessor = current_pipeline$edges[current_pipeline$edges$dst_id == existing_pipeop, ]$src_id
successor = current_pipeline$edges[current_pipeline$edges$src_id == existing_pipeop, ]$dst_id
return(c(predecessor = predecessor, successor = successor))
}
add_branching = function(current_pipeline, choices, id, columns) {
# branch over imputation methods for numerical columns
current_pipeline$add_pipeop(po("branch", options = choices, id = id))
# add new pipeops and edges
for (pipeop in choices) {
add_pipeop(current_pipeline, pipeop, columns)
current_pipeline$add_edge(id, pipeop, src_channel = pipeop)
}
}
add_unbranching = function(current_pipeline, choices, id) {
current_pipeline$add_pipeop(po("unbranch", options = length(choices), id = id))
for (pipeop_idx in seq_along(choices)) {
current_pipeline$add_edge(src_id = choices[pipeop_idx], dst_id = id, dst_channel = pipeop_idx)
}
}
add_pipeop = function(current_pipeline, pipeop, columns) {
if (!(pipeop %in% current_pipeline$ids())) {
pipeop_name = sub(".*\\.", "", pipeop)
if (pipeop_name != "nop") {
current_pipeline$add_pipeop(po(pipeop_name, affect_columns = selector_type(columns), id = pipeop))
} else {
current_pipeline$add_pipeop(po(pipeop_name, id = pipeop))
}
}
}
replace_existing_node = function(current_pipeline, existing_pipeop, pipeop_choices, branching_prefix, columns) {
# get predecessor and successor of node to be replaced
neighbor_nodes = get_predecessor_successor(current_pipeline, existing_pipeop)
# remove source and destination edge of existing node
remove_existing_edges(current_pipeline, existing_pipeop)
if (length(pipeop_choices) > 1) {
# add new branching
add_branching(current_pipeline,
choices = pipeop_choices,
id = paste0(branching_prefix, "branch"),
columns = columns)
# add unbranching pipeop and connect
add_unbranching(current_pipeline,
choices = pipeop_choices,
id = paste0(branching_prefix, "unbranch"))
} else {
# we do not need branching here
add_pipeop(current_pipeline, pipeop_choices[[1]], columns)
}
# connect new subgraph to predecessor and successor
if (!is.na(neighbor_nodes["successor"])) {
if (length(pipeop_choices) > 1) {
current_pipeline$add_edge(paste0(branching_prefix, "unbranch"), neighbor_nodes["successor"])
} else {
current_pipeline$add_edge(pipeop_choices[[1]], neighbor_nodes["successor"])
}
}
if (!is.na(neighbor_nodes["predecessor"])) {
if (length(pipeop_choices) > 1) {
current_pipeline$add_edge(neighbor_nodes["predecessor"], paste0(branching_prefix, "branch"))
} else {
current_pipeline$add_edge(neighbor_nodes["predecessor"], pipeop_choices[[1]])
}
}
if (!(existing_pipeop %in% pipeop_choices)) {
current_pipeline$pipeops[[existing_pipeop]] <- NULL
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.