Nothing
#' @title LearnerKnn R6 class
#'
#' @description
#' This learner is a wrapper around [class::knn()] in order to perform a
#' k-nearest neighbor classification.
#'
#'
#' @details
#' Optimization metric: classification error rate
#' Can be used with
#' * [mlexperiments::MLTuneParameters]
#' * [mlexperiments::MLCrossValidation]
#' * [mlexperiments::MLNestedCV]
#'
#' Implemented methods:
#' * `$fit` To fit the model.
#' * `$predict` To predict new data with the model.
#' * `$cross_validation` To perform a grid search (hyperparameter
#' optimization).
#' * `$bayesian_scoring_function` To perform a Bayesian hyperparameter
#' optimization.
#'
#' For the two hyperparameter optimization strategies ("grid" and "bayesian"),
#' the parameter `metric_optimization_higher_better` of the learner is
#' set to `FALSE` by default as the classification error rate
#' ([mlr3measures::ce()]) is used as the optimization metric.
#'
#' @seealso [class::knn()], [mlr3measures::ce()]
#'
#' @examples
#' LearnerKnn$new()
#'
#' @export
#'
LearnerKnn <- R6::R6Class( # nolint
classname = "LearnerKnn",
inherit = mlexperiments::MLLearnerBase,
public = list(
#'
#' @description
#' Create a new `LearnerKnn` object.
#'
#' @details
#' This learner is a wrapper around [class::knn()] in order to perform a
#' k-nearest neighbor classification. The following experiments are
#' implemented:
#' * [mlexperiments::MLTuneParameters]
#' * [mlexperiments::MLCrossValidation]
#' * [mlexperiments::MLNestedCV]
#' For the two hyperparameter optimization strategies ("grid" and
#' "bayesian"), the parameter `metric_optimization_higher_better` of the
#' learner is set to `FALSE` by default as the classification error rate
#' ([mlr3measures::ce()]) is used as the optimization metric.
#'
#' @seealso [class::knn()], [mlr3measures::ce()]
#'
#' @examples
#' LearnerKnn$new()
#'
#' @export
#'
initialize = function() {
if (!requireNamespace("class", quietly = TRUE)) {
stop(
paste0(
"Package \"class\" must be installed to use ",
"'learner = \"LearnerKnn\"'."
),
call. = FALSE
)
}
super$initialize(
metric_optimization_higher_better = FALSE # classification error
)
self$environment <- "mlexperiments"
self$cluster_export <- knn_ce()
private$fun_optim_cv <- knn_optimization
private$fun_fit <- knn_fit
private$fun_predict <- knn_predict
private$fun_bayesian_scoring_function <- knn_bsF
}
)
)
knn_ce <- function() {
c("knn_optimization", "knn_fit", "knn_predict", "metric")
}
knn_bsF <- function(...) { # nolint
params <- list(...)
# call to knn_optimization here with ncores = 1, since the Bayesian search
# is parallelized already / "FUN is fitted n times in m threads"
set.seed(seed)#, kind = "L'Ecuyer-CMRG")
bayes_opt_knn <- knn_optimization(
x = x,
y = y,
params = params,
fold_list = method_helper$fold_list,
ncores = 1L, # important, as bayesian search is already parallelized
seed = seed
)
ret <- kdry::list.append(
list("Score" = bayes_opt_knn$metric_optim_mean),
bayes_opt_knn
)
return(ret)
}
knn_optimization <- function(x, y, params, fold_list, ncores, seed) {
stopifnot(
"`params` must be a list" = is.list(params),
"One item of `params` must be `k`" = "k" %in% names(params)
)
# initialize a dataframe to store the results
results_df <- data.table::data.table(
"fold" = character(0),
"metric" = numeric(0)
)
# we do not need test here as it is defined explicitly below
params[["test"]] <- NULL
# loop over the folds
for (fold in names(fold_list)) {
# get row-ids of the current fold
train_idx <- fold_list[[fold]]
# train the model for this cv-fold
args <- kdry::list.append(
list(
x = kdry::mlh_subset(x, train_idx),
test = kdry::mlh_subset(x, -train_idx),
y = kdry::mlh_subset(y, train_idx),
use.all = FALSE,
ncores = ncores,
seed = seed
),
params
)
set.seed(seed)
cvfit <- do.call(knn_fit, args)
# optimize error rate
FUN <- metric("ce") # nolint
perf_args <- list(
predictions = knn_predict(
model = cvfit,
newdata = kdry::mlh_subset(x, -train_idx),
ncores = ncores,
type = "response"
),
ground_truth = kdry::mlh_subset(y, -train_idx)
)
perf <- metric_types_helper(
FUN = FUN,
y = y,
perf_args = perf_args
)
results_df <- data.table::rbindlist(
l = list(
results_df,
list(
"fold" = fold,
"validation_metric" = perf
)
),
fill = TRUE
)
}
res <- list(
"metric_optim_mean" = mean(results_df$validation_metric)
)
return(res)
}
knn_fit <- function(x, y, ncores, seed, ...) {
kwargs <- list(...)
stopifnot("`k` is a required argument" = "k" %in% names(kwargs))
args <- kdry::list.append(
list(
train = x,
cl = y
),
kwargs
)
args$prob <- TRUE
set.seed(seed)
fit <- do.call(class::knn, args)
return(fit)
}
knn_predict <- function(model, newdata, ncores, ...) {
kwargs <- list(...)
stopifnot("`type` is a required argument" = "type" %in% names(kwargs))
if (kwargs$type == "response") {
return(model)
} else if (kwargs$type == "prob") {
# there is no knn-model but the probabilities predicted for the test data
return(attributes(model)$prob)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.