Nothing
#' @title Logistic Regression Classification Learner
#'
#' @name mlr_learners_classif.log_reg
#'
#' @description
#' Classification via logistic regression.
#' Calls [stats::glm()] with `family` set to `"binomial"`.
#'
#' @section Internal Encoding:
#' Starting with \CRANpkg{mlr3} v0.5.0, the order of class labels is reversed prior to
#' model fitting to comply to the [stats::glm()] convention that the negative class is provided
#' as the first factor level.
#'
#' @section Initial parameter values:
#' - `model`:
#' - Actual default: `TRUE`.
#' - Adjusted default: `FALSE`.
#' - Reason for change: Save some memory.
#'
#' @section Offset:
#' If a `Task` has a column with the role `offset`, it will automatically be used during training.
#' The offset is incorporated through the formula interface to ensure compatibility with [stats::glm()].
#' We add it to the model formula as `offset(<column_name>)` and also include it in the training data.
#' During prediction, the default behavior is to use the offset column from the test set (enabled by `use_pred_offset = TRUE`).
#' Otherwise, if the user sets `use_pred_offset = FALSE`, a zero offset is applied, effectively disabling the offset adjustment during prediction.
#'
#' @templateVar id classif.log_reg
#' @template learner
#'
#' @template section_contrasts
#'
#' @export
#' @template seealso_learner
#' @template example
LearnerClassifLogReg = R6Class("LearnerClassifLogReg",
inherit = LearnerClassif,
public = list(
#' @description
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
ps = ps(
dispersion = p_uty(default = NULL, tags = "predict"),
epsilon = p_dbl(default = 1e-8, tags = c("train", "control")),
etastart = p_uty(tags = "train"),
maxit = p_dbl(default = 25, tags = c("train", "control")),
model = p_lgl(default = TRUE, tags = "train"),
mustart = p_uty(tags = "train"),
singular.ok = p_lgl(default = TRUE, tags = "train"),
start = p_uty(default = NULL, tags = "train"),
trace = p_lgl(default = FALSE, tags = c("train", "control")),
x = p_lgl(default = FALSE, tags = "train"),
y = p_lgl(default = TRUE, tags = "train"),
use_pred_offset = p_lgl(default = TRUE, tags = "predict")
)
ps$set_values(use_pred_offset = TRUE)
super$initialize(
id = "classif.log_reg",
param_set = ps,
predict_types = c("response", "prob"),
feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered"),
properties = c("weights", "twoclass", "offset"),
packages = c("mlr3learners", "stats"),
label = "Logistic Regression",
man = "mlr3learners::mlr_learners_classif.log_reg"
)
}
),
private = list(
.train = function(task) {
pv = self$param_set$get_values(tags = "train")
pv$weights = get_weights(task, private)
form = task$formula()
data = task$data()
if ("offset" %in% task$properties) {
# we use the formula interface as `offset` = ... doesn't work during prediction
offset_colname = task$col_roles$offset
# re-write formula
formula_terms = c(task$feature_names, paste0("offset(", offset_colname, ")"))
# needs both `env = ...` and `quote = "left"` args to work
form = mlr3misc::formulate(lhs = task$target_names, rhs = formula_terms, env = environment(), quote = "left")
# add offset column to the data
data = data[, (offset_colname) := task$offset$offset][]
}
# logreg expects the first label to be the negative class, contrary
# to the mlr3 convention that the positive class comes first.
tn = task$target_names
data[[tn]] = swap_levels(data[[tn]])
invoke(stats::glm,
formula = form, data = data,
family = "binomial", model = FALSE, .args = pv, .opts = opts_default_contrasts)
},
.predict = function(task) {
pv = self$param_set$get_values(tags = "predict")
lvls = c(task$negative, task$positive)
newdata = ordered_features(task, self)
if ("offset" %in% task$properties) {
# add offset to the test data
offset_colname = task$col_roles$offset
newdata[, (offset_colname) := if (isTRUE(pv$use_pred_offset)) task$offset$offset else 0]
}
p = unname(invoke(predict, object = self$model, newdata = newdata, type = "response", .args = pv))
if (self$predict_type == "response") {
list(response = ifelse(p < 0.5, lvls[1L], lvls[2L]))
} else {
list(prob = pvec2mat(p, lvls))
}
}
)
)
#' @include aaa.R
learners[["classif.log_reg"]] = LearnerClassifLogReg
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.