compboost: Efficient Component-Wise Boosting Implementation

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @title Store data in RAM
#'
#' @description
#' This data container stores a vector as it is in the RAM and makes it
#' accessible for [Compboost].
#'
#' @format [S4] object.
#' @name InMemoryData
#'
#' @section Usage:
#' \preformatted{
#' InMemoryData$new()
#' InMemoryData$new(data_mat, data_identifier)
#' InMemoryData$new(data_mat, data_identifier, use_sparse)
#' }
#'
#' @param data_mat (`matrix()`)\cr
#' The data matrix.
#' @param data_identifier (`character(1)`)\cr
#' Data id, e.g. a feature name.
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$getData()`: `() -> matrix()`
#' * `$getIdentifier()`: `() -> character(1)`
#' @template section-data-base-methods
#'
#' @examples
#' # Sample data:
#' data_mat = cbind(rnorm(10))
#'
#' # Create new data object:
#' data_obj = InMemoryData$new(data_mat, "my_data_name")
#'
#' # Get data and identifier:
#' data_obj$getData()
#' data_obj$getIdentifier()
#'
#' @export InMemoryData
NULL

#' @title Data class for categorical variables
#'
#' @description
#' [CategoricalDataRaw] creates an data object which can be used as source
#' object to instantiate categorical base learner.
#'
#' @format [S4] object.
#' @name CategoricalDataRaw
#'
#' @section Usage:
#' \preformatted{
#' CategoricalDataRaw$new(x, data_identifier)
#' }
#'
#' @param x (`character()`)\cr
#' Categorical vector.
#' @param data_identifier (`character(1)`)\cr
#' Data id, e.g. a feature name.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$getData()`: `() -> stop()`\cr Throws error because no representation is calculated.
#' * `$getRawData()`: `() -> character()`
#' * `$getIdentifier()`: `() -> character(1)`
#' @template section-data-base-methods
#'
#' @examples
#' # Sample data:
#' x = sample(c("one","two", "three"), 20, TRUE)
#'
#' # Create new data object:
#' data_obj = CategoricalDataRaw$new(x, "cat_raw")
#'
#' # Get data and identifier:
#' data_obj$getRawData()
#' data_obj$getIdentifier()
#'
#' @export CategoricalDataRaw
NULL

#' @title Polynomial base learner
#'
#' @description
#' `[BaselearnerPolynomial]` creates a polynomial base learner object.
#' The base learner takes one feature and calculates the polynomials (with
#' intercept) \eqn{1 + x + x^2 + \dots + x^d} for a given degree \eqn{d}.
#'
#' @format [S4] object.
#' @name BaselearnerPolynomial
#'
#' @section Usage:
#' \preformatted{
#' BaselearnerPolynomial$new(data_source, list(degree, intercept, bin_root))
#' BaselearnerPolynomial$new(data_source, blearner_type, list(degree, intercept, bin_root))
#' }
#'
#' @param data_source ([InMemoryData]) \cr
#' Data object which contains the raw data (see \code{?InMemoryData}).
#' @param blearner_type (`character(1)`) \cr
#' Type of the base learner (if not specified, `blearner_type = paste0("poly", d)` is used).
#' The unique id of the base learner is defined by appending `blearner_type` to
#' the feature name: `paste0(data_source$getIdentifier(), "_", blearner_type)`.
#' @param degree (`integer(1)`)\cr
#' Polynomial degree.
#' @param intercept (`logical(1)`)\cr
#' Polynomial degree.
#' @template param-bin_root
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeFactory()`: `() -> ()`
#' * `$transfromData(newdata)`: `list(InMemoryData) -> matrix()`
#' * `$getMeta()`: `() -> list()`
#' @template section-bl-base-methods
#'
#' @examples
#' # Sample data:
#' x = runif(100)
#' y = 1 + 2*x + rnorm(100, 0, 0.2)
#' dat = data.frame(x, y)
#'
#' # S4 wrapper
#'
#' # Create new data object, a matrix is required as input:
#' data_mat = cbind(x)
#' data_source = InMemoryData$new(data_mat, "my_data_name")
#'
#' # Create new linear base learner factory:
#' bl_lin = BaselearnerPolynomial$new(data_source,
#'   list(degree = 1))
#' bl_cub = BaselearnerPolynomial$new(data_source,
#'   list(intercept = FALSE, degree = 3, bin_root = 2))
#'
#' # Get the transformed data:
#' head(bl_lin$getData())
#' head(bl_cub$getData())
#'
#' # Summarize factory:
#' bl_lin$summarizeFactory()
#'
#' # Transform "new data":
#' newdata = list(InMemoryData$new(cbind(rnorm(5)), "my_data_name"))
#' bl_lin$transformData(newdata)
#' bl_cub$transformData(newdata)
#'
#' # R6 wrapper
#'
#' cboost_lin = Compboost$new(dat, "y")
#' cboost_lin$addBaselearner("x", "lin", BaselearnerPolynomial, degree = 1)
#' cboost_lin$train(100, 0)
#'
#' cboost_cub = Compboost$new(dat, "y")
#' cboost_cub$addBaselearner("x", "cubic", BaselearnerPolynomial,
#'   intercept = FALSE, degree = 3, bin_root = 2)
#' cboost_cub$train(100, 0)
#'
#' # Access base learner directly from the API (n = sqrt(100) = 10 with binning):
#' head(cboost_lin$baselearner_list$x_lin$factory$getData())
#' cboost_cub$baselearner_list$x_cubic$factory$getData()
#'
#' gg_lin = plotPEUni(cboost_lin, "x")
#' gg_cub = plotPEUni(cboost_cub, "x")
#'
#' library(ggplot2)
#' library(patchwork)
#'
#' (gg_lin | gg_cub) &
#'   geom_point(data = dat, aes(x = x, y = y - c(cboost_lin$offset)), alpha = 0.2)
#' @export BaselearnerPolynomial
NULL

#' @title Non-parametric B or P-spline base learner
#'
#' @description
#' [BaselearnerPSpline] creates a spline base learner object.
#' The object calculates the B-spline basis functions and in the case
#' of P-splines also the penalty. Instead of defining the penalty
#' term directly, one should consider to restrict the flexibility by
#' setting the degrees of freedom.
#'
#' @format [S4] object.
#' @name BaselearnerPSpline
#'
#' @section Usage:
#' \preformatted{
#' BaselearnerPSpline$new(data_source, list(degree, n_knots, penalty, differences, df, bin_root))
#' BaselearnerPSpline$new(data_source, blearner_type, list(degree, n_knots, penalty, differences, df, bin_root))
#' }
#'
#' @param data_source ([InMemoryData]) \cr
#' Data object which contains the raw data (see `?InMemoryData`).
#' @param blearner_type (`character(1)`) \cr
#' Type of the base learner (if not specified, `blearner_type = "spline"` is used).
#' The unique id of the base learner is defined by appending `blearner_type` to
#' the feature name: `paste0(data_source$getIdentifier(), "_", blearner_type)`.
#' @param degree (`integer(1)`)\cr
#' Degree of the piecewise polynomial (default `degree = 3` for cubic splines).
#' @param n_knots (`integer(1)`)\cr
#' Number of inner knots (default `n_knots = 20`). The inner knots are expanded by
#' `degree - 1` additional knots at each side to prevent unstable behavior on the edges.
#' @param penalty (`numeric(1)`)\cr
#' Penalty term for P-splines (default `penalty = 2`). Set to zero for B-splines.
#' @param differences (`integer(1)`)\cr
#' The number of differences to are penalized. A higher value leads to smoother curves.
#' @template param-df
#' @template param-bin_root
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeFactory()`: `() -> ()`
#' * `$transfromData(newdata)`: `list(InMemoryData) -> matrix()`
#' * `$getMeta()`: `() -> list()`
#' @template section-bl-base-methods
#'
#' @section Details:
#' The data matrix is instantiated as transposed sparse matrix due to performance
#' reasons. The member function `$getData()` accounts for that while  `$transformData()`
#' returns the raw data matrix as p x n matrix.
#'
#' @examples
#' # Sample data:
#' x = runif(100, 0, 10)
#' y = sin(x) + rnorm(100, 0, 0.2)
#' dat = data.frame(x, y)
#'
#' # S4 wrapper
#'
#' # Create new data object, a matrix is required as input:
#' data_mat = cbind(x)
#' data_source = InMemoryData$new(data_mat, "my_data_name")
#'
#' # Create new linear base learner factory:
#' bl_sp_df2 = BaselearnerPSpline$new(data_source,
#'   list(n_knots = 10, df = 2, bin_root = 2))
#' bl_sp_df5 = BaselearnerPSpline$new(data_source,
#'   list(n_knots = 15, df = 5))
#'
#' # Get the transformed data:
#' dim(bl_sp_df2$getData())
#' dim(bl_sp_df5$getData())
#'
#' # Summarize factory:
#' bl_sp_df2$summarizeFactory()
#'
#' # Get full meta data such as penalty term or matrix as well as knots:
#' str(bl_sp_df2$getMeta())
#' bl_sp_df2$getPenalty()
#' bl_sp_df5$getPenalty() # The penalty here is smaller due to more flexibility
#'
#' # Transform "new data":
#' newdata = list(InMemoryData$new(cbind(rnorm(5)), "my_data_name"))
#' bl_sp_df2$transformData(newdata)
#' bl_sp_df5$transformData(newdata)
#'
#' # R6 wrapper
#'
#' cboost_df2 = Compboost$new(dat, "y")
#' cboost_df2$addBaselearner("x", "sp", BaselearnerPSpline,
#'   n_knots = 10, df = 2, bin_root = 2)
#' cboost_df2$train(200, 0)
#'
#' cboost_df5 = Compboost$new(dat, "y")
#' cboost_df5$addBaselearner("x", "sp", BaselearnerPSpline,
#'   n_knots = 15, df = 5)
#' cboost_df5$train(200, 0)
#'
#' # Access base learner directly from the API (n = sqrt(100) = 10 with binning):
#' str(cboost_df2$baselearner_list$x_sp$factory$getData())
#' str(cboost_df5$baselearner_list$x_sp$factory$getData())
#'
#' gg_df2 = plotPEUni(cboost_df2, "x")
#' gg_df5 = plotPEUni(cboost_df5, "x")
#'
#' library(ggplot2)
#' library(patchwork)
#'
#' (gg_df2 | gg_df5) &
#'   geom_point(data = dat, aes(x = x, y = y - c(cboost_df2$offset)), alpha = 0.2)
#' @export BaselearnerPSpline
NULL

#' @title Row-wise tensor product base learner
#'
#' @description
#' This class combines base learners. The base learner is defined by a data matrix
#' calculated as row-wise tensor product of the two data matrices given in the
#' base learners to combine.
#'
#' @format [S4] object.
#' @name BaselearnerTensor
#'
#' @section Usage:
#' \preformatted{
#' BaselearnerTensor$new(blearner1, blearner2, blearner_type)
#' BaselearnerTensor$new(blearner1, blearner2, blearner_type, anisotrop)
#' }
#'
#' @param blearner1 (`Baselearner*`)\cr
#' First base learner.
#' @param blearner2 (`Baselearner*`)\cr
#' Second base learner.
#' @param blearner_type (`character(1)`) \cr
#' Type of the base learner (if not specified, `blearner_type = "spline"` is used).
#' The unique id of the base learner is defined by appending `blearner_type` to
#' the feature name:
#' `paste0(blearner1$getDataSource()getIdentifier(), "_",
#'    blearner2$getDataSource()getIdentifier(), "_", blearner_type)`.
#' @param anisotrop (`logical(1)`)\cr
#' Defines how the penalty is added up. If `anisotrop = TRUE`, the marginal effects of the
#' are penalized as defined in the underlying factories. If `anisotrop = FALSE`, an isotropic
#' penalty is used, which means that both directions gets penalized equally.
#'
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeFactory()`: `() -> ()`
#' * `$transfromData(newdata)`: `list(InMemoryData) -> matrix()`
#' * `$getMeta()`: `() -> list()`
#' @template section-bl-base-methods
#'
#' @examples
#' # Sample data:
#' x1 = runif(100, 0, 10)
#' x2 = runif(100, 0, 10)
#' y = sin(x1) * cos(x2) + rnorm(100, 0, 0.2)
#' dat = data.frame(x1, x2, y)
#'
#' # S4 wrapper
#'
#' # Create new data object, a matrix is required as input:
#' ds1 = InMemoryData$new(cbind(x1), "x1")
#' ds2 = InMemoryData$new(cbind(x2), "x2")
#'
#' # Create new linear base learner factory:
#' bl1 = BaselearnerPSpline$new(ds1, "sp", list(n_knots = 10, df = 5))
#' bl2 = BaselearnerPSpline$new(ds2, "sp", list(n_knots = 10, df = 5))
#'
#' tensor = BaselearnerTensor$new(bl1, bl2, "row_tensor")
#'
#' # Get the transformed data:
#' dim(tensor$getData())
#'
#' # Get full meta data such as penalty term or matrix as well as knots:
#' str(tensor$getMeta())
#'
#' # Transform "new data":
#' newdata = list(InMemoryData$new(cbind(runif(5)), "x1"),
#'   InMemoryData$new(cbind(runif(5)), "x2"))
#' str(tensor$transformData(newdata))
#'
#' # R6 wrapper
#'
#' cboost = Compboost$new(dat, "y")
#' cboost$addTensor("x1", "x2", df = 5)
#' cboost$train(50, 0)
#'
#' table(cboost$getSelectedBaselearner())
#' plotTensor(cboost, "x1_x2_tensor")
#' @export BaselearnerTensor
NULL

#' @title Centering a base learner by another one
#'
#' @description
#' This base learner subtracts the effect of two base learners (usually defined
#' on the same feature). By subtracting the effects, one is not able to predict
#' the other one. This becomes handy for decomposing effects into, e.g., a
#' linear and non-linear component in which the non-linear component
#' is not capable to capture the linear part and hence is selected after
#' the linear effect is estimated.
#'
#' @format [S4] object.
#' @name BaselearnerCentered
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeFactory()`: `() -> ()`
#' * `$transfromData(newdata)`: `list(InMemoryData) -> matrix()`
#' * `$getMeta()`: `() -> list()`
#' * `$getRotation()`: `() -> matrix()`
#' @template section-bl-base-methods
#'
#' @examples
#' # Sample data:
#' x = runif(100, 0, 10)
#' y = 2 * sin(x) + 2 * x + rnorm(100, 0, 0.5)
#' dat = data.frame(x, y)
#'
#' # S4 wrapper
#'
#' # Create new data object, a matrix is required as input:
#' data_mat = cbind(x)
#' data_source = InMemoryData$new(data_mat, "x")
#'
#' # Prerequisite: Create a linear and spline base learner:
#' bl_lin = BaselearnerPolynomial$new(data_source,
#'   list(degree = 1, intercept = TRUE))
#' bl_sp = BaselearnerPSpline$new(data_source,
#'   list(n_knots = 15, df = 5))
#'
#' # Now, subtract the linear effect from the spline:
#' bl_ctr = BaselearnerCentered$new(bl_sp, bl_lin, "ctr")
#'
#' # Recognize, that the data matrix of this base learner has
#' # `nrow(bl_sp$getData()) - ncol(bl_lin$getData())` columns:
#' dim(bl_ctr$getData())
#' str(bl_ctr$getMeta())
#'
#' # The data matrix is created by rotating the spline data matrix:
#' all.equal(t(bl_sp$getData()) %*% bl_ctr$getRotation(), bl_ctr$getData())
#'
#' # Transform "new data". Internally, the basis of the spline is build and
#' # then rotated by the rotation matrix to subtract the linear part:
#' newdata = list(InMemoryData$new(cbind(rnorm(5)), "x"))
#' bl_ctr$transformData(newdata)
#'
#' # R6 wrapper
#'
#' # Compboost has a wrapper called `$addComponents()` that automatically
#' cboost = Compboost$new(dat, "y")
#'
#' # creates and adds the linear base learner and a centered base learner
#' # as above (the `...` args are passed to `BaselearnerPSpline$new():
#' cboost$addComponents("x", n_knots = 10, df = 5, bin_root = 2)
#'
#' # Note that we have used binning to save memory, hence the data matrix
#' # is reduced to 10 observations:
#' dim(cboost$baselearner_list$x_x_spline_centered$factory$getData())
#'
#' cboost$train(200, 0)
#'
#' library(ggplot2)
#'
#' plotPEUni(cboost, "x") +
#'   geom_point(data = dat, aes(x = x, y = y - c(cboost$offset)), alpha = 0.2)
#' @export BaselearnerCentered
NULL

#' @title One-hot encoded base learner for a categorical feature
#'
#' @description
#' This base learner can be used to estimate effects of categorical
#' features. The classes are included similar as in the linear model by
#' using a one-hot encoded data matrix. Additionally, a Ridge penalty
#' allows unbiased feature selection.
#'
#' @format [S4] object.
#' @name BaselearnerCategoricalRidge
#'
#' @section Usage:
#' \preformatted{
#' BaselearnerCategoricalRidge$new(data_source, list(df))
#' BaselearnerCategoricalRidge$new(data_source, blearner_type, list(df))
#' }
#'
#' @param data_source [CategoricalDataRaw]\cr
#' Data container of the raw categorical feature.
#' @param blearner_type (`character(1)`) \cr
#' Type of the base learner (if not specified, `blearner_type = "ridge"` is used).
#' The unique id of the base learner is defined by appending `blearner_type` to
#' the feature name: `paste0(data_source$getIdentifier(), "_", blearner_type)`.
#' @template param-df
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeFactory()`: `() -> ()`
#' * `$transfromData(newdata)`: `list(InMemoryData) -> matrix()`
#' * `$getMeta()`: `() -> list()`
#' @template section-bl-base-methods
#'
#' @examples
#' # Sample data:
#' x = sample(c("one","two"), 20, TRUE)
#' y = c(one = 0.8, two = -1.2)[x] + rnorm(20, 0, 0.2)
#' dat = data.frame(x, y)
#'
#' # S4 API:
#' ds = CategoricalDataRaw$new(x, "cat")
#' bl = BaselearnerCategoricalRidge$new(ds, list(df = 1))
#'
#' bl$getData()
#' bl$summarizeFactory()
#'
#' bl$getData()
#' bl$summarizeFactory()
#' bl$transformData(list(ds))
#' bl$getBaselearnerId()
#'
#' # R6 API:
#' cboost = Compboost$new(dat, "y")
#' cboost$addBaselearner("x", "binary", BaselearnerCategoricalRidge)
#' cboost$train(100, 0)
#' table(cboost$getSelectedBaselearner())
#' plotPEUni(cboost, "x", individual = FALSE)
#'
#' @export BaselearnerCategoricalRidge
NULL

#' @title Base learner to encode one single class of a categorical feature
#'
#' @description
#' This class create a one-column one-hot encoded data matrix with ones at
#' `x == class_name` and zero otherwise.
#'
#' @format [S4] object.
#' @name BaselearnerCategoricalBinary
#'
#' @section Usage:
#' \preformatted{
#' BaselearnerCategoricalBinary$new(data_source, class_name)
#' BaselearnerCategoricalBinary$new(data_source, class_name, blearner_type)
#' }
#'
#' @param data_source [CategoricalDataRaw]\cr
#' The raw data object. Must be an object generated by [CategoricalDataRaw].
#' @param class_name (`character(1)`)\cr
#' The class for which a binary vector is created as data representation.
#' @param blearner_type (`character(1)`) \cr
#' Type of the base learner (if not specified, `blearner_type = "binary"` is used).
#' The unique id of the base learner is defined by appending `blearner_type` to
#' the feature name: `paste0(data_source$getIdentifier(), "_", class_name, "_", blearner_type)`.
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeFactory()`: `() -> ()`
#' * `$transfromData(newdata)`: `list(InMemoryData) -> matrix()`
#' * `$getMeta()`: `() -> list()`
#' @template section-bl-base-methods
#'
#' @examples
#' # Sample data:
#' x = sample(c("one","two"), 20, TRUE)
#' y = c(one = 0.8, two = -1.2)[x] + rnorm(20, 0, 0.2)
#' dat = data.frame(x, y)
#'
#' # S4 API:
#' ds = CategoricalDataRaw$new(x, "cat")
#' bl = BaselearnerCategoricalBinary$new(ds, "one")
#'
#' bl$getData()
#' bl$summarizeFactory()
#' bl$transformData(list(ds))
#' bl$getBaselearnerId()
#'
#' # R6 API:
#' cboost = Compboost$new(dat, "y")
#' cboost$addBaselearner("x", "binary", BaselearnerCategoricalBinary)
#' cboost$train(500, 0)
#' table(cboost$getSelectedBaselearner())
#' plotPEUni(cboost, "x", individual = FALSE)
#' @export BaselearnerCategoricalBinary
NULL

#' @title Custom base learner using `R` functions.
#'
#' @description
#' This class defines a custom base learner factory by
#' passing `R` functions for instantiation, fitting, and predicting.
#'
#' @format [S4] object.
#' @name BaselearnerCustom
#'
#' @section Usage:
#' \preformatted{
#' BaselearnerCustom$new(data_source, list(instantiate_fun,
#'   train_fun, predict_fun, param_fun))
#' }
#'
#' @template param-data_source
#' @param instantiate_fun (`function`)\cr
#' `R` function to transform the source data.
#' @param train_fun (`function`)\cr
#' `R` function to train the base learner on the target data.
#' @param predict_fun (`function`)\cr
#' `R` function to predict on the object returned by `train_fun`.
#' @param param_fun (`function`)\cr
#' `R` function to extract the parameter of the object returned by `train`.
#'
#' @section Details:
#'   The function must have the following structure:
#'
#'   \code{instantiateData(X) { ... return (X_trafo) }} With a matrix argument
#'   \code{X} and a matrix as return object.
#'
#'   \code{train(y, X) { ... return (SEXP) }} With a vector argument \code{y}
#'   and a matrix argument \code{X}. The target data is used in \code{X} while
#'   \code{y} contains the response. The function can return any \code{R}
#'   object which is stored within a \code{SEXP}.
#'
#'   \code{predict(model, newdata) { ... return (prediction) }} The returned
#'   object of the \code{train} function is passed to the \code{model}
#'   argument while \code{newdata} contains a new matrix used for predicting.
#'
#'   \code{extractParameter() { ... return (parameters) }} Again, \code{model}
#'   contains the object returned by \code{train}. The returned object must be
#'   a matrix containing the estimated parameter. If no parameter should be
#'   estimated one can return \code{NA}.
#'
#'   For an example see the \code{Examples}.
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeFactory()`: `() -> ()`
#' * `$transfromData(newdata)`: `list(InMemoryData) -> matrix()`
#' * `$getMeta()`: `() -> list()`
#' @template section-bl-base-methods
#'
#' @examples
#' # Sample data:
#' data_mat = cbind(1, 1:10)
#' y = 2 + 3 * 1:10
#'
#' # Create new data object:
#' data_source = InMemoryData$new(data_mat, "my_data_name")
#'
#' instantiateDataFun = function (X) {
#'   return(X)
#' }
#' # Ordinary least squares estimator:
#' trainFun = function (y, X) {
#'   return(solve(t(X) %*% X) %*% t(X) %*% y)
#' }
#' predictFun = function (model, newdata) {
#'   return(as.matrix(newdata %*% model))
#' }
#' extractParameter = function (model) {
#'   return(as.matrix(model))
#' }
#'
#' # Create new custom linear base learner factory:
#' custom_lin_factory = BaselearnerCustom$new(data_source,
#'   list(instantiate_fun = instantiateDataFun, train_fun = trainFun,
#'     predict_fun = predictFun, param_fun = extractParameter))
#'
#' # Get the transformed data:
#' custom_lin_factory$getData()
#'
#' # Summarize factory:
#' custom_lin_factory$summarizeFactory()
#'
#' @export BaselearnerCustom
NULL

#' Base learner factory list to define the set of base learners
#'
#' \code{BlearnerFactoryList} creates an object in which base learner factories
#' can be registered. This object can then be passed to compboost as set of
#' base learner which is used by the optimizer to get the new best
#' base learner.
#'
#' @format [S4] object.
#' @name BlearnerFactoryList
#'
#' @section Usage:
#' \preformatted{
#' BlearnerFactoryList$new()
#' }
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{registerFactory(BaselearnerFactory)}}{Takes a object of the
#'   class \code{BaseLearnerFactory} and adds this factory to the set of
#'   base learner.}
#' \item{\code{printRegisteredFactories()}}{Get all registered factories.}
#' \item{\code{clearRegisteredFactories()}}{Remove all registered factories.
#'   Note that the factories are not deleted, just removed from the map.}
#' \item{\code{getModelFrame()}}{Get each target data matrix parsed to one
#'   big matrix.}
#' \item{\code{getNumberOfRegisteredFactories()}}{Get the number of registered
#'   factories.}
#' }
#' @examples
#' # Sample data:
#' data_mat = cbind(1:10)
#'
#' # Create new data object:
#' data_source = InMemoryData$new(data_mat, "my_data_name")
#'
#' lin_factory = BaselearnerPolynomial$new(data_source,
#'   list(degree = 1, intercept = TRUE))
#' poly_factory = BaselearnerPolynomial$new(data_source,
#'   list(degree = 2, intercept = TRUE))
#'
#' # Create new base learner list:
#' my_bl_list = BlearnerFactoryList$new()
#'
#' # Register factories:
#' my_bl_list$registerFactory(lin_factory)
#' my_bl_list$registerFactory(poly_factory)
#'
#' # Get registered factories:
#' my_bl_list$printRegisteredFactories()
#'
#' # Get all target data matrices in one big matrix:
#' my_bl_list$getModelFrame()
#'
#' # Clear list:
#' my_bl_list$clearRegisteredFactories()
#'
#' # Get number of registered factories:
#' my_bl_list$getNumberOfRegisteredFactories()
#'
#' @export BlearnerFactoryList
NULL

#' Quadratic loss for regression tasks.
#'
#' This loss can be used for regression with \eqn{y \in \mathrm{R}}.
#'
#' \strong{Loss Function:}
#' \deqn{
#'   L(y, f(x)) = \frac{1}{2}( y - f(x))^2
#' }
#' \strong{Gradient:}
#' \deqn{
#'   \frac{\delta}{\delta f(x)}\ L(y, f(x)) = f(x) - y
#' }
#' \strong{Initialization:}
#' \deqn{
#'   \hat{f}^{[0]}(x) = \mathrm{arg~min}{c\in\mathrm{R}}{\mathrm{arg~min}}\ \frac{1}{n}\sum\limits_{i=1}^n
#'   L\left(y^{(i)}, c\right) = \bar{y}
#' }
#'
#' @format [S4] object.
#' @name LossQuadratic
#'
#' @section Usage:
#' \preformatted{
#' LossQuadratic$new()
#' LossQuadratic$new(offset)
#' }
#'
#' @template section-loss-base-methods
#' @template param-offset
#'
#' @examples
#' # Create new loss object:
#' quadratic_loss = LossQuadratic$new()
#' quadratic_loss
#'
#' @export LossQuadratic
NULL

#' Absolute loss for regression tasks.
#'
#' This loss can be used for regression with \eqn{y \in \mathrm{R}}.
#'
#' \strong{Loss Function:}
#' \deqn{
#'   L(y, f(x)) = | y - f(x)|
#' }
#' \strong{Gradient:}
#' \deqn{
#'   \frac{\delta}{\delta f(x)}\ L(y, f(x)) = -\mathrm{sign}(y - f(x))
#' }
#' \strong{Initialization:}
#' \deqn{
#'   \hat{f}^{[0]}(x) = \mathrm{arg~min}_{c\in R}\ \frac{1}{n}\sum\limits_{i=1}^n
#'   L(y^{(i)}, c) = \mathrm{median}(y)
#' }
#'
#' @format [S4] object.
#' @name LossAbsolute
#'
#' @section Usage:
#' \preformatted{
#' LossAbsolute$new()
#' LossAbsolute$new(offset)
#' }
#'
#' @template section-loss-base-methods
#' @template param-offset
#'
#' @examples
#'
#' # Create new loss object:
#' absolute_loss = LossAbsolute$new()
#' absolute_loss
#'
#' @export LossAbsolute
NULL

#' Quantile loss for regression tasks.
#'
#' This loss can be used for regression with \eqn{y \in \mathrm{R}}.
#'
#' \strong{Loss Function:}
#' \deqn{
#'   L(y, f(x)) = h| y - f(x)|
#' }
#' \strong{Gradient:}
#' \deqn{
#'   \frac{\delta}{\delta f(x)}\ L(y, f(x)) = -h\mathrm{sign}( y - f(x))
#' }
#' \strong{Initialization:}
#' \deqn{
#'   \hat{f}^{[0]}(x) = \mathrm{arg~min}_{c\in R}\ \frac{1}{n}\sum\limits_{i=1}^n
#'   L(y^{(i)}, c) = \mathrm{quantile}(y, q)
#' }
#'
#' @format [S4] object.
#' @name LossQuantile
#'
#' @section Usage:
#' \preformatted{
#' LossAbsolute$new()
#' LossAbsolute$new(quantile)
#' LossAbsolute$new(offset, quantile)
#' }
#'
#' @template section-loss-base-methods
#' @template param-offset
#' @param quantile (`numeric(1)`)\cr
#' Numerical value between 0 and 1 that defines the quantile that is modeled.
#'
#' @examples
#'
#' # Create new loss object:
#' quadratic_loss = LossQuadratic$new()
#' quadratic_loss
#'
#' @export LossQuantile
NULL

#' Huber loss for regression tasks.
#'
#' This loss can be used for regression with \eqn{y \in \mathrm{R}}.
#'
#' \strong{Loss Function:}
#' \deqn{
#'   L(y, f(x)) = 0.5(y - f(x))^2 \ \ \mathrm{if} \ \ |y - f(x)| < d
#' }
#' \deqn{
#'   L(y, f(x)) = d|y - f(x)| - 0.5d^2 \ \ \mathrm{otherwise}
#' }
#' \strong{Gradient:}
#' \deqn{
#'   \frac{\delta}{\delta f(x)}\ L(y, f(x)) = f(x) - y \ \ \mathrm{if} \ \ |y - f(x)| < d
#' }
#' \deqn{
#'   \frac{\delta}{\delta f(x)}\ L(y, f(x)) = -d\mathrm{sign}(y - f(x)) \ \ \mathrm{otherwise}
#' }
#'
#' @format [S4] object.
#' @name LossHuber
#'
#' @section Usage:
#' \preformatted{
#' LossHuber$new()
#' LossHuber$new(delta)
#' LossHuber$new(offset, delta)
#' }
#'
#' @template section-loss-base-methods
#' @template param-offset
#' @param delta (`numeric(1)`)\cr
#' Numerical value greater than 0 to specify the interval around 0 for the
#' quadratic error measuring (default `delta = 1`).
#'
#' @examples
#'
#' # Create new loss object:
#' huber_loss = LossHuber$new()
#' huber_loss
#'
#' @export LossHuber
NULL

#' 0-1 Loss for binary classification derived of the binomial distribution
#'
#' This loss can be used for binary classification. The coding we have chosen
#' here acts on
#' \eqn{y \in \{-1, 1\}}.
#'
#' \strong{Loss Function:}
#' \deqn{
#'   L(y, f(x)) = \log(1 + \mathrm{exp}(-2yf(x)))
#' }
#' \strong{Gradient:}
#' \deqn{
#'   \frac{\delta}{\delta f(x)}\ L(y, f(x)) = - \frac{y}{1 + \mathrm{exp}(2yf)}
#' }
#' \strong{Initialization:}
#' \deqn{
#'   \hat{f}^{[0]}(x) = \frac{1}{2}\mathrm{log}(p / (1 - p))
#' }
#' with
#' \deqn{
#'   p = \frac{1}{n}\sum\limits_{i=1}^n\mathrm{1}_{\{y^{(i)} = 1\}}
#' }
#'
#' @format [S4] object.
#' @name LossBinomial
#'
#' @section Usage:
#' \preformatted{
#' LossBinomial$new()
#' LossBinomial$new(offset)
#' }
#'
#' @template section-loss-base-methods
#' @template param-offset
#'
#' @examples
#'
#' # Create new loss object:
#' bin_loss = LossBinomial$new()
#' bin_loss
#'
#' @export LossBinomial
NULL

#' Create LossCustom by using R functions.
#'
#' \code{LossCustom} creates a custom loss by using
#' \code{Rcpp::Function} to set \code{R} functions.
#'
#' @format [S4] object.
#' @name LossCustom
#'
#' @section Usage:
#' \preformatted{
#' LossCustom$new(lossFun, gradientFun, initFun)
#' }
#'
#' @template section-loss-base-methods
#' @param lossFun (`function`)\cr
#' `R` function to calculate the loss.
#' @param gradientFun (`function`)\cr
#' `R` function to calculate the gradient.
#' @param initFun (`function`)\cr
#' `R` function to calculate the constant initialization.
#'
#' @section Details:
#'   The functions must have the following structure:
#'
#'   \code{lossFun(truth, prediction) { ... return (loss) }} With a vector
#'   argument \code{truth} containing the real values and a vector of
#'   predictions \code{prediction}. The function must return a vector
#'   containing the loss for each component.
#'
#'   \code{gradientFun(truth, prediction) { ... return (grad) }} With a vector
#'   argument \code{truth} containing the real values and a vector of
#'   predictions \code{prediction}. The function must return a vector
#'   containing the gradient of the loss for each component.
#'
#'   \code{initFun(truth) { ... return (init) }} With a vector
#'   argument \code{truth} containing the real values. The function must
#'   return a numeric value containing the offset for the constant
#'   initialization.
#'
#' @examples
#'
#' # Loss function:
#' myLoss = function (true_values, prediction) {
#'   return (0.5 * (true_values - prediction)^2)
#' }
#' # Gradient of loss function:
#' myGradient = function (true_values, prediction) {
#'   return (prediction - true_values)
#' }
#' # Constant initialization:
#' myConstInit = function (true_values) {
#'   return (mean(true_values))
#' }
#'
#' # Create new custom quadratic loss:
#' my_loss = LossCustom$new(myLoss, myGradient, myConstInit)
#'
#' @export LossCustom
NULL

#' Create response object for regression.
#'
#' \code{ResponseRegr} creates a response object that are used as target during the
#' fitting process.
#'
#' @format [S4] object.
#' @name ResponseRegr
#'
#' @section Usage:
#' \preformatted{
#' ResponseRegr$new(target_name, response)
#' ResponseRegr$new(target_name, response, weights)
#' }
#'
#' @examples
#'
#' response_regr = ResponseRegr$new("target", cbind(rnorm(10)))
#' response_regr$getResponse()
#' response_regr$getTargetName()
#'
#' @export ResponseRegr
NULL

#' @name ResponseRegr
#' @title Response class for regression tasks.
NULL

#' Create response object for binary classification.
#'
#' \code{ResponseBinaryClassif} creates a response object that are used as target during the
#' fitting process.
#'
#' @format [S4] object.
#' @name ResponseBinaryClassif
#'
#' @section Usage:
#' \preformatted{
#' ResponseBinaryClassif$new(target_name, pos_class, response)
#' ResponseBinaryClassif$new(target_name, pos_class, response, weights)
#' }
#'
#' @examples
#'
#' response_binary = ResponseBinaryClassif$new("target", "A", sample(c("A", "B"), 10, TRUE))
#' response_binary$getResponse()
#' response_binary$getPrediction()
#' response_binary$getPredictionTransform() # Applies sigmoid to prediction scores
#' response_binary$getPredictionResponse()  # Categorizes depending on the transformed predictions
#' response_binary$getTargetName()
#' response_binary$setThreshold(0.7)
#' response_binary$getThreshold()
#' response_binary$getPositiveClass()
#'
#' @export ResponseBinaryClassif
NULL

#' Logger class to log the current iteration
#'
#' @format [S4] object.
#' @name LoggerIteration
#'
#' @section Usage:
#' \preformatted{
#' LoggerIterationWrapper$new(logger_id, use_as_stopper, max_iterations)
#' }
#'
#' @template param-logger_id
#' @template param-use_as_stopper
#' @param max_iterations (`integer(1)`)\cr
#' If the logger is used as stopper this argument defines the maximal iterations.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeLogger()`: `() -> ()`
#'
#' @examples
#' # Define logger:
#' log_iters = LoggerIteration$new("iterations", FALSE, 100)
#'
#' # Summarize logger:
#' log_iters$summarizeLogger()
#'
#' @export LoggerIteration
NULL

#' @title Log the train risk.
#'
#' @description
#' This class logs the train risk for a specific loss function.
#'
#' @format [S4] object.
#' @name LoggerInbagRisk
#'
#' @section Usage:
#' \preformatted{
#' LoggerInbagRisk$new(logger_id, use_as_stopper, loss, eps_for_break, patience)
#' }
#'
#' @template param-logger_id
#' @template param-use_as_stopper
#' @template param-loss
#' @param eps_for_break (`numeric(1)`)\cr
#' This argument becomes active if the loss is also used as stopper. If the relative
#' improvement of the logged inbag risk falls above this boundary the stopper
#' returns `TRUE`.
#' @template param-patience
#'
#' @section Details:
#' This logger computes the risk for the training data
#' \eqn{\mathcal{D} = \{(x^{(i)},\ y^{(i)})\ |\ i \in \{1, \dots, n\}\}}
#' and stores it into a vector. The empirical risk \eqn{\mathcal{R}_\mathrm{emp}} for
#' iteration \eqn{m} is calculated by:
#' \deqn{
#'   \mathcal{R}_\mathrm{emp}^{[m]} = \frac{1}{n}\sum\limits_{i = 1}^n L(y^{(i)}, \hat{f}^{[m]}(x^{(i)}))
#' }
#' __Note:__
#' * If \eqn{m=0} than \eqn{\hat{f}} is just the offset.
#' * The implementation to calculate \eqn{\mathcal{R}_\mathrm{emp}^{[m]}} is done in two steps:
#'   1. Calculate vector \code{risk_temp} of losses for every observation for
#'      given response \eqn{y^{(i)}} and prediction \eqn{\hat{f}^{[m]}(x^{(i)})}.
#'   2. Average over \code{risk_temp}.
#'
#'   This procedure ensures, that it is possible to e.g. use the AUC or any
#'   arbitrary performance measure for risk logging. This gives just one
#'   value for \code{risk_temp} and therefore the average equals the loss
#'   function. If this is just a value (like for the AUC) then the value is
#'   returned.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeLogger()`: `() -> ()`
#'
#' @examples
#' # Used loss:
#' log_bin = LossBinomial$new()
#'
#' # Define logger:
#' log_inbag_risk = LoggerInbagRisk$new("inbag", FALSE, log_bin, 0.05, 5)
#'
#' # Summarize logger:
#' log_inbag_risk$summarizeLogger()
#'
#' @export LoggerInbagRisk
NULL

#' @title Log the validation/test/out-of-bag risk
#'
#' @description
#' This class logs the out of bag risk for a specific loss function.
#'
#' @format [S4] object.
#' @name LoggerOobRisk
#'
#' @section Usage:
#' \preformatted{
#' LoggerOobRisk$new(logger_id, use_as_stopper, loss, eps_for_break,
#'   patience, oob_data, oob_response)
#' }
#'
#' @template param-logger_id
#' @template param-use_as_stopper
#' @template param-loss
#' @param eps_for_break (`numeric(1)`)\cr
#' This argument is used if the loss is also used as stopper. If the relative
#' improvement of the logged inbag risk falls above this boundary the stopper
#' returns `TRUE`.
#' @template param-patience
#' @param oob_data (`list()`)\cr
#' A list which contains data source objects which corresponds to the
#' source data of each registered factory. The source data objects should
#' contain the out of bag data. This data is then used to calculate the
#' prediction in each step.
#' @param oob_response ([ResponseRegr] | [ResponseBinaryClassif])\cr
#' The response object used for the predictions on the validation data.
#'
#' @section Details:
#' This logger computes the risk for a given new dataset
#' \eqn{\mathcal{D}_\mathrm{oob} = \{(x^{(i)},\ y^{(i)})\ |\ i \in I_\mathrm{oob}\}}
#' and stores it into a vector. The OOB risk \eqn{\mathcal{R}_\mathrm{oob}} for
#' iteration \eqn{m} is calculated by:
#' \deqn{
#'   \mathcal{R}_\mathrm{oob}^{[m]} = \frac{1}{|\mathcal{D}_\mathrm{oob}|}\sum\limits_{(x,y) \in \mathcal{D}_\mathrm{oob}}
#'   L(y, \hat{f}^{[m]}(x))
#' }
#' __Note:__
#' * If \eqn{m=0} than \eqn{\hat{f}} is just the offset.
#' * The implementation to calculate \eqn{\mathcal{R}_\mathrm{emp}^{[m]}} is done in two steps:
#'   1. Calculate vector \code{risk_temp} of losses for every observation for
#'      given response \eqn{y^{(i)}} and prediction \eqn{\hat{f}^{[m]}(x^{(i)})}.
#'   2. Average over \code{risk_temp}.
#'
#'   This procedure ensures, that it is possible to e.g. use the AUC or any
#'   arbitrary performance measure for risk logging. This gives just one
#'   value for \eqn{risk_temp} and therefore the average equals the loss
#'   function. If this is just a value (like for the AUC) then the value is
#'   returned.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeLogger()`: `() -> ()`
#'
#' @examples
#' # Define data:
#' X1 = cbind(1:10)
#' X2 = cbind(10:1)
#' data_source1 = InMemoryData$new(X1, "x1")
#' data_source2 = InMemoryData$new(X2, "x2")
#'
#' oob_list = list(data_source1, data_source2)
#'
#' set.seed(123)
#' y_oob = rnorm(10)
#'
#' # Used loss:
#' log_bin = LossBinomial$new()
#'
#' # Define response object of oob data:
#' oob_response = ResponseRegr$new("oob_response", as.matrix(y_oob))
#'
#' # Define logger:
#' log_oob_risk = LoggerOobRisk$new("oob", FALSE, log_bin, 0.05, 5, oob_list, oob_response)
#'
#' # Summarize logger:
#' log_oob_risk$summarizeLogger()
#'
#' @export LoggerOobRisk
NULL

#' @title Log the runtime
#'
#' @description
#' This class logs the runtime of the algorithm. The logger also can be used
#' to stop the algorithm after a defined time budget. The available time units are:
#' * minutes
#' * seconds
#' * microseconds
#'
#' @format [S4] object.
#' @name LoggerTime
#'
#' @section Usage:
#' \preformatted{
#' LoggerTime$new(logger_id, use_as_stopper, max_time, time_unit)
#' }
#'
#' @template param-logger_id
#' @template param-use_as_stopper
#' @param max_time (`integer(1)`)\cr
#' If the logger is used as stopper this argument contains the maximal time
#' which are available to train the model.
#' @param time_unit (`character(1)`)\cr
#' The unit in which the time is measured. Choices are `minutes`,
#' `seconds` or `microseconds`.
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$summarizeLogger()`: `() -> ()`
#'
#' @examples
#' # Define logger:
#' log_time = LoggerTime$new("time_minutes", FALSE, 20, "minutes")
#'
#' # Summarize logger:
#' log_time$summarizeLogger()
#'
#' @export LoggerTime
NULL

#' @title Collect loggers
#'
#' @description
#' This class collects all loggers that are used in the algorithm and
#' takes care about stopping strategies and tracing.
#'
#' @format [S4] object.
#' @name LoggerList
#'
#' @section Usage:
#' \preformatted{
#' LoggerList$new()
#' }
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$registerLogger()`: `Logger* -> ()`
#' * `$printRegisteredLogger()`: `() -> ()`
#' * `$clearRegisteredLogger()`: `() -> ()`
#' * `$getNumberOfRegisteredLogger()`: `() -> integer(1)`
#' * `$getNamesOfRegisteredLogger()`: `() -> character()`
#' * `$isStopper()`: `() -> logical()`
#'
#' @examples
#' # Define logger:
#' log_iters = LoggerIteration$new("iteration", TRUE, 100)
#' log_time = LoggerTime$new("time", FALSE, 20, "minutes")
#'
#' # Create logger list:
#' logger_list = LoggerList$new()
#'
#' # Register new loggeR:
#' logger_list$registerLogger(log_iters)
#' logger_list$registerLogger(log_time)
#'
#' # Print registered logger:
#' logger_list$printRegisteredLogger()
#'
#' # Remove all logger:
#' logger_list$clearRegisteredLogger()
#'
#' # Get number of registered logger:
#' logger_list$getNumberOfRegisteredLogger()
#'
#' @export LoggerList
NULL

#' @title Coordinate descent
#'
#' @description
#' This class defines a new object to conduct gradient descent in function space.
#' Because of the component-wise structure, this is more like a block-wise
#' coordinate descent.
#'
#' @format [S4] object.
#' @name OptimizerCoordinateDescent
#'
#' @section Usage:
#' \preformatted{
#' OptimizerCoordinateDescent$new()
#' OptimizerCoordinateDescent$new(ncores)
#' }
#'
#' @template param-ncores
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$getOptimizerType()`: `() -> character(1)`
#' * `$getStepSize()`: `() -> numeric()`
#'
#' @examples
#'
#' # Define optimizer:
#' optimizer = OptimizerCoordinateDescent$new()
#'
#' @export OptimizerCoordinateDescent
NULL

#' @title Coordinate descent with cosine annealing
#'
#' @description
#' Same as [OptimizerCoordinateDescent] but with a cosine annealing scheduler to
#' adjust the learning rate during the fitting process.
#'
#' @format [S4] object.
#' @name OptimizerCosineAnnealing
#'
#' @section Usage:
#' \preformatted{
#' OptimizerCosineAnnealing$new()
#' OptimizerCosineAnnealing$new(ncores)
#' OptimizerCosineAnnealing$new(nu_min, nu_max, cycles, anneal_iter_max, cycles)
#' OptimizerCosineAnnealing$new(nu_min, nu_max, cycles, anneal_iter_max, cycles, ncores)
#' }
#'
#' @template param-ncores
#' @param nu_min (`numeric(1)`)\cr
#' Minimal learning rate.
#' @param nu_max (`numeric(1)`)\cr
#' Maximal learning rate.
#' @param cycles (`integer(1)`)\cr
#' Number of annealing cycles form `nu_max` to `nu_min` between 1 and anneal_`anneal_iter_max`.
#' `anneal_iter_max (`integer(1)`)\cr
#' Maximal number of iterations for which the annealing is conducted. `nu_min` is used as
#' fixed learning rate after `anneal_iter_max`.
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$getOptimizerType()`: `() -> character(1)`
#' * `$getStepSize()`: `() -> numeric()`
#'
#' @examples
#'
#' # Define optimizer:
#' optimizer = OptimizerCosineAnnealing$new()
#'
#' @export OptimizerCosineAnnealing
NULL

#' @title Coordinate descent with line search
#'
#' @description
#' Same as [OptimizerCoordinateDescent] but with a line search in each iteration.
#'
#' @format [S4] object.
#' @name OptimizerCoordinateDescentLineSearch
#'
#' @section Usage:
#' \preformatted{
#' OptimizerCoordinateDescentLineSearch$new()
#' OptimizerCoordinateDescentLineSearch$new(ncores)
#' }
#'
#' @template param-ncores
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$getOptimizerType()`: `() -> character(1)`
#' * `$getStepSize()`: `() -> numeric()`
#'
#' @examples
#'
#' # Define optimizer:
#' optimizer = OptimizerCoordinateDescentLineSearch$new()
#'
#' @export OptimizerCoordinateDescentLineSearch
NULL

#' @title Nesterovs momentum
#'
#' @description
#' This class defines a new object to conduct Nesterovs momentum in function space.
#'
#' @format [S4] object.
#' @name OptimizerAGBM
#'
#' @section Usage:
#' \preformatted{
#' OptimizerAGBM$new(momentum)
#' OptimizerAGBM$new(momentum, ncores)
#' }
#'
#' @template param-ncores
#' @param momentum (`numeric(1)`)\cr
#' Momentum term used to accelerate the fitting process. If chosen large, the algorithm trains
#' faster but also tends to overfit faster.
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$getOptimizerType()`: `() -> character(1)`
#' * `$getStepSize()`: `() -> numeric()`
#' * `$getMomentumParameter()`: `() -> numeric(1)`
#' * `$getSelectedMomentumBaselearner()`: `() -> character()`
#' * `$getParameterMatrix()`: `() -> list(matrix()`
#' * `$getErrorCorrectedPseudoResiduals()`: `() -> matrix()`
#'
#' @examples
#'
#' optimizer = OptimizerAGBM$new(0.1)
#'
#' @export OptimizerAGBM
NULL

#' @title Internal Compboost Class
#'
#' @description
#' This class is the raw `C++` pendant and still at a very high-level.
#' It is the base for the [Compboost] [R6] class and provides
#' many convenient wrapper to access data and execute methods by calling
#' the `C++` methods.
#'
#' @format [S4] object.
#' @name Compboost_internal
#'
#' @section Usage:
#' \preformatted{
#' Compboost$new(response, learning_rate, stop_if_all_stopper_fulfilled,
#'   factory_list, loss, logger_list, optimizer)
#' }
#'
#' @param oob_response ([ResponseRegr] | [ResponseBinaryClassif])\cr
#' The response object containing the target variable.
#' @param learning_rate (`numeric(1)`)\cr
#' The learning rate.
#' @param stop_if_all_stopper_fulfilled (`logical(1)`)\cr
#' Boolean to indicate which stopping strategy is used. If `TRUE`,
#' the algorithm stops if the conditions of all loggers for stopping apply.
#' @param factory_list ([BlearnerFactoryList])\cr
#'   List of base learner factories from which one base learner is selected
#'   in each iteration by using the
#' @template param-loss
#' @param logger_list ([LoggerList])\cr
#' The [LoggerList] object with all loggers.
#' @template param-optimizer
#'
#' @section Fields:
#' This class doesn't contain public fields.
#'
#' @section Methods:
#' * `$train()`: `() -> ()`
#' * `$continueTraining()`: `() -> ()`
#' * `$getLearningRate()`: `() -> numeric(1)`
#' * `$getPrediction()`: `() -> matrix()`
#' * `$getSelectedBaselearner()`: `() -> character()`
#' * `$getLoggerData()`: `() -> list(character(), matrix())`
#' * `$getEstimatedParameter()`: `() -> list(matrix())`
#' * `$getParameterAtIteration()`: `() -> list(matrix())`
#' * `$getParameterMatrix()`: `() -> matrix()`
#' * `$predictFactoryTrainData()`: `() -> matrix()`
#' * `$predictFactoryNewData()`: `list(Data*) -> matrix()`
#' * `$predictIndividualTrainData()`: `() -> list(matrix())` Get the linear contribution of each base learner for the training data.
#' * `$predictIndividual()`: `list(Data*) -> list(matrix())` Get the linear contribution of each base learner for new data.
#' * `$predict()`: `list(Data*), logical(1) -> matrix()`
#' * `$summarizeCompboost()`: `() -> ()`
#' * `$isTrained()`: `() -> logical(1)`
#' * `$setToIteration()`: `() -> ()`
#' * `$saveJson()`: `() -> ()`
#' * `$getOffset()`: `() -> numeric(1) | matrix()`
#' * `$getRiskVector()`: `() -> numeric()`
#' * `$getResponse()`: `() -> Response*`
#' * `$getOptimizer()`: `() -> Optimizer*`
#' * `$getLoss()`: `() -> Loss*`
#' * `$getLoggerList()`: `() -> LoggerList`
#' * `$getBaselearnerList()`: `() -> BlearnerFactoryList`
#' * `$useGlobalStopping()`: `() -> logical(1)*`
#' * `$getFactoryMap()`: `() -> list(Baselearner*)`
#' * `$getDataMap()`: `() -> list(Data*)`
#' @examples
#'
#' # Some data:
#' df = mtcars
#' df$mpg_cat = ifelse(df$mpg > 20, "high", "low")
#'
#' # # Create new variable to check the polynomial base learner with degree 2:
#' # df$hp2 = df[["hp"]]^2
#'
#' # Data for the baselearner are matrices:
#' X_hp = as.matrix(df[["hp"]])
#' X_wt = as.matrix(df[["wt"]])
#'
#' # Target variable:
#' response = ResponseBinaryClassif$new("mpg_cat", "high", df[["mpg_cat"]])
#'
#' data_source_hp = InMemoryData$new(X_hp, "hp")
#' data_source_wt = InMemoryData$new(X_wt, "wt")
#'
#' # List for oob logging:
#' oob_data = list(data_source_hp, data_source_wt)
#'
#' # List to test prediction on newdata:
#' test_data = oob_data
#'
#' # Factories:
#' linear_factory_hp = BaselearnerPolynomial$new(data_source_hp,
#'   list(degree = 1, intercept = TRUE))
#' linear_factory_wt = BaselearnerPolynomial$new(data_source_wt,
#'   list(degree = 1, intercept = TRUE))
#' quadratic_factory_hp = BaselearnerPolynomial$new(data_source_hp,
#'   list(degree = 2, intercept = TRUE))
#' spline_factory_wt = BaselearnerPSpline$new(data_source_wt,
#'   list(degree = 3, n_knots = 10, penalty = 2, differences = 2))
#'
#' # Create new factory list:
#' factory_list = BlearnerFactoryList$new()
#'
#' # Register factories:
#' factory_list$registerFactory(linear_factory_hp)
#' factory_list$registerFactory(linear_factory_wt)
#' factory_list$registerFactory(quadratic_factory_hp)
#' factory_list$registerFactory(spline_factory_wt)
#'
#' # Define loss:
#' loss_bin = LossBinomial$new()
#'
#' # Define optimizer:
#' optimizer = OptimizerCoordinateDescent$new()
#'
#' ## Logger
#'
#' # Define logger. We want just the iterations as stopper but also track the
#' # time, inbag risk and oob risk:
#' log_iterations  = LoggerIteration$new(" iteration_logger", TRUE, 500)
#' log_time        = LoggerTime$new("time_logger", FALSE, 500, "microseconds")
#'
#' # Define new logger list:
#' logger_list = LoggerList$new()
#'
#' # Register the logger:
#' logger_list$registerLogger(log_iterations)
#' logger_list$registerLogger(log_time)
#'
#' # Run compboost:
#' # --------------
#'
#' # Initialize object:
#' cboost = Compboost_internal$new(
#'   response      = response,
#'   learning_rate = 0.05,
#'   stop_if_all_stopper_fulfilled = FALSE,
#'   factory_list = factory_list,
#'   loss         = loss_bin,
#'   logger_list  = logger_list,
#'   optimizer    = optimizer
#' )
#'
#' # Train the model (we want to print the trace):
#' cboost$train(trace = 50)
#' cboost
#'
#' # Get estimated parameter:
#' cboost$getEstimatedParameter()
#'
#' # Get trace of selected base learner:
#' cboost$getSelectedBaselearner()
#'
#' # Set to iteration 200:
#' cboost$setToIteration(200, 30)
#'
#' # Get new parameter values:
#' cboost$getEstimatedParameter()
#'
#' @export Compboost_internal
NULL
schalkdaniel/compboost documentation built on April 15, 2023, 9:03 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
schalkdaniel/compboost
Efficient Component-Wise Boosting Implementation

R/RcppExports.R
In schalkdaniel/compboost: Efficient Component-Wise Boosting Implementation

R Package Documentation

Browse R Packages

We want your feedback!

schalkdaniel/compboost Efficient Component-Wise Boosting Implementation

R/RcppExports.R In schalkdaniel/compboost: Efficient Component-Wise Boosting Implementation

R Package Documentation

Browse R Packages

We want your feedback!

schalkdaniel/compboost
Efficient Component-Wise Boosting Implementation

R/RcppExports.R
In schalkdaniel/compboost: Efficient Component-Wise Boosting Implementation