R/RcppExports.R

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' In memory data class to store data in RAM
#'
#' \code{InMemoryData} creates an data object which can be used as source or
#' target object within the base-learner factories of \code{compboost}. The
#' convention to initialize target data is to call the constructor without
#' any arguments.
#'
#' @format \code{\link{S4}} object.
#' @name InMemoryData
#'
#' @section Usage:
#' \preformatted{
#' InMemoryData$new()
#' InMemoryData$new(data.mat, data.identifier)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{data.mat} [\code{matrix}]}{
#'   Matrix containing the source data. This source data is later transformed
#'   to obtain the design matrix a base-learner uses for training.
#' }
#' \item{\code{data.identifier} [\code{character(1)}]}{
#'   The name for the data specified in \code{data.mat}. Note that it is
#'   important to have the same data names for train and evaluation data.
#' }
#' }
#'
#'
#' @section Details:
#'   The \code{data.mat} needs to suits the base-learner. For instance, the
#'   spline base-learner does just take a one column matrix since there are
#'   just one dimensional splines till now. Additionally, using the polynomial
#'   base-learner the \code{data.mat} is used to control if a intercept should
#'   be fitted or not by adding a column containing just ones. It is also
#'   possible to add other columns to estimate multiple features
#'   simultaneously. Anyway, this is not recommended in terms of unbiased
#'   features selection.
#'
#'   The \code{data.mat} and \code{data.identifier} of a target data object
#'   is set automatically by passing the source and target object to the
#'   desired factory. \code{getData()} can then be used to access the
#'   transformed data of the target object.
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classdata_1_1_in_memory_data.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{getData()}}{method extract the \code{data.mat} from the data object.}
#' \item{\code{getIdentifier()}}{method to extract the used name from the data object.}
#' }
#' @examples
#' # Sample data:
#' data.mat = cbind(1:10)
#'
#' # Create new data object:
#' data.obj = InMemoryData$new(data.mat, "my.data.name")
#'
#' # Get data and identifier:
#' data.obj$getData()
#' data.obj$getIdentifier()
#'
#' @export InMemoryData
NULL

#' Base-learner factory to make polynomial regression
#'
#' \code{BaselearnerPolynomial} creates a polynomial base-learner factory
#'  object which can be registered within a base-learner list and then used
#'  for training.
#'
#' @format \code{\link{S4}} object.
#' @name BaselearnerPolynomial
#'
#' @section Usage:
#' \preformatted{
#' BaselearnerPolynomial$new(data_source, data_target, degree, intercept)
#' BaselearnerPolynomial$new(data_source, data_target, blearner_type, degree, intercept)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{data_source} [\code{Data} Object]}{
#'   Data object which contains the source data.
#' }
#' \item{\code{data_target} [\code{Data} Object]}{
#'   Data object which gets the transformed source data.
#' }
#' \item{\code{degree} [\code{integer(1)}]}{
#'   This argument is used for transforming the source data. Each element is
#'   taken to the power of the \code{degree} argument.
#' }
#' \item{\code{intercept} [\code{logical(1)}]}{
#'   Indicating whether an intercept should be added or not. Default is set to TRUE.
#' }
#' }
#'
#'
#' @section Details:
#'   The polynomial base-learner factory takes any matrix which the user wants
#'   to pass the number of columns indicates how much parameter are estimated.
#'   Note that the intercept isn't added by default. To get an intercept add a
#'   column of ones to the source data matrix.
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classblearnerfactory_1_1_polynomial_blearner_factory.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{getData()}}{Get the data matrix of the target data which is used
#'   for modeling.}
#' \item{\code{transformData(X)}}{Transform a data matrix as defined within the
#'   factory. The argument has to be a matrix with one column.}
#' \item{\code{summarizeFactory()}}{Summarize the base-learner factory object.}
#' }
#' @examples
#' # Sample data:
#' data.mat = cbind(1:10)
#'
#' # Create new data object:
#' data.source = InMemoryData$new(data.mat, "my.data.name")
#' data.target1 = InMemoryData$new()
#' data.target2 = InMemoryData$new()
#'
#' # Create new linear base-learner factory:
#' lin.factory = BaselearnerPolynomial$new(data.source, data.target1, 
#'   degree = 2, intercept = FALSE)
#' lin.factory.int = BaselearnerPolynomial$new(data.source, data.target2, 
#'   degree = 2, intercept = TRUE)
#'
#' # Get the transformed data:
#' lin.factory$getData()
#' lin.factory.int$getData()
#'
#' # Summarize factory:
#' lin.factory$summarizeFactory()
#'
#' # Transform data manually:
#' lin.factory$transformData(data.mat)
#' lin.factory.int$transformData(data.mat)
#'
#' @export BaselearnerPolynomial
NULL

#' Base-learner factory to do non-parametric B or P-spline regression
#'
#' \code{BaselearnerPSpline} creates a spline base-learner factory
#'  object which can be registered within a base-learner list and then used
#'  for training.
#'
#' @format \code{\link{S4}} object.
#' @name BaselearnerPSpline
#'
#' @section Usage:
#' \preformatted{
#' BaselearnerPSpline$new(data_source, data_target, degree, n_knots, penalty,
#'   differences)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{data_source} [\code{Data} Object]}{
#'   Data object which contains the source data.
#' }
#' \item{\code{data_target} [\code{Data} Object]}{
#'   Data object which gets the transformed source data.
#' }
#' \item{\code{degree} [\code{integer(1)}]}{
#'   Degree of the spline functions to interpolate the knots.
#' }
#' \item{\code{n_knots} [\code{integer(1)}]}{
#'   Number of \strong{inner knots}. To prevent weird behavior on the edges
#'   the inner knots are expanded by \eqn{\mathrm{degree} - 1} additional knots.
#' }
#' \item{\code{penalty} [\code{numeric(1)}]}{
#'   Positive numeric value to specify the penalty parameter. Setting the
#'   penalty to 0 ordinary B-splines are used for the fitting.
#' }
#' \item{\code{differences} [\code{integer(1)}]}{
#'   The number of differences which are penalized. A higher value leads to
#'   smoother curves.
#' }
#' }
#'
#' @section Details:
#'   The data matrix of the source data is restricted to have just one column.
#'   The spline bases are created for this single feature. Multidimensional
#'   splines are not supported at the moment.
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classblearnerfactory_1_1_p_spline_blearner_factory.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{getData()}}{Get the data matrix of the target data which is used
#'   for modeling.}
#' \item{\code{transformData(X)}}{Transform a data matrix as defined within the
#'   factory. The argument has to be a matrix with one column.}
#' \item{\code{summarizeFactory()}}{Summarize the base-learner factory object.}
#' }
#' @examples
#' # Sample data:
#' data.mat = cbind(1:10)
#' y = sin(1:10)
#'
#' # Create new data object:
#' data.source = InMemoryData$new(data.mat, "my.data.name")
#' data.target = InMemoryData$new()
#'
#' # Create new linear base-learner:
#' spline.factory = BaselearnerPSpline$new(data.source, data.target,
#'   degree = 3, n_knots = 4, penalty = 2, differences = 2)
#'
#' # Get the transformed data:
#' spline.factory$getData()
#'
#' # Summarize factory:
#' spline.factory$summarizeFactory()
#'
#' # Transform data manually:
#' spline.factory$transformData(data.mat)
#'
#' @export BaselearnerPSpline
NULL

#' Create custom base-learner factory by using R functions.
#'
#' \code{BaselearnerCustom} creates a custom base-learner factory by
#'   setting custom \code{R} functions. This factory object can be registered
#'   within a base-learner list and then used for training.
#'
#' @format \code{\link{S4}} object.
#' @name BaselearnerCustom
#'
#' @section Usage:
#' \preformatted{
#' BaselearnerCustom$new(data_source, data_target, instantiateData, train,
#'   predict, extractParameter)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{data_source} [\code{Data} Object]}{
#'   Data object which contains the source data.
#' }
#' \item{\code{data_target} [\code{Data} Object]}{
#'   Data object which gets the transformed source data.
#' }
#' \item{\code{instantiateData} [\code{function}]}{
#'   \code{R} function to transform the source data. For details see the
#'   \code{Details}.
#' }
#' \item{\code{train} [\code{function}]}{
#'   \code{R} function to train the base-learner on the target data. For
#'   details see the \code{Details}.
#' }
#' \item{\code{predict} [\code{function}]}{
#'   \code{R} function to predict on the object returned by \code{train}.
#'   For details see the \code{Details}.
#' }
#' \item{\code{extractParameter} [\code{function}]}{
#'   \code{R} function to extract the parameter of the object returned by
#'   \code{train}. For details see the \code{Details}.
#' }
#' }
#'
#' @section Details:
#'   The function must have the following structure:
#'
#'   \code{instantiateData(X) { ... return (X.trafo) }} With a matrix argument
#'   \code{X} and a matrix as return object.
#'
#'   \code{train(y, X) { ... return (SEXP) }} With a vector argument \code{y}
#'   and a matrix argument \code{X}. The target data is used in \code{X} while
#'   \code{y} contains the response. The function can return any \code{R}
#'   object which is stored within a \code{SEXP}.
#'
#'   \code{predict(model, newdata) { ... return (prediction) }} The returned
#'   object of the \code{train} function is passed to the \code{model}
#'   argument while \code{newdata} contains a new matrix used for predicting.
#'
#'   \code{extractParameter() { ... return (parameters) }} Again, \code{model}
#'   contains the object returned by \code{train}. The returned object must be
#'   a matrix containing the estimated parameter. If no parameter should be
#'   estimated one can return \code{NA}.
#'
#'   For an example see the \code{Examples}.
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classblearnerfactory_1_1_custom_blearner_factory.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{getData()}}{Get the data matrix of the target data which is used
#'   for modeling.}
#' \item{\code{transformData(X)}}{Transform a data matrix as defined within the
#'   factory. The argument has to be a matrix with one column.}
#' \item{\code{summarizeFactory()}}{Summarize the base-learner factory object.}
#' }
#' @examples
#' # Sample data:
#' data.mat = cbind(1, 1:10)
#' y = 2 + 3 * 1:10
#'
#' # Create new data object:
#' data.source = InMemoryData$new(data.mat, "my.data.name")
#' data.target = InMemoryData$new()
#'
#' instantiateDataFun = function (X) {
#'   return(X)
#' }
#' # Ordinary least squares estimator:
#' trainFun = function (y, X) {
#'   return(solve(t(X) %*% X) %*% t(X) %*% y)
#' }
#' predictFun = function (model, newdata) {
#'   return(as.matrix(newdata %*% model))
#' }
#' extractParameter = function (model) {
#'   return(as.matrix(model))
#' }
#'
#' # Create new custom linear base-learner factory:
#' custom.lin.factory = BaselearnerCustom$new(data.source, data.target,
#'   instantiateDataFun, trainFun, predictFun, extractParameter)
#'
#' # Get the transformed data:
#' custom.lin.factory$getData()
#'
#' # Summarize factory:
#' custom.lin.factory$summarizeFactory()
#'
#' # Transform data manually:
#' custom.lin.factory$transformData(data.mat)
#'
#' @export BaselearnerCustom
NULL

#' Create custom cpp base-learner factory by using cpp functions and external
#' pointer.
#'
#' \code{BaselearnerCustomCpp} creates a custom base-learner factory by
#'   setting custom \code{C++} functions. This factory object can be registered
#'   within a base-learner list and then used for training.
#'
#' @format \code{\link{S4}} object.
#' @name BaselearnerCustomCpp
#'
#' @section Usage:
#' \preformatted{
#' BaselearnerCustomCpp$new(data_source, data_target, instantiate_data_ptr,
#'   train_ptr, predict_ptr)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{data_source} [\code{Data} Object]}{
#'   Data object which contains the source data.
#' }
#' \item{\code{data_target} [\code{Data} Object]}{
#'   Data object which gets the transformed source data.
#' }
#' \item{\code{instantiate_data_ptr} [\code{externalptr}]}{
#'   External pointer to the \code{C++} instantiate data function.
#' }
#' \item{\code{train_ptr} [\code{externalptr}]}{
#'   External pointer to the \code{C++} train function.
#' }
#' \item{\code{predict_ptr} [\code{externalptr}]}{
#'   External pointer to the \code{C++} predict function.
#' }
#' }
#'
#' @section Details:
#'   For an example see the extending compboost vignette or the function
#'   \code{getCustomCppExample}.
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classblearnerfactory_1_1_custom_cpp_blearner_factory.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{getData()}}{Get the data matrix of the target data which is used
#'   for modeling.}
#' \item{\code{transformData(X)}}{Transform a data matrix as defined within the
#'   factory. The argument has to be a matrix with one column.}
#' \item{\code{summarizeFactory()}}{Summarize the base-learner factory object.}
#' }
#' @examples
#' \donttest{
#' # Sample data:
#' data.mat = cbind(1, 1:10)
#' y = 2 + 3 * 1:10
#'
#' # Create new data object:
#' data.source = InMemoryData$new(data.mat, "my.data.name")
#' data.target = InMemoryData$new()
#'
#' # Source the external pointer exposed by using XPtr:
#' Rcpp::sourceCpp(code = getCustomCppExample(silent = TRUE))
#'
#' # Create new linear base-learner:
#' custom.cpp.factory = BaselearnerCustomCpp$new(data.source, data.target,
#'   dataFunSetter(), trainFunSetter(), predictFunSetter())
#'
#' # Get the transformed data:
#' custom.cpp.factory$getData()
#'
#' # Summarize factory:
#' custom.cpp.factory$summarizeFactory()
#'
#' # Transform data manually:
#' custom.cpp.factory$transformData(data.mat)
#' }
#' @export BaselearnerCustomCpp
NULL

#' Base-learner factory list to define the set of base-learners
#'
#' \code{BlearnerFactoryList} creates an object in which base-learner factories
#' can be registered. This object can then be passed to compboost as set of
#' base-learner which is used by the optimizer to get the new best
#' base-learner.
#'
#' @format \code{\link{S4}} object.
#' @name BlearnerFactoryList
#'
#' @section Usage:
#' \preformatted{
#' BlearnerFactoryList$new()
#' }
#'
#' @section Details:
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classblearnerlist_1_1_baselearner_factory_list.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{registerFactory(BaselearnerFactory)}}{Takes a object of the
#'   class \code{BaseLearnerFactory} and adds this factory to the set of
#'   base-learner.}
#' \item{\code{printRegisteredFactories()}}{Get all registered factories.}
#' \item{\code{clearRegisteredFactories()}}{Remove all registered factories.
#'   Note that the factories are not deleted, just removed from the map.}
#' \item{\code{getModelFrame()}}{Get each target data matrix parsed to one
#'   big matrix.}
#' \item{\code{getNumberOfRegisteredFactories()}}{Get the number of registered
#'   factories.}
#' }
#' @examples
#' # Sample data:
#' data.mat = cbind(1:10)
#'
#' # Create new data object:
#' data.source = InMemoryData$new(data.mat, "my.data.name")
#' data.target1 = InMemoryData$new()
#' data.target2 = InMemoryData$new()
#'
#' lin.factory = BaselearnerPolynomial$new(data.source, data.target1, 1, TRUE)
#' poly.factory = BaselearnerPolynomial$new(data.source, data.target2, 2, TRUE)
#'
#' # Create new base-learner list:
#' my.bl.list = BlearnerFactoryList$new()
#'
#' # Register factories:
#' my.bl.list$registerFactory(lin.factory)
#' my.bl.list$registerFactory(poly.factory)
#'
#' # Get registered factories:
#' my.bl.list$printRegisteredFactories()
#'
#' # Get all target data matrices in one big matrix:
#' my.bl.list$getModelFrame()
#'
#' # Clear list:
#' my.bl.list$clearRegisteredFactories()
#'
#' # Get number of registered factories:
#' my.bl.list$getNumberOfRegisteredFactories()
#'
#' @export BlearnerFactoryList
NULL

#' Quadratic loss for regression tasks.
#'
#' This loss can be used for regression with \eqn{y \in \mathrm{R}}.
#'
#' \strong{Loss Function:}
#' \deqn{
#'   L(y, f(x)) = \frac{1}{2}( y - f(x))^2
#' }
#' \strong{Gradient:}
#' \deqn{
#'   \frac{\delta}{\delta f(x)}\ L(y, f(x)) = f(x) - y
#' }
#' \strong{Initialization:}
#' \deqn{
#'   \hat{f}^{[0]}(x) = \mathrm{arg~min}{c\in\mathrm{R}}{\mathrm{arg~min}}\ \frac{1}{n}\sum\limits_{i=1}^n
#'   L\left(y^{(i)}, c\right) = \bar{y}
#' }
#'
#' @format \code{\link{S4}} object.
#' @name LossQuadratic
#'
#' @section Usage:
#' \preformatted{
#' LossQuadratic$new()
#' LossQuadratic$new(offset)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{offset} [\code{numeric(1)}]}{
#'   Numerical value which can be used to set a custom offset. If so, this
#'   value is returned instead of the loss optimal initialization.
#' }
#' }
#'
#' @section Details:
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classloss_1_1_quadratic_loss.html}.
#'
#' @examples
#'
#' # Create new loss object:
#' quadratic.loss = LossQuadratic$new()
#' quadratic.loss
#'
#' @export LossQuadratic
NULL

#' Absolute loss for regression tasks.
#'
#' This loss can be used for regression with \eqn{y \in \mathrm{R}}.
#'
#' \strong{Loss Function:}
#' \deqn{
#'   L(y, f(x)) = | y - f(x)|
#' }
#' \strong{Gradient:}
#' \deqn{
#'   \frac{\delta}{\delta f(x)}\ L(y, f(x)) = \mathrm{sign}( f(x) - y)
#' }
#' \strong{Initialization:}
#' \deqn{
#'   \hat{f}^{[0]}(x) = \mathrm{arg~min}_{c\in R}\ \frac{1}{n}\sum\limits_{i=1}^n
#'   L(y^{(i)}, c) = \mathrm{median}(y)
#' }
#'
#' @format \code{\link{S4}} object.
#' @name LossAbsolute
#'
#' @section Usage:
#' \preformatted{
#' LossAbsolute$new()
#' LossAbsolute$new(offset)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{offset} [\code{numeric(1)}]}{
#'   Numerical value which can be used to set a custom offset. If so, this
#'   value is returned instead of the loss optimal initialization.
#' }
#' }
#'
#' @section Details:
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classloss_1_1_absolute_loss.html}.
#'
#' @examples
#'
#' # Create new loss object:
#' absolute.loss = LossAbsolute$new()
#' absolute.loss
#'
#' @export LossAbsolute
NULL

#' 0-1 Loss for binary classification derived of the binomial distribution
#'
#' This loss can be used for binary classification. The coding we have chosen
#' here acts on
#' \eqn{y \in \{-1, 1\}}.
#'
#' \strong{Loss Function:}
#' \deqn{
#'   L(y, f(x)) = \log(1 + \mathrm{exp}(-2yf(x)))
#' }
#' \strong{Gradient:}
#' \deqn{
#'   \frac{\delta}{\delta f(x)}\ L(y, f(x)) = - \frac{y}{1 + \mathrm{exp}(2yf)}
#' }
#' \strong{Initialization:}
#' \deqn{
#'   \hat{f}^{[0]}(x) = \frac{1}{2}\mathrm{log}(p / (1 - p))
#' }
#' with
#' \deqn{
#'   p = \frac{1}{n}\sum\limits_{i=1}^n\mathrm{1}_{\{y^{(i)} = 1\}}
#' }
#'
#' @format \code{\link{S4}} object.
#' @name LossBinomial
#'
#' @section Usage:
#' \preformatted{
#' LossBinomial$new()
#' LossBinomial$new(offset)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{offset} [\code{numeric(1)}]}{
#'   Numerical value which can be used to set a custom offset. If so, this
#'   value is returned instead of the loss optimal initialization.
#' }
#' }
#'
#' @section Details:
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classloss_1_1_binomial_loss.html}.
#'
#' @examples
#' 
#' # Create new loss object:
#' bin.loss = LossBinomial$new()
#' bin.loss
#'
#' @export LossBinomial
NULL

#' Create LossCustom by using R functions.
#'
#' \code{LossCustom} creates a custom loss by using
#' \code{Rcpp::Function} to set \code{R} functions.
#'
#' @format \code{\link{S4}} object.
#' @name LossCustom
#'
#' @section Usage:
#' \preformatted{
#' LossCustom$new(lossFun, gradientFun, initFun)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{lossFun} [\code{function}]}{
#'   \code{R} function to calculate the loss. For details see the
#'   \code{Details}.
#' }
#' \item{\code{gradientFun} [\code{function}]}{
#'   \code{R} function to calculate the gradient. For details see the
#'   \code{Details}.
#' }
#' \item{\code{initFun} [\code{function}]}{
#'   \code{R} function to calculate the constant initialization. For
#'   details see the \code{Details}.
#' }
#' }
#'
#' @section Details:
#'   The functions must have the following structure:
#'
#'   \code{lossFun(truth, prediction) { ... return (loss) }} With a vector
#'   argument \code{truth} containing the real values and a vector of
#'   predictions \code{prediction}. The function must return a vector
#'   containing the loss for each component.
#'
#'   \code{gradientFun(truth, prediction) { ... return (grad) }} With a vector
#'   argument \code{truth} containing the real values and a vector of
#'   predictions \code{prediction}. The function must return a vector
#'   containing the gradient of the loss for each component.
#'
#'   \code{initFun(truth) { ... return (init) }} With a vector
#'   argument \code{truth} containing the real values. The function must
#'   return a numeric value containing the offset for the constant
#'   initialization.
#'
#'   For an example see the \code{Examples}.
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classloss_1_1_custom_loss.html}.
#'
#' @examples
#'
#' # Loss function:
#' myLoss = function (true.values, prediction) {
#'   return (0.5 * (true.values - prediction)^2)
#' }
#' # Gradient of loss function:
#' myGradient = function (true.values, prediction) {
#'   return (prediction - true.values)
#' }
#' # Constant initialization:
#' myConstInit = function (true.values) {
#'   return (mean(true.values))
#' }
#'
#' # Create new custom quadratic loss:
#' my.loss = LossCustom$new(myLoss, myGradient, myConstInit)
#'
#' @export LossCustom
NULL

#' Create custom cpp losses by using cpp functions and external pointer.
#'
#' \code{LossCustomCpp} creates a custom loss by using
#' \code{Rcpp::XPtr} to set \code{C++} functions.
#'
#' @format \code{\link{S4}} object.
#' @name LossCustomCpp
#'
#' @section Usage:
#' \preformatted{
#' LossCustomCpp$new(loss_ptr, grad_ptr, const_init_ptr)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{loss_ptr} [\code{externalptr}]}{
#'   External pointer to the \code{C++} loss function.
#' }
#' \item{\code{grad_ptr} [\code{externalptr}]}{
#'   External pointer to the \code{C++} gradient function.
#' }
#' \item{\code{const_init_ptr} [\code{externalptr}]}{
#'   External pointer to the \code{C++} constant initialization function.
#' }
#' }
#'
#' @section Details:
#'   For an example see the extending compboost vignette or the function
#'   \code{getCustomCppExample(example = "loss")}.
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classloss_1_1_custom_cpp_loss.html}.
#'
#' @examples
#' \donttest{
#' # Load loss functions:
#' Rcpp::sourceCpp(code = getCustomCppExample(example = "loss", silent = TRUE))
#'
#' # Create new custom quadratic loss:
#' my.cpp.loss = LossCustomCpp$new(lossFunSetter(), gradFunSetter(), constInitFunSetter())
#' }
#' @export LossCustomCpp
NULL

#' Logger class to log the current iteration
#'
#' This class seems to be useless, but it gives more control about the algorithm
#' and doesn't violate the idea of object programming here. Additionally, it is
#' quite convenient to have this class instead of tracking the iteration at any
#' stage of the fitting within the compboost object as another vector.
#'
#' @format \code{\link{S4}} object.
#' @name LoggerIteration
#'
#' @section Usage:
#' \preformatted{
#' LoggerIterationWrapper$new(use_as_stopper, max_iterations)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{use_as_stopper} [\code{logical(1)}]}{
#'   Boolean to indicate if the logger should also be used as stopper.
#' }
#' \item{\code{max_iterations} [\code{integer(1)}]}{
#'   If the logger is used as stopper this argument defines the maximal
#'   iterations.
#' }
#' }
#'
#' @section Details:
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classlogger_1_1_iteration_logger.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{summarizeLogger()}}{Summarize the logger object.}
#' }
#' @examples
#' # Define logger:
#' log.iters = LoggerIteration$new(FALSE, 100)
#'
#' # Summarize logger:
#' log.iters$summarizeLogger()
#'
#' @export LoggerIteration
NULL

#' Logger class to log the inbag risk
#'
#' This class logs the inbag risk for a specific loss function. It is also
#' possible to use custom losses to log performance measures. For details
#' see the use case or extending compboost vignette.
#'
#' @format \code{\link{S4}} object.
#' @name LoggerInbagRisk
#'
#' @section Usage:
#' \preformatted{
#' LoggerInbagRisk$new(use_as_stopper, used_loss, eps_for_break)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{use_as_stopper} [\code{logical(1)}]}{
#'   Boolean to indicate if the logger should also be used as stopper.
#' }
#' \item{\code{used_loss} [\code{Loss} object]}{
#'   The loss used to calculate the empirical risk by taking the mean of the
#'   returned defined loss within the loss object.
#' }
#' \item{\code{eps_for_break} [\code{numeric(1)}]}{
#'   This argument is used if the loss is also used as stopper. If the relative
#'   improvement of the logged inbag risk falls above this boundary the stopper
#'   returns \code{TRUE}.
#' }
#' }
#'
#' @section Details:
#'
#' This logger computes the risk for the given training data
#' \eqn{\mathcal{D} = \{(x^{(i)},\ y^{(i)})\ |\ i \in \{1, \dots, n\}\}}
#' and stores it into a vector. The empirical risk \eqn{\mathcal{R}} for
#' iteration \eqn{m} is calculated by:
#' \deqn{
#'   \mathcal{R}_\mathrm{emp}^{[m]} = \frac{1}{n}\sum\limits_{i = 1}^n L(y^{(i)}, \hat{f}^{[m]}(x^{(i)}))
#' }
#'
#' \strong{Note:}
#' \itemize{
#'   \item
#'     If \eqn{m=0} than \eqn{\hat{f}} is just the offset.
#'
#'   \item
#'     The implementation to calculate \eqn{\mathcal{R}_\mathrm{emp}^{[m]}} is
#'     done in two steps:
#'       \enumerate{
#'        \item
#'          Calculate vector \code{risk_temp} of losses for every observation for
#'          given response \eqn{y^{(i)}} and prediction \eqn{\hat{f}^{[m]}(x^{(i)})}.
#'
#'        \item
#'          Average over \code{risk_temp}.
#'      }
#'    }
#'    This procedure ensures, that it is possible to e.g. use the AUC or any
#'    arbitrary performance measure for risk logging. This gives just one
#'    value for \code{risk_temp} and therefore the average equals the loss
#'    function. If this is just a value (like for the AUC) then the value is
#'    returned.
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classlogger_1_1_inbag_risk_logger.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#'   \item{\code{summarizeLogger()}}{Summarize the logger object.}
#' }
#' @examples
#' # Used loss:
#' log.bin = LossBinomial$new()
#'
#' # Define logger:
#' log.inbag.risk = LoggerInbagRisk$new(FALSE, log.bin, 0.05)
#'
#' # Summarize logger:
#' log.inbag.risk$summarizeLogger()
#'
#' @export LoggerInbagRisk
NULL

#' Logger class to log the out of bag risk
#'
#' This class logs the out of bag risk for a specific loss function. It is
#' also possible to use custom losses to log performance measures. For details
#' see the use case or extending compboost vignette.
#'
#' @format \code{\link{S4}} object.
#' @name LoggerOobRisk
#'
#' @section Usage:
#' \preformatted{
#' LoggerOobRisk$new(use_as_stopper, used_loss, eps_for_break, oob_data,
#'   oob_response)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{use_as_stopper} [\code{logical(1)}]}{
#'   Boolean to indicate if the logger should also be used as stopper.
#' }
#' \item{\code{used_loss} [\code{Loss} object]}{
#'   The loss used to calculate the empirical risk by taking the mean of the
#'   returned defined loss within the loss object.
#' }
#' \item{\code{eps_for_break} [\code{numeric(1)}]}{
#'   This argument is used if the loss is also used as stopper. If the relative
#'   improvement of the logged inbag risk falls above this boundary the stopper
#'   returns \code{TRUE}.
#' }
#' \item{\code{oob_data} [\code{list}]}{
#'   A list which contains data source objects which corresponds to the
#'   source data of each registered factory. The source data objects should
#'   contain the out of bag data. This data is then used to calculate the
#'   prediction in each step.
#' }
#' \item{\code{oob_response} [\code{numeric}]}{
#'   Vector which contains the response for the out of bag data given within
#'   the \code{list}.
#' }
#' }
#'
#' @section Details:
#'
#' This logger computes the risk for a given new dataset
#' \eqn{\mathcal{D}_\mathrm{oob} = \{(x^{(i)},\ y^{(i)})\ |\ i \in I_\mathrm{oob}\}}
#' and stores it into a vector. The OOB risk \eqn{\mathcal{R}_\mathrm{oob}} for
#' iteration \eqn{m} is calculated by:
#' \deqn{
#'   \mathcal{R}_\mathrm{oob}^{[m]} = \frac{1}{|\mathcal{D}_\mathrm{oob}|}\sum\limits_{(x,y) \in \mathcal{D}_\mathrm{oob}}
#'   L(y, \hat{f}^{[m]}(x))
#' }
#'
#' \strong{Note:}
#'   \itemize{
#'
#'   \item
#'     If \eqn{m=0} than \eqn{\hat{f}} is just the offset.
#'
#'   \item
#'     The implementation to calculate \eqn{\mathcal{R}_\mathrm{emp}^{[m]}} is
#'     done in two steps:
#'       \enumerate{
#'
#'       \item
#'         Calculate vector \code{risk_temp} of losses for every observation for
#'         given response \eqn{y^{(i)}} and prediction \eqn{\hat{f}^{[m]}(x^{(i)})}.
#'
#'       \item
#'         Average over \code{risk_temp}.
#'      }
#'    }
#'
#'    This procedure ensures, that it is possible to e.g. use the AUC or any
#'    arbitrary performance measure for risk logging. This gives just one
#'    value for \eqn{risk_temp} and therefore the average equals the loss
#'    function. If this is just a value (like for the AUC) then the value is
#'    returned.
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classlogger_1_1_oob_risk_logger.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{summarizeLogger()}}{Summarize the logger object.}
#' }
#' @examples
#' # Define data:
#' X1 = cbind(1:10)
#' X2 = cbind(10:1)
#' data.source1 = InMemoryData$new(X1, "x1")
#' data.source2 = InMemoryData$new(X2, "x2")
#'
#' oob.list = list(data.source1, data.source2)
#'
#' set.seed(123)
#' y.oob = rnorm(10)
#'
#' # Used loss:
#' log.bin = LossBinomial$new()
#'
#' # Define logger:
#' log.oob.risk = LoggerOobRisk$new(FALSE, log.bin, 0.05, oob.list, y.oob)
#'
#' # Summarize logger:
#' log.oob.risk$summarizeLogger()
#'
#' @export LoggerOobRisk
NULL

#' Logger class to log the elapsed time
#'
#' This class just logs the elapsed time. This should be very handy if one
#' wants to run the algorithm for just 2 hours and see how far he comes within
#' that time. There are three time units available for logging:
#' \itemize{
#'   \item minutes
#'   \item seconds
#'   \item microseconds
#' }
#'
#' @format \code{\link{S4}} object.
#' @name LoggerTime
#'
#' @section Usage:
#' \preformatted{
#' LoggerTime$new(use_as_stopper, max_time, time_unit)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{use_as_stopper} [\code{logical(1)}]}{
#'   Boolean to indicate if the logger should also be used as stopper.
#' }
#' \item{\code{max_time} [\code{integer(1)}]}{
#'   If the logger is used as stopper this argument contains the maximal time
#'   which are available to train the model.
#' }
#' \item{\code{time_unit} [\code{character(1)}]}{
#'   Character to specify the time unit. Possible choices are \code{minutes},
#'   \code{seconds} or \code{microseconds}
#' }
#' }
#'
#' @section Details:
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classlogger_1_1_time_logger.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{summarizeLogger()}}{Summarize the logger object.}
#' }
#' @examples
#' # Define logger:
#' log.time = LoggerTime$new(FALSE, 20, "minutes")
#'
#' # Summarize logger:
#' log.time$summarizeLogger()
#'
#' @export LoggerTime
NULL

#' Logger list class to collect all loggers
#'
#' This class is meant to define all logger which should be used to track the
#' progress of the algorithm.
#'
#' @format \code{\link{S4}} object.
#' @name LoggerList
#'
#' @section Usage:
#' \preformatted{
#' LoggerList$new()
#' }
#'
#' @section Details:
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classloggerlist_1_1_logger_list.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{clearRegisteredLogger()}}{Removes all registered logger
#'   from the list. The used logger are not deleted, just removed from the
#'   map.}
#' \item{\code{getNamesOfRegisteredLogger()}}{Returns the registered logger
#'   names as character vector.}
#' \item{\code{getNumberOfRegisteredLogger()}}{Returns the number of registered
#'   logger as integer.}
#' \item{\code{printRegisteredLogger()}}{Prints all registered logger.}
#' \item{\code{registerLogger(logger.id, logger)}}{Includes a new \code{logger}
#'   into the logger list with the \code{logger.id} as key.}
#' }
#' @examples
#' # Define logger:
#' log.iters = LoggerIteration$new(TRUE, 100)
#' log.time = LoggerTime$new(FALSE, 20, "minutes")
#'
#' # Create logger list:
#' logger.list = LoggerList$new()
#'
#' # Register new loggeR:
#' logger.list$registerLogger("iteration", log.iters)
#' logger.list$registerLogger("time", log.time)
#'
#' # Print registered logger:
#' logger.list$printRegisteredLogger()
#'
#' # Important: The keys has to be unique:
#' logger.list$registerLogger("iteration", log.iters)
#'
#' # Still just two logger:
#' logger.list$printRegisteredLogger()
#'
#' # Remove all logger:
#' logger.list$clearRegisteredLogger()
#'
#' # Get number of registered logger:
#' logger.list$getNumberOfRegisteredLogger()
#'
#' @export LoggerList
NULL

#' Greedy Optimizer
#'
#' This class defines a new object for the greedy optimizer. The optimizer
#' just calculates for each base-learner the sum of squared errors and returns
#' the base-learner with the smallest SSE.
#'
#' @format \code{\link{S4}} object.
#' @name OptimizerCoordinateDescent
#'
#' @section Usage:
#' \preformatted{
#' OptimizerCoordinateDescent$new()
#' }
#'
#' @section Details:
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classoptimizer_1_1_greedy_optimizer.html}.
#'
#' @examples
#'
#' # Define optimizer:
#' optimizer = OptimizerCoordinateDescent$new()
#'
#' @export OptimizerCoordinateDescent
NULL

#' Main Compboost Class
#'
#' This class collects all parts such as the factory list or the used logger
#' and passes them to \code{C++}. On the \code{C++} side is then the main
#' algorithm.
#'
#' @format \code{\link{S4}} object.
#' @name Compboost_internal
#'
#' @section Usage:
#' \preformatted{
#' Compboost$new(response, learning_rate, stop_if_all_stopper_fulfilled,
#'   factory_list, loss, logger_list, optimizer)
#' }
#'
#' @section Arguments:
#' \describe{
#' \item{\code{response} [\code{numeric}]}{
#'   Vector of the true values which should be modeled.
#' }
#' \item{\code{learning_rate} [\code{numeric(1)}]}{
#'   The learning rate which is used to shrink the parameter in each iteration.
#' }
#' \item{\code{stop_if_all_stopper_fulfilled} [\code{logical(1)}]}{
#'   Boolean to indicate which stopping strategy is used. If \code{TRUE} then
#'   the algorithm stops if all registered logger stopper are fulfilled.
#' }
#' \item{\code{factory_list} [\code{BlearnerFactoryList} object]}{
#'   List of base-learner factories from which one base-learner is selected
#'   in each iteration by using the
#' }
#' \item{\code{loss} [\code{Loss} object]}{
#'   The loss which should be used to calculate the pseudo residuals in each
#'   iteration.
#' }
#' \item{\code{logger_list} [\code{LoggerList} object]}{
#'   The list with all registered logger which are used to track the algorithm.
#' }
#' \item{\code{optimizer} [\code{Optimizer} object]}{
#'   The optimizer which is used to select in each iteration one good
#'   base-learner.
#' }
#' }
#'
#' @section Details:
#'
#'   This class is a wrapper around the pure \code{C++} implementation. To see
#'   the functionality of the \code{C++} class visit
#'   \url{https://schalkdaniel.github.io/compboost/cpp_man/html/classcboost_1_1_compboost.html}.
#'
#' @section Fields:
#'   This class doesn't contain public fields.
#'
#' @section Methods:
#' \describe{
#' \item{\code{train(trace)}}{Initial training of the model. The integer
#'   argument \code{trace} indicates if the logger progress should be printed
#'   or not and if so trace indicates which iterations should be printed.}
#' \item{\code{continueTraining(trace, logger_list)}}{Continue the training
#'   by using an additional \code{logger_list}. The retraining is stopped if
#'   the first logger says that the algorithm should be stopped.}
#' \item{\code{getPrediction()}}{Get the inbag prediction which is done during
#'   the fitting process.}
#' \item{\code{getSelectedBaselearner()}}{Returns a character vector of how
#'   the base-learner are selected.}
#' \item{\code{getLoggerData()}}{Returns a list of all logged data. If the
#'   algorithm is retrained, then the list contains for each training one
#'   element.}
#' \item{\code{getEstimatedParameter()}}{Returns a list with the estimated
#'   parameter for base-learner which was selected at least once.}
#' \item{\code{getParameterAtIteration(k)}}{Calculates the prediction at the
#'   iteration \code{k}.}
#' \item{\code{getParameterMatrix()}}{Calculates a matrix where row \code{i}
#'   includes the parameter at iteration \code{i}. There are as many rows
#'   as done iterations.}
#' \item{\code{isTrained()}}{This function returns just a boolean value which
#'   indicates if the initial training was already done.}
#' \item{\code{predict(newdata)}}{Prediction on new data organized within a
#'   list of source data objects. It is important that the names of the source
#'   data objects matches those one that were used to define the factories.}
#' \item{\code{predictAtIteration(newdata, k)}}{Prediction on new data by using
#'   another iteration \code{k}.}
#' \item{\code{setToIteration(k)}}{Set the whole model to another iteration
#'   \code{k}. After calling this function all other elements such as the
#'   parameters or the prediction are calculated corresponding to \code{k}.}
#' \item{\code{summarizeCompboost()}}{Summarize the \code{Compboost} object.}
#' }
#' @examples
#'
#' # Some data:
#' df = mtcars
#' df$mpg.cat = ifelse(df$mpg > 20, 1, -1)
#'
#' # # Create new variable to check the polynomial base-learner with degree 2:
#' # df$hp2 = df[["hp"]]^2
#'
#' # Data for the baselearner are matrices:
#' X.hp = as.matrix(df[["hp"]])
#' X.wt = as.matrix(df[["wt"]])
#'
#' # Target variable:
#' y = df[["mpg.cat"]]
#'
#' data.source.hp = InMemoryData$new(X.hp, "hp")
#' data.source.wt = InMemoryData$new(X.wt, "wt")
#'
#' data.target.hp1 = InMemoryData$new()
#' data.target.hp2 = InMemoryData$new()
#' data.target.wt1 = InMemoryData$new()
#' data.target.wt2 = InMemoryData$new()
#'
#' # List for oob logging:
#' oob.data = list(data.source.hp, data.source.wt)
#'
#' # List to test prediction on newdata:
#' test.data = oob.data
#'
#' # Factories:
#' linear.factory.hp = BaselearnerPolynomial$new(data.source.hp, data.target.hp1, 1, TRUE)
#' linear.factory.wt = BaselearnerPolynomial$new(data.source.wt, data.target.wt1, 1, TRUE)
#' quadratic.factory.hp = BaselearnerPolynomial$new(data.source.hp, data.target.hp2, 2, TRUE)
#' spline.factory.wt = BaselearnerPSpline$new(data.source.wt, data.target.wt2, 3, 10, 2, 2)
#'
#' # Create new factory list:
#' factory.list = BlearnerFactoryList$new()
#'
#' # Register factories:
#' factory.list$registerFactory(linear.factory.hp)
#' factory.list$registerFactory(linear.factory.wt)
#' factory.list$registerFactory(quadratic.factory.hp)
#' factory.list$registerFactory(spline.factory.wt)
#'
#' # Define loss:
#' loss.bin = LossBinomial$new()
#'
#' # Define optimizer:
#' optimizer = OptimizerCoordinateDescent$new()
#'
#' ## Logger
#'
#' # Define logger. We want just the iterations as stopper but also track the
#' # time, inbag risk and oob risk:
#' log.iterations  = LoggerIteration$new(TRUE, 500)
#' log.time        = LoggerTime$new(FALSE, 500, "microseconds")
#' log.inbag       = LoggerInbagRisk$new(FALSE, loss.bin, 0.05)
#' log.oob         = LoggerOobRisk$new(FALSE, loss.bin, 0.05, oob.data, y)
#'
#' # Define new logger list:
#' logger.list = LoggerList$new()
#'
#' # Register the logger:
#' logger.list$registerLogger(" iteration.logger", log.iterations)
#' logger.list$registerLogger("time.logger", log.time)
#' logger.list$registerLogger("inbag.binomial", log.inbag)
#' logger.list$registerLogger("oob.binomial", log.oob)
#'
#' # Run compboost:
#' # --------------
#'
#' # Initialize object:
#' cboost = Compboost_internal$new(
#'   response      = y,
#'   learning_rate = 0.05,
#'   stop_if_all_stopper_fulfilled = FALSE,
#'   factory_list = factory.list,
#'   loss         = loss.bin,
#'   logger_list  = logger.list,
#'   optimizer    = optimizer
#' )
#'
#' # Train the model (we want to print the trace):
#' cboost$train(trace = 50)
#' cboost
#'
#' # Get estimated parameter:
#' cboost$getEstimatedParameter()
#'
#' # Get trace of selected base-learner:
#' cboost$getSelectedBaselearner()
#'
#' # Set to iteration 200:
#' cboost$setToIteration(200)
#'
#' # Get new parameter values:
#' cboost$getEstimatedParameter()
#'
#' @export Compboost_internal
NULL

Try the compboost package in your browser

Any scripts or data that you put into this service are public.

compboost documentation built on May 2, 2019, 6:40 a.m.