R/LearnerSurvPenalized.R

#' @title Survival L1 and L2 Penalized Regression Learner
#'
#' @name mlr_learners_surv.penalized
#'
#' @description
#' A [mlr3proba::LearnerSurv] implementing penalized from package
#'   \CRANpkg{penalized}.
#' Calls [penalized::penalized()].
#'
#' @details
#' The `penalized` and `unpenalized` arguments in the learner are implemented slightly
#' differently than in [penalized::penalized()]. Here, there is no parameter for `penalized` but
#' instead it is assumed that every variable is penalized unless stated in the `unpenalized`
#' parameter, see examples.
#'
#' @templateVar id surv.penalized
#' @template section_dictionary_learner
#'
#' @references
#' Goeman JJ (2009). “L1Penalized Estimation in the Cox Proportional Hazards Model.”
#' Biometrical Journal
#' doi: 10.1002/bimj.200900028.
#'
#' @template seealso_learner
#' @template example
#' @export
LearnerSurvPenalized = R6Class("LearnerSurvPenalized",
  inherit = LearnerSurv,

  public = list(
    #' @description
    #' Creates a new instance of this [R6][R6::R6Class] class.
    initialize = function() {
      ps = ParamSet$new(
        params = list(
          ParamUty$new(id = "unpenalized", tags = c("train", "predict")),
          ParamUty$new(id = "lambda1", default = 0, tags = "train"),
          ParamUty$new(id = "lambda2", default = 0, tags = "train"),
          ParamLgl$new(id = "positive", default = FALSE, tags = "train"),
          ParamLgl$new(id = "fusedl", default = FALSE, tags = "train"),
          ParamDbl$new(id = "startbeta", tags = "train"),
          ParamDbl$new(id = "startgamma", tags = "train"),
          ParamInt$new(id = "steps", lower = 1L, default = 1L, tags = "train"),
          ParamDbl$new(id = "epsilon", default = 1.0e-10, lower = 0, upper = 1, tags = "train"),
          ParamInt$new(id = "maxiter", lower = 1, tags = "train"),
          ParamLgl$new(id = "standardize", default = FALSE, tags = "train"),
          ParamLgl$new(id = "trace", default = TRUE, tags = "train")
        )
      )

      super$initialize(
        # see the mlr3book for a description: https://mlr3book.mlr-org.com/extending-mlr3.html
        id = "surv.penalized",
        packages = "penalized",
        feature_types = c("integer", "numeric", "factor", "logical"),
        predict_types = c("distr", "crank"),
        param_set = ps,
        # the help file name is the one used as @name in the roxygen2 block
        man = "mlr3learners.penalized::mlr_learners_surv.penalized"
      )
    }
  ),

  private = list(
    .train = function(task) {

      # Checks missing data early to prevent crashing, which is not caught earlier by task/train

      if (any(task$missings() > 0)) {
        stop("Missing data is not supported by ", self$id)
      }

      # Changes the structure of the penalized and unpenalized parameters to be more user friendly.
      # Now the user supplies the column names as a vector and these are added to the formula as
      # required.
      pars = self$param_set$get_values(tags = "train")
      if (length(pars$unpenalized) == 0) {
        penalized = formulate(rhs = task$feature_names)
      } else {
        penalized = formulate(rhs = task$feature_names[task$feature_names %nin% pars$unpenalized])
        pars$unpenalized = formulate(rhs = pars$unpenalized)
      }

      mlr3misc::with_package("penalized", {
        mlr3misc::invoke(penalized::penalized,
          response = task$truth(), penalized = penalized,
          data = task$data(cols = task$feature_names), model = "cox", .args = pars)
      })
    },

    .predict = function(task) {
      # Again the penalized and unpenalized covariates are automatically converted to the
      # correct formula

      pars = self$param_set$get_values(tags = "predict")
      if (length(pars$unpenalized) == 0) {
        penalized = formulate(rhs = task$feature_names)
      } else {
        penalized = formulate(rhs = task$feature_names[task$feature_names %nin% pars$unpenalized])
        pars$unpenalized = formulate(rhs = pars$unpenalized)
      }

      surv = mlr3misc::with_package("penalized", {
        mlr3misc::invoke(penalized::predict, self$model,
          penalized = penalized,
          data = task$data(cols = task$feature_names),
          .args = pars)
      })

      # define WeightedDiscrete distr6 object from predicted survival function
      x = rep(list(list(x = surv@time, cdf = 0)), task$nrow)
      for (i in 1:task$nrow) {
        x[[i]]$cdf = 1 - surv@curves[i, ]
      }

      distr = distr6::VectorDistribution$new(
        distribution = "WeightedDiscrete", params = x,
        decorators = c("CoreStatistics", "ExoticStatistics"))

      crank = as.numeric(sapply(x, function(y) sum(y$time * c(y$cdf[1], diff(y$cdf)))))

      mlr3proba::PredictionSurv$new(task = task, distr = distr, crank = crank)
    }
  )
)
mlr3learners/mlr3learners.penalized documentation built on Aug. 4, 2020, 5:19 a.m.