#' @title Accelerated Oblique Random Survival Forest Learner
#' @name mlr_learners_surv.aorsf
#' @author bcjaeger
#'
#' @description
#' Accelerated oblique random survival forest.
#' Calls [aorsf::orsf()] from \CRANpkg{aorsf}.
#' Note that although the learner has the property `"missing"` and it can in principle deal with missing values,
#' the behaviour has to be configured using the parameter `na_action`.
#' @template learner
#' @templateVar id surv.aorsf
#'
#' @section Initial parameter values:
#' - `mtry`:
#' - This hyperparameter can alternatively be set via the added hyperparameter `mtry_ratio`
#' as `mtry = max(ceiling(mtry_ratio * n_features), 1)`.
#' Note that `mtry` and `mtry_ratio` are mutually exclusive.
#'
#' @references
#' `r format_bib("jaeger_2019")`
#'
#' `r format_bib("jaeger_2022")`
#'
#'
#' @template seealso_learner
#' @template example
#' @export
LearnerSurvAorsf = R6Class("LearnerSurvAorsf",
inherit = mlr3proba::LearnerSurv,
public = list(
#' @description
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
ps = ps(
n_tree = p_int(default = 500L, lower = 1L, tags = "train"),
n_split = p_int(default = 5L, lower = 1L, tags = "train"),
n_retry = p_int(default = 3L, lower = 0L, tags = "train"),
n_thread = p_int(default = 0, lower = 0, tags = c("train", "predict")),
pred_aggregate = p_lgl(default = TRUE, tags = "predict"),
pred_simplify = p_lgl(default = FALSE, tags = "predict"),
mtry = p_int(default = NULL, lower = 1L, special_vals = list(NULL), tags = "train"),
mtry_ratio = p_dbl(lower = 0, upper = 1, tags = "train"),
sample_with_replacement = p_lgl(default = TRUE, tags = "train"),
sample_fraction = p_dbl(lower = 0, upper = 1, default = .632, tags = "train"),
control_type = p_fct(levels = c("fast", "cph", "net"), default = "fast", tags = "train"),
split_rule = p_fct(levels = c("logrank", "cstat"), default = "logrank", tags = "train"),
control_fast_do_scale = p_lgl(default = FALSE, tags = "train"),
control_fast_ties = p_fct(levels = c("efron", "breslow"), default = "efron", tags = "train"),
control_cph_ties = p_fct(levels = c("efron", "breslow"), default = "efron", tags = "train"),
control_cph_eps = p_dbl(default = 1e-9, lower = 0, tags = "train"),
control_cph_iter_max = p_int(default = 20L, lower = 1, tags = "train"),
control_net_alpha = p_dbl(default = 0.5, tags = "train"),
control_net_df_target = p_int(default = NULL, lower = 1L,
special_vals = list(NULL),
tags = "train"),
leaf_min_events = p_int(default = 1L, lower = 1L, tags = "train"),
leaf_min_obs = p_int(default = 5L, lower = 1L, tags = "train"),
split_min_events = p_int(default = 5L, lower = 1L, tags = "train"),
split_min_obs = p_int(default = 10, lower = 1L, tags = "train"),
split_min_stat = p_dbl(default = NULL, special_vals = list(NULL), lower = 0, tags = "train"),
oobag_pred_type = p_fct(levels = c("none", "surv", "risk", "chf"), default = "surv", tags = "train"),
importance = p_fct(levels = c("none", "anova", "negate", "permute"), default = "anova", tags = "train"),
importance_max_pvalue = p_dbl(default = 0.01, lower = 0.0001, upper = .9999, tags = "train"),
oobag_pred_horizon = p_dbl(default = NULL, special_vals = list(NULL), tags = "train", lower = 0),
oobag_eval_every = p_int(default = NULL, special_vals = list(NULL), lower = 1, tags = "train"),
attach_data = p_lgl(default = TRUE, tags = "train"),
verbose_progress = p_lgl(default = FALSE, tags = "train"),
na_action = p_fct(levels = c("fail", "omit", "impute_meanmode"), default = "fail", tags = "train"))
super$initialize(
id = "surv.aorsf",
packages = c("mlr3extralearners", "aorsf", "pracma"),
feature_types = c("integer", "numeric", "factor", "ordered"),
predict_types = c("crank", "distr"),
param_set = ps,
properties = c("oob_error", "importance", "missings"),
man = "mlr3extralearners::mlr_learners_surv.aorsf",
label = "Oblique Random Forest"
)
},
#' @description
#' OOB concordance error extracted from the model slot
#' `eval_oobag$stat_values`
#' @return `numeric()`.
oob_error = function() {
nrows = nrow(self$model$eval_oobag$stat_values)
1 - self$model$eval_oobag$stat_values[nrows, 1L]
},
#' @description
#' The importance scores are extracted from the model.
#' @return Named `numeric()`.
importance = function() {
if (is.null(self$model)) {
stopf("No model stored")
}
sort(aorsf::orsf_vi(self$model, group_factors = TRUE),
decreasing = TRUE)
}
),
private = list(
.train = function(task) {
# initialize
pv = self$param_set$get_values(tags = "train")
pv = convert_ratio(pv, "mtry", "mtry_ratio",
length(task$feature_names))
# helper function to organize aorsf control function inputs
dflt_if_null = function(params, slot_name) {
out = params[[slot_name]]
if (is.null(out)) out <- self$param_set$default[[slot_name]]
out
}
# default value for oobag_eval_every is ntree, but putting
# default = ntree in p_int() above would be problematic, so:
if (is.null(pv$oobag_eval_every)) {
pv$oobag_eval_every = dflt_if_null(pv, "n_tree")
}
control = switch(
dflt_if_null(pv, "control_type"),
"fast" = {
aorsf::orsf_control_survival(
method = "glm",
scale_x = dflt_if_null(pv, "control_fast_do_scale"),
ties = dflt_if_null(pv, "control_fast_ties"),
max_iter = 1
)
},
"cph" = {
aorsf::orsf_control_survival(
method = "glm",
scale_x = TRUE, # should always scale with max_iter > 1
ties = dflt_if_null(pv, "control_cph_ties"),
epsilon = dflt_if_null(pv, "control_cph_eps"),
max_iter = dflt_if_null(pv, "control_cph_iter_max")
)
},
"net" = {
aorsf::orsf_control_survival(
method = "net",
net_mix = dflt_if_null(pv, "control_net_alpha"),
target_df = dflt_if_null(pv, "control_net_df_target")
)
}
)
# these parameters are used to organize the control arguments
# above but are not used directly by aorsf::orsf(), so:
pv = remove_named(pv, c("control_type",
"control_fast_do_scale",
"control_fast_ties",
"control_cph_ties",
"control_cph_eps",
"control_cph_iter_max",
"control_net_alpha",
"control_net_df_target"))
invoke(
aorsf::orsf,
data = task$data(),
formula = task$formula(),
weights = task$weights,
control = control,
no_fit = FALSE,
.args = pv
)
},
.predict = function(task) {
pv = self$param_set$get_values(tags = "predict")
time = self$model$data[[task$target_names[1]]]
status = self$model$data[[task$target_names[2]]]
utime = sort(unique(time[status == 1]), decreasing = FALSE)
surv = mlr3misc::invoke(predict,
self$model,
new_data = ordered_features(task, self),
pred_horizon = utime,
pred_type = "surv",
.args = pv
)
mlr3proba::.surv_return(times = utime, surv = surv)
}
)
)
.extralrns_dict$add("surv.aorsf", function() LearnerSurvAorsf$new())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.