Examples

knitr::opts_chunk$set(cache = TRUE)

First we load some relevant packages

set.seed(329730)
suppressPackageStartupMessages({
 library(aorsf)
 library(survival)
 library(ranger)
 library(riskRegression)
})

Accelerated linear combinations

The accelerated ORSF ensemble is the default because it has a nice balance of computational speed and prediction accuracy. It runs a single iteration of Newton Raphson scoring on the Cox partial likelihood function to find linear combinations of predictors.

fit_accel <- orsf(pbc_orsf, 
                  control = orsf_control_survival(),
                  formula = Surv(time, status) ~ . - id,
                  tree_seeds = 329)

Linear combinations with Cox regression

Setting inputs in orsf_control_survival to scale the X matrix and repeat iterations until convergence allows you to run Cox regression in each non-terminal node of each survival tree, using the regression coefficients to create linear combinations of predictors:

control_cph <- orsf_control_survival(method = 'glm', 
                                     scale_x = TRUE, 
                                     max_iter = 20)

fit_cph <- orsf(pbc_orsf, 
                control = control_cph,
                formula = Surv(time, status) ~ . - id,
                tree_seeds = 329)

Linear combinations with penalized cox regression

Setting method == 'net' runs penalized Cox regression in each non-terminal node of each survival tree. This can be really helpful if you want to do feature selection within the node, but it is a lot slower than the 'glm' option.

# select 3 predictors out of 5 to be used in
# each linear combination of predictors.

control_net <- orsf_control_survival(method = 'net', target_df = 3)

fit_net <- orsf(pbc_orsf, 
                control = control_net,
                formula = Surv(time, status) ~ . - id,
                tree_seeds = 329)

Linear combinations with your own function

In addition to the built-in methods, customized functions can be used to identify linear combinations of predictors. We'll demonstrate a few here.

f_rando <- function(x_node, y_node, w_node){
 matrix(runif(ncol(x_node)), ncol=1) 
}
f_pca <- function(x_node, y_node, w_node) { 

 # estimate two principal components.
 pca <- stats::prcomp(x_node, rank. = 2)
 # use the second principal component to split the node
 pca$rotation[, 1L, drop = FALSE]

}
f_rlt <- function(x_node, y_node, w_node){

 colnames(y_node) <- c('time', 'status')
 colnames(x_node) <- paste("x", seq(ncol(x_node)), sep = '')

 data <- as.data.frame(cbind(y_node, x_node))

 if(nrow(data) <= 10) 
  return(matrix(runif(ncol(x_node)), ncol = 1))

 fit <- ranger::ranger(data = data, 
                       formula = Surv(time, status) ~ ., 
                       num.trees = 25, 
                       num.threads = 1,
                       min.node.size = 5,
                       importance = 'permutation')

 out <- sort(fit$variable.importance, decreasing = TRUE)

 # "mute" the least two important variables
 n_vars <- length(out)
 if(n_vars > 4){
   out[c(n_vars, n_vars-1)] <- 0
 }

 # ensure out has same variable order as input
 out <- out[colnames(x_node)]

 # protect yourself
 out[is.na(out)] <- 0

 matrix(out, ncol = 1)

}

We can plug these functions into orsf_control_custom(), and then pass the result into orsf():

fit_rando <- orsf(pbc_orsf,
                  Surv(time, status) ~ . - id,
                  control = orsf_control_survival(method = f_rando),
                  tree_seeds = 329)

fit_pca <- orsf(pbc_orsf,
                Surv(time, status) ~ . - id,
                control = orsf_control_survival(method = f_pca),
                tree_seeds = 329)

fit_rlt <- orsf(pbc_orsf, time + status ~ . - id, 
                control = orsf_control_survival(method = f_rlt),
                tree_seeds = 329)

So which fit seems to work best in this example? Let's find out by evaluating the out-of-bag survival predictions.

risk_preds <- list(
 accel = fit_accel$pred_oobag,
 cph   = fit_cph$pred_oobag,
 net   = fit_net$pred_oobag,
 rando = fit_rando$pred_oobag,
 pca   = fit_pca$pred_oobag,
 rlt   = fit_rlt$pred_oobag
)

sc <- Score(object = risk_preds, 
            formula = Surv(time, status) ~ 1, 
            data = pbc_orsf, 
            summary = 'IPA',
            times = fit_accel$pred_horizon)

The AUC values, from highest to lowest:

sc$AUC$score[order(-AUC)]

And the indices of prediction accuracy:

sc$Brier$score[order(-IPA), .(model, times, IPA)]

From inspection,



bcjaeger/aorsf documentation built on April 3, 2025, 4:16 p.m.