inst/doc/oobag.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.height = 5, 
  fig.width = 7
)

## ----setup--------------------------------------------------------------------

library(aorsf)
library(survival)
library(SurvMetrics)


## -----------------------------------------------------------------------------

fit <- orsf(data = pbc_orsf, 
            formula = Surv(time, status) ~ . - id,
            oobag_pred_type = 'surv',
            n_tree = 5,
            oobag_pred_horizon = 2000)

hist(fit$pred_oobag, 
     main = 'Ensemble out-of-bag survival predictions at t=3,500')


## -----------------------------------------------------------------------------

# what function is used to evaluate out-of-bag predictions?
fit$eval_oobag$stat_type

# what is the output from this function?
fit$eval_oobag$stat_values


## -----------------------------------------------------------------------------

fit <- orsf(data = pbc_orsf,
            formula = Surv(time, status) ~ . - id,
            n_tree = 20,
            tree_seeds = 2,
            oobag_pred_type = 'surv',
            oobag_pred_horizon = 2000,
            oobag_eval_every = 1)

plot(
 x = seq(1, 20, by = 1),
 y = fit$eval_oobag$stat_values, 
 main = 'Out-of-bag C-statistic computed after each new tree is grown.',
 xlab = 'Number of trees grown',
 ylab = fit$eval_oobag$stat_type
)

lines(x=seq(1, 20), y = fit$eval_oobag$stat_values)


## -----------------------------------------------------------------------------

oobag_fun_brier <- function(y_mat, w_vec, s_vec){

 # output is numeric vector of length 1
 as.numeric(
  SurvMetrics::Brier(
   object = Surv(time = y_mat[, 1], event = y_mat[, 2]), 
   pre_sp = s_vec,
   # t_star in Brier() should match oob_pred_horizon in orsf()
   t_star = 2000
  )
 )
 
}

## -----------------------------------------------------------------------------

oobag_fun_brier(y_mat = pbc_orsf[,c('time', 'status')],
                s_vec = fit$pred_oobag)


## -----------------------------------------------------------------------------

fit <- orsf(data = pbc_orsf,
            formula = Surv(time, status) ~ . - id,
            n_tree = 20,
            tree_seeds = 2,
            oobag_pred_horizon = 2000,
            oobag_fun = oobag_fun_brier,
            oobag_eval_every = 1)

plot(
 x = seq(1, 20, by = 1),
 y = fit$eval_oobag$stat_values, 
 main = 'Out-of-bag error computed after each new tree is grown.',
 sub = 'For the Brier score, lower values indicate more accurate predictions',
 xlab = 'Number of trees grown',
 ylab = "Brier score"
)

lines(x=seq(1, 20), y = fit$eval_oobag$stat_values)


## -----------------------------------------------------------------------------

# Helper code to make sure your oobag_fun function will work with aorsf

# time and status values
test_time <- seq(from = 1, to = 5, length.out = 100)
test_status <- rep(c(0,1), each = 50)

# y-matrix is presumed to contain time and status (with column names)
y_mat <- cbind(time = test_time, status = test_status)
# s_vec is presumed to be a vector of survival probabilities
s_vec <- seq(0.9, 0.1, length.out = 100)

# see 1 in the checklist above
names(formals(oobag_fun_brier)) == c("y_mat", "w_vec", "s_vec")

test_output <- oobag_fun_brier(y_mat = y_mat, 
                               w_vec = w_vec,
                               s_vec = s_vec)

# test output should be numeric
is.numeric(test_output)
# test_output should be a numeric value of length 1
length(test_output) == 1

Try the aorsf package in your browser

Any scripts or data that you put into this service are public.

aorsf documentation built on Oct. 26, 2023, 5:08 p.m.