tests/testthat/setup.R

set.seed(329)

library(survival)

# flchain ----

data("flchain", package = 'survival')

flc <- flchain
flc$chapter <- NULL
flc <- na.omit(flc)
flc <- flc[flc$futime > 0, ]

names(flc)[names(flc) == 'futime'] <- 'time'
names(flc)[names(flc) == 'death'] <- 'status'

# make sorted x and y matrices for testing internal cpp functions
flc_mats <- prep_test_matrices(flc, outcomes = c("time", "status"))

# lung ----

lcd <- survival::lung
lcd$inst <- NULL
lcd <- na.omit(lcd)

# make sorted x and y matrices for testing internal cpp functions
lcd_mats <- prep_test_matrices(lcd, outcomes = c("time", "status"))

# pbc ----

pbc <- pbc_orsf

pbc$id <- NULL

pbc_status_12 <- pbc
pbc_status_12$status <- pbc_status_12$status + 1

pbc_scale <- pbc_noise <- pbc

vars <- c('bili', 'chol', 'albumin', 'copper', 'alk.phos', 'ast')

for(i in vars){
 pbc_noise[[i]] <- add_noise(pbc_noise[[i]])
 pbc_scale[[i]] <- change_scale(pbc_scale[[i]])
}

# make sorted x and y matrices for testing internal cpp functions
pbc_mats <- prep_test_matrices(pbc, outcomes = c("time", "status"))

# penguins ----

penguins <- penguins_orsf

penguins_binary <- penguins

penguins_binary$species <- factor(
 penguins_binary$species,
 levels = c("Adelie", "Chinstrap", "Gentoo"),
 labels = c("Adelie", "not_Adelie", "not_Adelie")
)

penguins_scale <- penguins_noise <- penguins

vars <- c("bill_length_mm", "bill_depth_mm",
          "flipper_length_mm", "body_mass_g")

for(i in vars){
 penguins_noise[[i]] <- add_noise(penguins_noise[[i]])
 penguins_scale[[i]] <- change_scale(penguins_scale[[i]])
}

# make sorted x and y matrices for testing internal cpp functions
penguins_mats <- prep_test_matrices(penguins, outcomes = c("species"))

# data lists ----

data_list_pbc <- list(pbc_standard = pbc,
                      pbc_status_12 = pbc_status_12,
                      pbc_scaled = pbc_scale,
                      pbc_noised = pbc_noise)

data_list_penguins <- list(penguins_standard = penguins,
                           penguins_binary = penguins_binary,
                           penguins_scaled = penguins_scale,
                           penguins_noised = penguins_noise)


# matrix lists ----

mat_list_surv <- list(pbc = pbc_mats,
                      flc = flc_mats,
                      lcd = lcd_mats)

# standard fits ----

# standards used to check validity of other fits

seeds_standard <- 329
n_tree_test <- 5

controls_surv <- list(
 fast = orsf_control_survival(method = 'glm', scale_x = FALSE, max_iter = 1),
 net = orsf_control_survival(method = 'net'),
 custom = orsf_control_survival(method = f_pca)
)

fit_standard_pbc <- lapply(
 controls_surv,
 function(cntrl){
  orsf(pbc,
       formula = time + status ~ .,
       n_tree = n_tree_test,
       control = cntrl,
       tree_seed = seeds_standard)
 }
)

controls_clsf <- list(
 fast = orsf_control_classification(method = 'glm', scale_x = FALSE, max_iter = 1),
 net = orsf_control_classification(method = 'net'),
 custom = orsf_control_classification(method = f_pca)
)

fit_standard_penguin_species <- lapply(
 controls_clsf,
 function(cntrl){
  orsf(penguins,
       formula = species ~ .,
       n_tree = n_tree_test,
       control = cntrl,
       tree_seed = seeds_standard)
 }
)

controls_regr <- list(
 fast = orsf_control_regression(method = 'glm', scale_x = FALSE, max_iter = 1),
 net = orsf_control_regression(method = 'net'),
 custom = orsf_control_regression(method = f_pca)
)

fit_standard_penguin_bills <- lapply(
 controls_regr,
 function(cntrl){
  orsf(penguins,
       formula = bill_length_mm ~ .,
       n_tree = n_tree_test,
       control = cntrl,
       tree_seed = seeds_standard)
 }
)

# training and testing data ----

pred_types_surv <- c(risk = 'risk',
                     surv = 'surv',
                     chf = 'chf',
                     mort = 'mort',
                     leaf = 'leaf',
                     time = 'time')

pred_types_clsf <- c(prob = 'prob',
                     class = 'class',
                     leaf = 'leaf')

pred_types_regr <- c(mean = 'mean',
                     leaf = 'leaf')

pbc_train_rows <- sample(nrow(pbc_orsf), size = 170)
pbc_train <- pbc[pbc_train_rows, ]
pbc_test <- pbc[-pbc_train_rows, ]

penguins_train_rows <- sample(nrow(penguins_orsf), size = 180)
penguins_train <- penguins[penguins_train_rows, ]
penguins_test <- penguins[-penguins_train_rows, ]

penguins_binary_train <- penguins_binary[penguins_train_rows, ]
penguins_binary_test <- penguins_binary[-penguins_train_rows, ]
bcjaeger/aorsf documentation built on April 3, 2025, 4:16 p.m.