3. Prognostic Workflow

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(E2E)
# Set up a 2-core cluster for parallel processing in this vignette
# This is crucial for passing R CMD check on CI/CD platforms
cl <- parallel::makeCluster(2)
doParallel::registerDoParallel(cl)

Prognostic Models (Survival Analysis)

This track is dedicated to survival prediction tasks.

1. Initialization

First, initialize the prognostic modeling system.

initialize_modeling_system_pro()

2. Training Single Models with models_pro

The models_pro function trains one or more standard survival models. For this demonstration, we'll run a subset.

# Run a subset of available prognostic models
results_all_pro <- models_pro(train_pro, model = c("lasso_pro", "rsf_pro"))

# Print summary for Random Survival Forest
print_model_summary_pro("rsf_pro", results_all_pro$rsf_pro)

3. Ensemble Modeling

Bagging (bagging_pro)

Builds a Bagging ensemble for survival models.

# Create a Bagging ensemble with lasso as the base survival model
# n_estimators is reduced for faster execution.
bagging_lasso_pro_results <- bagging_pro(train_pro, base_model_name = "lasso_pro", n_estimators = 5, seed = 123)
print_model_summary_pro("Bagging (LASSO)", bagging_lasso_pro_results)

Stacking (stacking_pro)

Builds a Stacking ensemble for survival models.

# Create a Stacking ensemble with lasso as the meta-model
stacking_lasso_pro_results <- stacking_pro(
  results_all_models = results_all_pro,
  data = train_pro,
  meta_model_name = "lasso_pro"
)
print_model_summary_pro("Stacking (LASSO)", stacking_lasso_pro_results)

4. Applying Models to New Data (apply_pro)

Generate prognostic scores for a new dataset.

# Apply the trained stacking model to the test set
pro_pred_new <- apply_pro(
  trained_model_object = stacking_lasso_pro_results$model_object,
  new_data = test_pro,
  time_unit = "day"
)

# Evaluate the new prognostic scores
eval_pro_new <- evaluate_predictions_pro(
  prediction_df = pro_pred_new,
  years_to_evaluate = c(1,3, 5)
)
print(eval_pro_new)

5. Visualization (figure_pro)

Generate Kaplan-Meier (KM) and time-dependent ROC (tdROC) curves.

# Kaplan-Meier Curve
p4 <- figure_pro(type = "km", data = stacking_lasso_pro_results, time_unit= "days")
#print(p4)

# Time-Dependent ROC Curve
p5 <- figure_pro(type = "tdroc", data = stacking_lasso_pro_results, time_unit = "days")
#print(p5)
# Stop the parallel cluster
parallel::stopCluster(cl)


Try the E2E package in your browser

Any scripts or data that you put into this service are public.

E2E documentation built on Aug. 27, 2025, 1:09 a.m.