#' Calculate the optimism-corrected performance of a model using bootstrap validation.
#'
#' Approach originally from Harrell Jr, FE. Regression modeling strategies, 2nd Edition. Method as described in Steyerberg EW. Clinical Prediction Models, 2nd Edition. pp 106--107
#'
#' @export
#' @import data.table
#'
#' @param mwf A function that represents the entire modeling workflow. Should take arguments ``form` and `data` fit a model then return it to make predictions.
#' @param form A formula describing the model.
#' @param data A data frame including all variables in `form`.
#' @param metric A performance metric as function that takes (`preds`, `obs`) to calculate. Default = `sbrier`
#' @param reps The number of bootstrap replicates. Default = 250.
#' @param get_probs A predict function to extract class probabilities. Default = `predict`
#' @param event The outcome class for an event, especially for factors. Default = `1`
#' @examples
#' glm_mwf <- \(form, data) { glm(form, data = data, family = binomial) }
#' mymtcars <- mtcars
#' mymtcars$mpg20 <- as.numeric(mymtcars$mpg > 20)
#' ocp(glm_mwf, form = mpg20 ~ cyl + disp + hp, data = mymtcars,
#' get_probs = \(m, d) predict(m, newdata = d, type = 'response'))
#'
ocp <- function(mwf, form, data, metric = sbrier, reps = 250,
get_probs = predict, event = '1') {
# Identify vars
.y <- all.vars(form)[1]
dd <- as.data.table(data)
# Step 1: Determine performance from model on entire dataset
orig_model <- mwf(form, dd)
orig_preds <- get_probs(orig_model, dd)
perf_orig <- metric(orig_preds, dd[, get(.y)] == event)
# Step 6: Repeat this many times...
optimism_list <- sapply(1:reps, \(r) {
# Step 2: draw bootstrap sample
idx <- sample(nrow(dd), replace = TRUE)
# Step 3: Construct a model and determine apparent performance
boot_model <- mwf(form, dd[idx])
boot_preds <- get_probs(boot_model, dd[idx])
perf_apparent <- metric(boot_preds, dd[idx, get(.y)] == event)
# Step 4: Apply model to original sample and determine test performance
boot_orig_preds <- get_probs(boot_model, dd)
perf_test <- metric(boot_orig_preds, dd[, get(.y)] == event)
# Step 5: Calculate optimism
optimism <- perf_apparent - perf_test
return(optimism)
})
# Step 7: Substract mean optimism from apparent perf in step 1
# This yields the optimism-corrected performance estimated
perf_opt_corrected <- perf_orig - mean(optimism_list)
return(perf_opt_corrected)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.