#' @title Calibration Indices
#'
#' @description Get summary calibration indices for binary outcomes and class probabilities. Optionally, get bootstrap-based confidence intervals using validation data.
#'
#' @param data Data frame.
#' @param y Binary outcome.
#' @param p Probabilites.
#' @param val Data frame.
#' @param conf.int Logical. Default is FALSE.
#' @param level Confidence level. Default is 0.95.
#' @param na.rm Logical. Default is FALSE.
#' @param span Passed on to \code{\link[stats]{loess}}.
#' @param p.calibrate Calibrated predictions.
#' @param ... Arguments passed on to \code{\link[rsample]{bootstraps}}.
#'
#' @return A tibble with as many rows and calibration indices. The number of columns can vary based on whether confident intervals are requested.
#'
#' @name cal
#'
#' @examples
#' dd <- data.frame(y = rbinom(50, 1, 0.5), p = runif(100, 0, 1))
#' cal_ind(dd, y, p)
#'
#' @export
cal_ind <- function(data, y, p, val = NULL, conf.int = FALSE, level = 0.95,
na.rm = FALSE, span = 0.75, ...) {
assertthat::assert_that(is.data.frame(data))
formula <- as.formula(paste(substitute(y), '~', substitute(p)))
p <- rlang::enquo(p)
p <- dplyr::pull(data, {{p}})
p.calibrate <- smoother(data, formula, span)
dd <- tibble::tribble(
~index, ~estimate,
"ICI", cal_ICI(p, p.calibrate),
"E50", cal_E50(p, p.calibrate),
"E90", cal_E90(p, p.calibrate),
"Emax", cal_Emax(p, p.calibrate),
)
if (conf.int & !is.null(val)) {
confs <- confs(data = data, val = val, y = y, p = p,
level = level, na.rm = na.rm, ...)
dd <- dplyr::left_join(dd, confs, by = 'index')
} else if (conf.int & is.null(val)) {
warning('You must provide a validation data set to compute bootstrap confidence intervals!')
}
dd$index <- factor(dd$index, levels = sort(unique(dd$index)))
dd
}
#' @rdname cal
#' @export
cal_ICI <- function(p, p.calibrate) {
ici <- mean(abs(p.calibrate - p))
ici
}
#' @rdname cal
#' @export
cal_E50 <- function(p, p.calibrate) {
e50 <- median(abs(p.calibrate - p))
e50
}
#' @rdname cal
#' @export
cal_E90 <- function(p, p.calibrate) {
e90 <- quantile(abs(p.calibrate - p), probs = 0.9)
names(e90) <- NULL
e90
}
#' @rdname cal
#' @export
cal_Emax <- function(p, p.calibrate) {
emax <- max(abs(p.calibrate - p))
emax
}
confs <- function(data, y, p, val, level = 0.95, na.rm = FALSE, ...){
y <- rlang::enquo(y)
p <- rlang::enquo(p)
ci.low = (1 - ci) / 2
ci.high = (1 + ci) / 2
assertthat::assert_that(is.data.frame(train))
assertthat::assert_that(is.data.frame(test))
train_boot <- rsample::bootstraps(train, ...)
test_boot <- rsample::bootstraps(test, ...)
dd <- dplyr::bind_cols(train_boot, test_boot)
dd <- dplyr::rename(dd, splits_train = splits, splits_test = splits1,
id_train = id, id_test = id1)
dd <- dplyr::mutate(dd,
data_train = purrr::map(splits_train,
rsample::training),
data_test = purrr::map(splits_test, rsample::training)
)
dd <- dplyr::mutate(dd, p_meth = purrr::map(data_train, ~loess(y ~ p, .)))
dd <- dplyr::mutate(dd, test_pr = purrr::map2(p_meth, data_test,
~predict(.x, newdata = .y)))
dd <- tidyr::hoist(dd, data_test,
test_y = rlang::as_label(y),
test_p = rlang::as_label(p),
.remove = FALSE)
dd <- dplyr::mutate(dd,
ICI = purrr::map2_dbl(test_pr, test_p, ~mean(abs(.x - .y),
na.rm = na.rm)),
E50 = purrr::map2_dbl(test_pr, test_p, ~median(abs(.x - .y),
na.rm = na.rm)),
E90 = purrr::map2_dbl(test_pr, test_p, ~quantile(abs(.x - .y),
probs = 0.9,
na.rm = na.rm)),
Emax = purrr::map2_dbl(test_pr, test_p, ~max(abs(.x - .y),
na.rm = na.rm))
)
dd <- dplyr::select(dd, ICI, E50, E90, Emax)
dd <- tidyr::pivot_longer(dd, 1:4, names_to = 'index', values_to = 'boot_values')
dd <- tidyr::chop(dd, boot_values)
dd <- dplyr::mutate(dd,
conf.low = purrr::map_dbl(boot_values, ~quantile(.,
probs = ci.low)),
conf.high = purrr::map_dbl(boot_values, ~quantile(.,
probs = ci.high))
)
dd
}
smoother <- function(data, formula, span) {
loess.calibrate <- loess(formula, data = data, span = span)
p.calibrate <- predict(loess.calibrate, newdata = data)
p.calibrate
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.