R/calibration-indices.R

Defines functions cal_ind cal_ICI cal_E50 cal_E90 cal_Emax confs

Documented in cal_E50 cal_E90 cal_Emax cal_ICI cal_ind

#' @title Calibration Indices
#'
#' @description Get summary calibration indices for binary outcomes and class probabilities. Optionally, get bootstrap-based confidence intervals using validation data.
#'
#' @param data Data frame.
#' @param y Binary outcome.
#' @param p Probabilites.
#' @param val Data frame.
#' @param conf.int Logical. Default is FALSE.
#' @param level Confidence level. Default is 0.95.
#' @param na.rm Logical. Default is FALSE.
#' @param span Passed on to \code{\link[stats]{loess}}.
#' @param p.calibrate Calibrated predictions.
#' @param ... Arguments passed on to \code{\link[rsample]{bootstraps}}.
#'
#' @return A tibble with as many rows and calibration indices. The number of columns can vary based on whether confident intervals are requested.
#'
#' @name cal
#'
#' @examples
#' dd <- data.frame(y = rbinom(50, 1, 0.5), p = runif(100, 0, 1))
#' cal_ind(dd, y, p)
#'
#' @export

cal_ind <- function(data, y, p, val = NULL, conf.int = FALSE, level = 0.95,
                    na.rm = FALSE, span = 0.75, ...) {
  assertthat::assert_that(is.data.frame(data))

  formula <- as.formula(paste(substitute(y), '~', substitute(p)))
  p <- rlang::enquo(p)

  p <- dplyr::pull(data, {{p}})

  p.calibrate <- smoother(data, formula, span)

  dd <- tibble::tribble(
    ~index, ~estimate,
    "ICI", cal_ICI(p, p.calibrate),
    "E50", cal_E50(p, p.calibrate),
    "E90", cal_E90(p, p.calibrate),
    "Emax", cal_Emax(p, p.calibrate),
  )

  if (conf.int & !is.null(val)) {
    confs <- confs(data = data, val = val, y = y, p = p,
                   level = level, na.rm = na.rm, ...)
    dd <- dplyr::left_join(dd, confs, by = 'index')
  } else if (conf.int & is.null(val)) {
    warning('You must provide a validation data set to compute bootstrap confidence intervals!')
  }

  dd$index <- factor(dd$index, levels = sort(unique(dd$index)))
  dd
}

#' @rdname cal
#' @export
cal_ICI <- function(p, p.calibrate) {
  ici <- mean(abs(p.calibrate - p))
  ici
}

#' @rdname cal
#' @export
cal_E50 <- function(p, p.calibrate) {
  e50 <- median(abs(p.calibrate - p))
  e50
}

#' @rdname cal
#' @export
cal_E90 <- function(p, p.calibrate) {
  e90 <- quantile(abs(p.calibrate - p), probs = 0.9)
  names(e90) <- NULL
  e90
}

#' @rdname cal
#' @export
cal_Emax <- function(p, p.calibrate) {
  emax <- max(abs(p.calibrate - p))
  emax
}

confs <- function(data, y, p, val, level = 0.95, na.rm = FALSE, ...){
  y <- rlang::enquo(y)
  p <- rlang::enquo(p)

  ci.low  = (1 - ci) / 2
  ci.high = (1 + ci) / 2

  assertthat::assert_that(is.data.frame(train))
  assertthat::assert_that(is.data.frame(test))

  train_boot <- rsample::bootstraps(train, ...)
  test_boot <- rsample::bootstraps(test, ...)

  dd <- dplyr::bind_cols(train_boot, test_boot)
  dd <- dplyr::rename(dd, splits_train = splits, splits_test = splits1,
                      id_train = id, id_test = id1)

  dd <- dplyr::mutate(dd,
                      data_train = purrr::map(splits_train,
                                                  rsample::training),
                      data_test = purrr::map(splits_test, rsample::training)
                      )
  dd <- dplyr::mutate(dd, p_meth = purrr::map(data_train, ~loess(y ~ p, .)))
  dd <- dplyr::mutate(dd, test_pr = purrr::map2(p_meth, data_test,
                                         ~predict(.x, newdata = .y)))
  dd <- tidyr::hoist(dd, data_test,
                     test_y = rlang::as_label(y),
                     test_p = rlang::as_label(p),
                     .remove = FALSE)
  dd <- dplyr::mutate(dd,
               ICI = purrr::map2_dbl(test_pr, test_p, ~mean(abs(.x - .y),
                                                            na.rm = na.rm)),
               E50 = purrr::map2_dbl(test_pr, test_p, ~median(abs(.x - .y),
                                                              na.rm = na.rm)),
               E90 = purrr::map2_dbl(test_pr, test_p, ~quantile(abs(.x - .y),
                                                            probs = 0.9,
                                                            na.rm = na.rm)),
               Emax = purrr::map2_dbl(test_pr, test_p, ~max(abs(.x - .y),
                                                            na.rm = na.rm))
               )

  dd <- dplyr::select(dd, ICI, E50, E90, Emax)
  dd <- tidyr::pivot_longer(dd, 1:4, names_to = 'index', values_to = 'boot_values')
  dd <- tidyr::chop(dd, boot_values)
  dd <- dplyr::mutate(dd,
                      conf.low = purrr::map_dbl(boot_values, ~quantile(.,
                                                              probs = ci.low)),
                      conf.high = purrr::map_dbl(boot_values, ~quantile(.,
                                                              probs = ci.high))
                      )
  dd
}

smoother <- function(data, formula, span) {
  loess.calibrate <- loess(formula, data = data, span = span)
  p.calibrate <- predict(loess.calibrate, newdata = data)
  p.calibrate
}
mattwarkentin/precogs documentation built on Jan. 12, 2020, 6:24 p.m.