R/g_auc_ci.R

Defines functions auc_ci.aucs auc_ci.default auc_ci

Documented in auc_ci auc_ci.aucs

#' Calculate CIs of ROC and precision-recall AUCs
#'
#' The \code{auc_ci} function takes an \code{S3} object generated by
#'   \code{\link{evalmod}} and calculates CIs of AUCs when multiple data sets
#'   are specified.
#'
#' @param curves An \code{S3} object generated by \code{\link{evalmod}}.
#'   The \code{auc_ci} function accepts the following S3 objects.
#'
#'   \tabular{lll}{
#'     \strong{\code{S3} object}
#'     \tab \strong{# of models}
#'     \tab \strong{# of test datasets} \cr
#'
#'     smcurves \tab single   \tab multiple \cr
#'     mmcurves \tab multiple \tab multiple
#'   }
#'
#'    See the \strong{Value} section of \code{\link{evalmod}} for more details.
#'
#' @param alpha A numeric value of the significant level (default: 0.05)
#'
#' @param dtype A string to specify the distribution used for CI calculation.
#'   \tabular{ll}{
#'     \strong{dtype} \tab \strong{distribution} \cr
#'     normal (default) \tab Normal distribution \cr
#'     z \tab Normal distribution \cr
#'     t \tab t-distribution
#'   }
#'
#' @return The \code{auc_ci} function returns a dataframe of AUC CIs.
#'
#' @seealso \code{\link{evalmod}} for generating \code{S3} objects with
#'   performance evaluation measures. \code{\link{auc}} for retrieving a dataset
#'   of AUCs.
#'
#' @examples
#'
#' ##################################################
#' ### Single model & multiple test datasets
#' ###
#'
#' ## Create sample datasets with 100 positives and 100 negatives
#' samps <- create_sim_samples(4, 100, 100, "good_er")
#' mdat <- mmdata(samps[["scores"]], samps[["labels"]],
#'   modnames = samps[["modnames"]],
#'   dsids = samps[["dsids"]]
#' )
#'
#' ## Generate an smcurve object that contains ROC and Precision-Recall curves
#' smcurves <- evalmod(mdat)
#'
#' ## Calculate CI of AUCs
#' sm_auc_cis <- auc_ci(smcurves)
#'
#' ## Shows the result
#' sm_auc_cis
#'
#' ##################################################
#' ### Multiple models & multiple test datasets
#' ###
#'
#' ## Create sample datasets with 100 positives and 100 negatives
#' samps <- create_sim_samples(4, 100, 100, "all")
#' mdat <- mmdata(samps[["scores"]], samps[["labels"]],
#'   modnames = samps[["modnames"]],
#'   dsids = samps[["dsids"]]
#' )
#'
#' ## Generate an mscurve object that contains ROC and Precision-Recall curves
#' mmcurves <- evalmod(mdat)
#'
#' ## Calculate CI of AUCs
#' mm_auc_ci <- auc_ci(mmcurves)
#'
#' ## Shows the result
#' mm_auc_ci
#'
#' @export
auc_ci <- function(curves, alpha = NULL, dtype = NULL) {
  UseMethod("auc_ci", curves)
}

#' @export
auc_ci.default <- function(curves, alpha = NULL, dtype = NULL) {
  stop("An object of unknown class is specified")
}

#
# Calculate an AUC CI
#
#' @rdname auc_ci
#' @export
auc_ci.aucs <- function(curves, alpha = 0.05, dtype = "normal") {
  # Validation
  .validate(curves)
  assertthat::assert_that(attr(curves, "dataset_type") == "multiple",
    msg = "'curves' must contain multiple datasets."
  )
  assertthat::assert_that(
    assertthat::is.number(alpha),
    alpha >= 0 && alpha <= 1
  )
  assertthat::assert_that(assertthat::is.string(dtype))

  # Check type of distribution
  dtype_tab <- c("normal", "z", "t")
  dype_match <- pmatch(tolower(dtype), dtype_tab)
  if (!is.na(dype_match)) {
    dtype <- dtype_tab[dype_match]
  }
  err_msg <- paste0(
    "'dtype' must be one of ",
    paste(dtype_tab, collapse = ", ")
  )
  assertthat::assert_that(dtype %in% dtype_tab, msg = err_msg)

  # Get AUC scores
  aucs <- attr(curves, "aucs")

  # Get unique model names, data set IDs and curve types
  uniq_modnames <- attr(curves, "uniq_modnames")
  uniq_curvetype <- unique(aucs$curvetypes)

  ci_df <- NULL
  for (modname in uniq_modnames) {
    auc_mod <- aucs[aucs$modnames == modname, ]
    for (curvetype in uniq_curvetype) {
      aucs_subset <- auc_mod[auc_mod$curvetypes == curvetype, ]

      # Prepare for CI calculation
      aucs_mean <- mean(aucs_subset$aucs)
      aucs_n <- length(aucs_subset$aucs)
      if (aucs_n < 2) {
        ci_df <- rbind(
          ci_df,
          data.frame(
            modnames = modname,
            curvetypes = curvetype,
            mean = aucs_mean,
            error = 0,
            lower_bound = aucs_mean,
            upper_bound = aucs_mean,
            n = aucs_n
          )
        )
        next
      }
      aucs_sd <- sd(aucs_subset$aucs)

      # Calculate CI
      if (dtype == "normal" || dtype == "z") {
        aucs_q <- qnorm(1 - (alpha / 2))
      } else if (dtype == "t") {
        aucs_q <- qt(1 - (alpha / 2), df = aucs_n - 1)
      }

      aucs_error <- aucs_q * aucs_sd / sqrt(aucs_n)
      acus_lower <- max(aucs_mean - aucs_error, 0.0)
      acus_upper <- min(aucs_mean + aucs_error, 1.0)

      ci_df <- rbind(
        ci_df,
        data.frame(
          modnames = modname,
          curvetypes = curvetype,
          mean = aucs_mean,
          error = aucs_error,
          lower_bound = acus_lower,
          upper_bound = acus_upper,
          n = aucs_n
        )
      )
    }
  }

  ci_df
}

Try the precrec package in your browser

Any scripts or data that you put into this service are public.

precrec documentation built on Oct. 12, 2023, 1:06 a.m.