Nothing
#' Calculate CIs of ROC and precision-recall AUCs
#'
#' The \code{auc_ci} function takes an \code{S3} object generated by
#' \code{\link{evalmod}} and calculates CIs of AUCs when multiple data sets
#' are specified.
#'
#' @param curves An \code{S3} object generated by \code{\link{evalmod}}.
#' The \code{auc_ci} function accepts the following S3 objects.
#'
#' \tabular{lll}{
#' \strong{\code{S3} object}
#' \tab \strong{# of models}
#' \tab \strong{# of test datasets} \cr
#'
#' smcurves \tab single \tab multiple \cr
#' mmcurves \tab multiple \tab multiple
#' }
#'
#' See the \strong{Value} section of \code{\link{evalmod}} for more details.
#'
#' @param alpha A numeric value of the significant level (default: 0.05)
#'
#' @param dtype A string to specify the distribution used for CI calculation.
#' \tabular{ll}{
#' \strong{dtype} \tab \strong{distribution} \cr
#' normal (default) \tab Normal distribution \cr
#' z \tab Normal distribution \cr
#' t \tab t-distribution
#' }
#'
#' @return The \code{auc_ci} function returns a dataframe of AUC CIs.
#'
#' @seealso \code{\link{evalmod}} for generating \code{S3} objects with
#' performance evaluation measures. \code{\link{auc}} for retrieving a dataset
#' of AUCs.
#'
#' @examples
#'
#' ##################################################
#' ### Single model & multiple test datasets
#' ###
#'
#' ## Create sample datasets with 100 positives and 100 negatives
#' samps <- create_sim_samples(4, 100, 100, "good_er")
#' mdat <- mmdata(samps[["scores"]], samps[["labels"]],
#' modnames = samps[["modnames"]],
#' dsids = samps[["dsids"]]
#' )
#'
#' ## Generate an smcurve object that contains ROC and Precision-Recall curves
#' smcurves <- evalmod(mdat)
#'
#' ## Calculate CI of AUCs
#' sm_auc_cis <- auc_ci(smcurves)
#'
#' ## Shows the result
#' sm_auc_cis
#'
#' ##################################################
#' ### Multiple models & multiple test datasets
#' ###
#'
#' ## Create sample datasets with 100 positives and 100 negatives
#' samps <- create_sim_samples(4, 100, 100, "all")
#' mdat <- mmdata(samps[["scores"]], samps[["labels"]],
#' modnames = samps[["modnames"]],
#' dsids = samps[["dsids"]]
#' )
#'
#' ## Generate an mscurve object that contains ROC and Precision-Recall curves
#' mmcurves <- evalmod(mdat)
#'
#' ## Calculate CI of AUCs
#' mm_auc_ci <- auc_ci(mmcurves)
#'
#' ## Shows the result
#' mm_auc_ci
#'
#' @export
auc_ci <- function(curves, alpha = NULL, dtype = NULL) {
UseMethod("auc_ci", curves)
}
#' @export
auc_ci.default <- function(curves, alpha = NULL, dtype = NULL) {
stop("An object of unknown class is specified")
}
#
# Calculate an AUC CI
#
#' @rdname auc_ci
#' @export
auc_ci.aucs <- function(curves, alpha = 0.05, dtype = "normal") {
# Validation
.validate(curves)
assertthat::assert_that(attr(curves, "dataset_type") == "multiple",
msg = "'curves' must contain multiple datasets."
)
assertthat::assert_that(
assertthat::is.number(alpha),
alpha >= 0 && alpha <= 1
)
assertthat::assert_that(assertthat::is.string(dtype))
# Check type of distribution
dtype_tab <- c("normal", "z", "t")
dype_match <- pmatch(tolower(dtype), dtype_tab)
if (!is.na(dype_match)) {
dtype <- dtype_tab[dype_match]
}
err_msg <- paste0(
"'dtype' must be one of ",
paste(dtype_tab, collapse = ", ")
)
assertthat::assert_that(dtype %in% dtype_tab, msg = err_msg)
# Get AUC scores
aucs <- attr(curves, "aucs")
# Get unique model names, data set IDs and curve types
uniq_modnames <- attr(curves, "uniq_modnames")
uniq_curvetype <- unique(aucs$curvetypes)
ci_df <- NULL
for (modname in uniq_modnames) {
auc_mod <- aucs[aucs$modnames == modname, ]
for (curvetype in uniq_curvetype) {
aucs_subset <- auc_mod[auc_mod$curvetypes == curvetype, ]
# Prepare for CI calculation
aucs_mean <- mean(aucs_subset$aucs)
aucs_n <- length(aucs_subset$aucs)
if (aucs_n < 2) {
ci_df <- rbind(
ci_df,
data.frame(
modnames = modname,
curvetypes = curvetype,
mean = aucs_mean,
error = 0,
lower_bound = aucs_mean,
upper_bound = aucs_mean,
n = aucs_n
)
)
next
}
aucs_sd <- sd(aucs_subset$aucs)
# Calculate CI
if (dtype == "normal" || dtype == "z") {
aucs_q <- qnorm(1 - (alpha / 2))
} else if (dtype == "t") {
aucs_q <- qt(1 - (alpha / 2), df = aucs_n - 1)
}
aucs_error <- aucs_q * aucs_sd / sqrt(aucs_n)
acus_lower <- max(aucs_mean - aucs_error, 0.0)
acus_upper <- min(aucs_mean + aucs_error, 1.0)
ci_df <- rbind(
ci_df,
data.frame(
modnames = modname,
curvetypes = curvetype,
mean = aucs_mean,
error = aucs_error,
lower_bound = acus_lower,
upper_bound = acus_upper,
n = aucs_n
)
)
}
}
ci_df
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.