Nothing
#' @title Plot a ROC curve from cross-validated AUC from CV.SuperLearner
#' @description
#' Based on initial code by Alan Hubbard.
#'
#' @param x CV.SuperLearner object
#' @param y Outcome vector if not already included in the SL object.
#' @param learner Which learner to plot - defaults to highest AUC learner.
#' @param title Title to use in the plot.
#' @param subtitle Optional plot subtitle.
#' @param digits Digits to use when rounding AUC and CI for plot.
#' @param show_plot If TRUE print the ggplot, otherwise just return in list.
#' @param ... Any additional unused arguments, due to the auc_table generic.
#'
#' @return List with the AUC plus standard error and confidence interval.
#'
#' @examples
#'
#' library(SuperLearner)
#' library(ck37r)
#'
#' data(Boston, package = "MASS")
#'
#' set.seed(1, "L'Ecuyer-CMRG")
#'
#' # Subset rows to speed up example computation.
#' row_subset = sample(nrow(Boston), 100)
#'
#' Boston = Boston[row_subset, ]
#' X = subset(Boston, select = -chas)
#'
#' cvsl = CV.SuperLearner(Boston$chas, X[, 1:2], family = binomial(),
#' cvControl = list(V = 2, stratifyCV = TRUE),
#' SL.library = c("SL.mean", "SL.glm"))
#' plot_roc(cvsl)
#'
#' @references
#'
#' LeDell, E., Petersen, M., & van der Laan, M. (2015). Computationally
#' efficient confidence intervals for cross-validated area under the ROC curve
#' estimates. Electronic journal of statistics, 9(1), 1583.
#'
#' Polley EC, van der Laan MJ (2010) Super Learner in Prediction. U.C. Berkeley
#' Division of Biostatistics Working Paper Series. Paper 226.
#' http://biostats.bepress.com/ucbbiostat/paper266/
#'
#' Sing, T., Sander, O., Beerenwinkel, N., & Lengauer, T. (2005). ROCR:
#' visualizing classifier performance in R. Bioinformatics, 21(20), 3940-3941.
#'
#' van der Laan, M. J., Polley, E. C. and Hubbard, A. E. (2007) Super Learner.
#' Statistical Applications of Genetics and Molecular Biology, 6, article 25.
#' http://www.degruyter.com/view/j/sagmb.2007.6.issue-1/sagmb.2007.6.1.1309/sagmb.2007.6.1.1309.xml
#'
#' @seealso \code{\link{cvsl_auc}} \code{\link{sl_plot_roc}}
#' \code{\link[cvAUC]{ci.cvAUC}}
#'
#' @method plot_roc CV.SuperLearner
#'
#' @export
plot_roc.CV.SuperLearner =
function(x, y = x$Y,
learner = NULL,
title = "CV-SuperLearner cross-validated ROC",
subtitle = NULL,
digits = 4,
show_plot = TRUE,
...) {
# Change to a better object name.
cvsl = x
auc_table = ck37r::auc_table(cvsl, y)
cvsl_table = summary(cvsl)$Table
# Choose the learner with the highest AUC.
# TODO: support passing in strings "SuperLearner" or "DiscreteSL" rather than
# an index.
if (is.null(learner)) {
# Note: multiple learners may have the best AUC.
best_auc = max(auc_table$auc, na.rm = TRUE)
# First extract the learner name, then later get the index.
learner_name = rownames(auc_table)[auc_table$auc == best_auc]
if (length(learner_name) > 1) {
# TODO: sort by SE first, so that we take the learner with the lower SE.
# (Although generally any AUC ties will have the same SE.)
# If there are multiple winners and SuperLearner is in there, use SL.
# Otherwise use DiscreteSL, otherwise use the first learner name.
if ("SuperLearner" %in% learner_name) {
learner_name = "SuperLearner"
} else if ("DiscreteSL" %in% learner_name) {
learner_name = "DiscreteSL"
} else {
learner_name = learner_name[1]
}
}
# Extract the original learner index based on learner name in the AUC table.
learner = which(cvsl_table$Algorithm == learner_name)
} else {
learner_name = as.character(cvsl_table$Algorithm[learner])
}
if (learner_name == "SuperLearner") {
preds = cvsl$SL.predict
} else if (learner_name == "DiscreteSL") {
preds = cvsl$discreteSL.predict
} else {
# library.predict contains only the raw learners, not SL or DiscreteSL.
preds = cvsl$library.predict[, learner - 2]
}
# Set learner name as the subtitle if no subtitle was specified.
if (is.null(subtitle)) {
subtitle = paste("Best learner:", learner_name)
}
auc = auc_table[learner_name, "auc"]
ci_upper = auc_table[learner_name, "ci_upper"]
ci_lower = auc_table[learner_name, "ci_lower"]
txt = paste0("AUC = ",
sprintf(paste0("%0.", digits, "f"), round(auc, digits)),
"\n95% CI = ", sprintf("%0.3f", round(ci_lower, digits)),
" - ", sprintf("%0.3f", round(ci_upper, digits)))
pred = ROCR::prediction(preds, y)
perf1 = ROCR::performance(pred, "sens", "spec")
# ggplot version.
p = ggplot2::qplot(1 - methods::slot(perf1, "x.values")[[1]],
methods::slot(perf1, "y.values")[[1]],
geom = "line",
main = title) +
ggplot2::labs(subtitle = subtitle,
x = "False positive % (1 - specificity)",
y = "True positive % (sensitivity)") +
ggplot2::theme_bw() +
ggplot2::annotate("text", x = 0.63, y = 0.15, label = txt, size = 6) +
ggplot2::annotate("segment", x = 0, xend = 1, y = 0, yend = 1)
if (show_plot) {
print(p)
}
# Return AUC, AUC CI, and ggplot object.
results = list(auc = auc,
auc_se = auc_table[learner_name, "se"],
auc_ci = c(ci_lower, ci_upper),
plot = p)
# Return results invisibly.
invisible(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.