#' Calculate precision/recall/enrichment as a function of threshold
#' Based on:
#' https://blog.revolutionanalytics.com/2016/08/roc-curves-in-two-lines-of-code.html
#'
#' See also https://github.com/WinVector/sigr
#'
#' @param modelPredictions numeric predictions (not empty, zero assumed to be "all")
#' @param yValues logical truth (not empty, same length as model predictions)
#' @return data frame of precision, enrichment, and recall as a function of threshold
#'
#' @noRd
#'
calcPRT <- function(modelPredictions, yValues) {
prevalence = mean(yValues)
ord <- order(modelPredictions, decreasing=TRUE)
yValues <- yValues[ord]
modelPredictions <- modelPredictions[ord]
precision <- cumsum(yValues)/seq_len(length(yValues)) # Precision
recall <- cumsum(yValues)/max(1,sum(yValues)) # TPR/Recall
notY <- 1 - yValues # falses
fpr <- cumsum(notY)/max(1, sum(notY)) # (pred T AND actually F)/(all F)
specificity <- 1 - fpr
data.frame(threshold = modelPredictions,
precision = precision,
enrichment = precision/prevalence,
recall = recall,
sensitivity = recall,
false_positive_rate = fpr,
specificity = specificity)
}
#' Plot Precision-Recall or Enrichment-Recall as a function of threshold.
#'
#' Plot classifier performance metrics as a function of threshold.
#'
#'@details
#' For a classifier, the precision is what fraction of predicted positives
#' are true positives; the recall is what fraction of true positives the
#' classifier finds, and the enrichment is the ratio of classifier precision to
#' the average rate of positives. Plotting precision-recall or enrichment-recall
#' as a function of classifier score helps identify a score threshold that achieves
#' an acceptable tradeoff between precision and recall, or enrichment and recall.
#'
#' In addition to precision/recall, \code{PRTPlot} can plot a number of other metrics:
#'
#' \itemize{
#' \item{precision: fraction of predicted positives that are true positives}
#' \item{recall: fraction of true positives that were predicted to be true}
#' \item{enrichment: ratio of classifier precision to prevalence of positive class}
#' \item{sensitivity: the same as recall (also known as the true positive rate)}
#' \item{specificity: fraction of true negatives to all negatives (or 1 - false_positive_rate)}
#' \item{false_positive_rate: fraction of negatives predicted to be true over all negatives}
#' }
#'
#' For example, plotting sensitivity/false_positive_rate as functions of threshold will "unroll" an ROC Plot.
#'
#' Plots are in a single column, in the order specified by \code{plotvars}.
#'
#'
#' @param frame data frame to get values from
#' @param predVar name of the column of predicted scores
#' @param truthVar name of the column of actual outcomes in frame
#' @param truthTarget value we consider to be positive
#' @param title title to place on plot
#' @param ... no unnamed argument, added to force named binding of later arguments.
#' @param plotvars variables to plot, must be at least one of the measures listed below. Defaults to c("precision", "recall")
#' @param thresholdrange range of thresholds to plot.
#' @param linecolor line color for the plot
#'
#' @seealso \code{\link{ThresholdPlot}}, \code{\link{ROCPlot}}
#'
#' @examples
#'
#' if (requireNamespace('data.table', quietly = TRUE)) {
#' # don't multi-thread during CRAN checks
#' data.table::setDTthreads(1)
#' }
#'
#' df <- iris
#' df$isVersicolor <- with(df, Species=='versicolor')
#' model = glm(isVersicolor ~ Petal.Length + Petal.Width + Sepal.Length + Sepal.Width,
#' data=df, family=binomial)
#' df$pred = predict(model, newdata=df, type="response")
#'
#' WVPlots::PRTPlot(df, "pred", "isVersicolor", TRUE, title="Example Precision-Recall threshold plot")
#'
#' if (FALSE) {
#' WVPlots::PRTPlot(df, "pred", "isVersicolor", TRUE,
#' plotvars = c("sensitivity", "specificity", "false_positive_rate"),
#' title="Sensitivity/specificity/FPR as functions of threshold")
#' }
#'
#' @export
PRTPlot <- function(frame, predVar, truthVar, truthTarget, title,
...,
plotvars = c("precision", "recall"),
thresholdrange = c(-Inf, Inf),
linecolor = 'black'
) {
frame <- check_frame_args_list(...,
frame = frame,
name_var_list = list(predVar = predVar, truthVar = truthVar),
title = title,
funname = "WVPlots::PRPlot")
outcol <- frame[[truthVar]]==truthTarget
if(length(unique(outcol))!=2) {
return(NULL)
}
predcol <- frame[[predVar]]
prtFrame <- calcPRT(predcol,outcol)
# mark not unbound
threshold <- value <- NULL
thresholds = with(prtFrame, thresholdrange[1] <= threshold & threshold <= thresholdrange[2])
toPlot = prtFrame[thresholds, c("threshold", plotvars)]
prtlong = cdata::unpivot_to_blocks(toPlot,
nameForNewKeyColumn = "measure",
nameForNewValueColumn = "value",
columnsToTakeFrom = plotvars)
# set the factor order to be the same as given in plotvars
prtlong$measure = factor(prtlong$measure, levels=plotvars)
ggplot2::ggplot(prtlong, ggplot2::aes(x=threshold, y=value)) +
ggplot2::geom_line(color=linecolor) +
ggplot2::facet_wrap(~measure,
scale = "free_y",
ncol=1) +
ggplot2::ggtitle(title)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.