R/visualizations.R

Defines functions slice_plot roc_plot

Documented in roc_plot slice_plot

#' Create a 'slice plot' of two roc curves with area between them (the ABROCA region) shaded.
#' @param majority_roc list with attributes 'x' and 'y' defining points of roc curve
#' @param minority_roc list with attributes 'x' and 'y' defining points of roc curve
#' @param majority_group_name optional label for majority group (character)
#' @param minority_group_name optional label for minority group (character)
#' @param fout path to output file
#' @return No return value; file is saved to disk.
#' @seealso \code{\link{compute_abroca}}
#' @references
#' Josh Gardner, Christopher Brooks, and Ryan Baker. (2019). Evaluating
#' the Fairness of Predictive Student Models Through Slicing Analysis.
#' Proceedings of the 9th International Conference on Learning
#' Analytics and Knowledge (LAK19).
#' @export
#' @examples  
#' # This function is not currently intended for calling directly, but
#' # this is possible. For example usage, see \code{\link{compute_abroca}}.
slice_plot <- function(majority_roc, minority_roc, majority_group_name = NULL,
                       minority_group_name = NULL, fout = NULL) {
    # check that number of points are the same
    stopifnot(length(majority_roc$x) == length(majority_roc$y),
              length(majority_roc$x) == length(minority_roc$x),
              length(majority_roc$x) == length(minority_roc$y))
    if (!is.null(fout)) {
        grDevices::png(fout, width = 720, height = 720)
    }
    # set some graph parameters
    majority_color <- "red"
    minority_color <- "blue"
    majority_group_label <- "Majority Group"
    minority_group_label <- "Minority Group"
    plot_title <- "ROC Slice Plot"
    if (!is.null(majority_group_name)) {
        majority_group_label <- glue::glue(
            "{majority_group_label} ({majority_group_name})")
    }
    if (!is.null(minority_group_name)) {
        minority_group_label <- glue::glue(
            "{minority_group_label} ({minority_group_name})")
    }
    # add labels, if given
    graphics::plot(majority_roc$x, majority_roc$y, col = majority_color,
                   type = "l", lwd = 1.5, main = plot_title,
                   xlab = "False Positive Rate", ylab = "True Positive Rate")
    # draw polygon; reverse ordering used to close polygon by ending near start
    # point
    graphics::polygon(x = c(majority_roc$x, rev(minority_roc$x)),
                      y = c(majority_roc$y, rev(minority_roc$y)),
                      col = "grey", border = NA)
    graphics::lines(majority_roc$x, majority_roc$y, col = majority_color,
                    type = "l", lwd = 1.5)
    graphics::lines(minority_roc$x, minority_roc$y, col = minority_color,
                    type = "l", lwd = 1.5)
    graphics::legend("bottomright",
                     legend = c(majority_group_label, minority_group_label),
                     col = c(majority_color, minority_color), lty = 1)
    if (!is.null(fout)) {
        grDevices::dev.off()
    }
}

#' Plot the Receiver Operating Characteristic curve for preds and labs.
#' @param preds vector of predictions
#' @param labs vector of true class labels
#' @param plot_type if set to "majority" or "minority", coloring is set to match
#' plots generated by slice_plot (blue for majority class, red for minority);
#' if NULL then neutral coloring is used
#' @param show_diag boolean indicator for whether a diagonal line indicating the
#' random chance ROC curve should be plotted
#' @param fout path to an output file (png)
#' @return No return value; file is saved to disk.
#' @export
#' @examples  
#' # First, we load data, train a model, and generate predictions to evaluate.
#' data("recidivism")
#' recidivism$returned = as.factor(recidivism$Return.Status != "Not Returned")
#' in_train = caret::createDataPartition(recidivism$returned, 
#'     p = 0.75, list = FALSE)
#' traindata = recidivism[in_train,c("Release.Year", "County.of.Indictment", 
#'     "Gender", "Age.at.Release", "returned")]
#' testdata = recidivism[-in_train,c("Release.Year", "County.of.Indictment", 
#'     "Gender", "Age.at.Release", "returned")]
#' lr = glm(returned ~ ., data=traindata, family="binomial")
#' testdata$pred = predict(lr, testdata, type = "response")
#' 
#' # Now, we apply roc_plot() to the labels and predictions 
#' # (note that this writes a file to fout):
#' roc_plot(testdata$pred, testdata$returned, plot_type = "majority", 
#'     show_diag=TRUE, fout="roc.png")
roc_plot <- function(preds, labs, plot_type = NULL, show_diag = FALSE,
                     fout = NULL){
    if (!is.null(fout)) {
        grDevices::png(fout, width = 720, height = 720)
    }
    if (plot_type == "majority"){
        plot_color <- "red"
    } else if (plot_type == "minority"){
        plot_color <- "blue"
    } else{
        plot_color <- "black"
    }
    roc <- compute_roc(preds, labs)
    auc <- round(compute_auc(preds, labs), 3)
    coords <- interpolate_roc_fun(roc)
    graphics::plot(coords[["x"]], coords[["y"]], type = "l", lwd = 1.5,
         xlab = "False Positive Rate", ylab = "True Positive Rate",
         col = plot_color,
         main = glue::glue(
             "Receiver Operating Characteristic Curve\nAUC = {auc}"))
    graphics::polygon(x = c(coords[["x"]], 1.0, 0.0),
                      y = c(coords[["y"]], 0.0, 0.0),
                      col = "grey", border = NA)
    graphics::lines(coords[["x"]], coords[["y"]], type = "l", lwd = 1.5,
                    col = plot_color)
    if (show_diag == TRUE){
        graphics::abline(a = 0, b = 1, col = "black", lwd = 0.5, lty = "dashed")
    }
    if (!is.null(fout)) {
        grDevices::dev.off()
    }

}
jpgard/abroca documentation built on May 25, 2019, 11:31 p.m.