e_plot_roc: A function to calculate the ROC curve, determine the optimal...

View source: R/e_plot_roc.R

e_plot_rocR Documentation

A function to calculate the ROC curve, determine the optimal threshold, plot the curve, and provide classification statistics

Description

A function to calculate the ROC curve, determine the optimal threshold, plot the curve, and provide classification statistics

Usage

e_plot_roc(
  labels_true = NULL,
  pred_values_pos = NULL,
  label_neg_pos = NULL,
  sw_plot = TRUE,
  cm_mode = c("sens_spec", "prec_recall", "everything")[1],
  threshold_to_use = NULL,
  sw_confusion_matrix = TRUE,
  pos_conf_mat = c(0, 0.75),
  sw_caption_desc = TRUE,
  sw_class_labels = TRUE,
  sw_thresh_bounds = TRUE,
  sw_val_AUC = TRUE,
  sw_val_BA = TRUE,
  sw_val_Sens = TRUE,
  sw_val_Spec = TRUE,
  sw_val_PPV = TRUE,
  sw_val_NPV = TRUE,
  sw_val_Thresh = TRUE,
  label_AUC = c("AUC", "Area Under Curve")[1],
  label_BA = c("BA", "Balanced Accuracy")[1],
  label_Sens = c("Sens", "Sensitivity")[1],
  label_Spec = c("Spec", "Specificity")[1],
  label_PPV = c("PPV", "Pos Pred Value")[1],
  label_NPV = c("NPV", "Neg Pred Value")[1],
  label_Thresh = c("Thresh", "Pos Threshold")[1]
)

Arguments

labels_true

true labels of binary observations, should be the same (and not a proxy) as what was used to build the prediction model

pred_values_pos

either predicted labels or a value (such as a probability) associated with the success label

label_neg_pos

labels in order c("negative", "positive")

sw_plot

T/F to return a ROC curve ggplot object

cm_mode

mode from caret::confusionMatrix

threshold_to_use

A threshold value to use for plot and "best" ROC, NULL to determine.

sw_confusion_matrix

T/F include confusion matrix table inset in plot

pos_conf_mat

c(x, y) inset position

sw_caption_desc

T/F define statistics in caption

sw_class_labels

T/F classification labels indicated in caption

sw_thresh_bounds

T/F print threshold bounds at ends of ROC Curve

sw_val_AUC

T/F report statistic

sw_val_BA

T/F report statistic

sw_val_Sens

T/F report statistic

sw_val_Spec

T/F report statistic

sw_val_PPV

T/F report statistic

sw_val_NPV

T/F report statistic

sw_val_Thresh

T/F report statistic

label_AUC

label for statistic

label_BA

label for statistic

label_Sens

label for statistic

label_Spec

label for statistic

label_PPV

label for statistic

label_NPV

label for statistic

label_Thresh

label for statistic

Value

           a list including
  • roc_curve_best - one-row tibble of classification statistics for best Sensitivity and Specificity (closest to upper-left corner of ROC curve)

  • pred_positive - pred_values_pos, returned as numeric 1 or 0

  • confusion_matrix - confusion matrix statistics

  • plot_roc - ROC curve ggplot object

  • roc_curve - ROC curve data

Examples

## Categorical prediction-value example (from ?caret::confusionMatrix)
ex_lvs    <- c("normal", "abnormal")
ex_truth  <- factor(rep(ex_lvs, times = c(86, 258)), levels = rev(ex_lvs))
ex_pred   <- factor(c(rep(ex_lvs, times = c(54,  32))
                    , rep(ex_lvs, times = c(27, 231)))
                  , levels = ex_lvs)
out <-
  e_plot_roc(
    labels_true     = ex_truth
  , pred_values_pos = ex_pred
  , label_neg_pos   = ex_lvs
  , sw_plot         = TRUE
  , sw_caption_desc = c(TRUE, FALSE)[2]
  )
out$roc_curve_best |> print(width = Inf)
out$plot_roc
out$confusion_matrix


## Numeric prediction-value example
out <-
  e_plot_roc(
    labels_true     = sample(c("a", "b"), size = 50, replace = TRUE)
  , pred_values_pos = runif(n = 50)
  , label_neg_pos   = c("a", "b")
  , sw_plot         = TRUE
  , sw_caption_desc = c(TRUE, FALSE)[1]
  )
out$roc_curve_best |> print(width = Inf)
out$plot_roc
out$confusion_matrix


## Logistic regression
data(dat_mtcars_e)

dat_mtcars_e <-
  dat_mtcars_e |>
  dplyr::mutate(
    vs_V = ifelse(vs == "V-shaped", 1, 0) # 0-1 binary for logistic regression
  )

# Predict engine type `vs` ("V-shaped" vs "straight") from other features.
fit_glm_vs <-
  glm(
    cbind(vs_V, 1 - vs_V) ~ disp + wt + carb
  , family = binomial
  , data = dat_mtcars_e
  )
cat("Test residual deviance for lack-of-fit (if > 0.10, little-to-no lack-of-fit)\n")
dev_p_val <- 1 - pchisq(fit_glm_vs$deviance, fit_glm_vs$df.residual)
dev_p_val |> print()
car::Anova(fit_glm_vs, type = 3)
#summary(fit_glm_vs)

glm_roc <-
  e_plot_roc(
    labels_true         = dat_mtcars_e$vs_V
  , pred_values_pos     = fit_glm_vs$fitted.values
  , label_neg_pos       = c(0, 1)
  , sw_plot             = TRUE
  , cm_mode             = c("sens_spec", "prec_recall", "everything")[3]
  , threshold_to_use    = NULL
  )
glm_roc$roc_curve_best %>% print(width = Inf)
glm_roc$plot_roc
glm_roc$confusion_matrix

# specified threshold
glm_roc <-
  e_plot_roc(
    labels_true         = dat_mtcars_e$vs_V
  , pred_values_pos     = fit_glm_vs$fitted.values
  , label_neg_pos       = c(0, 1)
  , sw_plot             = TRUE
  , cm_mode             = c("sens_spec", "prec_recall", "everything")[3]
  , threshold_to_use    = 0.001
  )
glm_roc$roc_curve_best %>% print(width = Inf)
glm_roc$plot_roc
glm_roc$confusion_matrix


# Illustrate labels and caption definitions
glm_roc <-
  e_plot_roc(
    labels_true         = dat_mtcars_e$vs_V
  , pred_values_pos     = fit_glm_vs$fitted.values
  , label_neg_pos       = c(0, 1)
  , sw_plot             = TRUE
  , cm_mode             = c("sens_spec", "prec_recall", "everything")[3]
  , sw_confusion_matrix = TRUE
  , pos_conf_mat        = c(0.9, 0)
  , sw_caption_desc     = TRUE
  , sw_class_labels     = TRUE
  , sw_thresh_bounds    = TRUE
  , sw_val_AUC          = TRUE
  , sw_val_BA           = TRUE
  , sw_val_Sens         = FALSE
  , sw_val_Spec         = FALSE
  , sw_val_PPV          = TRUE
  , sw_val_NPV          = TRUE
  , sw_val_Thresh       = TRUE
  , label_AUC           = c("AUC"   , "Area Under Curve"  )[1]
  , label_BA            = c("BA"    , "Balanced Accuracy" )[1]
  , label_Sens          = c("Sens"  , "Sensitivity"       )[1]
  , label_Spec          = c("Spec"  , "Specificity"       )[1]
  , label_PPV           = c("PPV"   , "Pos Pred Value"    )[2]
  , label_NPV           = c("NPV"   , "Neg Pred Value"    )[2]
  , label_Thresh        = c("Thresh", "Pos Threshold"     )[1]
  )
glm_roc$roc_curve_best |> print(width = Inf)
glm_roc$plot_roc
glm_roc$confusion_matrix


## Not run: 

## Categorical prediction-value example (from ?caret::confusionMatrix)
ex_lvs    <- c("normal", "abnormal")
ex_truth  <- factor(rep(ex_lvs, times = c(86, 258)), levels = rev(ex_lvs))
ex_pred   <- factor(c(rep(ex_lvs, times = c(54,  32))
                    , rep(ex_lvs, times = c(27, 231)))
                  , levels = ex_lvs)
# List of ROC curves for each target
out_roc_temp <- list()
for (n_target in levels(ex_truth)) {  # replace "unique()" by "levels()" if categorical
  out_roc <-
    e_plot_roc(
      labels_true     = ex_truth |> relevel(ref = n_target)
    , pred_values_pos = ex_pred
    , label_neg_pos   = ex_lvs
    , sw_plot         = TRUE
    , sw_caption_desc = c(TRUE, FALSE)[2]
    )

  out_roc$plot_roc <- out_roc$plot_roc + labs(title = paste0("ROC Curve, Target:  ", n_target))
  out_roc$plot_roc <- out_roc$plot_roc + coord_fixed(ratio = 1) # equal axes

  out_roc_temp[[ n_target ]] <- out_roc
} # n_target

# to display list tree hierarchy
out_roc_temp |> e_print_list_tree_hierarchy()

# Reorder ROC objects by type (rather than target)
out_roc_reordered <-
  out_roc_temp |>
  e_plot_roc_reorder_hierarchy()

out_roc_reordered |> e_print_list_tree_hierarchy()


## End(Not run)


erikerhardt/erikmisc documentation built on April 17, 2025, 10:48 a.m.