classification_case.R
In metrica: Prediction Performance Metrics

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----warning=FALSE, message=FALSE---------------------------------------------
library(metrica)
library(dplyr)
library(purrr)
library(tidyr)

## ----load binary data---------------------------------------------------------
# Load
binary_landCover <- metrica::land_cover

# Printing first observations
head(binary_landCover)

## ----load multiclass data-----------------------------------------------------
# Load
multi_maize_phen <- metrica::maize_phenology

# Printing first observations
head(multi_maize_phen)

## ----confusion_matrix binary, fig.width=6, fig.height=5, dpi=60---------------
# a. Print
binary_landCover %>% confusion_matrix(obs = actual, pred = predicted, 
                                      plot = FALSE,
                                      unit = "count")

# b. Plot
binary_landCover %>% confusion_matrix(obs = actual, pred = predicted, 
                                      plot = TRUE,
                                      colors = c(low="#ffe8d6" , high="#892b64"), 
                                      unit = "count")

# c. Unit = proportion
binary_landCover %>% confusion_matrix(obs = actual, pred = predicted, 
                                      plot = TRUE,
                                      colors = c(low="#f9dbbd" , high="#892b64"), 
                                      unit = "proportion")


## ----confusion_matrix multiclass, fig.width=6, fig.height=5, dpi=60-----------
# a. Print
multi_maize_phen %>% confusion_matrix(obs = actual, pred = predicted, 
                                      plot = FALSE, 
                                      unit = "count")

# b. Plot
multi_maize_phen %>% confusion_matrix(obs = actual, pred = predicted, 
                                      plot = TRUE, 
                                      colors = c(low="grey85" , high="steelblue"), 
                                      unit = "count")


## ----accuracy-----------------------------------------------------------------
# Binary
binary_landCover %>% accuracy(data = ., obs = actual, pred = predicted, tidy = TRUE)

# Multiclass
maize_phenology %>% accuracy(data = ., obs = actual, pred = predicted, tidy = TRUE)

## ----balanced_accuracy--------------------------------------------------------
# Binary
binary_landCover %>% balacc(data = ., obs = actual, pred = predicted, tidy = TRUE)

# Multiclass
maize_phenology %>% balacc(data = ., obs = actual, pred = predicted, tidy = TRUE)

## ----precision----------------------------------------------------------------
# Binary
binary_landCover %>% precision(data = ., obs = actual, pred = predicted, tidy = TRUE)

# Multiclass
maize_phenology %>% precision(data = ., obs = actual, pred = predicted, tidy = TRUE)

## ----metrics_summary----------------------------------------------------------

# Get all at once with metrics_summary()
# Binary
binary_landCover %>% metrics_summary(data = ., obs = actual, pred = predicted, type = "classification")

# Multiclass
multi_maize_phen %>% metrics_summary(data = ., obs = actual, pred = predicted, type = "classification")

## ----metrics_summary_selected-------------------------------------------------
# Get a selected list at once with metrics_summary()
selected_class_metrics <- c("accuracy", "precision", "recall", "fscore")

# Binary
bin_sum <- binary_landCover %>% 
  metrics_summary(data = ., 
                  obs = actual, pred = predicted, 
                  type = "classification",
                  metrics_list = selected_class_metrics, pos_level = 1) 

# Multiclass
multi_maize_phen %>% 
  metrics_summary(data = ., 
                  obs = actual, pred = predicted, 
                  type = "classification",
                  metrics_list = selected_class_metrics)


## ----atom argument------------------------------------------------------------
# Precision
maize_phenology %>% metrica::precision(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# Recall
maize_phenology %>% metrica::recall(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# Specificity
maize_phenology %>% metrica::specificity(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)

# atom = TRUE available for more functions available (remove #)
# F-score
# maize_phenology %>% metrica::fscore(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Adjusted F-score
# maize_phenology %>% metrica::agf(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # G-mean
# maize_phenology %>% metrica::gmean(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Negative predictive value
# maize_phenology %>% metrica::npv(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Prevalence
# maize_phenology %>% metrica::preval(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Prevalence threshold
# maize_phenology %>% metrica::preval_t(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # False omission rate
# maize_phenology %>% metrica::FOR(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # False detection rate
# maize_phenology %>% metrica::FDR(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # False positive rate
# maize_phenology %>% metrica::FPR(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Falase negative rate
# maize_phenology %>% metrica::FNR(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Delta-p
# maize_phenology %>% metrica::deltap(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Critical Success Index
# maize_phenology %>% metrica::csi(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Bookmaker Informedness
# maize_phenology %>% metrica::bmi(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Positive likelihood ratio
# maize_phenology %>% metrica::posLr(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Negative likelihood ratio
# maize_phenology %>% metrica::negLr(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)
# # Diagnostic odds ratio
# maize_phenology %>% metrica::dor(obs = actual, pred = predicted, atom = TRUE, tidy = TRUE)


## ----multiple_models nested---------------------------------------------------

set.seed(15)
# Let's simulated two extra runs of the same model for Land Cover
fold_2 <- data.frame(actual = sample(c(0,1), 285, replace = TRUE),
                     predicted = sample(c(0,1), 285, replace = TRUE))
fold_3 <- data.frame(actual = sample(c(0,1), 285, replace = TRUE),
                     predicted = sample(c(0,1), 285, replace = TRUE))

# a. Create nested df with the folds
binary_nested_folds <- bind_rows(list(fold_1 = binary_landCover, 
                                      fold_2 = fold_2,
                                      fold_3 = fold_3),
                             .id = "id") %>%
  dplyr::group_by(id) %>% tidyr::nest()

head(binary_nested_folds %>% group_by(id) %>% dplyr::slice_head(n=2))

# b. Run 
binary_folds_summary <- binary_nested_folds %>% 
  # Store metrics in new.column "performance"
  dplyr::mutate(performance = 
                  purrr::map(data,
                             ~metrica::metrics_summary(data = ., 
                                                       obs = actual, pred = predicted, 
                                                       type = "classification"))) %>% 
  dplyr::select(-data) %>% 
  tidyr::unnest(cols = performance) %>% 
  dplyr::arrange(Metric)

head(binary_folds_summary)


## ----multiple_models unnested group_map---------------------------------------
non_nested_folds <- binary_nested_folds %>% unnest(cols = "data") 

# Using group_map()
binary_folds_summary_2 <- non_nested_folds %>% 
  dplyr::group_by(id) %>% 
  dplyr::group_map(~metrics_summary(data = ., obs = actual, pred = predicted, type = "classification"))

binary_folds_summary_2

## ----multiple_models unnested summarise---------------------------------------

# Using summarise()
binary_folds_summary_3 <- non_nested_folds %>% 
  dplyr::group_by(id) %>% 
  dplyr::summarise(metrics_summary(obs = actual, pred = predicted, type = "classification")) %>%
  dplyr::arrange(Metric)


binary_folds_summary_3


## ----scatter_plot print_metrics, fig.width=6, fig.height=5, dpi=60------------

selected_metrics <- c("accuracy", "precision", "recall", "khat",
                   "mcc", "fscore", "agf", "npv", "FPR", "FNR")

binary_matrix_metrics <- 
  binary_landCover %>% 
  confusion_matrix(obs = actual, pred = predicted, 
                   plot = TRUE,
                   colors = c(low="#ffe8d6" , high="#892b64"), 
                   unit = "count",  
                   # Print metrics_summary
                   print_metrics = TRUE,
                   # List of performance metrics
                   metrics_list = selected_metrics,
                   # Position (bottom or top)
                   position_metrics = "bottom")

binary_matrix_metrics

multinomial_matrix_metrics <-
  maize_phenology %>% 
  confusion_matrix(obs = actual, pred = predicted, 
                   plot = TRUE, 
                   colors = c(low="grey85" , high="steelblue"), 
                   unit = "count",
                   # Print metrics_summary
                   print_metrics = TRUE,
                   # List of performance metrics
                   metrics_list = selected_metrics,
                   # Position (bottom or top)
                   position_metrics = "bottom")
  
multinomial_matrix_metrics


## ----scatter_plot.edit, fig.width=6, fig.height=5, dpi=60---------------------

binary_matrix_metrics +
  # Modify labels
  ggplot2::labs(x = "Observed Vegetation", 
       y = "Predicted Vegetation",
       title = "Binary Confusion Matrix")
  

multinomial_matrix_metrics +
  # Modify labels
  ggplot2::labs(x = "Observed Corn Phenology", 
       y = "Predicted Corn Phenology",
       title = "Multinomial Confusion Matrix")+
  # Modify theme
  ggplot2::theme_light()



## ----export metrics_summary, eval=F-------------------------------------------
#  metrics_summary(data = binary_landCover,
#                  obs = obs,
#                  pred = pred,
#                  type = "classification") %>%
#    write.csv("binary_landcover_metrics_summary.csv")
#  

## ----export plot, eval=F------------------------------------------------------
#  
#  ggsave(plot = multinomial_matrix_metrics,
#         "multinomial_matrix_metrics.png",
#         width = 8,
#         height = 7)