knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)
experiment_set <- list(few_cloud         = c("rep_prodes_40", "rep_prodes_41", "rep_prodes_42"),
                       few_masked_clouds = c("rep_prodes_50", "rep_prodes_51", "rep_prodes_52"))

library(dplyr)
library(ggplot2)
library(knitr)
library(magrittr)
library(sits.prodes)
library(xtable)

accuracy_ls <- list()
accuracy_tb <- tibble::tibble()

warning("Run get_experiment_accuracy.R and devtools::build() to update the data in this package!")

validation_data_file <- "extdata/validation_tb.Rdata" %>%
  system.file(package = "sits.prodes") %>% 
  ensurer::ensure_that(file.exists(.))
load(validation_data_file) %>% 
  ensurer::ensure_that(exists("validation_tb"))
accuracy_ls <- validation_tb %>% 
  dplyr::pull(validation_data)
names(accuracy_ls) <- validation_tb %>% 
  dplyr::pull(out_res)

# Get data for confusion matrices.
accuracy_tb <- accuracy_ls %>% 
  names() %>%
    tibble::enframe(name = NULL) %>%
    dplyr::rename("file_path" = "value") %>%
    dplyr::mutate(acc_data = accuracy_ls,
                  experiment = stringr::str_extract(file_path, "rep_prodes_[0-9]+"),
                  algorithm = purrr::map_chr(.$file_path, function(x){
                      unlist(stringr::str_split(stringr::str_extract(x, "results_[a-z]+"), '_'))[2]
                  }),
                  smooth = stringr::str_extract(file_path, "smooth_[0-9]+x[0-9]+_n[0-9]{2}"),
                  scene = stringr::str_extract(basename(file_path), "[0-9]{6}"),
                  pyear = purrr::map_chr(file_path, function(x){
                      as.integer(dplyr::last(unlist(stringr::str_extract_all(basename(x), "[0-9]{4}"))))
                  })) %>%
    dplyr::mutate(smooth = ifelse(is.na(smooth), "no_smooth", smooth)) %>%
    dplyr::select(experiment, algorithm, scene, pyear, smooth, #overall, overall2, up_acc, up_acc2,
                  acc_data, file_path) %>%
    dplyr::mutate(cm = purrr::map(.$acc_data, function(x){return(x$error_matrix)})) %>%
    dplyr::mutate(up_acc2 = purrr::map(accuracy_ls, function(x){
        res <- sits.prodes::asses_accuracy_simple(x$error_matrix)
        c(res$user, res$producer) %>%
            tibble::enframe(name = NULL) %>%
            dplyr::bind_cols(cname =  c(paste0("ua_", names(res$user)), paste0("pa_", names(res$producer)))) %>%
            tidyr::spread("cname", "value") %>%
            return()
    })) %>%
    dplyr::mutate(up_acc3 = purrr::map(.$cm, add_upacc)) %>%
    dplyr::arrange(experiment, algorithm, scene, pyear, smooth)


# get data for graphs
acc_tb <- accuracy_tb %>%
    dplyr::select(experiment, algorithm, scene, pyear, smooth, up_acc2) %>%
    tidyr::unnest() %>%
    tidyr::gather(tidyselect::starts_with("pa_"), tidyselect::starts_with("ua_"),
                  key = "variable", value = "value") %>%
    dplyr::mutate(acc_type = ifelse(stringr::str_sub(variable, 1, 3) == "ua_",
                                    "user", "producer"),
                  variable = stringr::str_sub(variable, 4))
acc_pr <- acc_tb %>% dplyr::filter(acc_type == "producer") %>%
    dplyr::rename(prod_acc = value) %>%
    dplyr::select(-acc_type)
acc_ur <- acc_tb %>% dplyr::filter(acc_type == "user") %>%
    dplyr::rename(user_acc = value) %>%
    dplyr::select(-acc_type)
pa_acc_tb <- acc_pr %>%
    dplyr::inner_join(acc_ur, by = c("experiment", "algorithm", "scene", "pyear",
                                     "smooth", "variable"))
suppressWarnings(
   brick_description %>% 
     knitr::kable(digits = 2, row.names = TRUE, full_width = FALSE) %>% 
     kableExtra::kable_styling()
 )

Accuracy

It is a decomposition of the accuracy, originally proposed by Story and Congalton [@Story:1986]. Producers' accuracy (omission error) is how well a specific area on the Earth is mapped. It is the probability that a reference sample is correctly classified. User's accuracy (or reliability) is how well the map represents what is really on the ground.

knitr::include_graphics("./img/user_producer_accuracy_example.png")

[TODO] Olafson extended this definition by pointig the need of normalizing the results using the area of on class

Classification results

Overall accuracy

Accuracy by algorithm and smooth

plot_acc_by_alg_smt <- function(data_tb, my_algorithm, my_smooth){
    print(
        data_tb %>%
            dplyr::filter(algorithm == !!my_algorithm, smooth == !!my_smooth) %>%
            ggplot2::ggplot(aes(x = prod_acc, y = user_acc,
                                color = experiment,
                                shape = variable
            )) +
            ggplot2::geom_point() +
            ggplot2::coord_fixed(xlim = c(0, 1), ylim = c(0, 1)) +
            ggplot2::facet_wrap(scene ~ pyear) +
            ggplot2::labs(title = sprintf("%s %s", my_algorithm, my_smooth),
                          x = "Producer accuracy",
                          y = "User accuracy") +
        ggplot2::theme(axis.text.x = element_text(angle = 90, hjust = 1))
    )
}


for (my_algorithm in c("dl", "rf", "svm", "vote")) {
    cat("\n\n")
    cat(paste0("#### ", my_algorithm), "\n")
    for (my_smooth in c("smooth_3x3_n10", "smooth_5x5_n10", "smooth_7x7_n10",
                  "smooth_9x9_n10","smooth_11x11_n10") ) {
        suppressWarnings(pa_acc_tb %>%
                             plot_acc_by_alg_smt(my_algorithm, my_smooth))
    }
}

Accuracy by label and experiment

plot_acc_by_alg_xp <- function(data_tb, my_label, my_experiment){
  algorithm <- prod_acc <- user_acc <- NULL
  plot(
    data_tb %>%
      dplyr::filter(variable ==  !!my_label,
                    experiment == !!my_experiment) %>%
      ggplot2::ggplot(aes(x = prod_acc, y = user_acc,
                          color = smooth,
                          shape = algorithm)) +
      ggplot2::geom_point() +
      ggplot2::coord_fixed() +
      ggplot2::facet_wrap(scene ~ pyear) +
      ggplot2::labs(title = sprintf("Whole validation %s %s", my_label, my_experiment),
           x = "Producer accuracy",
           y = "User accuracy") +
      ggplot2::theme(axis.text.x = element_text(angle = 90, hjust = 1))
  )
}

for (my_label in c("deforestation", "forest")) {
    cat("\n\n")
    cat(paste0("#### ", my_label), "\n")
    for (my_experiment in c("rep_prodes_40", "rep_prodes_41", "rep_prodes_42",
                    "rep_prodes_50", "rep_prodes_51", "rep_prodes_52")) {
        suppressWarnings(plot_acc_by_alg_xp(pa_acc_tb, my_label, my_experiment))
    }
}

Confusion matrices

# if (nrow(accuracy_tb) > 0) {
#     for (i in 1:nrow(accuracy_tb)) {
#         cap <- sprintf("Confusion matrix %s %s %s %s %s ",
#                        unlist(accuracy_tb[i, "experiment"]),
#                        unlist(accuracy_tb[i, "scene"]),
#                        unlist(accuracy_tb[i, "pyear"]),
#                        unlist(accuracy_tb[i, "algorithm"]),
#                        unlist(accuracy_tb[i, "smooth"]))
#         cat(paste0("### ", cap), "\n")
#         p_conmat <- suppressMessages(suppressWarnings(accuracy_tb$up_acc3[[i]] %>%
#                                                           apply(2, function(x){x/1}) %>%
#                                                           tibble::as_tibble(rownames = NA, caption = cap) %>%
#                                                           knitr::kable(digits = 2, row.names = TRUE, full_width = FALSE) %>%
#                                                           kableExtra::kable_styling()))
#         print(p_conmat)
#         cat("\n\n")
#     }
# }

Accuracy and amount of clouds

Overall accuracy

#  Helper function to get the overall accuracy from a list column.
get_overall <- function(x, qt_n){
  if (any(is.na(x)))
    return(NA)
  if (!(qt_n %in% paste("quantile", 1:4, sep = '_')))
    stop("Quantile not found.")
  return(x[[qt_n]][["overall"]])
}

algorithm <- q1_overall <- q4_overall <- NULL
validation_tb %>%
  dplyr::select(experiment, algorithm, smooth, scene, pyear, r_quantile) %>%
  dplyr::filter(is.na(smooth)) %>% 
  dplyr::mutate(q1_overall = purrr::map_dbl(.$r_quantile, get_overall, qt_n = "quantile_1"),
                q4_overall = purrr::map_dbl(.$r_quantile, get_overall, qt_n = "quantile_4")) %>% 
  ggplot2::ggplot() + 
  ggplot2::geom_point(ggplot2::aes(x = q1_overall, y = q4_overall, 
                                  color = experiment, shape = algorithm)) +
  ggplot2::facet_wrap(scene ~ pyear) +
  ggplot2::coord_fixed(xlim = c(0.25, 1), ylim = c(0.25, 1)) + 
  ggplot2::geom_abline(slope = 1) + 
  ggplot2::labs(title = "Cloud effect on overall accuracy",
                subtitle = "No smoothing",
                x = "Overall accuracy Q1 clouded observations",
                y = "Overall accuracy Q4 clouded observations")

User accuracy

#  Helper function to get the user-producer accuracies from a list column.
get_accuracy <- function(x, qt_n, qt_type, c_names){
    if (!(qt_n %in% paste("quantile", 1:4, sep = '_')))
        stop("Quantile not found.")
    if (!(qt_type %in% c("user", "producer")))
        stop("Quantile type not found.")
    if (length(x) == 1 && is.na(x)) {
          res <- rep(NA, times = length(c_names))
          names(res) <- paste(qt_type, qt_n, c_names,  sep = '_')
          return(tibble::as_tibble(t(res)))
    }
    res <- as_tibble(t(x[[qt_n]][[qt_type]]))
    names(res) <- paste(qt_type, qt_n, names(res),  sep = '_')
    return(res)
}


c_names <- names(validation_tb$r_quantile[[1]]$quantile_1$user) %>% 
  ensurer::ensure_that(length(.) > 0, err_desc = "Missing names.")
up_14 <- validation_tb %>% 
  dplyr::mutate(user_q1 = purrr::map(.$r_quantile, get_accuracy, 
                                     qt_n = "quantile_1", 
                                     qt_type = "user", 
                                     c_names = c_names),
                user_q4 = purrr::map(.$r_quantile, get_accuracy, 
                                     qt_n = "quantile_4", 
                                     qt_type = "user", 
                                     c_names = c_names), 
                prod_q1 = purrr::map(.$r_quantile, get_accuracy, 
                                     qt_n = "quantile_1", 
                                     qt_type = "producer", 
                                     c_names = c_names),
                prod_q4 = purrr::map(.$r_quantile, get_accuracy, 
                                     qt_n = "quantile_4", 
                                     qt_type = "producer",
                                     c_names = c_names))

# up_14 %>%
#     dplyr::bind_cols(dplyr::bind_rows(up_14$user_q1),
#                      dplyr::bind_rows(up_14$user_q4),
#                      dplyr::bind_rows(up_14$prod_q1),
#                      dplyr::bind_rows(up_14$prod_q4)) %>%
#     dplyr::select(experiment, algorithm, smooth, scene, pyear,
#                   user_quantile_1_deforestation, user_quantile_1_forest,
#                   user_quantile_4_deforestation, user_quantile_4_forest,
#                   producer_quantile_1_deforestation, producer_quantile_1_forest,
#                   producer_quantile_4_deforestation, producer_quantile_4_forest) %>%
#   ggplot2::ggplot() +
#   ggplot2::geom_point(ggplot2::aes(x = q1_overall, y = q4_overall,
#                                   color = experiment, shape = algorithm)) +
#   ggplot2::facet_wrap(scene ~ pyear) +
#   ggplot2::coord_fixed(xlim = c(0.25, 1), ylim = c(0.25, 1)) +
#   ggplot2::geom_abline(slope = 1) +
#   ggplot2::labs(title = "Cloud effect on overall accuracy",
#                 subtitle = "No smoothing",
#                 x = "Overall accuracy Q1 clouded observations",
#                 y = "Overall accuracy Q4 clouded observations")

Producer accuracy

References



albhasan/sits.prodes documentation built on Sept. 3, 2020, 2:03 p.m.