In albhasan/sits.prodes: What the Package Does (One Line, Title Case)

    knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

experiment_set <- list(few_cloud         = c("rep_prodes_40", "rep_prodes_41", "rep_prodes_42"),
                       few_masked_clouds = c("rep_prodes_50", "rep_prodes_51", "rep_prodes_52"))

library(dplyr)
library(ggplot2)
library(knitr)
library(magrittr)
library(sits.prodes)
library(xtable)

accuracy_ls <- list()
accuracy_tb <- tibble::tibble()

warning("Run get_experiment_accuracy.R and devtools::build() to update the data in this package!")

validation_data_file <- "extdata/validation_tb.Rdata" %>%
  system.file(package = "sits.prodes") %>% 
  ensurer::ensure_that(file.exists(.))
load(validation_data_file) %>% 
  ensurer::ensure_that(exists("validation_tb"))
accuracy_ls <- validation_tb %>% 
  dplyr::pull(validation_data)
names(accuracy_ls) <- validation_tb %>% 
  dplyr::pull(out_res)

# Get data for confusion matrices.
accuracy_tb <- accuracy_ls %>% 
  names() %>%
    tibble::enframe(name = NULL) %>%
    dplyr::rename("file_path" = "value") %>%
    dplyr::mutate(acc_data = accuracy_ls,
                  experiment = stringr::str_extract(file_path, "rep_prodes_[0-9]+"),
                  algorithm = purrr::map_chr(.$file_path, function(x){
                      unlist(stringr::str_split(stringr::str_extract(x, "results_[a-z]+"), '_'))[2]
                  }),
                  smooth = stringr::str_extract(file_path, "smooth_[0-9]+x[0-9]+_n[0-9]{2}"),
                  scene = stringr::str_extract(basename(file_path), "[0-9]{6}"),
                  pyear = purrr::map_chr(file_path, function(x){
                      as.integer(dplyr::last(unlist(stringr::str_extract_all(basename(x), "[0-9]{4}"))))
                  })) %>%
    dplyr::mutate(smooth = ifelse(is.na(smooth), "no_smooth", smooth)) %>%
    dplyr::select(experiment, algorithm, scene, pyear, smooth, #overall, overall2, up_acc, up_acc2,
                  acc_data, file_path) %>%
    dplyr::mutate(cm = purrr::map(.$acc_data, function(x){return(x$error_matrix)})) %>%
    dplyr::mutate(up_acc2 = purrr::map(accuracy_ls, function(x){
        res <- sits.prodes::asses_accuracy_simple(x$error_matrix)
        c(res$user, res$producer) %>%
            tibble::enframe(name = NULL) %>%
            dplyr::bind_cols(cname =  c(paste0("ua_", names(res$user)), paste0("pa_", names(res$producer)))) %>%
            tidyr::spread("cname", "value") %>%
            return()
    })) %>%
    dplyr::mutate(up_acc3 = purrr::map(.$cm, add_upacc)) %>%
    dplyr::arrange(experiment, algorithm, scene, pyear, smooth)


# get data for graphs
acc_tb <- accuracy_tb %>%
    dplyr::select(experiment, algorithm, scene, pyear, smooth, up_acc2) %>%
    tidyr::unnest() %>%
    tidyr::gather(tidyselect::starts_with("pa_"), tidyselect::starts_with("ua_"),
                  key = "variable", value = "value") %>%
    dplyr::mutate(acc_type = ifelse(stringr::str_sub(variable, 1, 3) == "ua_",
                                    "user", "producer"),
                  variable = stringr::str_sub(variable, 4))
acc_pr <- acc_tb %>% dplyr::filter(acc_type == "producer") %>%
    dplyr::rename(prod_acc = value) %>%
    dplyr::select(-acc_type)
acc_ur <- acc_tb %>% dplyr::filter(acc_type == "user") %>%
    dplyr::rename(user_acc = value) %>%
    dplyr::select(-acc_type)
pa_acc_tb <- acc_pr %>%
    dplyr::inner_join(acc_ur, by = c("experiment", "algorithm", "scene", "pyear",
                                     "smooth", "variable"))

suppressWarnings(
   brick_description %>% 
     knitr::kable(digits = 2, row.names = TRUE, full_width = FALSE) %>% 
     kableExtra::kable_styling()
 )

Accuracy

It is a decomposition of the accuracy, originally proposed by Story and Congalton [@Story:1986]. Producers' accuracy (omission error) is how well a specific area on the Earth is mapped. It is the probability that a reference sample is correctly classified. User's accuracy (or reliability) is how well the map represents what is really on the ground.

knitr::include_graphics("./img/user_producer_accuracy_example.png")

[TODO] Olafson extended this definition by pointig the need of normalizing the results using the area of on class

Classification results

Overall accuracy

Random forest bahaves homogenously accros experiments, smooths, and scenes.
Accuracy improvement due to smooth window size is marginal.
Filling the clouds using No Data decreses Deep Learning's accuracy.

Accuracy by algorithm and smooth

plot_acc_by_alg_smt <- function(data_tb, my_algorithm, my_smooth){
    print(
        data_tb %>%
            dplyr::filter(algorithm == !!my_algorithm, smooth == !!my_smooth) %>%
            ggplot2::ggplot(aes(x = prod_acc, y = user_acc,
                                color = experiment,
                                shape = variable
            )) +
            ggplot2::geom_point() +
            ggplot2::coord_fixed(xlim = c(0, 1), ylim = c(0, 1)) +
            ggplot2::facet_wrap(scene ~ pyear) +
            ggplot2::labs(title = sprintf("%s %s", my_algorithm, my_smooth),
                          x = "Producer accuracy",
                          y = "User accuracy") +
        ggplot2::theme(axis.text.x = element_text(angle = 90, hjust = 1))
    )
}


for (my_algorithm in c("dl", "rf", "svm", "vote")) {
    cat("\n\n")
    cat(paste0("#### ", my_algorithm), "\n")
    for (my_smooth in c("smooth_3x3_n10", "smooth_5x5_n10", "smooth_7x7_n10",
                  "smooth_9x9_n10","smooth_11x11_n10") ) {
        suppressWarnings(pa_acc_tb %>%
                             plot_acc_by_alg_smt(my_algorithm, my_smooth))
    }
}

Accuracy by label and experiment

plot_acc_by_alg_xp <- function(data_tb, my_label, my_experiment){
  algorithm <- prod_acc <- user_acc <- NULL
  plot(
    data_tb %>%
      dplyr::filter(variable ==  !!my_label,
                    experiment == !!my_experiment) %>%
      ggplot2::ggplot(aes(x = prod_acc, y = user_acc,
                          color = smooth,
                          shape = algorithm)) +
      ggplot2::geom_point() +
      ggplot2::coord_fixed() +
      ggplot2::facet_wrap(scene ~ pyear) +
      ggplot2::labs(title = sprintf("Whole validation %s %s", my_label, my_experiment),
           x = "Producer accuracy",
           y = "User accuracy") +
      ggplot2::theme(axis.text.x = element_text(angle = 90, hjust = 1))
  )
}

for (my_label in c("deforestation", "forest")) {
    cat("\n\n")
    cat(paste0("#### ", my_label), "\n")
    for (my_experiment in c("rep_prodes_40", "rep_prodes_41", "rep_prodes_42",
                    "rep_prodes_50", "rep_prodes_51", "rep_prodes_52")) {
        suppressWarnings(plot_acc_by_alg_xp(pa_acc_tb, my_label, my_experiment))
    }
}

Confusion matrices

# if (nrow(accuracy_tb) > 0) {
#     for (i in 1:nrow(accuracy_tb)) {
#         cap <- sprintf("Confusion matrix %s %s %s %s %s ",
#                        unlist(accuracy_tb[i, "experiment"]),
#                        unlist(accuracy_tb[i, "scene"]),
#                        unlist(accuracy_tb[i, "pyear"]),
#                        unlist(accuracy_tb[i, "algorithm"]),
#                        unlist(accuracy_tb[i, "smooth"]))
#         cat(paste0("### ", cap), "\n")
#         p_conmat <- suppressMessages(suppressWarnings(accuracy_tb$up_acc3[[i]] %>%
#                                                           apply(2, function(x){x/1}) %>%
#                                                           tibble::as_tibble(rownames = NA, caption = cap) %>%
#                                                           knitr::kable(digits = 2, row.names = TRUE, full_width = FALSE) %>%
#                                                           kableExtra::kable_styling()))
#         print(p_conmat)
#         cat("\n\n")
#     }
# }

Accuracy and amount of clouds

Overall accuracy

#  Helper function to get the overall accuracy from a list column.
get_overall <- function(x, qt_n){
  if (any(is.na(x)))
    return(NA)
  if (!(qt_n %in% paste("quantile", 1:4, sep = '_')))
    stop("Quantile not found.")
  return(x[[qt_n]][["overall"]])
}

algorithm <- q1_overall <- q4_overall <- NULL
validation_tb %>%
  dplyr::select(experiment, algorithm, smooth, scene, pyear, r_quantile) %>%
  dplyr::filter(is.na(smooth)) %>% 
  dplyr::mutate(q1_overall = purrr::map_dbl(.$r_quantile, get_overall, qt_n = "quantile_1"),
                q4_overall = purrr::map_dbl(.$r_quantile, get_overall, qt_n = "quantile_4")) %>% 
  ggplot2::ggplot() + 
  ggplot2::geom_point(ggplot2::aes(x = q1_overall, y = q4_overall, 
                                  color = experiment, shape = algorithm)) +
  ggplot2::facet_wrap(scene ~ pyear) +
  ggplot2::coord_fixed(xlim = c(0.25, 1), ylim = c(0.25, 1)) + 
  ggplot2::geom_abline(slope = 1) + 
  ggplot2::labs(title = "Cloud effect on overall accuracy",
                subtitle = "No smoothing",
                x = "Overall accuracy Q1 clouded observations",
                y = "Overall accuracy Q4 clouded observations")

User accuracy

#  Helper function to get the user-producer accuracies from a list column.
get_accuracy <- function(x, qt_n, qt_type, c_names){
    if (!(qt_n %in% paste("quantile", 1:4, sep = '_')))
        stop("Quantile not found.")
    if (!(qt_type %in% c("user", "producer")))
        stop("Quantile type not found.")
    if (length(x) == 1 && is.na(x)) {
          res <- rep(NA, times = length(c_names))
          names(res) <- paste(qt_type, qt_n, c_names,  sep = '_')
          return(tibble::as_tibble(t(res)))
    }
    res <- as_tibble(t(x[[qt_n]][[qt_type]]))
    names(res) <- paste(qt_type, qt_n, names(res),  sep = '_')
    return(res)
}


c_names <- names(validation_tb$r_quantile[[1]]$quantile_1$user) %>% 
  ensurer::ensure_that(length(.) > 0, err_desc = "Missing names.")
up_14 <- validation_tb %>% 
  dplyr::mutate(user_q1 = purrr::map(.$r_quantile, get_accuracy, 
                                     qt_n = "quantile_1", 
                                     qt_type = "user", 
                                     c_names = c_names),
                user_q4 = purrr::map(.$r_quantile, get_accuracy, 
                                     qt_n = "quantile_4", 
                                     qt_type = "user", 
                                     c_names = c_names), 
                prod_q1 = purrr::map(.$r_quantile, get_accuracy, 
                                     qt_n = "quantile_1", 
                                     qt_type = "producer", 
                                     c_names = c_names),
                prod_q4 = purrr::map(.$r_quantile, get_accuracy, 
                                     qt_n = "quantile_4", 
                                     qt_type = "producer",
                                     c_names = c_names))

# up_14 %>%
#     dplyr::bind_cols(dplyr::bind_rows(up_14$user_q1),
#                      dplyr::bind_rows(up_14$user_q4),
#                      dplyr::bind_rows(up_14$prod_q1),
#                      dplyr::bind_rows(up_14$prod_q4)) %>%
#     dplyr::select(experiment, algorithm, smooth, scene, pyear,
#                   user_quantile_1_deforestation, user_quantile_1_forest,
#                   user_quantile_4_deforestation, user_quantile_4_forest,
#                   producer_quantile_1_deforestation, producer_quantile_1_forest,
#                   producer_quantile_4_deforestation, producer_quantile_4_forest) %>%
#   ggplot2::ggplot() +
#   ggplot2::geom_point(ggplot2::aes(x = q1_overall, y = q4_overall,
#                                   color = experiment, shape = algorithm)) +
#   ggplot2::facet_wrap(scene ~ pyear) +
#   ggplot2::coord_fixed(xlim = c(0.25, 1), ylim = c(0.25, 1)) +
#   ggplot2::geom_abline(slope = 1) +
#   ggplot2::labs(title = "Cloud effect on overall accuracy",
#                 subtitle = "No smoothing",
#                 x = "Overall accuracy Q1 clouded observations",
#                 y = "Overall accuracy Q4 clouded observations")

Producer accuracy

References

albhasan/sits.prodes documentation built on Sept. 3, 2020, 2:03 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

albhasan/sits.prodes
What the Package Does (One Line, Title Case)

In albhasan/sits.prodes: What the Package Does (One Line, Title Case)

Accuracy

Classification results

Overall accuracy

Accuracy by algorithm and smooth

Accuracy by label and experiment

Confusion matrices

Accuracy and amount of clouds

Overall accuracy

User accuracy

Producer accuracy

References

R Package Documentation

Browse R Packages

We want your feedback!

albhasan/sits.prodes What the Package Does (One Line, Title Case)

In albhasan/sits.prodes: What the Package Does (One Line, Title Case)

Accuracy

Classification results

Overall accuracy

Accuracy by algorithm and smooth

Accuracy by label and experiment

Confusion matrices

Accuracy and amount of clouds

Overall accuracy

User accuracy

Producer accuracy

References

R Package Documentation

Browse R Packages

We want your feedback!

albhasan/sits.prodes
What the Package Does (One Line, Title Case)