knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
experiment_set <- list(few_cloud = c("rep_prodes_40", "rep_prodes_41", "rep_prodes_42"), few_masked_clouds = c("rep_prodes_50", "rep_prodes_51", "rep_prodes_52")) library(dplyr) library(ggplot2) library(knitr) library(magrittr) library(sits.prodes) library(xtable) accuracy_ls <- list() accuracy_tb <- tibble::tibble() warning("Run get_experiment_accuracy.R and devtools::build() to update the data in this package!") validation_data_file <- "extdata/validation_tb.Rdata" %>% system.file(package = "sits.prodes") %>% ensurer::ensure_that(file.exists(.)) load(validation_data_file) %>% ensurer::ensure_that(exists("validation_tb")) accuracy_ls <- validation_tb %>% dplyr::pull(validation_data) names(accuracy_ls) <- validation_tb %>% dplyr::pull(out_res) # Get data for confusion matrices. accuracy_tb <- accuracy_ls %>% names() %>% tibble::enframe(name = NULL) %>% dplyr::rename("file_path" = "value") %>% dplyr::mutate(acc_data = accuracy_ls, experiment = stringr::str_extract(file_path, "rep_prodes_[0-9]+"), algorithm = purrr::map_chr(.$file_path, function(x){ unlist(stringr::str_split(stringr::str_extract(x, "results_[a-z]+"), '_'))[2] }), smooth = stringr::str_extract(file_path, "smooth_[0-9]+x[0-9]+_n[0-9]{2}"), scene = stringr::str_extract(basename(file_path), "[0-9]{6}"), pyear = purrr::map_chr(file_path, function(x){ as.integer(dplyr::last(unlist(stringr::str_extract_all(basename(x), "[0-9]{4}")))) })) %>% dplyr::mutate(smooth = ifelse(is.na(smooth), "no_smooth", smooth)) %>% dplyr::select(experiment, algorithm, scene, pyear, smooth, #overall, overall2, up_acc, up_acc2, acc_data, file_path) %>% dplyr::mutate(cm = purrr::map(.$acc_data, function(x){return(x$error_matrix)})) %>% dplyr::mutate(up_acc2 = purrr::map(accuracy_ls, function(x){ res <- sits.prodes::asses_accuracy_simple(x$error_matrix) c(res$user, res$producer) %>% tibble::enframe(name = NULL) %>% dplyr::bind_cols(cname = c(paste0("ua_", names(res$user)), paste0("pa_", names(res$producer)))) %>% tidyr::spread("cname", "value") %>% return() })) %>% dplyr::mutate(up_acc3 = purrr::map(.$cm, add_upacc)) %>% dplyr::arrange(experiment, algorithm, scene, pyear, smooth) # get data for graphs acc_tb <- accuracy_tb %>% dplyr::select(experiment, algorithm, scene, pyear, smooth, up_acc2) %>% tidyr::unnest() %>% tidyr::gather(tidyselect::starts_with("pa_"), tidyselect::starts_with("ua_"), key = "variable", value = "value") %>% dplyr::mutate(acc_type = ifelse(stringr::str_sub(variable, 1, 3) == "ua_", "user", "producer"), variable = stringr::str_sub(variable, 4)) acc_pr <- acc_tb %>% dplyr::filter(acc_type == "producer") %>% dplyr::rename(prod_acc = value) %>% dplyr::select(-acc_type) acc_ur <- acc_tb %>% dplyr::filter(acc_type == "user") %>% dplyr::rename(user_acc = value) %>% dplyr::select(-acc_type) pa_acc_tb <- acc_pr %>% dplyr::inner_join(acc_ur, by = c("experiment", "algorithm", "scene", "pyear", "smooth", "variable"))
suppressWarnings( brick_description %>% knitr::kable(digits = 2, row.names = TRUE, full_width = FALSE) %>% kableExtra::kable_styling() )
It is a decomposition of the accuracy, originally proposed by Story and Congalton [@Story:1986]. Producers' accuracy (omission error) is how well a specific area on the Earth is mapped. It is the probability that a reference sample is correctly classified. User's accuracy (or reliability) is how well the map represents what is really on the ground.
knitr::include_graphics("./img/user_producer_accuracy_example.png")
[TODO] Olafson extended this definition by pointig the need of normalizing the results using the area of on class
plot_acc_by_alg_smt <- function(data_tb, my_algorithm, my_smooth){ print( data_tb %>% dplyr::filter(algorithm == !!my_algorithm, smooth == !!my_smooth) %>% ggplot2::ggplot(aes(x = prod_acc, y = user_acc, color = experiment, shape = variable )) + ggplot2::geom_point() + ggplot2::coord_fixed(xlim = c(0, 1), ylim = c(0, 1)) + ggplot2::facet_wrap(scene ~ pyear) + ggplot2::labs(title = sprintf("%s %s", my_algorithm, my_smooth), x = "Producer accuracy", y = "User accuracy") + ggplot2::theme(axis.text.x = element_text(angle = 90, hjust = 1)) ) } for (my_algorithm in c("dl", "rf", "svm", "vote")) { cat("\n\n") cat(paste0("#### ", my_algorithm), "\n") for (my_smooth in c("smooth_3x3_n10", "smooth_5x5_n10", "smooth_7x7_n10", "smooth_9x9_n10","smooth_11x11_n10") ) { suppressWarnings(pa_acc_tb %>% plot_acc_by_alg_smt(my_algorithm, my_smooth)) } }
plot_acc_by_alg_xp <- function(data_tb, my_label, my_experiment){ algorithm <- prod_acc <- user_acc <- NULL plot( data_tb %>% dplyr::filter(variable == !!my_label, experiment == !!my_experiment) %>% ggplot2::ggplot(aes(x = prod_acc, y = user_acc, color = smooth, shape = algorithm)) + ggplot2::geom_point() + ggplot2::coord_fixed() + ggplot2::facet_wrap(scene ~ pyear) + ggplot2::labs(title = sprintf("Whole validation %s %s", my_label, my_experiment), x = "Producer accuracy", y = "User accuracy") + ggplot2::theme(axis.text.x = element_text(angle = 90, hjust = 1)) ) } for (my_label in c("deforestation", "forest")) { cat("\n\n") cat(paste0("#### ", my_label), "\n") for (my_experiment in c("rep_prodes_40", "rep_prodes_41", "rep_prodes_42", "rep_prodes_50", "rep_prodes_51", "rep_prodes_52")) { suppressWarnings(plot_acc_by_alg_xp(pa_acc_tb, my_label, my_experiment)) } }
# if (nrow(accuracy_tb) > 0) { # for (i in 1:nrow(accuracy_tb)) { # cap <- sprintf("Confusion matrix %s %s %s %s %s ", # unlist(accuracy_tb[i, "experiment"]), # unlist(accuracy_tb[i, "scene"]), # unlist(accuracy_tb[i, "pyear"]), # unlist(accuracy_tb[i, "algorithm"]), # unlist(accuracy_tb[i, "smooth"])) # cat(paste0("### ", cap), "\n") # p_conmat <- suppressMessages(suppressWarnings(accuracy_tb$up_acc3[[i]] %>% # apply(2, function(x){x/1}) %>% # tibble::as_tibble(rownames = NA, caption = cap) %>% # knitr::kable(digits = 2, row.names = TRUE, full_width = FALSE) %>% # kableExtra::kable_styling())) # print(p_conmat) # cat("\n\n") # } # }
# Helper function to get the overall accuracy from a list column. get_overall <- function(x, qt_n){ if (any(is.na(x))) return(NA) if (!(qt_n %in% paste("quantile", 1:4, sep = '_'))) stop("Quantile not found.") return(x[[qt_n]][["overall"]]) } algorithm <- q1_overall <- q4_overall <- NULL validation_tb %>% dplyr::select(experiment, algorithm, smooth, scene, pyear, r_quantile) %>% dplyr::filter(is.na(smooth)) %>% dplyr::mutate(q1_overall = purrr::map_dbl(.$r_quantile, get_overall, qt_n = "quantile_1"), q4_overall = purrr::map_dbl(.$r_quantile, get_overall, qt_n = "quantile_4")) %>% ggplot2::ggplot() + ggplot2::geom_point(ggplot2::aes(x = q1_overall, y = q4_overall, color = experiment, shape = algorithm)) + ggplot2::facet_wrap(scene ~ pyear) + ggplot2::coord_fixed(xlim = c(0.25, 1), ylim = c(0.25, 1)) + ggplot2::geom_abline(slope = 1) + ggplot2::labs(title = "Cloud effect on overall accuracy", subtitle = "No smoothing", x = "Overall accuracy Q1 clouded observations", y = "Overall accuracy Q4 clouded observations")
# Helper function to get the user-producer accuracies from a list column. get_accuracy <- function(x, qt_n, qt_type, c_names){ if (!(qt_n %in% paste("quantile", 1:4, sep = '_'))) stop("Quantile not found.") if (!(qt_type %in% c("user", "producer"))) stop("Quantile type not found.") if (length(x) == 1 && is.na(x)) { res <- rep(NA, times = length(c_names)) names(res) <- paste(qt_type, qt_n, c_names, sep = '_') return(tibble::as_tibble(t(res))) } res <- as_tibble(t(x[[qt_n]][[qt_type]])) names(res) <- paste(qt_type, qt_n, names(res), sep = '_') return(res) } c_names <- names(validation_tb$r_quantile[[1]]$quantile_1$user) %>% ensurer::ensure_that(length(.) > 0, err_desc = "Missing names.") up_14 <- validation_tb %>% dplyr::mutate(user_q1 = purrr::map(.$r_quantile, get_accuracy, qt_n = "quantile_1", qt_type = "user", c_names = c_names), user_q4 = purrr::map(.$r_quantile, get_accuracy, qt_n = "quantile_4", qt_type = "user", c_names = c_names), prod_q1 = purrr::map(.$r_quantile, get_accuracy, qt_n = "quantile_1", qt_type = "producer", c_names = c_names), prod_q4 = purrr::map(.$r_quantile, get_accuracy, qt_n = "quantile_4", qt_type = "producer", c_names = c_names)) # up_14 %>% # dplyr::bind_cols(dplyr::bind_rows(up_14$user_q1), # dplyr::bind_rows(up_14$user_q4), # dplyr::bind_rows(up_14$prod_q1), # dplyr::bind_rows(up_14$prod_q4)) %>% # dplyr::select(experiment, algorithm, smooth, scene, pyear, # user_quantile_1_deforestation, user_quantile_1_forest, # user_quantile_4_deforestation, user_quantile_4_forest, # producer_quantile_1_deforestation, producer_quantile_1_forest, # producer_quantile_4_deforestation, producer_quantile_4_forest) %>% # ggplot2::ggplot() + # ggplot2::geom_point(ggplot2::aes(x = q1_overall, y = q4_overall, # color = experiment, shape = algorithm)) + # ggplot2::facet_wrap(scene ~ pyear) + # ggplot2::coord_fixed(xlim = c(0.25, 1), ylim = c(0.25, 1)) + # ggplot2::geom_abline(slope = 1) + # ggplot2::labs(title = "Cloud effect on overall accuracy", # subtitle = "No smoothing", # x = "Overall accuracy Q1 clouded observations", # y = "Overall accuracy Q4 clouded observations")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.