In albhasan/sits.prodes: What the Package Does (One Line, Title Case)

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

suppressMessages(library(ggplot2))
suppressMessages(library(magrittr))
suppressMessages(library(raster))
suppressMessages(library(rasterVis))
suppressMessages(library(sits.prodes))
suppressMessages(library(tibble))

How clouded are the images?

cloud_cov <- id <- files <- neigh <- prodes_year <- sat_image <- scene <- tile <- NULL
sits.starfm::BRICK_IMAGES %>%
    dplyr::select(-c(files, tile, neigh)) %>%
    dplyr::group_by(scene, prodes_year) %>% 
    dplyr::mutate(id = seq_along(sat_image)) %>%
    ggplot2::ggplot() +
    ggplot2::geom_line(ggplot2::aes(x = id, y = cloud_cov, color = cloud_cov, 
                                    group = prodes_year)) +
    ggplot2::geom_point(ggplot2::aes(x = id, y = cloud_cov, color = cloud_cov)) +
    ggplot2::facet_grid(scene ~ prodes_year) +
    ggplot2::ggtitle("Images cloud cover", 
                     subtitle = "According to the image's metadata")

How clouded are the samples?

data(prodes_samples_raw)
cloud <- n_clouds <- NULL
prodes_samples_raw %>% 
  dplyr::sample_frac(0.1) %>% 
  dplyr::mutate(n_clouds = purrr::map_int(.$time_series, function(x){
    x %>% 
      dplyr::select(cloud) %>% 
      sum() %>% 
      as.integer() %>% 
      return()
  })) %>% 
  ggplot2::ggplot(ggplot2::aes(n_clouds)) + 
  ggplot2::geom_histogram(breaks = 0:23) + 
  ggplot2::labs(title = "Clouded samples (prodes_samples_raw)") +
  ggplot2::xlab("Number of clouded pixels") +
  ggplot2::ylab("Count")

What are the accuracies inside and outside the clouds?

There are no cloud-free areas in the images, so, it isn't feaible to compare the classification accuracies inside and outside clouded zones.

# 
# get_scene <- function(file_path){
#     file_path %>% 
#         basename() %>% 
#         stringr::str_extract(pattern = "_[0-9]{6}_") %>% 
#         stringr::str_sub(2, -2) %>% 
#         return()
# }
# 
# get_prodes_year <- function(file_path){
#     file_path %>%
#         basename() %>% 
#         stringr::str_extract_all(pattern = "_[0-9]{4}.") %>% 
#         purrr::map(dplyr::last) %>% 
#         unlist() %>% 
#         stringr::str_sub(2, -2) %>% 
#         return()
# }
# 
# 
# cloud_dir <- "/home/alber/Documents/data/experiments/prodes_reproduction/data/raster/cloud_count"
# file_path <- pyear <- r <- scene <- NULL
# clouds_sum_map <- cloud_dir %>% 
#     list.files(pattern = ".tif$", full.names = TRUE) %>% 
#     tibble::enframe(name = NULL) %>% 
#     dplyr::rename(file_path = "value") %>% 
#     dplyr::mutate(file_name = basename(file_path),
#                   scene = get_scene(file_path),
#                   pyear = get_prodes_year(file_path)) %>% 
#     dplyr::arrange(scene, pyear) %>% 
#     dplyr::filter(pyear == "2016") %>% 
#     dplyr::mutate(r = purrr::map(file_path, raster::raster),
#                   quant = purrr::map(r, function(r){
#                       r[] %>% 
#                           .[!is.na(.)] %>% 
#                           .[which(. > 0)] %>% 
#                           stats::quantile() %>% 
#                           return()
#                   }))
# 
# for (i in seq_along(clouds_sum_map$file_path)) {
#     cat(paste0("### ", sprintf("RAW brick %s %s", clouds_sum_map$scene[[i]], 
#                                clouds_sum_map$pyear[[i]])), "\n")
#     rasterVis::levelplot(clouds_sum_map$r[[i]], 
#                          par.settings = rasterVis::RdBuTheme(), 
#                          at = unique(clouds_sum_map$quant[[i]]), 
#                          margin = FALSE) %>% 
#         print()
#     cat("\n\n")
# }

Is smoothing improving the classification results?

# # Get data for confusion matrices.
# load(system.file("extdata/accuracy_ls.Rdata", package = "sits.prodes"))
# accuracy_tb <- accuracy_ls %>% names() %>% 
#     tibble::enframe(name = NULL) %>% 
#     dplyr::rename("file_path" = "value") %>% 
#     dplyr::mutate(acc_data = accuracy_ls,
#                   experiment = stringr::str_extract(file_path, "rep_prodes_[0-9]+"), 
#                   algorithm = purrr::map_chr(.$file_path, function(x){
#                       unlist(stringr::str_split(stringr::str_extract(x, "results_[a-z]+"), '_'))[2]
#                   }), 
#                   smooth = stringr::str_extract(file_path, "smooth_[0-9]+x[0-9]+_n[0-9]{2}"), 
#                   scene = stringr::str_extract(basename(file_path), "[0-9]{6}"),
#                   pyear = purrr::map_chr(file_path, function(x){
#                       as.integer(dplyr::last(unlist(stringr::str_extract_all(basename(x), "[0-9]{4}"))))
#                   })) %>%
#     dplyr::mutate(smooth = ifelse(is.na(smooth), "no_smooth", smooth)) %>% 
#     dplyr::select(experiment, algorithm, scene, pyear, smooth, #overall, overall2, up_acc, up_acc2, 
#                   acc_data, file_path) %>% 
#     dplyr::mutate(cm = purrr::map(.$acc_data, function(x){return(x$error_matrix)})) %>%
#     dplyr::mutate(up_acc2 = purrr::map(accuracy_ls, function(x){
#         res <- sits.prodes::asses_accuracy_simple(x$error_matrix)
#         c(res$user, res$producer) %>%
#             tibble::enframe(name = NULL) %>%
#             dplyr::bind_cols(cname =  c(paste0("ua_", names(res$user)), paste0("pa_", names(res$producer)))) %>%
#             tidyr::spread("cname", "value") %>%
#             return()
#     })) %>%
#     dplyr::mutate(up_acc3 = purrr::map(.$cm, add_upacc)) %>%
#     dplyr::arrange(experiment, algorithm, scene, pyear, smooth)
#

# 
# acc_tb <- accuracy_tb %>% 
#     dplyr::select(experiment, algorithm, scene, pyear, smooth, up_acc2) %>% 
#     tidyr::unnest() %>% 
#     tidyr::gather(tidyselect::starts_with("pa_"), tidyselect::starts_with("ua_"), 
#                   key = "variable", value = "value") %>% 
#     dplyr::mutate(acc_type = ifelse(stringr::str_sub(variable, 1, 3) == "ua_", 
#                                     "user", "producer"),
#                   variable = stringr::str_sub(variable, 4)) 
# acc_pr <- acc_tb %>% dplyr::filter(acc_type == "producer") %>% 
#     dplyr::rename(prod_acc = value) %>% 
#     dplyr::select(-acc_type)
# acc_ur <- acc_tb %>% dplyr::filter(acc_type == "user") %>% 
#     dplyr::rename(user_acc = value) %>% 
#     dplyr::select(-acc_type)
# pa_acc_tb <- acc_pr %>% 
#     dplyr::inner_join(acc_ur, by = c("experiment", "algorithm", "scene", "pyear", 
#                                      "smooth", "variable"))

# plot_acc_by_alg_xp <- function(data_tb, my_label, my_experiment){
#     plot(
#         data_tb %>% 
#             dplyr::filter(variable ==  !!my_label, 
#                           experiment == !!my_experiment) %>% 
#             ggplot2::ggplot(ggplot2::aes(x = prod_acc, y = user_acc, 
#                                          color = smooth, 
#                                          shape = algorithm)) +
#             ggplot2::geom_point() +
#             ggplot2::coord_fixed() +
#             #ggplot2::xlim(0, 1) +
#             #ggplot2::ylim(0, 1) +
#             ggplot2::facet_wrap(scene ~ pyear) +
#             labs(title = sprintf("Whole validation %s %s", my_label, my_experiment),
#                  x = "Producer accuracy", 
#                  y = "User accuracy")
#     )
# }

# for (my_label in c("deforestation")) {
#     cat("\n\n")
#     cat(paste0("#### ", my_label), "\n")
#     for (my_experiment in c("rep_prodes_40", "rep_prodes_50")) {
#         suppressWarnings(plot_acc_by_alg_xp(pa_acc_tb, my_label, my_experiment))
#     }
# }