knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
suppressMessages(library(dplyr))
suppressMessages(library(ggplot2))
suppressMessages(library(kableExtra))
suppressMessages(library(purrr))
suppressMessages(library(raster))
suppressMessages(library(rasterVis))
suppressMessages(library(rnaturalearth))
suppressMessages(library(rnaturalearthdata))
suppressMessages(library(sits.prodes))
suppressMessages(library(tidyr))

base_path <- "/home/alber/Documents/data/experiments/prodes_reproduction"
selected_scenes <- "226064"
selected_pyears <- 2014:2017
selected_experiments <- c("rep_prodes_40", "rep_prodes_41", "rep_prodes_42")
selected_algorithm  <- c("dl", "results_dl")
selected_smooth <- c("", "no_smooth", NA)
experiment_codes <- list("rep_prodes_40" = "Bands",  "rep_prodes_42" = "MM",  
                         "rep_prodes_41" = "Band_MM")
BRICK_IMAGES <- expert_validation <- prodes_mapbiomas <- validation_tb <- NULL

utils::data("BRICK_IMAGES", package = "sits.starfm")
BRICK_IMAGES <- BRICK_IMAGES %>% 
  dplyr::filter(scene %in% selected_scenes, prodes_year %in% selected_pyears) %>%
  dplyr::mutate(tier = stringr::str_sub(sat_image, -2), 
                scene_tier = paste(scene, tier, sep = '_')) %>%
  ensurer::ensure_that(nrow(.) > 0)

utils::data("expert_validation", package = "sits.prodes")
expert_validation <- expert_validation %>%
  dplyr::filter(experiment %in% selected_experiments, 
                scene %in% selected_scenes,
                pyear %in% selected_pyears,
                algorithm %in% selected_algorithm,
                smooth %in% selected_smooth) %>%
  ensurer::ensure_that(nrow(.) > 0)

validation_data_file <- "extdata/validation_tb.Rdata" %>%
  system.file(package = "sits.prodes") %>% 
  ensurer::ensure_that(file.exists(.))
load(validation_data_file) %>% 
  ensurer::ensure_that(exists("validation_tb"))
validation_tb <- validation_tb %>%
  dplyr::filter(experiment %in% selected_experiments, 
                algorithm %in% selected_algorithm,
                smooth %in% selected_smooth,
                scene %in% selected_scenes,
                pyear %in% selected_pyears) %>%
  ensurer::ensure_that(nrow(.) > 0)

utils::data("prodes_labels", package = "sits.prodes")
utils::data("mapbiomas_labels", package = "sits.prodes")
utils::data("prodes_mapbiomas", package = "sits.prodes")
ensurer::ensure_that(exists(c("prodes_labels", "mapbiomas_labels", 
                              "prodes_mapbiomas")), TRUE)
prodes_mapbiomas <- prodes_mapbiomas %>%
  dplyr::filter(tile %in% selected_scenes,
                year_pd %in% selected_pyears) %>%
  ensurer::ensure_that(nrow(.) > 0)

utils::data("training_logs", package = "sits.prodes")
# Get the accuracies out of a confusionMatrix object. 
# 
# @param  x A confusionMatrix object (see caret::confusionMatrix).
# @return A list of numeric. The overall, user, and producer accuracies.
get_accuracies <- function(x){
  if (length(x) == 1 && is.na(x)) 
    return(tibble::tibble(NA))
  if ("error_matrix" %in% names(x)) { 
    acc <- sits.prodes::asses_accuracy_simple(x$error_matrix)
  } else if ("table" %in% names(x)) { 
    acc <- sits.prodes::asses_accuracy_simple(x$table)
  } else {
    stop("Not found!")
  }
  c(acc$user, acc$producer) %>%
    tibble::enframe(name = NULL) %>%
    dplyr::bind_cols(cname =  c(paste0("ua_", names(acc$user)),
                                paste0("pa_", names(acc$producer)))) %>%
    tidyr::spread("cname", "value") %>%
    return()
}


# Remove NAs (either as value or character) from a vector.
# 
# @param  x A character.
# @return   A character.
remove_na <- function(x){
  return(trimws(gsub("NA", "", x[!is.na(x)])))
}


# Is the confusion matrix valid?
# 
# @param  x         A confusionMatrix object (see caret::confusionMatrix).
# @param  label_vec A chatacter. Names of the labels to consider.
# @return           A length-one logical.
is_matrix_valid <- function(x, label_vec){
    if (length(x) == 1 && is.na(x)) 
        return(FALSE)
    mt <- x$table %>% 
        as.matrix()
    if (any(colnames(mt) != rownames(mt)))
        return(FALSE)
    if (any(!(label_vec %in% colnames(mt))))
        return(FALSE)
    mt_lab <- mt[label_vec, label_vec]
    if (any(is.na(mt_lab)))
        return(FALSE)
    #if (any(apply(mt_lab, 2, sum) == 0))
    #    return(FALSE)
    return(TRUE)
}


# Get the confusion matrix from an object.
# 
# @param  x         A confusionMatrix object.
# @return           A length-one matrix.
get_confusion_matrix <- function(x){
  x %>% 
    .$table %>% 
    as.matrix() %>% 
    return()    
}
plot_expert_validation <- function(data_tb, my_algorithm, my_smooth){
  prod_acc <- user_acc <- variable <- NULL
  prodes_years <- data_tb %>% 
    dplyr::pull(pyear) %>% 
    unique() %>% 
    sort() 
  data_tb <- data_tb %>%
    dplyr::filter(algorithm == !!my_algorithm, smooth == !!my_smooth)
  if (nrow(data_tb) < 1) {
    warning("No data found!")
    ggplot(data.frame()) + geom_point() + xlim(0, 10) + ylim(0, 100)
    return()
  }
  plot(
    data_tb %>%
      dplyr::mutate(experiment = dplyr::recode(experiment,  !!!experiment_codes)) %>%
      ggplot2::ggplot(aes(x = prod_acc, y = user_acc,
                          shape = variable, color = experiment)) +
      ggplot2::geom_jitter(size = 3) +
      ggplot2::coord_fixed() +
      ggplot2::xlim(0.0, 1.01) +
      ggplot2::ylim(0.0, 1.01) +
      ggplot2::facet_wrap(scene ~ pyear, ncol = length(prodes_years)) +
      ggplot2::guides(shape = guide_legend(override.aes = list(size = 3))) +
      ggplot2::labs(#title = sprintf("Expert validation %s %s",  my_algorithm, my_smooth),
                    x = "Producer accuracy",
                    y = "User accuracy") + 
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
      theme(legend.position = "bottom") + 
      ggplot2::theme(#text = ggplot2::element_text(size = 22)
        #plot.title          = ggplot2::element_text(size = 22),
        #axis.title          = ggplot2::element_text(size = 22), 
        axis.title.x        = ggplot2::element_text(size = 16), 
        axis.title.y        = ggplot2::element_text(size = 16), 
        #axis.title.x.top    = ggplot2::element_text(size = 22), 
        #axis.title.x.bottom = ggplot2::element_text(size = 22), 
        #axis.title.y.left   = ggplot2::element_text(size = 22), 
        #axis.title.y.right  = ggplot2::element_text(size = 22),
        #axis.text           = ggplot2::element_text(size = 22), 
        axis.text.x         = ggplot2::element_text(size = 12), 
        axis.text.y         = ggplot2::element_text(size = 12), 
        #axis.text.x.top     = ggplot2::element_text(size = 22), 
        #axis.text.x.bottom  = ggplot2::element_text(size = 22), 
        #axis.text.y.left    = ggplot2::element_text(size = 22), 
        #axis.text.y.right   = ggplot2::element_text(size = 22),
        strip.text           = ggplot2::element_text(size = 14),
        strip.text.x         = ggplot2::element_text(size = 14),
        strip.text.y         = ggplot2::element_text(size = 14)
      )
  )
}

plot_prodes_validation <- function(data_tb, my_algorithm, my_smooth){
  prod_acc <- user_acc <- variable <- NULL
  prodes_years <- data_tb %>% 
    dplyr::pull(pyear) %>% 
    unique() %>% 
    sort() 
  data_tb <- data_tb %>%
    dplyr::filter(algorithm == !!my_algorithm, smooth == !!my_smooth)
  if (nrow(data_tb) < 1) {
    warning("No data found!")
    ggplot(data.frame()) + geom_point() + xlim(0, 10) + ylim(0, 100)
    return()
  }
  print(
    data_tb %>%
      dplyr::mutate(experiment = dplyr::recode(experiment, !!!experiment_codes)) %>%
      ggplot2::ggplot(aes(x = prod_acc, y = user_acc, color = experiment, 
                          shape = variable )) +
      ggplot2::geom_point(size = 3) +
      ggplot2::coord_fixed(xlim = c(0, 1), ylim = c(0, 1)) +
      ggplot2::facet_wrap(scene ~ pyear, ncol = length(prodes_years)) +
      ggplot2::guides(shape = guide_legend(override.aes = list(size = 4))) +
      ggplot2::labs(x = "Producer accuracy", 
                    # title = sprintf("%s %s", my_algorithm, my_smooth),
                    y = "User accuracy") +
      ggplot2::theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
      ggplot2::theme(legend.position = "bottom") + 
      ggplot2::theme(#text = ggplot2::element_text(size = 22)
        #plot.title          = ggplot2::element_text(size = 22),
        #axis.title          = ggplot2::element_text(size = 22), 
        axis.title.x        = ggplot2::element_text(size = 16), 
        axis.title.y        = ggplot2::element_text(size = 16), 
        #axis.title.x.top    = ggplot2::element_text(size = 22), 
        #axis.title.x.bottom = ggplot2::element_text(size = 22), 
        #axis.title.y.left   = ggplot2::element_text(size = 22), 
        #axis.title.y.right  = ggplot2::element_text(size = 22),
        #axis.text           = ggplot2::element_text(size = 22), 
        axis.text.x         = ggplot2::element_text(size = 12), 
        axis.text.y         = ggplot2::element_text(size = 12), 
        #axis.text.x.top     = ggplot2::element_text(size = 22), 
        #axis.text.x.bottom  = ggplot2::element_text(size = 22), 
        #axis.text.y.left    = ggplot2::element_text(size = 22), 
        #axis.text.y.right   = ggplot2::element_text(size = 22),
        strip.text           = ggplot2::element_text(size = 14),
        strip.text.x         = ggplot2::element_text(size = 14),
        strip.text.y         = ggplot2::element_text(size = 14)
      )
  )
}

plot_prodes_mapbiomas <- function(data_tb){
  prod_acc <- user_acc <- variable <- NULL
  prodes_years <- data_tb %>% 
    dplyr::pull(year_pd) %>% 
    unique() %>% 
    sort() 
  print(
    data_tb %>% 
      ggplot2::ggplot(aes(x = prod_acc, y = user_acc, 
                          shape = variable, color = variable)) +
      ggplot2::geom_point(size = 3) +
      ggplot2::coord_fixed(xlim = c(0, 1), ylim = c(0, 1)) +
      ggplot2::facet_wrap(tile ~ year_pd, ncol = length(prodes_years)) + 
      ggplot2::guides(shape = guide_legend(override.aes = list(size = 3))) +
      ggplot2::labs(x = "Producer accuracy", 
                    #title = "PRODES - Mapbiomas", 
                    y = "User accuracy") +
      ggplot2::theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
      ggplot2::theme(legend.position="bottom") + 
      ggplot2::theme(#text = ggplot2::element_text(size = 22)
        #plot.title          = ggplot2::element_text(size = 22),
        #axis.title          = ggplot2::element_text(size = 22), 
        axis.title.x        = ggplot2::element_text(size = 16), 
        axis.title.y        = ggplot2::element_text(size = 16), 
        #axis.title.x.top    = ggplot2::element_text(size = 22), 
        #axis.title.x.bottom = ggplot2::element_text(size = 22), 
        #axis.title.y.left   = ggplot2::element_text(size = 22), 
        #axis.title.y.right  = ggplot2::element_text(size = 22),
        #axis.text           = ggplot2::element_text(size = 22), 
        axis.text.x         = ggplot2::element_text(size = 12), 
        axis.text.y         = ggplot2::element_text(size = 12), 
        #axis.text.x.top     = ggplot2::element_text(size = 22), 
        #axis.text.x.bottom  = ggplot2::element_text(size = 22), 
        #axis.text.y.left    = ggplot2::element_text(size = 22), 
        #axis.text.y.right   = ggplot2::element_text(size = 22),
        strip.text           = ggplot2::element_text(size = 14),
        strip.text.x         = ggplot2::element_text(size = 14),
        strip.text.y         = ggplot2::element_text(size = 14)
      )
  )
}

Spatial coverage

scene <- NULL
map_bz <- rnaturalearth::ne_countries(scale = "small", returnclass = "sf") %>%
  ggplot2::ggplot() +
  ggplot2::geom_sf() +
  #ggplot2::labs(title = "Area of interest.") +
  ggplot2::coord_sf(xlim = c(-74, -34), ylim = c(-34, 6), expand = FALSE)
img_extent <- BRICK_IMAGES %>% 
  dplyr::select(scene, img_extent) %>% 
  dplyr::group_by(scene) %>% 
  dplyr::slice(dplyr::n()) %>% 
  dplyr::ungroup() %>% 
  dplyr::pull(img_extent)
for (i_ext in img_extent) {
  map_bz <- map_bz + ggplot2::geom_rect(xmin = i_ext["xmin"], 
                                        xmax = i_ext["xmax"], 
                                        ymin = i_ext["ymin"], 
                                        ymax = i_ext["ymax"], 
                                        fill = NA, colour = "black", size = 1.0)
}
print(map_bz)
sat_image <- img_date <- tier <- scene_tier <- scene_tier_id <- NULL

id_scene_tier <- BRICK_IMAGES %>% 
  dplyr::distinct(scene, tier) %>% 
  dplyr::arrange(scene, tier) %>% 
  dplyr::mutate(scene_tier_id = 1:n())

BRICK_IMAGES %>% 
  dplyr::select(sat_image, scene, img_date, tier, scene_tier) %>% 
  dplyr::left_join(id_scene_tier, by = c("scene", "tier")) %>% 
  ggplot2::ggplot(ggplot2::aes(x = img_date, y = scene_tier_id)) + 
  ggplot2::geom_line(ggplot2::aes(color = scene, group = scene)) +
  ggplot2::geom_point(ggplot2::aes(shape = tier), size = 2) +
  ggplot2::labs(x = "Image date", y = "") +
  #ggplot2::labs(title = "Image's spatial accuracy.") +
  ggplot2::theme(axis.title.y = ggplot2::element_blank(),
                 axis.text.y = ggplot2::element_blank(),
                 axis.ticks.y = ggplot2::element_blank())

Cloud coverage.

cloud_cov <- prodes_year <- img_date <- prodes_doy <- NULL

cloud_cover <- BRICK_IMAGES %>% 
  dplyr::select(scene, img_date, cloud_cov, prodes_year) %>% 
  dplyr::mutate(doy = lubridate::yday(img_date),
                prodes_doy = (doy + 365 - 214) %% 365 + 1) %>% 
  dplyr::arrange(scene, prodes_year, prodes_doy) %>%
  ggplot2::ggplot(ggplot2::aes(x = prodes_doy, y = cloud_cov)) + 
  ggplot2::ylim(0, 1) +
  ggplot2::geom_line() +
  ggplot2::facet_grid(prodes_year ~ scene) +
  ggplot2::geom_smooth(se = FALSE, method = 'loess', formula = 'y ~ x') + 
  #ggplot2::labs(title = "Cloud covearge", subtitle = "As reported by the images' metadata.") +   
  ggplot2::xlab("Image adquisition date ") + 
  ggplot2::scale_x_continuous(breaks = c(1, 91, 181, 271, 365),
                              label = c("August 1st", "November 1st", 
                                        "February 1st", "May 1st", 
                                        "July 31st")) + 
  ggplot2::ylab("Cloud coverage") + 
  ggplot2::theme(#text = ggplot2::element_text(size = 22)
                 #plot.title          = ggplot2::element_text(size = 22),
                 #axis.title          = ggplot2::element_text(size = 22), 
                 axis.title.x        = ggplot2::element_text(size = 20), 
                 axis.title.y        = ggplot2::element_text(size = 20), 
                 #axis.title.x.top    = ggplot2::element_text(size = 22), 
                 #axis.title.x.bottom = ggplot2::element_text(size = 22), 
                 #axis.title.y.left   = ggplot2::element_text(size = 22), 
                 #axis.title.y.right  = ggplot2::element_text(size = 22),
                 #axis.text           = ggplot2::element_text(size = 22), 
                 axis.text.x         = ggplot2::element_text(size = 12), 
                 axis.text.y         = ggplot2::element_text(size = 12), 
                 #axis.text.x.top     = ggplot2::element_text(size = 22), 
                 #axis.text.x.bottom  = ggplot2::element_text(size = 22), 
                 #axis.text.y.left    = ggplot2::element_text(size = 22), 
                 #axis.text.y.right   = ggplot2::element_text(size = 22),
                 strip.text           = ggplot2::element_text(size = 16),
                 strip.text.x         = ggplot2::element_text(size = 16),
                 strip.text.y         = ggplot2::element_text(size = 16)
                 )
print(cloud_cover)

Cloud quantity

cloud_maps <- base_path %>%
  file.path("data", "raster", "cloud_count") %>% 
  list.files(pattern = "*201[0-9].tif", full.names = TRUE) %>%
  tibble::enframe(name = NULL) %>%
  dplyr::rename(file_path = "value") %>%
  dplyr::mutate(scene = file_path %>%
                  basename() %>%
                  stringr::str_extract(pattern = "_[0-9]{6}_") %>%
                  stringr::str_sub(2, -2),
                pyear = file_path %>%
                  basename() %>%
                  stringr::str_extract(pattern = "_[0-9]{4}[.]") %>%
                  stringr::str_sub(2, -2)) %>%
  ensurer::ensure_that(nrow(.) > 0, err_desc = "Quantile maps not found.") %>% 
  dplyr::mutate(r = purrr::map(file_path, raster::raster)) %>% 
  dplyr::arrange(scene, pyear) %>%
  dplyr::filter(scene %in% selected_scenes, pyear %in% c(2013, selected_pyears))

colr <- suppressWarnings(grDevices::colorRampPalette(RColorBrewer::brewer.pal(24, "RdYlBu")))
for (rid in 1:nrow(cloud_maps)) {
  r <- cloud_maps$r[[rid]]
  rasterVis::levelplot(r, margin = FALSE,
                       colorkey = list(space = "bottom",
                                       labels = list(at = 0:23, font = 4)), 
                       par.settings = list(axis.line = list(col = "transparent")),
                       #scales = list(draw = FALSE),
                       col.regions = colr,
                       main = sprintf("%s ", 
                                      #cloud_maps$scene[[rid]], 
                                      cloud_maps$pyear[[rid]]),
                       at = seq(0, 23, len = 24),
                       xlab = NULL,
                       ylab = NULL,
                       scales=list(draw = FALSE),
                       labels = FALSE) %>% 
    plot()
}

Quantiles

quantile_maps <- base_path %>%
  file.path("data", "raster", "cloud_count") %>%
  list.files(pattern = "*quantiles.tif", full.names = TRUE) %>%
  tibble::enframe(name = NULL) %>%
  dplyr::rename(file_path = "value") %>%
  dplyr::mutate(scene = file_path %>%
                  basename() %>%
                  stringr::str_extract(pattern = "_[0-9]{6}_") %>%
                  stringr::str_sub(2, -2),
                pyear = file_path %>%
                  basename() %>%
                  stringr::str_extract(pattern = "_[0-9]{4}_") %>%
                  stringr::str_sub(2, -2)) %>%
  ensurer::ensure_that(nrow(.) > 0, err_desc = "Quantile maps not found.") %>% 
  dplyr::mutate(r = purrr::map(file_path, raster::raster)) %>% 
  dplyr::arrange(scene, pyear) %>%
  dplyr::filter(scene %in% selected_scenes, pyear %in% c(2013, selected_pyears))

colr <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(5, "RdYlBu"))
for (rid in 1:nrow(quantile_maps)) {
  r <- quantile_maps$r[[rid]]
  rasterVis::levelplot(r, margin = FALSE,
                       colorkey = list(space = "bottom",
                                       labels = list(at = -5:5, font = 4)), 
                       par.settings = list(axis.line = list(col = "transparent")),
                       #scales = list(draw = FALSE),
                       col.regions = colr,
                       at = seq(0, 4, len = 5), 
                       main = sprintf("Cloud quantile %s %s", 
                                      quantile_maps$scene[[rid]], 
                                      quantile_maps$pyear[[rid]])) %>% 
    plot()
}

List of experiments.

Bands_experiment <- Bands__experiment <- Clasification_type <- Labels <- Labels_experiment <- setup <- Scenes_experiment <- NULL
training_logs %>% 
  tidyr::unnest(setup) %>%
  dplyr::filter(stringr::str_detect(.$experiment, "^rep_prodes_4")) %>%
  dplyr::mutate(bands = remove_na(paste(Bands_experiment, Bands__experiment))) %>%
  dplyr::mutate(labels = remove_na(paste(Labels, Labels_experiment))) %>%
  dplyr::select(experiment, Clasification_type, labels, bands, Scenes_experiment) %>%
  dplyr::mutate(experiment = paste0("rep_prodes_", stringr::str_extract(experiment, pattern = "[0-9]{2}"))) %>% 
  dplyr::rename(Experiment = "experiment", Brick = "Clasification_type", 
                Label = "labels", Scene = "Scenes_experiment", Band = "bands") %>% 
  knitr::kable() %>%
  kableExtra::kable_styling(full_width = F)

PRODES mapping to English

id_pd <- NULL
prodes_labels %>% 
    dplyr::select(-id_pd) %>% 
    knitr::kable() %>% 
    kableExtra::kable_styling(full_width = F)

Results

acc_type <- algorithm <- prod_acc <- pyear <- user_acc <- up_acc2 <- variable <- NULL

acc_tb <- expert_validation %>%
  dplyr::mutate( is_mt_complete = purrr::map_lgl(.$confusion_matrix, 
                                                 is_matrix_valid, 
                                                 label_vec = c("deforestation", 
                                                               "forest")),
                 up_acc2 = purrr::map(.$confusion_matrix, get_accuracies)) %>%
  dplyr::filter(is_mt_complete == TRUE) %>%
  tidyr::drop_na(confusion_matrix) %>% 
  dplyr::select(experiment, algorithm, smooth, scene, pyear, up_acc2) %>%
  tidyr::unnest() %>%
  dplyr::select(-tidyselect::ends_with(match = "no forest"), 
                -tidyselect::ends_with(match = "water")) %>%
  tidyr::gather(tidyselect::starts_with("pa_"), tidyselect::starts_with("ua_"),
                key = "variable", value = "value") %>%
  dplyr::mutate(acc_type = ifelse(stringr::str_sub(variable, 1, 3) == "ua_",
                                  "user", "producer"),
                variable = stringr::str_sub(variable, 4)) %>% 
  dplyr::arrange(experiment, algorithm, smooth, scene, pyear, variable, acc_type) %>% 
  ensurer::ensure_that(nrow(.) > 0)
acc_pr <- acc_tb %>% 
  dplyr::filter(acc_type == "producer") %>%
  dplyr::rename(prod_acc = value) %>%
  dplyr::select(-acc_type) %>%
  ensurer::ensure_that(nrow(.) > 0)
acc_ur <- acc_tb %>% 
  dplyr::filter(acc_type == "user") %>%
  dplyr::rename(user_acc = value) %>%
  dplyr::select(-acc_type) %>% 
  ensurer::ensure_that(nrow(.) > 0)
pa_acc_tb <- acc_pr %>%
  dplyr::inner_join(acc_ur, by = c("experiment", "algorithm", "smooth", 
                                   "scene", "pyear", "variable")) %>% 
  ensurer::ensure_that(nrow(.) > 0) 

for (my_algorithm in unique(dplyr::pull(pa_acc_tb, algorithm))) {
  cat("\n\n")
  for (my_smooth in unique(dplyr::pull(pa_acc_tb, smooth))) {
    cat(paste0("#### ", my_algorithm, " ", my_smooth), "\n")
    pa_acc_tb %>% 
      plot_expert_validation(my_algorithm, my_smooth)
  }
}


View(pa_acc_tb)
View(tidyr::drop_na(pa_acc_tb))



rm(acc_tb, acc_pr, acc_ur, pa_acc_tb)

Compare to PRODES - Accuracy by algorithm and smooth

accuracy_tb <- validation_tb %>%
  dplyr::mutate(smooth = ifelse(is.na(smooth), "no_smooth", smooth),
                up_acc2 = purrr::map(.$validation_data, get_accuracies)) %>%
  dplyr::select(experiment, algorithm, scene, pyear, smooth, up_acc2) %>%
  dplyr::arrange(experiment, algorithm, scene, pyear, smooth) %>%
  ensurer::ensure_that(nrow(.) > 0, err_desc = "No data found!") %>%
  tidyr::unnest() %>%
  tidyr::gather(tidyselect::starts_with("pa_"), tidyselect::starts_with("ua_"),
                key = "variable", value = "value") %>%
  dplyr::mutate(acc_type = ifelse(stringr::str_sub(variable, 1, 3) == "ua_",
                                  "user", "producer"),
                variable = stringr::str_sub(variable, 4))
accuracy_pr <- accuracy_tb %>% dplyr::filter(acc_type == "producer") %>%
    dplyr::rename(prod_acc = value) %>%
    dplyr::select(-acc_type)
accuracy_ur <- accuracy_tb %>% dplyr::filter(acc_type == "user") %>%
    dplyr::rename(user_acc = value) %>%
    dplyr::select(-acc_type)
pa_accuracy_tb <- accuracy_pr %>%
    dplyr::inner_join(accuracy_ur, by = c("experiment", "algorithm", "scene", "pyear",
                                     "smooth", "variable"))

for (my_algorithm in unique(dplyr::pull(pa_accuracy_tb, algorithm))) {
  cat("\n\n")
  cat(paste0("#### ", my_algorithm), "\n")
  for (my_smooth in unique(dplyr::pull(pa_accuracy_tb, smooth))) {
    pa_accuracy_tb %>% 
      plot_prodes_validation(my_algorithm, my_smooth)
  }
}

rm(accuracy_tb, accuracy_pr, accuracy_ur, pa_accuracy_tb)

PRODES - Mapbiomas

# ---- here ----

tile <- year_pd <- NULL

# sort by scene-year
# extract the confusion matrices
# compute user-producer accuracies
prodes_mapbiomas <-  prodes_mapbiomas %>% 
  dplyr::arrange(tile, year_pd) %>%
  dplyr::mutate(cm = purrr::map(.$conmat, get_confusion_matrix)) %>%
  dplyr::mutate(up_acc = add_upacc(cm))

# get data for plots

acc_tb <- prodes_mapbiomas %>% 
    dplyr::mutate(up_acc2 = purrr::map(.$conmat, function(x){
        res <- sits.prodes::asses_accuracy_simple(x$table)
        c(res$user, res$producer) %>%
            tibble::enframe(name = NULL) %>%
            dplyr::bind_cols(cname =  c(paste0("ua_", names(res$user)), 
                                        paste0("pa_", names(res$producer)))) %>%
            tidyr::spread("cname", "value") %>%
            return()
    })) %>% 
    dplyr::select(tile, year_pd, up_acc2) %>% 
    tidyr::unnest() %>% 
    tidyr::gather(tidyselect::starts_with("pa_"), tidyselect::starts_with("ua_"), 
                  key = "variable", value = "value") %>% 
    dplyr::mutate(acc_type = ifelse(stringr::str_sub(variable, 1, 3) == "ua_", 
                                    "user", "producer"),
                  variable = stringr::str_sub(variable, 4)) 
acc_pr <- acc_tb %>% dplyr::filter(acc_type == "producer") %>% 
    dplyr::rename(prod_acc = value) %>% 
    dplyr::select(-acc_type)
acc_ur <- acc_tb %>% dplyr::filter(acc_type == "user") %>% 
    dplyr::rename(user_acc = value) %>% 
    dplyr::select(-acc_type)
pa_acc_tb <- acc_pr %>% 
    dplyr::inner_join(acc_ur, by = c("tile", "year_pd", "variable"))


plot_prodes_mapbiomas(pa_acc_tb)

rm(acc_tb, acc_pr, acc_ur, pa_acc_tb)

RAW samples

A dataset containing a tibble with time series sampled on the brazilian Amazon. The time series come from Landsat 8 Collection images.

data(prodes_samples_raw)

suppressWarnings(
  prodes_samples_raw %>% 
    dplyr::mutate(pyear = lubridate::year(end_date)) %>% 
    dplyr::select(pyear, label) %>% 
    table() %>% 
    knitr::kable(digits = 0, row.names = TRUE, full_width = TRUE, 
                 caption = "Number of raw samples per PRODES year.") %>% 
    kableExtra::kable_styling()
)

prodes_samples_raw %>% 
  dplyr::sample_frac(0.01) %>% 
  sits::sits_select_bands(ndvi) %>% 
  sits::sits_plot()

cloud <- n_clouds <- NULL
prodes_samples_raw %>% 
    dplyr::sample_frac(0.1) %>% 
    dplyr::mutate(n_clouds = purrr::map_int(.$time_series, function(x){
        x %>% 
            dplyr::select(cloud) %>% 
            sum() %>% 
            as.integer() %>% 
            return()
    })) %>% 
    ggplot2::ggplot(ggplot2::aes(n_clouds)) + 
    ggplot2::geom_histogram(breaks = 0:23) + 
    ggplot2::labs(title = "Clouded samples (prodes_samples_raw)") +
    ggplot2::xlab("Number of clouded pixels") +
    ggplot2::ylab("Count")

print(sprintf("Number of samples %s", nrow(prodes_samples_raw)))

Experiment summary

data("training_logs", package = "sits.prodes")
training_logs %>% 
    tidyr::unnest(setup) %>%
    dplyr::filter(stringr::str_detect(experiment, pattern = "train_(4|5)")) %>%
    dplyr::mutate(bands = remove_na(paste(Bands_experiment, Bands__experiment))) %>%
    dplyr::mutate(labels = remove_na(paste(Labels, Labels_experiment))) %>%
    dplyr::select(experiment, Clasification_type, labels, bands, Scenes_experiment) %>%
    dplyr::filter(stringr::str_detect(experiment, "train_(4|5)")) %>%
    dplyr::rename_all(list(~stringr::str_replace_all(stringr::str_to_title(.), '_', ' '))) %>%
    knitr::kable() %>%
    kableExtra::kable_styling(full_width = F)
training_logs %>% 
    tidyr::unnest(trains) %>%
    dplyr::filter(model_name %in% c("train_40_model_17", "train_41_model_2",  
                                    "train_42_model_2")) %>%
    dplyr::select(experiment, activation, batch_size, dropout_rates, epochs, 
                  optimizer, units, validation_split, path) %>%
    dplyr::mutate(layers = purrr::map(.$dropout_rates, function(x){length(eval(x))}),
                  dropout_rates = unique(dropout_rates)) %>%
    dplyr::rename_all(list(~stringr::str_replace_all(stringr::str_to_title(.), '_', ' '))) %>%
    knitr::kable() %>% 
    kableExtra::kable_styling(full_width = F)

plot_tb <- training_logs %>% 
    tidyr::unnest(trains) %>% 
    dplyr::select(experiment, model_name, acc, loss, val_acc, val_loss) %>%
    dplyr::filter(model_name %in% c("train_40_model_17",
                                    "train_41_model_2",
                                    "train_42_model_2",
                                    "train_50_model_2",
                                    "train_51_model_14",
                                    "train_52_model_13"
                                    )) %>% 
    dplyr::mutate(acc  = purrr::map(acc,  function(x) eval(parse(text = x))),
                  loss = purrr::map(loss, function(x) eval(parse(text = x))),
                  val_acc  = purrr::map(val_acc, function(x) eval(parse(text = x))),
                  val_loss = purrr::map(val_loss, function(x) eval(parse(text = x))), 
                  val_acc_mean = purrr::map_dbl(val_acc, mean)) 

plot_ls <- lapply(unique(plot_tb$experiment), function(exp){
    plot_tb %>% 
        dplyr::filter(experiment %in% exp) %>% 
        tidyr::unnest() %>%
        dplyr::group_by(model_name) %>% 
        dplyr::mutate(epoch = row_number()) %>%
        ggplot2::ggplot() +
        ggplot2::geom_path(aes(y = loss, x = epoch), colour = "blue") +
        ggplot2::geom_path(ggplot2::aes(y = val_loss, x = epoch)) +
        ggplot2::ggtitle(exp) +
        ggplot2::ylim(0.0, 0.25) +
        #ggplot2::theme(plot.title = element_text(size = 8)) +
        ggplot2::labs(x = "Epochs", y = "Loss") +
        ggplot2::facet_wrap(~ model_name, nrow = 5) 

#    %>%
#        return()
})



#for(p in plot_ls){
for(i in 1:length(plot_ls)){
    print(i)
    p <- plot_ls[[i]]
    suppressWarnings(print(p))
}

selected trainings



albhasan/sits.prodes documentation built on Sept. 3, 2020, 2:03 p.m.