knitr::opts_chunk$set(echo = TRUE)
The following report summarizes the completeness of the forms submitted in Peskas. The data is filtered for boat landings.
library(magrittr) library(ggplot2) setwd("../..") pars <- peskas.timor.data.pipeline::read_config() metadata <- peskas.timor.data.pipeline::get_preprocessed_metadata(pars) landings <- peskas.timor.data.pipeline::get_merged_landings(pars, "_weight") %>% dplyr::rename(date = date, submitted_by = `_submitted_by`) %>% dplyr::select(-c(`_attachments`, species_group, dplyr::starts_with("_"))) %>% dplyr::rename_all(~ stringr::str_remove(., "trip_group/")) %>% dplyr::filter(!is.na(landing_site_name)) %>% dplyr::filter(date >= "2018-01-01") # Filter by landings with boats and form filled landings <- landings %>% dplyr::filter( has_boat == TRUE, Ita_koleta_dadus_husi_atividad == 1 | is.na(Ita_koleta_dadus_husi_atividad) & !is.na(total_catch_value) ) stations <- metadata$stations %>% dplyr::select( landing_site_name = .data$station_code, station = .data$station_name ) %>% dplyr::mutate(landing_site_name = as.character(landing_site_name)) landings <- dplyr::left_join(landings, stations, by = "landing_site_name") %>% dplyr::select(-landing_site_name) %>% dplyr::rename(landing_site_name = station) %>% dplyr::mutate( landing_site_name = trimws(.data$landing_site_name, which = "both"), landing_site_name = dplyr::case_when( landing_site_name == "Oenunu/Citrana/Bona/Baoknana" ~ "Oenunu/Citrana", landing_site_name == "Beto Tasi/Bebonuk/Fatuhada/Kampung Alor" ~ "Beto Tasi/Bebonuk", landing_site_name == "Sentru/Liarafa/Sika/Rau Moko" ~ "Sentru/Liarafa", landing_site_name == "Oebone/Sakato/Bausiu/Mahata/Oebau/Oenunu/Kmusa/Bokos/Posto Sika" ~ "Oebone/Sakato", landing_site_name == "Dolok Oan/Hera/Metinaro/Manleu" ~ "Dolok Oan/Hera", landing_site_name == "Fatu'u/Doru/Iliana/Arlo" ~ "Fatu'u/Doru", landing_site_name == "Seiçal/Buruma/Fatuk Bo'ot/Uai'ca" ~ "Seiçal/Buruma", landing_site_name == "Beacou/Sulilaran/Palaka" ~ "Beacou/Sulilaran", landing_site_name == "Bonuk/Fatumeta/Nunumera" ~ "Bonuk/Fatumeta", landing_site_name == "Bidau Santana/Metiaut" ~ "Bidau Santana", T ~ landing_site_name ) ) miss_plots <- function(landings = NULL, site = NULL) { landings_site <- landings %>% dplyr::filter(landing_site_name %in% site) missing.values <- landings_site %>% tidyr::gather(key = "key", value = "val") %>% dplyr::mutate(isna = is.na(val)) %>% dplyr::group_by(key) %>% dplyr::mutate(total = dplyr::n()) %>% dplyr::group_by(key, total, isna) %>% dplyr::summarise(num.isna = dplyr::n()) %>% dplyr::mutate(pct = num.isna / total * 100) %>% dplyr::ungroup() levels1 <- missing.values %>% dplyr::group_by(key) %>% dplyr::tally() %>% dplyr::arrange(n) %>% dplyr::filter(n == 1) %>% magrittr::extract2("key") levels2 <- (missing.values %>% dplyr::filter(isna == T) %>% dplyr::arrange(desc(pct)))$key levels3 <- c(levels2, levels1) percentage.plot <- missing.values %>% ggplot() + theme_minimal() + geom_bar( aes( x = reorder(key, dplyr::desc(pct)), y = pct, fill = isna ), stat = "identity", alpha = 0.8 ) + scale_x_discrete(limits = levels3) + scale_y_continuous(n.breaks = 6) + scale_fill_manual( name = "", values = c("#FDE725FF", "#440154FF"), labels = c("Present", "Missing") ) + coord_flip() + labs( title = "Percentage of missing values", x = "", y = "Missing values (%)" ) + theme( legend.position = "top", plot.margin = unit(c(1, 1, 1, 1), "pt") ) + guides(fill = "none") row.plot <- landings_site %>% dplyr::mutate(id = as.Date(date)) %>% tidyr::gather(-id, key = "key", value = "val") %>% dplyr::mutate(isna = is.na(val)) %>% ggplot(aes(key, id, fill = isna)) + theme_minimal() + geom_raster(alpha = 0.8) + scale_fill_manual( name = "", values = c("#FDE725FF", "#440154FF"), labels = c("Present", "Missing") ) + scale_x_discrete(limits = levels3) + scale_y_date( expand = c(0, 0) ) + labs( x = "", y = "Date", title = "Missing values temporal pattern" ) + coord_flip() + theme( legend.position = "right", axis.text.y = element_blank(), plot.margin = unit(c(1, 1, 1, -7), "pt") ) cowplot::plot_grid(percentage.plot, row.plot, ncol = 2, rel_widths = c(2, 1.25)) } # plots <- # purrr::set_names(unique(landings$landing_site_name)) %>% # purrr::map(miss_plots, # landings = landings)
missing.values <- landings %>% tidyr::gather(key = "key", value = "val") %>% dplyr::mutate(isna = is.na(val)) %>% dplyr::group_by(key) %>% dplyr::mutate(total = dplyr::n()) %>% dplyr::group_by(key, total, isna) %>% dplyr::summarise(num.isna = dplyr::n()) %>% dplyr::mutate(pct = num.isna / total * 100) %>% dplyr::ungroup() levels1 <- missing.values %>% dplyr::group_by(key) %>% dplyr::tally() %>% dplyr::arrange(n) %>% dplyr::filter(n == 1) %>% magrittr::extract2("key") levels2 <- (missing.values %>% dplyr::filter(isna == T) %>% dplyr::arrange(desc(pct)))$key levels3 <- c(levels2, levels1) percentage.plot <- missing.values %>% ggplot() + theme_minimal(13) + geom_bar( aes( x = reorder(key, dplyr::desc(pct)), y = pct, fill = isna ), stat = "identity", alpha = 0.8 ) + scale_x_discrete(limits = levels3) + scale_y_continuous( n.breaks = 6, expand = c(0, 0) ) + scale_fill_manual( name = "", values = c("#D2E21BFF", "#472F7DFF"), labels = c("Present", "Missing") ) + coord_flip() + labs( title = "Percentage of missing values", x = "", y = "Missing values (%)" ) + theme( legend.position = "top", plot.margin = unit(c(1, 1, 1, 1), "pt") ) + guides(fill = "none") row.plot <- landings %>% dplyr::mutate(id = as.Date(date)) %>% tidyr::gather(-id, key = "key", value = "val") %>% dplyr::mutate(isna = is.na(val)) %>% ggplot(aes(key, id, fill = isna)) + theme_minimal(13) + geom_raster(alpha = 0.8) + scale_fill_manual( name = "", values = c("#D2E21BFF", "#472F7DFF"), labels = c("Present", "Missing") ) + scale_x_discrete(limits = levels3) + scale_y_date( expand = c(0, 0) ) + labs( x = "", y = "Date", title = "Missing values temporal pattern" ) + coord_flip() + theme( legend.position = "right", axis.text.y = element_blank(), plot.margin = unit(c(1, 1, 1, -7), "pt") ) cowplot::plot_grid(percentage.plot, row.plot, ncol = 2, rel_widths = c(2, 1))
main_plot_dat <- landings %>% dplyr::filter(!is.na(landing_site_name)) %>% tidyr::gather(-landing_site_name, key = "key", value = "val") %>% dplyr::mutate(isna = is.na(val)) %>% dplyr::group_by(landing_site_name, key) %>% dplyr::mutate(total_obs = dplyr::n()) %>% dplyr::group_by(landing_site_name, key, total_obs, isna) %>% dplyr::summarise(num.isna = dplyr::n()) %>% dplyr::mutate(pct = num.isna / total_obs * 100) %>% dplyr::ungroup() %>% tidyr::complete(landing_site_name, key, isna) %>% dplyr::filter(isna == T) %>% dplyr::select(-c(isna, num.isna)) %>% dplyr::mutate(pct = ifelse(is.na(pct), 0, pct)) main_plot <- main_plot_dat %>% ggplot() + theme_bw(13) + geom_tile(aes( y = reorder(key, pct), x = reorder(landing_site_name, pct), fill = pct, color = pct )) + scale_fill_viridis_c(direction = -1, alpha = 0.75) + scale_color_viridis_c(direction = -1) + scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0)) + guides(color = "none") + labs( x = "", y = "", fill = "Missing values (%)" ) + theme( panel.grid = element_blank(), legend.position = "bottom", axis.text.x = element_text(angle = 45, hjust = 1) ) + guides(color = "none") obs_plot <- main_plot_dat %>% ggplot(aes( reorder(landing_site_name, pct), total_obs )) + theme_minimal(13) + geom_col(position = "identity", alpha = 0.75) + theme( panel.grid = element_blank(), axis.text.x = element_blank(), plot.margin = unit(c(-5, 1, -14, 1), "pt") ) + labs( x = "", y = "", title = "" ) cowplot::plot_grid(obs_plot, main_plot, ncol = 1, rel_heights = c(1, 4), align = "hv" )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.