Nothing
# Coding convention # To make scripts readable and consistent, we adhere to the packages and # programming style of the tidyverse (www.tidyverse.org) knitr::opts_knit$set( progress = FALSE, verbose = FALSE ) # make sure all table columns are visible options(dplyr.width = Inf, warning.length = 8000) # attach packages to R's search path suppressPackageStartupMessages({ library(rlang) # tidyverse versions of base/core functions library(dplyr) # grammar of data manipulation library(purrr) # functional programming tools library(tidyr) # tidy data library(ggplot2) # grammar of graphics library(fs) # file system library(knitr) # dynamic report generation library(litteR) # litter analysis library(readr) # writing CSV-files library(stringr) # common string operations })
# figure quality FIGURE_QUALITY <- params %>% chuck("figure_quality") # global knitr-settings (defaults) opts_chunk$set( comment = NULL, echo = FALSE, dpi = c(low = 72, high = 300)[FIGURE_QUALITY] )
logger$info("Checking parameters in settings file") # files FILE_SETTINGS <- params %>% pluck("file_settings", .default = "") FILE_DATA <- params %>% pluck("file_data", .default = "") FILE_TYPES <- params %>% pluck("file_types", .default = "") FILE_STATS <- params %>% pluck("file_stats", .default = "") # period to analyse DATE_FROM <- params %>% chuck("date_min") DATE_TO <- params %>% chuck("date_max") # percentage of total count PERCENTAGE_TOTAL_COUNT <- params %>% chuck("percentage_total_count") # location_code, type name, and group code LOCATION_CODE <- params %>% pluck("location_code") REGION_CODE <- params %>% pluck("region_code") TYPE_NAME <- params %>% pluck("type_name") GROUP_CODE <- params %>% pluck("group_code") if ((length(LOCATION_CODE) == 1L) && (LOCATION_CODE == "")) { LOCATION_CODE <- NULL } if ((length(REGION_CODE) == 1L) && (REGION_CODE == "")) { REGION_CODE <- NULL } if ((length(TYPE_NAME) == 1L) && (TYPE_NAME == "")) { TYPE_NAME <- NULL } if ((length(GROUP_CODE) == 1L) && (GROUP_CODE == "")) { GROUP_CODE <- NULL } # cutoff count axis in plots CUTOFF_COUNT_AXIS <- params %>% pluck("cutoff_count_axis", .default = 100) if (!(CUTOFF_COUNT_AXIS %>% between(0, 100))) { logger$error("cutoff count axis in YAML-file should be between 0% and 100%") }
This tool performs the following types of data analysis on beach litter data, or any other type of litter data: data quality control, outlier analysis, descriptive statistics, and trend analysis.
This report can best be viewed with the latest versions of web browsers like Google Chrome, Mozilla Firefox, Chromium, or Safari. Its contents does not render well in some versions of Microsoft's Internet Explorer.
r DATE_FROM
to r DATE_TO
r PERCENTAGE_TOTAL_COUNT
%r sQuote(path_dir(FILE_DATA))
r sQuote(path_file(FILE_SETTINGS))
r sQuote(path_file(FILE_DATA))
r sQuote(path_file(FILE_TYPES))
r if (is_null(LOCATION_CODE)) {"not specified"} else {enumerate(sQuote(LOCATION_CODE))}
r if (is_null(REGION_CODE)) {"not specified"} else {enumerate(sQuote(REGION_CODE))}
r if (is_null(GROUP_CODE)) {"not specified"} else {enumerate(sQuote(GROUP_CODE))}
r if (is_null(TYPE_NAME)) {"not specified"} else {enumerate(sQuote(TYPE_NAME))}
r sQuote(FIGURE_QUALITY)
r CUTOFF_COUNT_AXIS
%
logger$info("Entering data quality control section")
In this section, litter data will be read and validated. Warnings will be printed if they occur. See also the log-file for more details.
# type_file d_type <- FILE_TYPES %>% read_litter_types(logger) # read and validate input file d_ltr <- FILE_DATA %>% read_litter(logger, type_names = d_type %>% pull("type_name")) %>% filter(date %>% between(DATE_FROM, DATE_TO)) if ((nrow(d_ltr) == 0L)) { logger$error("No data available in the period settings between {DATE_FROM} and {DATE_TO}") }
# regional analysis requested? REGIONAL_ANALYSIS <- "region_code" %in% names(d_ltr)
logger$info(str_glue("Computing group totals")) d_grp <- d_type %>% pull("group_code") %>% unique %>% map_df(function(x) { d_type %>% filter(group_code == x) %>% select(type_name) %>% left_join(d_ltr, by = "type_name") %>% group_by(location_code, date, .RECORD_ID) %>% summarise(count = sum(count), .groups = "drop") %>% filter(!are_na(count)) %>% mutate(type_name = x)}) logger$info(str_glue("Computing relative group totals (relative w.r.t. TC)")) d_grp <- d_grp %>% filter(type_name == "TC") %>% select(location_code, date, .RECORD_ID, TC = count) %>% right_join(d_grp, by = c("location_code", "date", ".RECORD_ID")) %>% mutate(rel_count = 100 * count / TC) %>% select(-TC) logger$info(str_glue("Determining top {PERCENTAGE_TOTAL_COUNT}% litter...")) d <- d_grp %>% filter(type_name == "TC") %>% select(location_code, date, TC = count) %>% group_by(location_code) %>% summarise(sum_TC = sum(TC), .groups = "drop") %>% left_join(d_ltr, by = "location_code") %>% group_by(location_code, sum_TC, type_name) %>% summarise(sum_count = sum(count), .groups = "drop") %>% arrange(location_code, desc(sum_count)) %>% group_by(location_code) %>% mutate(rel_count = 100 * cumsum(sum_count) / sum_TC) %>% mutate(topx = rel_count < (PERCENTAGE_TOTAL_COUNT + 0.001)) %>% ungroup %>% select(location_code, type_name, topx) # adding relative counts to litter types d_ltr <- d_grp %>% filter(type_name == "TC") %>% select(location_code, date, .RECORD_ID, TC = count) %>% right_join(d_ltr, by = c("location_code", "date", ".RECORD_ID")) %>% mutate(rel_count = 100 * count / TC) %>% select(-TC) %>% arrange(location_code, date, desc(rel_count)) # adding topx% to litter types d_ltr <- d_ltr %>% left_join(d, by = c("location_code", "type_name")) # adding litter types to litter groups d_ltr <- d_ltr %>% bind_rows(d_grp) # add region_code to litter groups lut <- d_ltr %>% distinct(location_code, region_code) %>% filter(!are_na(region_code)) d_ltr <- d_ltr %>% select(-region_code) %>% left_join(lut, by = "location_code")
logger$info("Adding session information.") sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.