|

# Coding convention
# To make scripts readable and consistent, we adhere to the packages and 
# programming style of the tidyverse (www.tidyverse.org)

knitr::opts_knit$set(
  progress = FALSE, 
  verbose = FALSE
)

# make sure all table columns are visible
options(dplyr.width = Inf, warning.length = 8000)

# attach packages to R's search path
suppressPackageStartupMessages({
  library(rlang)     # tidyverse versions of base/core functions
  library(dplyr)     # grammar of data manipulation
  library(purrr)     # functional programming tools
  library(tidyr)     # tidy data
  library(ggplot2)   # grammar of graphics
  library(fs)        # file system
  library(knitr)     # dynamic report generation
  library(litteR)    # litter analysis
  library(readr)     # writing CSV-files
  library(stringr)   # common string operations
})
# figure quality
FIGURE_QUALITY <- params %>%
  chuck("figure_quality")

# global knitr-settings (defaults)
opts_chunk$set(
    comment = NULL,
    echo = FALSE,
    dpi = c(low = 72, high = 300)[FIGURE_QUALITY]
)
logger$info("Checking parameters in settings file")

# files
FILE_SETTINGS <- params %>%
  pluck("file_settings", .default = "")
FILE_DATA <- params %>%
  pluck("file_data", .default = "")
FILE_TYPES <- params %>%
  pluck("file_types", .default = "")
FILE_STATS <- params %>%
  pluck("file_stats", .default = "")

# period to analyse
DATE_FROM <- params %>%
    chuck("date_min")
DATE_TO <- params %>%
    chuck("date_max")

# percentage of total count
PERCENTAGE_TOTAL_COUNT <- params %>%
  chuck("percentage_total_count")

# location_code, type name, and group code
LOCATION_CODE <- params %>%
  pluck("location_code")
REGION_CODE <- params %>%
  pluck("region_code")
TYPE_NAME <- params %>%
  pluck("type_name")
GROUP_CODE <- params %>%
  pluck("group_code")

if ((length(LOCATION_CODE) == 1L) && (LOCATION_CODE == "")) {
    LOCATION_CODE <- NULL
}
if ((length(REGION_CODE) == 1L) && (REGION_CODE == "")) {
    REGION_CODE <- NULL
}
if ((length(TYPE_NAME) == 1L) && (TYPE_NAME == "")) {
    TYPE_NAME <- NULL
}
if ((length(GROUP_CODE) == 1L) && (GROUP_CODE == "")) {
    GROUP_CODE <- NULL
}

# cutoff count axis in plots
CUTOFF_COUNT_AXIS <- params %>%
    pluck("cutoff_count_axis", .default = 100)

if (!(CUTOFF_COUNT_AXIS %>% between(0, 100))) {
    logger$error("cutoff count axis in YAML-file should be between 0% and 100%")
}

Introduction

This tool performs the following types of data analysis on beach litter data, or any other type of litter data: data quality control, outlier analysis, descriptive statistics, and trend analysis.

This report can best be viewed with the latest versions of web browsers like Google Chrome, Mozilla Firefox, Chromium, or Safari. Its contents does not render well in some versions of Microsoft's Internet Explorer.

Settings



logger$info("Entering data quality control section")

Data Quality Control

In this section, litter data will be read and validated. Warnings will be printed if they occur. See also the log-file for more details.

# type_file
d_type <- FILE_TYPES %>%
  read_litter_types(logger)

# read and validate input file
d_ltr <- FILE_DATA %>%
  read_litter(logger, type_names = d_type %>% pull("type_name")) %>%
  filter(date %>% between(DATE_FROM, DATE_TO))

if ((nrow(d_ltr) == 0L)) {
  logger$error("No data available in the period settings between {DATE_FROM} and {DATE_TO}")
}
# regional analysis requested?
REGIONAL_ANALYSIS <- "region_code" %in% names(d_ltr)
logger$info(str_glue("Computing group totals"))
d_grp <- d_type %>%
  pull("group_code") %>%
  unique %>%
  map_df(function(x) {
    d_type %>%
      filter(group_code == x) %>%
      select(type_name) %>%
      left_join(d_ltr, by = "type_name") %>%
      group_by(location_code, date, .RECORD_ID) %>%
      summarise(count = sum(count), .groups = "drop") %>%
      filter(!are_na(count)) %>%
      mutate(type_name = x)})

logger$info(str_glue("Computing relative group totals (relative w.r.t. TC)"))
d_grp <- d_grp %>% 
  filter(type_name == "TC") %>%
  select(location_code, date, .RECORD_ID, TC = count) %>%
  right_join(d_grp, by = c("location_code", "date", ".RECORD_ID")) %>%
  mutate(rel_count = 100 * count / TC) %>%
  select(-TC)

logger$info(str_glue("Determining top {PERCENTAGE_TOTAL_COUNT}% litter..."))
d <- d_grp %>%
  filter(type_name == "TC") %>%
  select(location_code, date, TC = count) %>%
  group_by(location_code) %>%
  summarise(sum_TC = sum(TC), .groups = "drop") %>%
  left_join(d_ltr, by = "location_code") %>%
  group_by(location_code, sum_TC, type_name) %>%
  summarise(sum_count = sum(count), .groups = "drop") %>%
  arrange(location_code, desc(sum_count)) %>%
  group_by(location_code) %>%
  mutate(rel_count = 100 * cumsum(sum_count) / sum_TC) %>%
  mutate(topx = rel_count < (PERCENTAGE_TOTAL_COUNT + 0.001)) %>%
  ungroup %>%
  select(location_code, type_name, topx)

# adding relative counts to litter types
d_ltr <- d_grp %>%
  filter(type_name == "TC") %>%
  select(location_code, date, .RECORD_ID, TC = count) %>%
  right_join(d_ltr, by = c("location_code", "date", ".RECORD_ID")) %>%
  mutate(rel_count = 100 * count / TC) %>%
  select(-TC) %>%
  arrange(location_code, date, desc(rel_count)) 

# adding topx% to litter types
d_ltr <- d_ltr %>%
  left_join(d, by = c("location_code", "type_name"))

# adding litter types to litter groups
d_ltr <- d_ltr %>%
  bind_rows(d_grp)

# add region_code to litter groups
lut <- d_ltr %>%
    distinct(location_code, region_code) %>%
    filter(!are_na(region_code))
d_ltr <- d_ltr %>%
    select(-region_code) %>%
    left_join(lut, by = "location_code")









Session Information

logger$info("Adding session information.")
sessionInfo()


Try the litteR package in your browser

Any scripts or data that you put into this service are public.

litteR documentation built on Aug. 27, 2022, 1:05 a.m.