|
In litteR: Litter Analysis

# Coding convention
# To make scripts readable and consistent, we adhere to the packages and 
# programming style of the tidyverse (www.tidyverse.org)

knitr::opts_knit$set(
  progress = FALSE, 
  verbose = FALSE
)

# make sure all table columns are visible
options(dplyr.width = Inf, warning.length = 8000)

# attach packages to R's search path
suppressPackageStartupMessages({
  library(rlang)     # tidyverse versions of base/core functions
  library(dplyr)     # grammar of data manipulation
  library(purrr)     # functional programming tools
  library(tidyr)     # tidy data
  library(ggplot2)   # grammar of graphics
  library(fs)        # file system
  library(knitr)     # dynamic report generation
  library(litteR)    # litter analysis
  library(readr)     # writing CSV-files
  library(stringr)   # common string operations
})

# figure quality
FIGURE_QUALITY <- params %>%
  chuck("figure_quality")

# global knitr-settings (defaults)
opts_chunk$set(
    comment = NULL,
    echo = FALSE,
    dpi = c(low = 72, high = 300)[FIGURE_QUALITY]
)

logger$info("Checking parameters in settings file")

# files
FILE_SETTINGS <- params %>%
  pluck("file_settings", .default = "")
FILE_DATA <- params %>%
  pluck("file_data", .default = "")
FILE_TYPES <- params %>%
  pluck("file_types", .default = "")
FILE_STATS <- params %>%
  pluck("file_stats", .default = "")

# period to analyse
DATE_FROM <- params %>%
    chuck("date_min")
DATE_TO <- params %>%
    chuck("date_max")

# percentage of total count
PERCENTAGE_TOTAL_COUNT <- params %>%
  chuck("percentage_total_count")

# location_code, type name, and group code
LOCATION_CODE <- params %>%
  pluck("location_code")
REGION_CODE <- params %>%
  pluck("region_code")
TYPE_NAME <- params %>%
  pluck("type_name")
GROUP_CODE <- params %>%
  pluck("group_code")

if ((length(LOCATION_CODE) == 1L) && (LOCATION_CODE == "")) {
    LOCATION_CODE <- NULL
}
if ((length(REGION_CODE) == 1L) && (REGION_CODE == "")) {
    REGION_CODE <- NULL
}
if ((length(TYPE_NAME) == 1L) && (TYPE_NAME == "")) {
    TYPE_NAME <- NULL
}
if ((length(GROUP_CODE) == 1L) && (GROUP_CODE == "")) {
    GROUP_CODE <- NULL
}

# cutoff count axis in plots
CUTOFF_COUNT_AXIS <- params %>%
    pluck("cutoff_count_axis", .default = 100)

if (!(CUTOFF_COUNT_AXIS %>% between(0, 100))) {
    logger$error("cutoff count axis in YAML-file should be between 0% and 100%")
}

Introduction

This tool performs the following types of data analysis on beach litter data, or any other type of litter data: data quality control, outlier analysis, descriptive statistics, and trend analysis.

This report can best be viewed with the latest versions of web browsers like Google Chrome, Mozilla Firefox, Chromium, or Safari. Its contents does not render well in some versions of Microsoft's Internet Explorer.

Settings

period: from r DATE_FROM to r DATE_TO
percentage of total count to analyse: r PERCENTAGE_TOTAL_COUNT%
files:
- project directory: r sQuote(path_dir(FILE_DATA))
- settings: r sQuote(path_file(FILE_SETTINGS))
- data: r sQuote(path_file(FILE_DATA))
- types: r sQuote(path_file(FILE_TYPES))
location codes: r if (is_null(LOCATION_CODE)) {"not specified"} else {enumerate(sQuote(LOCATION_CODE))}
region codes: r if (is_null(REGION_CODE)) {"not specified"} else {enumerate(sQuote(REGION_CODE))}
group codes: r if (is_null(GROUP_CODE)) {"not specified"} else {enumerate(sQuote(GROUP_CODE))}
type names: r if (is_null(TYPE_NAME)) {"not specified"} else {enumerate(sQuote(TYPE_NAME))}
figure quality: r sQuote(FIGURE_QUALITY)
cutoff count axis in plots: r CUTOFF_COUNT_AXIS%

logger$info("Entering data quality control section")

Data Quality Control

In this section, litter data will be read and validated. Warnings will be printed if they occur. See also the log-file for more details.

# type_file
d_type <- FILE_TYPES %>%
  read_litter_types(logger)

# read and validate input file
d_ltr <- FILE_DATA %>%
  read_litter(logger, type_names = d_type %>% pull("type_name")) %>%
  filter(date %>% between(DATE_FROM, DATE_TO))

if ((nrow(d_ltr) == 0L)) {
  logger$error("No data available in the period settings between {DATE_FROM} and {DATE_TO}")
}

# regional analysis requested?
REGIONAL_ANALYSIS <- "region_code" %in% names(d_ltr)

logger$info(str_glue("Computing group totals"))
d_grp <- d_type %>%
  pull("group_code") %>%
  unique %>%
  map_df(function(x) {
    d_type %>%
      filter(group_code == x) %>%
      select(type_name) %>%
      left_join(d_ltr, by = "type_name") %>%
      group_by(location_code, date, .RECORD_ID) %>%
      summarise(count = sum(count), .groups = "drop") %>%
      filter(!are_na(count)) %>%
      mutate(type_name = x)})

logger$info(str_glue("Computing relative group totals (relative w.r.t. TC)"))
d_grp <- d_grp %>% 
  filter(type_name == "TC") %>%
  select(location_code, date, .RECORD_ID, TC = count) %>%
  right_join(d_grp, by = c("location_code", "date", ".RECORD_ID")) %>%
  mutate(rel_count = 100 * count / TC) %>%
  select(-TC)

logger$info(str_glue("Determining top {PERCENTAGE_TOTAL_COUNT}% litter..."))
d <- d_grp %>%
  filter(type_name == "TC") %>%
  select(location_code, date, TC = count) %>%
  group_by(location_code) %>%
  summarise(sum_TC = sum(TC), .groups = "drop") %>%
  left_join(d_ltr, by = "location_code") %>%
  group_by(location_code, sum_TC, type_name) %>%
  summarise(sum_count = sum(count), .groups = "drop") %>%
  arrange(location_code, desc(sum_count)) %>%
  group_by(location_code) %>%
  mutate(rel_count = 100 * cumsum(sum_count) / sum_TC) %>%
  mutate(topx = rel_count < (PERCENTAGE_TOTAL_COUNT + 0.001)) %>%
  ungroup %>%
  select(location_code, type_name, topx)

# adding relative counts to litter types
d_ltr <- d_grp %>%
  filter(type_name == "TC") %>%
  select(location_code, date, .RECORD_ID, TC = count) %>%
  right_join(d_ltr, by = c("location_code", "date", ".RECORD_ID")) %>%
  mutate(rel_count = 100 * count / TC) %>%
  select(-TC) %>%
  arrange(location_code, date, desc(rel_count)) 

# adding topx% to litter types
d_ltr <- d_ltr %>%
  left_join(d, by = c("location_code", "type_name"))

# adding litter types to litter groups
d_ltr <- d_ltr %>%
  bind_rows(d_grp)

# add region_code to litter groups
lut <- d_ltr %>%
    distinct(location_code, region_code) %>%
    filter(!are_na(region_code))
d_ltr <- d_ltr %>%
    select(-region_code) %>%
    left_join(lut, by = "location_code")

Session Information

logger$info("Adding session information.")
sessionInfo()

Any scripts or data that you put into this service are public.

litteR documentation built on Nov. 5, 2025, 6:19 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

litteR
Litter Analysis

|
In litteR: Litter Analysis

Introduction

Settings

Data Quality Control

Session Information

Try the litteR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

litteR Litter Analysis

| In litteR: Litter Analysis

Introduction

Settings

Data Quality Control

Session Information

Try the litteR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

litteR
Litter Analysis

|
In litteR: Litter Analysis