knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  echo = FALSE
)
library(hera)
library(dplyr)
library(purrr)
library(tidyr)
library(tibble)
library(magrittr)
library(testthat)

Welcome

This document has been created following the generic assessment guidance.

Description

Basic details about the assessment. Update the 'response' values as required.

description <- tribble(
  ~question, ~response,
  "name_short", "DARLEQ3",
  "name_long", "UKTAG River Assessment Method Macrophytes and Phytobenthos",
  "parameter", "River Diatoms",
  "status", "prototype",
  "type", "metric"
)

knitr::kable(description)

Input

A list of questions required to run the assessment. Optional 'alkalinity' column can be included to provide a pre-calculated mean alkalinity. If 'alkalinity' column not included, alkalinity will be calculated from rows with 'alkalinity' question using the response column value.

input <- tibble(
  question = c(
    "location_id",
    "sample_id",
    "date_taken",
    "Taxon abundance",
    "parameter",
    "Alkalinity",
    "TDI4",
    "Predicted TDI4"
  ),
  response = c(
    "8175",
    "1",
    "2021-05-21",
    "12",
    "45",
    "River Diatoms",
    "45",
    "67"
  ),
  label = c("Gomphonema olivaceum", NA, NA, NA, NA, NA, NA, NA),
  parameter = c(NA, NA, NA, NA, NA, "River Diatoms", NA, NA),
  data_type = c(
    "character", "character",
    "character", "number",
    "number", "character",
    "number", "number"
  ),
  max = c(NA, NA, NA, NA, NA, NA, NA, NA),
  min = c(NA, NA, NA, NA, NA, NA, NA, NA),
  source = c(
    "sepa_ecology_results",
    "sepa_ecology_results",
    "sepa_ecology_results",
    "sepa_ecology_results",
    "location_attributes",
    "sepa_ecology_results",
    "sepa_metric_results",
    "sepa_metric_results"
  ),
  question_type = c(
    "reference",
    "reference",
    "reference",
    "observation",
    "reference",
    "predictor",
    "outcome",
    "outcome"
  )
)
input


input <- tibble(
  location_id = c("8175", "8175"),
  sample_id = c("12345", "12345"),
  date_taken = as.Date(c("2019-11-21", "2019-11-21")),
  question = c("Taxon abundance", "Alkalinity"),
  response = c("12", "45"),
  label = c("Gomphonema olivaceum", NA),
  parameter = c("River Diatoms", "Chemistry"),
  type = c("number", "number"),
  max = c(NA, NA),
  min = c(NA, NA),
  source = c("sepa_ecology_results", "location_attributes")
)
data <- input
data <- select(data, question, response)
data <- data[!duplicated(data[, c("question")]), ]

knitr::kable(data)

Assessment

Function code used to assess your input data and return an outcome.

Code

assessment_function <- function(data, metric = "TDI5LM") {
  require(dplyr)
  require(tidyr)
  require(magrittr)
  require(tibble)
  # Get alkalinity predictor if not present...
  if (!any(names(data) %in% "alkalinity")) {
    # predictor table for alkalinity if "chemistry_site" variable
    predictors <- utils::read.csv(
      system.file("extdat",
        "predictors.csv",
        package = "hera"
      ),
      stringsAsFactors = FALSE, check.names = FALSE
    )
    predictors$location_id <- as.character(predictors$location_id)
     predictors$date <- as.Date(predictors$date)
    predictors <- arrange(predictors, dplyr::desc(date))
    if (!any(names(data) %in% "chemistry_site")) {
      data <- left_join(data, predictors, by = c("location_id"),
                        multiple = "first")
    }


    if (!any(names(data) %in% c("alkalinity"))) {
      message("calculating alklainity...")
      alk <- hera:::mean_alkalinity(data)
      data$alkalinity <- NULL
      data <- inner_join(data, alk, by = join_by("sample_id" == "sample_number"))
    }
  }
  data <- filter(data, question == "Taxon abundance" &
    parameter == "River Diatoms")
  data$response <- as.numeric(data$response)
  data$alkalinity[is.na(data$alkalinity)] <- 75

  data$alkalinity <- as.numeric(data$alkalinity)
  # Combine mean alkalinity with other site headers
  header <- data %>%
    mutate(
      "SampleID" = as.factor(sample_id),
      "DATE_TAKEN" = as.Date(date_taken, tz = "GB")
    ) %>%
    select("SampleID",
      "SiteID" = "location_id",
      "SAMPLE_DATE" = "date_taken",
      "Alkalinity" = "alkalinity"
    ) %>%
    unique()

  # Loch samples also require an Alkalinity 'type';
  # 'HA' - High Alkalninty etc
  # This will be ignored if running river classification
  header$lake_TYPE <- NA
  header$lake_TYPE[header$Alkalinity > 50] <- "HA"
  header$lake_TYPE[header$Alkalinity >= 10 &
    header$Alkalinity <= 50] <- "MA"
  header$lake_TYPE[header$Alkalinity < 10] <- "LA"

  header$SiteID <- as.character(header$SiteID)
  ## Important: Arrange to match order of 'diatom_data' data frame.
  header <- arrange(header, SampleID)

  # Prepare dataframe of 'diatom_data' -------------------------------
  # Include columns for each diatom ID (from NEMS Dares table)
  # responses are abundances.
  # dataframe row.names are SAMPLE_NUMBER.

  # DARES table
  # - must use table from NEMS - this links TAXON to TAXONLD code
  dares_table <- darleq3::darleq3_taxa
  # Filter for taxon abundance only
  diatom_taxon_abundance <- data %>%
    filter(question == "taxon abundance" |
      question == "Taxon abundance")

  # Trim whitespace in Taxon name to help join.
  diatom_taxon_abundance$label <- trimws(diatom_taxon_abundance$label)
  dares_table$TaxonNameSEPA <- trimws(dares_table$TaxonNameSEPA)
  # Join to S_TAXON_DARES table using Taxon name.
  if (any(names(diatom_taxon_abundance) %in% c("taxon_ids"))) {
    diatom_taxonname <- diatom_taxon_abundance %>%
      select(
        "location_id",
        "sample_id",
        "taxon_ids",
        "label",
        "response",
        "date_taken"
      ) %>%
      inner_join(dares_table[, c("TaxonName", "TaxonId", "TaxonNameSEPA")],
        by = c("taxon_ids" = "TaxonId")
      )
    diatom_taxonname$TaxonId <- diatom_taxonname$taxon_ids
  } else {
    diatom_taxonname <- diatom_taxon_abundance %>%
      select(
        "location_id",
        "sample_id",
        "label",
        "response",
        "date_taken"
      ) %>%
      inner_join(dares_table[, c("TaxonName", "TaxonId", "TaxonNameSEPA")],
        by = c("label" = "TaxonNameSEPA")
      )
  }
  # Make sure numeric
  diatom_taxonname$response <-
    as.numeric(as.character(diatom_taxonname$response))
  # Sum response if duplicate taxon names entered within a single sample
  diatom_tidied <- diatom_taxonname %>%
    group_by(sample_id, TaxonId, label, date_taken) %>%
    summarise(response = sum(response, na.rm = TRUE), .groups = "drop")

  # Arrange to keep in same order as 'taxon_names' data.frame
  diatom_tidied <- diatom_tidied %>%
    ungroup() %>%
    arrange(label) %>%
    select(-"label")

  # DARLEQ3 requires Taxon IDs and responses pivoted into wide format
  diatom_data <- diatom_tidied %>% pivot_wider(
    names_from = TaxonId,
    values_from = response,
  )
  diatom_data[is.na(diatom_data)] <- 0


  # Arrange by sampled_date to match order of 'header' data frame.
  diatom_data <- arrange(diatom_data, sample_id)
  # darleq3 requires row.names equal SAMPLE_NUMBER. Must convert
  # to be data.frame first (row.names deprecated on tibble).
  diatom_data <- data.frame(diatom_data, check.names = FALSE)
  row.names(diatom_data) <- diatom_data$sample_id
  diatom_data <- select(diatom_data, -"sample_id", -"date_taken")

  # Prepare dataframe of 'taxon_names'  ------------------------------
  # include columns 'TaxonCode','TaxonName'
  taxon_names <- diatom_taxonname %>%
    select(
      "TaxonCode" = "TaxonId",
      "TaxonName" = "TaxonName"
    ) %>%
    unique()
  taxon_names <- arrange(taxon_names, TaxonName)

  # Combine dataframes into named list ------------------------
  header <- data.frame(header)
  header <- header[header$SampleID %in% row.names(diatom_data), ]
  header <- header[!duplicated(header$SampleID), ]
  row.names(header) <- header$SampleID
  header$SampleID <- as.character(header$SampleID)
  header$SAMPLE_DATE <- as.Date(header$SAMPLE_DATE)
  output <- darleq3::calc_Metric(diatom_data, metric)
  output <- darleq3::calc_EQR(output,
    header,
    truncate_EQR = TRUE,
    verbose = TRUE
  )

  sample <- output$EQR
  sample <- sample %>% mutate_all(as.character)
  sample <- pivot_longer(sample,
    cols = c(-SampleID, -SiteID, -SAMPLE_DATE),
    names_to = "question",
    values_to = "response"
  )
  sample <- select(sample, -"SAMPLE_DATE")
  names(sample) <- c("sample_id", "location_id", "question", "response")

  location <- output$Uncertainty
  location <- location %>% mutate_all(as.character)
  location <- pivot_longer(location,
    cols = c(-SiteID),
    names_to = "question",
    values_to = "response"
  )
  names(location) <- c("location_id", "question", "response")
  results <- bind_rows(sample, location)
  results$parameter <- "Phytobenthos (diatoms)"
  results <- mutate(results,
    question = ifelse(question == "WFDClass",
      "Class",
      question
    )
  )

  years <- data %>%
    mutate("year" = lubridate::year(date_taken)) %>%
    filter(parameter == "River Diatoms") %>%
    group_by(location_id) %>%
    summarise("response" = paste(unique(.$year), collapse = ","))

  years$question <- "Years included"
  years$parameter <- "Phytobenthos (diatoms)"
  results <- bind_rows(results, years)
  return(results)
}

Outcome

The outcome of your assessment.

outcome <- assessment_function(input)
outcome_table <- select(outcome, "question", "response")
outcome_table <- outcome_table[!duplicated(outcome_table[, c("question")]), ]
knitr::kable(outcome_table)

Check

Run checks on the assessment.

# No need to edit this code
# Format description
standard_format <- hera:::hera_format(description = description)
# Check description
check_list <- hera:::hera_test(description = description)
knitr::kable(check_list$standard_check)

Update

Update the catalogue of assessments to make them available.

# No need to edit this code
hera:::update_catalogue(
  description = description,
  input = input,
  assessment_function = assessment_function,
  output = outcome
)

Test

This section tests if this assessment is usable using assessment function.

# No need to edit this code
assess(
  data = hera::demo_data,
  name = description$response[description$question == "name_short"]
)

Launch app

Below is an interactive application displaying the results of your assessment.

# No need to edit this code
# launch_app(new_catalogue = catalogue, data = data)


ecodata1/hera documentation built on April 5, 2025, 1:48 a.m.