knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  echo = FALSE
)
library(hera)
library(purrr)
library(dplyr)
library(tidyr)
library(magrittr)

Details

This classification method enables the assessment of macrophytes in rivers according to the requirements of the Water Framework Directive (WFD). Macrophytes are larger plants of fresh water which are easily seen with the naked eye, including all vascular plants, bryophytes, stoneworts (Characeae) and macro-algal growths. River LEAFPACS2 uses a species nutrient index together with measures of diversity and abundance. This method replaces the classification tool River LEAFPACS used in the first river basin planning cycle in the UK.

standard <- tibble(
  standard_short = "leafpacs",
  quality_element = "Ecology",
  parameter = "River Macrophytes",
  standard_long = "UKTAG River Assessment Method Macrophytes and Phytobenthos",
  standard_reference = "http://wfduk.org/sites/default/files/Media/Characterisation%20of%20the%20water%20environment/Biological%20Method%20Statements/River%20Macrophytes%20UKTAG%20Method%20Statement.pdf",
  aggregation = "waterbody",
  status = "testing"
)

Data

If available, add a dataframe containing all the data variables including the questions, predictors, and aggregations required to complete the assessment process.

Example:

# Web
# demo_data <- get_data(location_id = c(92751, 100))
# demo_data <- filter(demo_data , question == "percentagecoverband")
# demo_data$year <- lubridate::year(demo_data$date_taken)

# Bulk Web
# all_data <- get_bulk_data()
# all_data$label <- NA
# latlon <- rict::osg_parse(all_data$grid_reference, coord_system = "WGS84")
# all_data$latitude <- latlon$lat
# all_data$longitude <- latlon$lon
# all_data$year <- lubridate::year(all_data$date_taken)
# Sepa


 all_data <- hera::demo_data
# demo_data$label <- demo_data$taxon
# demo_data$dist_from_source <- 32
# demo_data$alkalinity <- demo_data$mean_alkalinity
# demo_data$slope <- 2
# demo_data$source_altitude <- 280
# demo_data$parameter <- NA
# demo_data$parameter[demo_data$analysis_name == "MAC_R_TST"] <- "River Macrophytes"
# demo_data$parameter[demo_data$analysis_name == "DIAT_R_TST"] <- "River Diatoms"
# demo_data$question <- as.character(demo_data$question)
# demo_data$question[demo_data$question == "Taxon Cover Value"] <- "percentagecoverband"
# demo_data$sample_id <- as.character(demo_data$sample_id)


data <- all_data 
# %>% filter(parameter == "River Macrophytes")
head(data)

Locate

Metadata required about the location

# A list of metadata for location of sampling
location <- data %>% select(
  location_id,
  location_description,
  grid_reference
)
location <- location %>% unique()
head(location)

Sample

Metadata required to assess each sample. (Excluding direct observations)

# A list of questions and responses collected through observation
sample <- data %>% select(
  parameter,
  sample_id,
  date_taken
)
sample <- sample %>% unique()
head(sample)

Observe

The survey method used should conform to CEN 14184 : 2003 Water quality -- Guidance standard for the surveying of aquatic macrophytes in running waters.

A list of question and example responses that are collected one or more times per sample.

# A list of questions and responses collected through observation
questions <- data %>% select(
  question,
  response,
  label # optional (usually for taxonomic/species name)
)
questions <- questions %>% filter(question == "percentagecoverband")
questions

Metrics

Create metrics from observed data. Metrics are statistical summaries of the observed data or indices derived from the observed data.

indices_function <- function(data) {
  # Some calculated index e.g. summary of sample responses:
  # index = sum(data$response, na.rm = TRUE)
  message("Calculating indices...")
  # require(dplyr)
  # require(tidyr)
  # require(magrittr)
  # require(tibble)
  if (!is.null(nrow(data$question[data$question == "rmni"]))) {
    return(NULL)
  }

  if (!"label" %in% colnames(data)) {
    return(NULL)
  }
  if (all(is.na(data$label))) {
    return(NULL)
  }

  # Join data to taxa macrophyte scores ------------------------------------
  macrophyte_scores <- utils::read.csv(system.file("extdat",
    "macrophyte-taxa.csv",
    package = "hera"
  ))

  data$label[data$label == "Cladophora"] <-
    "Cladophora glomerata/Rhizoclonium hieroglyphicum"
  data <- inner_join(data, macrophyte_scores, by = c("label" = "Taxon.Name"))
  names(data) <- tolower(names(data))

  # Calculate scores -------------------------------------------------------
  data <- group_by(data, sample_id)
  data$response <- as.numeric(data$response)

  # Calculate algae
  data$cover <- NA
  data$cover[data$response == 1] <- 0.05
  data$cover[data$response == 2] <- 0.5
  data$cover[data$response == 3] <- 1.7
  data$cover[data$response == 4] <- 3.8
  data$cover[data$response == 5] <- 7.5
  data$cover[data$response == 6] <- 17.5
  data$cover[data$response == 7] <- 37.5
  data$cover[data$response == 9] <- 62.5
  data$cover[data$response == 10] <- 87.5

  algae <- data %>%
    group_by(cover) %>%
    summarise(
      n = n_distinct(label[algal.taxon == "Y"])
    )
  algae_cover <- sum(algae$cover * algae$n, na.rm = TRUE)

  # Calculate indices
  data <- data %>% summarise(
    rmni = sum(rmni * response, na.rm = TRUE) /
      sum(response[!is.na(rmni)]),
    rn_a_taxa = n_distinct(label[aquatic.taxon == "Y"]),
    n_rfg = n_distinct(plant.fn.gp[plant.fn.gp != 0 &
      !is.na(plant.fn.gp)]),
    rfa_pc = algae_cover
  )

  data <- pivot_longer(data, -sample_id,
    names_to = "question",
    values_to = "response"
  )
  data$sample_id <- as.character(data$sample_id)
  data$response <- as.character(data$response)
  data <- ungroup(data)
  return(data)
}
indexes <- indices_function(data)
head(indexes)

Predictors

Predictors or expected values used in this assessment.

predictors <- data %>%
  select(
    dist_from_source,
    alkalinity,
    source_altitude,
    slope,
    location_id
  ) %>%
  unique()
knitr::kable(head(predictors))

Predict

Create predicted or expected responses to questions.

prediction_function <- function(predictors, reference_algae = 0.05) {
  message("Calculating predictions...")
  # require(dplyr)
  # require(tidyr)
  # require(magrittr)
  # require(tibble)
  # Predict reference RMNI
  prediction <-
    transmute(predictors,
      ref_rmni =
        (5.239 + (1.384 * log10(alkalinity + 1)) +
          (-0.68 * log10(slope + 1)) +
          (0.711 * log10(dist_from_source + 1)) +
          (-1.074 * log10(source_altitude + 1))
        )
    )

  # Calculate Reference Taxa
  prediction$ref_taxa <- (10.026 * exp(log10(predictors$slope + 1) * -0.426))

  # Calculate Reference Number of Functional Groups
  prediction$ref_nfg <- (6.304 * exp(log10(predictors$slope + 1) * -0.377))

  # Add Reference Algae
  prediction$ref_algae <- reference_algae

  # Sample ID
  prediction$location_id <- as.character(predictors$location_id)

  # Transform to standard format --------------------------------------------
  prediction <- prediction %>%
    distinct() %>%
    pivot_longer(
      cols = c(
        ref_taxa,
        ref_algae,
        ref_nfg,
        ref_rmni
      ),
      names_to = "question",
      values_to = "response"
    )

  prediction$response <- as.character(prediction$response)
  return(prediction)
}
predictions <- prediction_function(predictors)
# predictions <- combine(predictions) # combines predictions with additional
# columns found in data
head(predictions)
# All Together Now
sample_info <- data %>%
  select(names(sample), names(location)) %>%
  unique()

if (!is.null(indexes)) {
  indexes <- right_join(sample_info, indexes,
    by = c("sample_id" = "sample_id")
  )
} else {
  indexes <- data
}

predictions <- right_join(sample_info, predictions,
  by = c("location_id" = "location_id")
)

combined_data <- bind_rows(questions, predictions, indexes)

Assessment

We now have observed and expected values, let's assess them!

A table of categories to assess your observed against expected (or predicted) values.

assessment_table <- tibble(
  assesssment = c("high", "good", "moderate", "poor", "bad"),
  value = c(0.80, 0.60, 0.40, 0.20, 0),
  level = c(1:5)
)

assessment_table

Assess

A function to compare observed against expected.

assessment_function <- function(data, assessment_table = NULL) {
  message("Calculating Assessment")
  # require(dplyr)
  # require(tidyr)
  # require(magrittr)
  # require(tibble)
  if (nrow(data %>% filter(question == "rmni")) == 0) {
    return(NULL)
  }

  # Transform data -----------------------------------------------------------
  data <- data %>%
    select(sample_id, question, response) %>%
    filter(question %in% c(
      "rmni",
      "rn_a_taxa",
      "n_rfg",
      "ref_taxa",
      "ref_algae",
      "ref_nfg",
      "ref_rmni",
      "rfa_pc"
    ))
  data$response <- as.numeric(data$response)

  data <- data %>%
    distinct() %>%
    group_by(sample_id) %>%
    pivot_wider(names_from = question, values_from = response) %>%
    ungroup()
  data[is.na(data)] <- 0

  # Check if classifiable

  data$status <- NA
  data$status[data$rn_a_taxa < 1] <- "unclassifiable - no aquatic taxa present"
  data$status[data$rn_a_taxa > 0] <- "classified"

  # Calculate EQRs ------------------------------------------------------------
  # RMNI EQR
  data <- data %>% mutate(rmni_eqr = (rmni - 10) / (ref_rmni - 10))

  # NTAXA EQR
  data <- data %>%
    mutate(ntaxa_eqr = (rn_a_taxa / ref_taxa)) %>%
    mutate(rmni_eqr_adj = rmni_eqr)

  # NFG EQR
  data <- data %>% mutate(nfg_eqr = (n_rfg / ref_nfg))

  # Algal EQR
  data <- data %>%
    mutate(alg_eqr = (rfa_pc - 100) / (ref_algae - 100))

  # Adjust RMNI EQR------------------------------------------------
  data$rmni_eqr_adj[data$rmni_eqr >= 0.85] <-
    (data$rmni_eqr[data$rmni_eqr >= 0.85]
    - 0.85) / (1 - 0.85) * 0.2 + 0.8

  data$rmni_eqr_adj[data$rmni_eqr < 0.85] <-
    (data$rmni_eqr[data$rmni_eqr < 0.85]
    - 0.7) / (0.85 - 0.7) * 0.2 + 0.6

  data$rmni_eqr_adj[data$rmni_eqr < 0.7] <-
    (data$rmni_eqr[data$rmni_eqr < 0.7]
    - 0.52) / (0.7 - 0.52) * 0.2 + 0.4

  data$rmni_eqr_adj[data$rmni_eqr < 0.52] <-
    (data$rmni_eqr[data$rmni_eqr < 0.52]
    - 0.34) / (0.52 - 0.34) * 0.2 + 0.2

  data$rmni_eqr_adj[data$rmni_eqr < 0.34] <-
    (data$rmni_eqr[data$rmni_eqr < 0.34]
    - 0.16) / (0.34 - 0.16) * 0.2

  # Adjust Ntaxa or NFG EQR ---------------------------------------------------
  data$min_eqr <- pmin(data$ntaxa_eqr, data$nfg_eqr)
  data$diversity_eqr_adj <- NA
  data$diversity_eqr_adj[data$min_eqr >= 0.83] <-
    ((data$min_eqr[data$min_eqr >= 0.83]
    - 0.83) / (1 - 0.83)) * 0.2 + 0.8

  data$diversity_eqr_adj[data$min_eqr < 0.83] <-
    ((data$min_eqr[data$min_eqr < 0.83]
    - 0.66) / (0.83 - 0.66)) * 0.2 + 0.6

  data$diversity_eqr_adj[data$min_eqr < 0.66] <-
    ((data$min_eqr[data$min_eqr < 0.66]
    - 0.49) / (0.66 - 0.49)) * 0.2 + 0.4

  data$diversity_eqr_adj[data$min_eqr < 0.49] <-
    ((data$min_eqr[data$min_eqr < 0.49]
    - 0.32) / (0.49 - 0.32)) * 0.2 + 0.2

  data$diversity_eqr_adj[data$min_eqr < 0.32] <-
    ((data$min_eqr[data$min_eqr < 0.32]
    - 0.15) / (0.32 - 0.15)) * 0.2

  # Adjust ALGAL EQR ----------------------------------------------------------
  data$alg_eqr_adj <- NA
  data$alg_eqr_adj[data$alg_eqr >= 0.975] <-
    ((data$alg_eqr[data$alg_eqr >= 0.975]
    - 0.975) / (1 - 0.975)) * 0.2 + 0.8

  data$alg_eqr_adj[data$alg_eqr < 0.975] <-
    ((data$alg_eqr[data$alg_eqr < 0.975]
    - 0.925) / (0.975 - 0.925)) * 0.2 + 0.6

  data$alg_eqr_adj[data$alg_eqr < 0.925] <-
    ((data$alg_eqr[data$alg_eqr < 0.925]
    - 0.825) / (0.925 - 0.825)) * 0.2 + 0.4

  data$alg_eqr_adj[data$alg_eqr < 0.825] <-
    ((data$alg_eqr[data$alg_eqr < 0.825]
    - 0.625) / (0.825 - 0.625)) * 0.2 + 0.2

  data$alg_eqr_adj[data$alg_eqr < 0.625] <-
    (data$alg_eqr[data$alg_eqr < 0.625]
    / 0.625) * 0.2

  # Combine EQR for each metric ---------------------------
  data$composition_diversity <- data$rmni_eqr_adj
  data$composition_diversity[data$diversity_eqr_adj < data$rmni_eqr_adj] <-
    ((0.5 * data$diversity_eqr_adj[data$diversity_eqr_adj < data$rmni_eqr_adj] +
      data$rmni_eqr_adj[data$diversity_eqr_adj < data$rmni_eqr_adj])) / 1.5

  data$z <-
    (2 * (1 / (exp(log(2600000000) + data$ref_rmni * log(0.0166)) + 1 / 0.5)))

  data$eqr_leafpacs <-
    (data$z * data$alg_eqr_adj + data$composition_diversity) / (data$z + 1)

  data$eqr_leafpacs[data$composition_diversity < data$alg_eqr_adj] <-
    data$composition_diversity[data$composition_diversity < data$alg_eqr_adj]

  # Cap final EQR -------------------------------------------------------------
  data$eqr <- data$eqr_leafpacs
  data$eqr[data$eqr_leafpacs > 1] <- 1
  data$eqr[data$rn_a_taxa < 1] <- NA

  # Final EQR cut! ------------------------------------------------------------
  class <- cut(data$eqr,
    breaks = c(1, assessment_table$value),
    labels = assessment_table$assesssment
  )

  assessments <- data.frame(
    sample_id = data$sample_id,
    class = class,
    eqr = data$eqr,
    status = data$status
  )

  assessments$class <- as.character(assessments$class)
  assessments$level <- NA
  assessments$level[assessments$class == "high"] <-
    assessment_table$level[assessment_table$assesssment == "high"]
  assessments$level[assessments$class == "good"] <-
    assessment_table$level[assessment_table$assesssment == "good"]
  assessments$level[assessments$class == "moderate"] <-
    assessment_table$level[assessment_table$assesssment == "moderate"]
  assessments$level[assessments$class == "poor"] <-
    assessment_table$level[assessment_table$assesssment == "poor"]
  assessments$level[assessments$class == "bad"] <-
    assessment_table$level[assessment_table$assesssment == "bad"]

  assessments <- as_tibble(assessments)

  assessments$eqr <- as.character(assessments$eqr)
  assessments$class <- as.character(assessments$class)
  assessments$level <- as.character(assessments$level)
  assessments <- pivot_longer(assessments, -sample_id,
    names_to = "question", values_to = "response"
  )

  return(assessments)
}
# assessments <- assessment_function(combined_data, assessment_table)
# assessments

Confidence

Confidence of assessment

confidence_function <- function(data, aggregates = "sample_id") {
  message("Calculating confidence...")
  if (!any(names(data) %in% "n_sample")) {
    data$n_sample <- 1
  }
  data <- data %>% filter(question == "eqr")
  data <- pivot_wider(data, names_from = "question", values_from = "response")
  data$eqr <- as.numeric(data$eqr)

  # Add confidence columns ---------------------------------------------------
  data <- data %>% mutate(
    se = NA,
    trsfd_mean = NA,
    trsfd_error = NA,
    norm_dist_1 = NA,
    norm_dist_2 = NA,
    norm_dist_3 = NA,
    norm_dist_4 = NA,
    bad = NA,
    poor = NA,
    moderate = NA,
    good = NA,
    high = NA
  )
  # SE value ------------------------------------------------------------------
  data$se <- (0.04 + -2.98 * data$eqr + 2.96 * data$eqr^0.95) / sqrt(data$n_sample)
  data$trsfd_mean <- log(data$eqr / (1 - data$eqr))
  data$trsfd_error <- (data$se) / (data$eqr * (1 - data$eqr))
  data$norm_dist_1 <- pnorm((-1.386 - data$trsfd_mean) / (data$trsfd_error))
  data$norm_dist_2 <- pnorm((-0.405 - data$trsfd_mean) / (data$trsfd_error))
  data$norm_dist_3 <- pnorm((0.405 - data$trsfd_mean) / (data$trsfd_error))
  data$norm_dist_4 <- pnorm((1.386 - data$trsfd_mean) / (data$trsfd_error))

  data$bad <- round((100 * data$norm_dist_1), 1)
  data$bad[data$eqr > 0.95] <- 0
  data$bad[data$eqr <= 0.056] <- 88.1

  data$poor <- round((100 * (data$norm_dist_2 - data$norm_dist_1)), 1)
  data$poor[data$eqr > 0.95] <- 0
  data$poor[data$eqr <= 0.056] <- 9.6

  data$moderate <- round((100 * (data$norm_dist_3 - data$norm_dist_2)), 1)
  data$moderate[data$eqr > 0.95] <- 0
  data$moderate[data$eqr <= 0.056] <- 2

  data$good <- round((100 * (data$norm_dist_4 - data$norm_dist_3)), 1)
  data$good[data$eqr > 0.95] <- 0
  data$good[data$eqr <= 0.056] <- 0.4

  data$high <- round((100 * (1 - data$norm_dist_4)), 1)
  data$high[data$eqr > 0.95] <- 100
  data$high[data$eqr <= 0.056] <- 0

  data <- data %>% select(all_of(aggregates), high, good, moderate, poor, bad)

  data <- pivot_longer(data,
    cols = (-all_of(aggregates)),
    names_to = "question",
    values_to = "response"
  )
  data$response <- as.character(data$response)
  return(data)
}


# confidences <- confidence_function(data = assessments)
# confidences

Checklist

# check_list <- hera:::hera_test(standard = standard)
# knitr::kable(check_list$standard_check)

Update hera

# catalogue <- hera::catalogue
# 
# model <- tibble(
#   analysis_name = standard$parameter,
#   assessment = standard$standard_long,
#   standard = list(standard),
#   location = list(location[1, ]),
#   sample = list(sample[1, ]),
#   validation_function = NA,
#   indices_function = list(indices_function),
#   prediction_function = list(prediction_function),
#   assessment_function = list(assessment_function),
#   confidence_function = list(confidence_function),
#   indices = list(indexes[indexes$sample_id == indexes$sample_id[1], ]),
#   assessment_table = list(assessment_table),
#   questions = list(questions[1, ]),
#   predictors = list(predictors[1, ]),
#   predictions = list(predictions[predictions$location_id == predictions$location_id[1], ]),
#   assessments =  list(assessments[1, ])
# )
# 
# catalogue <- catalogue[catalogue$assessment != standard$standard_long, ]
# 
# catalogue <- bind_rows(catalogue, model)
# new_catalogue <- catalogue
# new_catalogue
# 
# usethis::use_data(catalogue, overwrite = TRUE)

Launch app

# No need to edit this code
# launch_app(new_catalogue = catalogue, data = all_data)


ecodata1/hera documentation built on April 5, 2025, 1:48 a.m.