Introduction

This provides an overview of how to apply to utilize stayCALM to automate waterbody assessments according to the New York State Department of Environmental Conservation's (NYSDEC) Consolidated Assessment and Listing Methodology (CALM).

knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)

stayCALM Package Installation

You can install the development version from GitHub with:

# install.packages("devtools")
devtools::install_github("BWAM/stayCALM")

You must have rtools installed on your machine for the stayCALM to be built upon installation from GitHub. rtools is NOT an R package. On Windows machines, rtools can be installed from the following CRAN repository: https://cran.r-project.org/bin/windows/Rtools/. Be sure to follow the instructions under the header "Putting Rtools on the PATH."

Prepare the R Environment

Load the necessary R packages into the global environment, including the stayCALM package.

library(tidyverse)
library(stayCALM)

Establish File Output Directory

root.dir <- gsub("(stayCALM)(.*$)", "\\1", getwd())

export.logical <- FALSE

Preprocess Data

Water Quality Standards

The table includes all of the necessary information to compare sampled data to NYSDEC's water quality standards (WQS).

wqs.df <- stayCALM::nysdec_wqs

WI/PWL

Data was preprocessed to resemble the expected output from the authoritative databases that will become available as part of the Data Modernization effort [Data Modernization].

data("wipwl.df")

Extract the necessary columns from WI/PWL table (wipwl.df) and join this information with the WQS table (wqs.df).

wqs_wipwl.df <- wipwl.df %>%
  select(seg_id, class, spatial_extent, water_type) %>% 
  distinct() %>%
  left_join(wqs.df, by = c("class", "spatial_extent", "water_type"))

Export the original data as a CSV file.

write.csv(wqs_wipwl.df, 
          file = file.path(root.dir,
                           "data",
                           "output",
                           paste0(Sys.Date(),
                                  "_stay-calm_wqs-wipwl",
                                  ".csv")),
          row.names = FALSE)

Observed Data

Data was preprocessed to resemble the expected output from the authoritative databases.

data(smas.df) 
data(lmas.df)

org.df <- rbind(smas.df, lmas.df) %>% 
  filter(date >= as.Date("2018-01-01"))

Export the original data as a CSV file.

write.csv(org.df, 
          file = file.path(root.dir,
                           "data",
                           "output",
                           paste0(Sys.Date(),
                                  "_stay-calm_original-data",
                                  ".csv")),
          row.names = FALSE)
chem_extract.df <- org.df %>% 
  filter(parameter %in% c("hardness", "ph", "temperature")) %>% 
  tidyr::pivot_wider(
    id_cols = sample_id,
    names_from = "parameter",
    values_from = c("value", "units"),
    names_sep = "_",
    values_fn = list(value = mean,
                     units = unique,
                     na.rm = TRUE)
  ) %>% 
  right_join(org.df, by = "sample_id")

# .data <- org.df
# params.vec <- c("hardness", "ph", "temperature")
# 
# test <- function(...) {
#   unlist(list(...))
# }
# 
# test1 <- test("hardness", "ph", "temperature")
# 
# extract_params <- function(.data , ..., .by_col) {
#   sub.df <- subset(.data, select = unlist(list(...)))
#   
#   chem_extract.df <- 
#   tidyr::pivot_wider(
#     id_cols = sample_id,
#     names_from = "parameter",
#     values_from = c("value", "units"),
#     names_sep = "_",
#     values_fn = list(value = mean,
#                      units = unique,
#                      na.rm = TRUE)
#   ) %>% 
#   right_join(org.df, by = "sample_id")
#   
#   final.df <- merge(.data, chem_extract.df, by = .by_col)
# }
chem.df <- chem_extract.df %>% 
  inner_join(wqs_wipwl.df, by = c("seg_id", "parameter",
                                  "fraction", "units"))

# chem.df2 <- merge(chem_extract.df, wqs_wipwl.df,
#                   by = c("seg_id", "parameter", "fraction"),
#                   )

A number of NYSDEC's WQS (e.g., total ammonia) are determined by other observed environmental variables, such as hardness, water-temperature, or pH. thresh_determination applies the appropriate formula for determining

chem.df <- thresh_determination(chem.df)
chem.df <- date_standard_cols(.df = chem.df, .date_col = date)

Add an assessment ID to simplifying grouping by key fields to perform summary calculations.

chem.df$assessment_id <- group_id(.data = chem.df,
                                  .keep = c("seg_id",
                                            # "lab_anl_method_name",
                                            # "cas_rn",
                                            "parameter",
                                            "fraction"),
                                  .numeric = TRUE)
chem.df$within_period <- assessment_period(.date_vec = chem.df$date,
                                           .n_years_ago = 10)
prepped.df <- prep_values(.data = chem.df,
                          .block_col = "block",
                          .value_col = "value",
                          .statistic_col = "statistic",
                          .new_value_col = "result",
                          .min_n_col = "min_n")


# prepped.df2 <- prepped.df[lapply(prepped.df$result, length) > 1, ]


selected.df <- subset(prepped.df, select = c("assessment_id", "seg_id",
                                             "site_id",
                                             "water_type",
                                             "type", "use",
                                             "group", "block", "statistic",
                                             "parameter", "fraction", "units",
                                             "result",
                                             "date",
                                             "year", "within_period",
                                             "direction", "threshold",
                                             "summarize_rows",
                                             "summarize_rows_operator",
                                             "wqs_75p_threshold",
                                             "data_provider"))
selected.df$attaining_wqs <- with(selected.df, 
                                  attaining(result,
                                            direction,
                                            threshold))

selected.df$attaining_75 <-  with(selected.df, 
                                  attaining(result,
                                            direction,
                                            wqs_75p_threshold))
selected.df$survey_id <- group_id(.data = selected.df,
                                  .keep = c("seg_id",
                                            "use", "type",
                                            "parameter",
                                            "within_period"),
                                  .numeric = TRUE)
surveyed.df <- with(selected.df,
                    survey(.id = survey_id,
                           .attaining_wqs = attaining_wqs,
                           .attaining_75 = attaining_75,
                           .year = year,
                           .water_type = water_type,
                           .tmdl = FALSE,
                           .ltco = FALSE,
                           .pollutant = FALSE,
                           .id_col_name = "survey_id"))
merged.df <- merge(surveyed.df, selected.df, by = "survey_id", all = TRUE)
status.df <- assign_status(merged.df, .eval_colname = "parameter_evaluation") %>% 
  separate_rows(use, sep = "; ")
error.df <- status.df[status.df$assessment %in% 'ERROR', ]
# table(status.df$assessment)
uses.vec <- c("fishing",
              "primary contact recreation",
              "secondary contact recreation",
              "shellfishing",
              "source of water supply")

wipwl_expanded.df <- expand_df(.data = wipwl.df,
                               .key_col = "seg_id",
                               .expand_vec = uses.vec)
wipwl_expanded.df <- subset(x = wipwl_expanded.df,
                            subset = seg_id %in% unique(status.df$seg_id))

names(wipwl_expanded.df)[names(wipwl_expanded.df) %in% "wbcatgry"] <- "previous_wb_category"

# wipwl_expanded.df <- wipwl_expanded.df[!names(wipwl_expanded.df) %in% "use"]

report_master.df <- merge(status.df, wipwl_expanded.df,
                          by = c("seg_id", "water_type", "use"),
                          all = TRUE)

report_master.df$rev_date <- Sys.Date()
report_master.df <- report_master.df %>% 
  assign_assess_hier(.col = "ir_category",
                     .type = "ir",
                     .assign_unassessed = TRUE) %>% 
  assign_assess_hier(.col = "use_assessment", 
                     .type = "assess",
                     .assign_unassessed = TRUE) %>% 
  assign_assess_hier(.col = "use_assessment_confirmation",
                     .type = "confir",
                     .assign_unassessed = TRUE)
report_master.df <- summarize_seg_assessment(.data = report_master.df,
                                             .seg_id_col = "seg_id",
                                             .ir_col = "ir_category",
                                             .assess_col = "use_assessment",
                                             .confir_col = "use_assessment_confirmation")
report_master.df <- assign_wb_category(.data = report_master.df,
                           .assess_col = "segment_assessment",
                           .wb_colname = "wb_category",
                           .seg_id_col = "seg_id",
                           .param_eval_col ="parameter_evaluation")
master_sub_cols.vec <- c("seg_id",
                         "wb_category",
                         "segment_assessment",
                         "class",
                         "use", 
                         "type",
                         "site_id",
                         "date",
                         "parameter", #pollutant
                         "fraction",
                         "units",
                         "result",
                         "direction",
                         "threshold",
                         "wqs_75p_threshold",
                         "parameter_evaluation",
                         "wqs_violation",
                         "wqs_75_violation",
                         "win", "previous_wb_category", "rev_date",
                         "type_name", "lgth_area", "lgth_area_units",
                         "basin_name", "huc_10", "co_name",
                         "seg_desc",
                         "x303dlist",
                         "data_provider")

# report_cols.vec[!report_cols.vec %in% names(report_master.df)]

report_simple.df <- unique(subset(report_master.df, select = master_sub_cols.vec))
write.csv(report_simple.df, 
          file = file.path(root.dir,
                           "data",
                           "output",
                           paste0(Sys.Date(),
                                  "_stay-calm_assessment",
                                  ".csv")),
          row.names = FALSE)

Plot the number (count) of each assessment category per waterbody type. This plot provides a quick visual summary of the output of the assessment performed.

report_simple.df %>%
  select(seg_id, wb_category, type_name) %>%
  distinct() %>%
  ggplot(aes(wb_category, fill = wb_category)) +
  scale_fill_viridis_d() +
  geom_bar() +
  theme_bw()  +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    legend.title = element_blank()
  ) +
  facet_wrap( ~ type_name, ncol = 4) +
  guides(fill = guide_legend(nrow = 3)) +
  theme(legend.position = "bottom",
        legend.direction = "horizontal")

Plot the number (count) of each assessment category per waterbody type. This plot provides a quick visual summary of the output of the assessment performed.

report_simple.df %>%
  select(seg_id, segment_assessment, type_name) %>%
  distinct() %>%
  ggplot(aes(segment_assessment, fill = segment_assessment)) +
  scale_fill_viridis_d() +
  geom_bar() +
  theme_bw()  +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    legend.title = element_blank()
  ) +
  facet_wrap( ~ type_name, ncol = 4) +
  guides(fill = guide_legend(nrow = 3)) +
  theme(legend.position = "bottom",
        legend.direction = "horizontal")


BWAM/stayCALM documentation built on May 21, 2020, 3:24 p.m.