Prepare the R Environment

Load the necessary R packages into the global environment, including the stayCALM package.

library(tidyverse)
library(stayCALM)

Preprocess Data

Water Quality Standards

Data was preprocessed to resemble the expected output from the athorotative databases that will become available as part of the Data Modernization effort [Data Modernization].

data(nysdec_wqs)

LMAS Data

Define the file path to LMAS data.

data.path <- file.path(here::here(),
                       "data-raw",
                       "lmas")

Define what values should be read as NA.

na.vec <- c("NA", "na", "", " ")

Import the water column data.

hypo_epi.df <- read.csv(file.path(data.path, "hypoepi.for.zach.csv"),
                        stringsAsFactors = FALSE,
                        na.strings = na.vec)

Import the profile data.

profile.df <- read.csv(file.path(data.path, "profiles.for.zach.csv"),
                       stringsAsFactors = FALSE,
                       na.strings = na.vec)

Append the two data sets and use clean_names() to change all names to lowercase and all non-alpanumeric characters to underscores. The profile Result.Value must be changed to a character value to join with the hypo_epi.df. All results are converted to a numeric value later in this document.

profile.df$Result.Value <- as.character(profile.df$Result.Value)
lmas.df <- dplyr::bind_rows(hypo_epi.df, profile.df)
names(lmas.df) <- clean_strings(names(lmas.df))

Convert all character values to lowercase to make them easier to manipulate.

lmas.df <- mutate_if(lmas.df, is.character, tolower)

Format sample_date as a date type.

lmas.df$date <- as.Date(lmas.df$sample_date, "%Y-%m-%d")
test <- lmas.df %>% 
  select(characteristic_name, result_unit, result_detection_quantitation_limit_unit) %>% 
  distinct() %>% 
  filter(result_unit !=  result_detection_quantitation_limit_unit) %>% 
  mutate()
lmas.df$value <- as.numeric(lmas.df$result_value)
lmas.df <- lmas.df %>% 
  mutate(quantitation_limit = as.numeric(quantitation_limit),
         quantitation_limit = case_when(
    result_unit %in% "ug/l" & 
      result_detection_quantitation_limit_unit %in% "mg/l" ~ quantitation_limit * 1000,
    result_unit %in% "mg/l" & 
      result_detection_quantitation_limit_unit %in% "ug/l" ~ quantitation_limit / 1000,
    TRUE ~ quantitation_limit
  ),
  result_detection_quantitation_limit_unit = case_when(
    result_unit %in% "ug/l" & 
      result_detection_quantitation_limit_unit %in% "mg/l" ~ "ug/l",
    result_unit %in% "mg/l" & 
      result_detection_quantitation_limit_unit %in% "ug/l" ~ "mg/l",
    TRUE ~ result_detection_quantitation_limit_unit
  ))
# lmas.df$quantitation_limit <- as.numeric(gsub("[^0-9.-]", "", lmas.df$quantitation_limit))
# test <- lmas.df[is.na(lmas.df$value) & !is.na(lmas.df$result_value), 
#                 c("result_value", "value")]

Make the fraction column more explicit in it's representation by supplying "total", instead of "t", and "dissolved", instead of "d."

lmas.df$fraction <- vapply(X = lmas.df$result_sample_fraction,
                           FUN = function(i) {
                             switch(i,
                                    "t" = "total",
                                    "d" = "dissolved",
                                    `NA` = NA_character_,
                                    stop("No match found. Expecting 'T', 'D', or NA."))
                           },
                           FUN.VALUE = NA_character_)

Add fraction values for the following parameters to match the values in the water quality standards table.

lmas.df$fraction <- with(lmas.df, ifelse(characteristic_name %in% "ph",
                                         "total",
                                         fraction))
lmas.df$fraction <- with(lmas.df, ifelse(characteristic_name %in% "dissolved_oxygen",
                                         "dissolved",
                                         fraction))

Standardize the parameter names to match the names in the water quality standards table.

lmas_param_switch <- Vectorize(vectorize.args = ".param",
                               FUN = function(.param) {
  switch(.param,
         "dissolved oxygen (do)" = "dissolved_oxygen",
         "nitrogen, nitrate (as n)" = "nitrate",
         "nitrogen, nitrate-nitrite" = "nitrate_nitrite",
         "sulfate (as so4)" = "sulfate",
         "temperature, water" = "temperature",
         "total dissolved solids" = "total_dissolved_solids",
         "total hardness" = "hardness",
         .param
         )
})
lmas.df$parameter <- lmas_param_switch(lmas.df$characteristic_name)
wqs_param.vec <- unique(nysdec_wqs$parameter)
sort(wqs_param.vec[!wqs_param.vec %in% unique(lmas.df$parameter)])
lmas.df$units <- with(lmas.df, ifelse(parameter %in% "ph",
                                      "ph_units",
                                      result_unit))
lmas.df$seg_id <- lmas.df$pwlid
lmas.df$sample_id <- paste(lmas.df$lake_id, lmas.df$sample_name, sep = "_")
names(lmas.df)[names(lmas.df) %in% "location_id"] <- "site_id"
keep.vec <- c("parameter", "fraction", "units")

merged.df <- merge(unique(lmas.df[keep.vec]),
                   unique(nysdec_wqs[keep.vec]),
                   by = c("parameter", "fraction"))

merged.df$units_comp <- paste(merged.df$units.x, merged.df$units.y, sep = ":")

names(merged.df)[names(merged.df) %in% "units.x"] <- "units"

units_comp.df <- merge(lmas.df, merged.df, by = keep.vec,
                       all.x = TRUE)

units_comp.df$units_comp <- ifelse(is.na(units_comp.df$units_comp),
                                   "no_match",
                                   units_comp.df$units_comp)
split.list <- by(units_comp.df, units_comp.df$units_comp,
                 function(i) {

                   units_comp.scalar <- unique(i$units_comp)

                   if (units_comp.scalar %in% "mg/l:ug/l") {
                     i$value <- i$value * 1000
                     i$units <- "ug/l"
                   }

                   if (units_comp.scalar %in% "ug/l:mg/l") {
                     i$value <- i$value / 1000
                     i$units <- "mg/l"
                   }

                   if (!units_comp.scalar %in% c("mg/l:ug/l", "ug/l:mg/l")) {
                     break.vec <- unlist(strsplit(units_comp.scalar, ":"))
                     if (length(unique(break.vec)) != 1) {
                       warning(paste0("Review required...",
                                      "\n",
                                     "\t Supplied: ", break.vec[1],
                                     "\n",
                                     "\t Required: ", break.vec[2]))
                     }
                   }

                  return(i)

                 })

prepped.df <- do.call(rbind, split.list)
prepped.df$water_type <- "pond"
keep.vec <- c("seg_id",
              "site_id",
              "sample_id",
              # "water_type",
              "depth",
              "date",
              "fraction", "parameter", "value", "units",
              "quantitation_limit",
              "validator_qualifiers",
              "interpreted_qualifiers",
              "data_provider"
)

final_lmas.df <- subset(prepped.df, select = keep.vec)
final_lmas.df <- final_lmas.df[!final_lmas.df$validator_qualifiers %in% "r", ]
final_lmas.df$value <- ifelse(final_lmas.df$validator_qualifiers %in% "u",
                              as.numeric(final_lmas.df$quantitation_limit),
                              final_lmas.df$value)
test.df <- anti_join(final_lmas.df, wipwl.df, by = "seg_id")

nrow(test.df[!is.na(test.df$seg_id), ])
test.df <- inner_join(final_lmas.df, wipwl.df, by = c("seg_id"))

Export LMAS Data

With the usethis package, the SMAS chemistry data is exported as a .rda file making it easily accessible during the development and testing of the stayCALM package.

lmas.df <- final_lmas.df
usethis::use_data(lmas.df, overwrite = TRUE)


BWAM/stayCALM documentation built on May 21, 2020, 3:24 p.m.