This documentation describes how water chemistry data collected by the New York State Department of Environmental Conservation's (NYSDEC's) Stream Monitoring and Assessment Section (SMAS) is prepared for assessment using the stayCALM R-package. Much of this work will not be necessary once the database is finalized. There will still be some amount of data preparation, but it should not be as extensive.
Load the necessary R packages into the global environment.
library(tidyverse) library(readxl)
data(nysdec_wqs)
standard_dt <- function(x) { DT::datatable(x, options = list( columnDefs = list(list( className = 'dt-center' )), scrollY = 300, scroller = TRUE, scrollX = TRUE ) ) }
The code below defines the file path to the SMAS chemistry data historically stored in an EQuIS database.
smas.path <- file.path(here::here(), "data-raw", "smas")
Import EQuIS sample information and result data.
# location.df <- vroom::vroom(file.path(equis.path, "equis_location_history.csv")) sample.df <- vroom::vroom(file.path(smas.path, "equis_sample_history.csv")) %>% select(SYS_SAMPLE_CODE, SAMPLE_DELIVERY_GROUP, SAMPLE_NAME, SAMPLE_TYPE_CODE, SAMPLE_DATE, DATA_PROVIDER) result.df <- vroom::vroom(file.path(smas.path, "equis_results_history.csv")) %>% select(SYS_SAMPLE_CODE, LAB_SDG, LAB_ANL_METHOD_NAME, CAS_RN, FRACTION, CHEMICAL_NAME, RESULT_NUMERIC, RESULT_UNIT, METHOD_DETECTION_LIMIT, REPORTING_DETECTION_LIMIT, VALIDATION_LEVEL, VALIDATOR_QUALIFIERS, INTERPRETED_QUALIFIERS, QUANTITATION_LIMIT, ANALYSIS_DATE) %>% mutate(VALIDATOR_QUALIFIERS = as.character(VALIDATOR_QUALIFIERS))
Join the EQuIS sample information and result data by "SYS_SAMPLE_CODE".
streams_chem.df <- inner_join(sample.df, result.df, by = c("SYS_SAMPLE_CODE")#, # "SAMPLE_DELIVERY_GROUP" = "LAB_SDG") ) %>% mutate(ADJUST_RIBS_ID = str_remove_all(SYS_SAMPLE_CODE, "\\-.*|\\_.*")) test <- streams_chem.df %>% group_by(SYS_SAMPLE_CODE, SAMPLE_DATE, CHEMICAL_NAME, FRACTION, RESULT_NUMERIC) %>% summarize(n = n()) %>% filter(n > 1)
To join the SMAS chemistry data with the Assessment database, each SMAS sample must have a Waterbody Inventory and Priority Waterbody Listing (WI/PWL) ID. SMAS has associated all sampling locations with a WI/PWL ID. However to be more descriptive, SMAS's sample naming convention changed recently, and therefore the WI/PWL ID cannot be directly linked to the sample IDs in the EQuIS database. The new sample names must be cross-walked with the EQuIS sample names to correctly assign the WI/PWL ID.
Import the data with the new sample IDs and the associated WI/PWL IDs.
streams_sites.df <- file.path(smas.path, "cleaned", "20191224_Site_Field_cleaned_final.xlsx") %>% readxl::read_excel(sheet = "Sheet1") %>% select(SH_SITE_ID, SH_PWL_ID) %>% distinct()
Import the data that provides the necessary information to perform the cross-walk between the EQuIS database and the new sample IDs.
streams_crosswalk.df <- file.path(smas.path, "RIBSchem_historic_BioSiteJoin_2019-12-18.xlsx" ) %>% readxl::read_excel(sheet = "Sheet1") %>% select(RIBS_ID_FOR_SITE_TABLE:Station_RIBS, SBU_ID) %>% distinct()
Perform the cross-walk to assign the WI/PWL ID to the EQuIS data.
crosswalk.df <- inner_join(streams_chem.df, streams_crosswalk.df, by = "ADJUST_RIBS_ID") %>% inner_join(streams_sites.df, by = c("SBU_ID" = "SH_SITE_ID"))
Confirm that the appropriate chemical name is assigned and that date columns are properly formatted.
clean_up.df <- crosswalk.df %>% mutate(CHEMICAL_NAME = case_when( CHEMICAL_NAME %in% "Fecal Coliform" ~ "Fecal Coliforms", CHEMICAL_NAME %in% "Coliform" ~ "Total Coliform", CHEMICAL_NAME %in% "Nitrogen, Ammonia (As N)" ~ "Ammonia", CHEMICAL_NAME %in% "Nitrogen, Nitrate (As N)" ~ "Nitrate", CHEMICAL_NAME %in% "Chloride (As Cl)" ~ "Chloride", CHEMICAL_NAME %in% "Total Dissolved Solids (Residue, Filterable)" ~ " Total Dissolved Solids", TRUE ~ CHEMICAL_NAME )) %>% mutate(SAMPLE_DATE = if_else(nchar(SAMPLE_DATE) > 10, lubridate::mdy_hms(SAMPLE_DATE, quiet = TRUE, tz = "America/New_York"), as.POSIXct(lubridate::mdy(SAMPLE_DATE, quiet = TRUE, tz = "America/New_York"))), ANALYSIS_DATE = if_else(nchar(ANALYSIS_DATE) > 10, lubridate::mdy_hms(ANALYSIS_DATE, quiet = TRUE, tz = "America/New_York"), as.POSIXct(lubridate::mdy(ANALYSIS_DATE, quiet = TRUE, tz = "America/New_York"))), date = as.Date(SAMPLE_DATE))
Drop the ANALYSIS_DATE
column because it is creating duplicates.
stream_chem.df <- clean_up.df %>% select(-ANALYSIS_DATE) %>% distinct()
Confirm that there are no duplicate chemicals reported for the same sample. REQUIRES ADDITIONAL ATTENTION.
dups.df <- stream_chem.df %>% group_by_at(vars(-RESULT_NUMERIC, -RESULT_UNIT)) %>% mutate(n = n()) %>% ungroup() %>% filter(n > 1) %>% select(n, everything()) %>% arrange(CHEMICAL_NAME, SYS_SAMPLE_CODE) vroom::vroom_write(dups.df, file.path(here::here(), "data-raw", "dups.csv"), delim = ",")
#test <- inner_join(dups.df, stream_chem.df)
standard_dt(dups.df)
smas.df <- stream_chem.df %>% rename_all(tolower) %>% rename(seg_id = sh_pwl_id, units = result_unit) %>% mutate(parameter = chemical_name %>% tolower() %>% trimws() %>% gsub(" ", "_", .), parameter = case_when( parameter %in% "hardness_(as_caco3)" ~ "hardness", parameter %in% c("nitrogen,_nitrate-nitrite", "nitrogen,_nitrite_+_nitrate") ~ "nitrate_nitrite", parameter %in% "sulfate_(as_so4)" ~ "sulfate", TRUE ~ parameter )) %>% mutate(fraction = case_when( fraction %in% "T" ~ "total", fraction %in% "D" ~ "dissolved", is.na(fraction) & parameter %in% c("ph", "total_coliforms", "fecal_coliforms") ~ "total", is.na(fraction) & parameter %in% c("dissolved_oxygen", "total_dissolved_solids") ~ "dissolved", TRUE ~ "ERROR" ), value = if_else(is.na(result_numeric) & !is.na(validation_level), reporting_detection_limit, result_numeric)) %>% mutate(sample_id = gsub(pattern = "_[^_]+$", replacement = "", x = sys_sample_code)) %>% select(sample_id, everything())
smas.df <- mutate_if(smas.df, is.character, tolower)
smas.df$units <- with(smas.df, ifelse(parameter %in% "ammonia" & is.na(units), "mg/l", units))
keep.vec <- c("parameter", "fraction", "units") merged.df <- merge(unique(smas.df[keep.vec]), unique(nysdec_wqs[keep.vec]), by = c("parameter", "fraction")) merged.df$units_comp <- paste(merged.df$units.x, merged.df$units.y, sep = ":") names(merged.df)[names(merged.df) %in% "units.x"] <- "units" units_comp.df <- merge(smas.df, merged.df, by = keep.vec, all.x = TRUE) units_comp.df$units_comp <- ifelse(is.na(units_comp.df$units_comp), "no_match", units_comp.df$units_comp) split.list <- by(units_comp.df, units_comp.df$units_comp, function(i) { units_comp.scalar <- unique(i$units_comp) if (units_comp.scalar %in% "mg/l:ug/l") { i$value <- i$value * 1000 i$units <- "ug/l" } if (units_comp.scalar %in% "ug/l:mg/l") { i$value <- i$value / 1000 i$units <- "mg/l" } if (units_comp.scalar %in% "ng/l:ug/l") { i$value <- i$value / 1000 i$units <- "ug/l" } if (units_comp.scalar %in% "ppt:ug/l") { i$units <- "ug/l" } if (units_comp.scalar %in% "ph units:ph_units") { i$units <- "ph_units" } if (units_comp.scalar %in% "NA:ug/l") { i$units <- "ug/l" } if (units_comp.scalar %in% "NA:cfu/100ml") { i$units <- "cfu/100ml" } if (!units_comp.scalar %in% c("mg/l:ug/l", "ug/l:mg/l", "ng/l:ug/l", "ppt:ug/l", "ph units:ph_units")) { break.vec <- unlist(strsplit(units_comp.scalar, ":")) if (length(unique(break.vec)) != 1) { warning(paste0("Review required...", "\n", "\t Supplied: ", break.vec[1], "\n", "\t Required: ", break.vec[2], "\n")) } } return(i) }) prepped.df <- do.call(rbind, split.list)
no_wqs_match.df <- prepped.df %>% select(parameter, everything()) %>% anti_join(stayCALM::nysdec_wqs, ., by = c("parameter", "fraction", "units")) %>% select(parameter, fraction, units) %>% distinct()
# Adding depth to match LMAS columns prepped.df$depth <- NA_real_ prepped.df$water_type <- "flow" keep.vec <- c("seg_id", # "water_type", "sample_id", "depth", "date", "fraction", "parameter", "value", "units", "quantitation_limit", "validator_qualifiers", "interpreted_qualifiers", "data_provider" ) final_smas.df <- subset(prepped.df, select = keep.vec) final_smas.df <- final_smas.df[!final_smas.df$interpreted_qualifiers %in% "r", ] final_smas.df$value <- ifelse(final_smas.df$interpreted_qualifiers %in% "u", as.numeric(final_smas.df$quantitation_limit), final_smas.df$value)
With the usethis package, the SMAS chemistry data is exported as a .rda file making it easily accessible during the development and testing of the stayCALM package.
smas.df <- final_smas.df usethis::use_data(smas.df, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.