Load the necessary R packages into the global environment, including the stayCALM package.
library(tidyverse) library(stayCALM)
Data was preprocessed to resemble the expected output from the athorotative databases that will become available as part of the Data Modernization effort [Data Modernization].
data(nysdec_wqs)
Define the file path to LMAS data.
data.path <- file.path(here::here(), "data-raw", "lmas")
Define what values should be read as NA
.
na.vec <- c("NA", "na", "", " ")
Import the water column data.
hypo_epi.df <- read.csv(file.path(data.path, "hypoepi.for.zach.csv"), stringsAsFactors = FALSE, na.strings = na.vec)
Import the profile data.
profile.df <- read.csv(file.path(data.path, "profiles.for.zach.csv"), stringsAsFactors = FALSE, na.strings = na.vec)
Append the two data sets and use clean_names()
to change all names to lowercase and all non-alpanumeric characters to underscores. The profile Result.Value
must be changed to a character value to join with the hypo_epi.df. All results are converted to a numeric value later in this document.
profile.df$Result.Value <- as.character(profile.df$Result.Value) lmas.df <- dplyr::bind_rows(hypo_epi.df, profile.df) names(lmas.df) <- clean_strings(names(lmas.df))
Convert all character values to lowercase to make them easier to manipulate.
lmas.df <- mutate_if(lmas.df, is.character, tolower)
Format sample_date
as a date type.
lmas.df$date <- as.Date(lmas.df$sample_date, "%Y-%m-%d")
test <- lmas.df %>% select(characteristic_name, result_unit, result_detection_quantitation_limit_unit) %>% distinct() %>% filter(result_unit != result_detection_quantitation_limit_unit) %>% mutate() lmas.df$value <- as.numeric(lmas.df$result_value) lmas.df <- lmas.df %>% mutate(quantitation_limit = as.numeric(quantitation_limit), quantitation_limit = case_when( result_unit %in% "ug/l" & result_detection_quantitation_limit_unit %in% "mg/l" ~ quantitation_limit * 1000, result_unit %in% "mg/l" & result_detection_quantitation_limit_unit %in% "ug/l" ~ quantitation_limit / 1000, TRUE ~ quantitation_limit ), result_detection_quantitation_limit_unit = case_when( result_unit %in% "ug/l" & result_detection_quantitation_limit_unit %in% "mg/l" ~ "ug/l", result_unit %in% "mg/l" & result_detection_quantitation_limit_unit %in% "ug/l" ~ "mg/l", TRUE ~ result_detection_quantitation_limit_unit )) # lmas.df$quantitation_limit <- as.numeric(gsub("[^0-9.-]", "", lmas.df$quantitation_limit)) # test <- lmas.df[is.na(lmas.df$value) & !is.na(lmas.df$result_value), # c("result_value", "value")]
Make the fraction column more explicit in it's representation by supplying "total", instead of "t", and "dissolved", instead of "d."
lmas.df$fraction <- vapply(X = lmas.df$result_sample_fraction, FUN = function(i) { switch(i, "t" = "total", "d" = "dissolved", `NA` = NA_character_, stop("No match found. Expecting 'T', 'D', or NA.")) }, FUN.VALUE = NA_character_)
Add fraction values for the following parameters to match the values in the water quality standards table.
lmas.df$fraction <- with(lmas.df, ifelse(characteristic_name %in% "ph", "total", fraction)) lmas.df$fraction <- with(lmas.df, ifelse(characteristic_name %in% "dissolved_oxygen", "dissolved", fraction))
Standardize the parameter names to match the names in the water quality standards table.
lmas_param_switch <- Vectorize(vectorize.args = ".param", FUN = function(.param) { switch(.param, "dissolved oxygen (do)" = "dissolved_oxygen", "nitrogen, nitrate (as n)" = "nitrate", "nitrogen, nitrate-nitrite" = "nitrate_nitrite", "sulfate (as so4)" = "sulfate", "temperature, water" = "temperature", "total dissolved solids" = "total_dissolved_solids", "total hardness" = "hardness", .param ) })
lmas.df$parameter <- lmas_param_switch(lmas.df$characteristic_name) wqs_param.vec <- unique(nysdec_wqs$parameter) sort(wqs_param.vec[!wqs_param.vec %in% unique(lmas.df$parameter)])
lmas.df$units <- with(lmas.df, ifelse(parameter %in% "ph", "ph_units", result_unit)) lmas.df$seg_id <- lmas.df$pwlid lmas.df$sample_id <- paste(lmas.df$lake_id, lmas.df$sample_name, sep = "_") names(lmas.df)[names(lmas.df) %in% "location_id"] <- "site_id"
keep.vec <- c("parameter", "fraction", "units") merged.df <- merge(unique(lmas.df[keep.vec]), unique(nysdec_wqs[keep.vec]), by = c("parameter", "fraction")) merged.df$units_comp <- paste(merged.df$units.x, merged.df$units.y, sep = ":") names(merged.df)[names(merged.df) %in% "units.x"] <- "units" units_comp.df <- merge(lmas.df, merged.df, by = keep.vec, all.x = TRUE) units_comp.df$units_comp <- ifelse(is.na(units_comp.df$units_comp), "no_match", units_comp.df$units_comp) split.list <- by(units_comp.df, units_comp.df$units_comp, function(i) { units_comp.scalar <- unique(i$units_comp) if (units_comp.scalar %in% "mg/l:ug/l") { i$value <- i$value * 1000 i$units <- "ug/l" } if (units_comp.scalar %in% "ug/l:mg/l") { i$value <- i$value / 1000 i$units <- "mg/l" } if (!units_comp.scalar %in% c("mg/l:ug/l", "ug/l:mg/l")) { break.vec <- unlist(strsplit(units_comp.scalar, ":")) if (length(unique(break.vec)) != 1) { warning(paste0("Review required...", "\n", "\t Supplied: ", break.vec[1], "\n", "\t Required: ", break.vec[2])) } } return(i) }) prepped.df <- do.call(rbind, split.list)
prepped.df$water_type <- "pond" keep.vec <- c("seg_id", "site_id", "sample_id", # "water_type", "depth", "date", "fraction", "parameter", "value", "units", "quantitation_limit", "validator_qualifiers", "interpreted_qualifiers", "data_provider" ) final_lmas.df <- subset(prepped.df, select = keep.vec) final_lmas.df <- final_lmas.df[!final_lmas.df$validator_qualifiers %in% "r", ] final_lmas.df$value <- ifelse(final_lmas.df$validator_qualifiers %in% "u", as.numeric(final_lmas.df$quantitation_limit), final_lmas.df$value)
test.df <- anti_join(final_lmas.df, wipwl.df, by = "seg_id") nrow(test.df[!is.na(test.df$seg_id), ])
test.df <- inner_join(final_lmas.df, wipwl.df, by = c("seg_id"))
With the usethis package, the SMAS chemistry data is exported as a .rda file making it easily accessible during the development and testing of the stayCALM package.
lmas.df <- final_lmas.df usethis::use_data(lmas.df, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.