#' @title compileNETNdata
#'
#' @description Compile flat files from NETN Access Database for R package
#'
#' @importFrom tidyr pivot_longer separate gather spread
#' @importFrom dplyr distinct case_when contains filter select mutate rename arrange left_join summarize group_by
#' @importFrom purrr map
#'
#' @param path Quoted path to export csv files to. Defaults to "Data" folder within current project.
#' @param export \code{TRUE} or \code{FALSE}. Export csv files to specified path. Defaults to \code{TRUE}.
#' @param surface \code{TRUE} or \code{FALSE}. Return only measurements representing the stream surface or lake epilimnion.
#' If \code{TRUE}, the median of the surface measurements from the top 2m of sampling are returned. Defaults to \code{TRUE}.
#' @param active_site \code{TRUE} or \code{FALSE}. If \code{TRUE} only compiles data for sites that are actively being monitored.
#' If \code{FALSE} compiles all sites stored in the database. Defaults to \code{TRUE}.
#' @param active_metric \code{TRUE} or \code{FALSE}. If \code{TRUE} only compiles metrics that are actively collected.
#' If \code{FALSE} compiles all metrics stored in the database. Defaults to \code{TRUE}.
#' @param restricted \code{TRUE} or \code{FALSE}. If \code{TRUE}, metrics within a site that have 4 or fewer
#' measurements since monitoring began in 2006 are dropped.
#' @param cleanEnv \code{TRUE} or \code{FALSE}. Allows you to clean the global environment so that
#' only waterDat and MD are kept. Defaults to \code{TRUE}.
#'
#' @return Returns a list of two dataframes (WaterData and MetaData) formatted for import into NCRNwater R package.Exports data to folder by default.
#' @details This function create flat files from NETN's water database. The function returns 2 data frames used for the
#' NCRNWater R package (Water Data and Meta Data).
#' Currently the Water Data list element can include (1) measurements at all sampled depths (for plotting profiles) AND the
#' the median value of measurements taken within the top 2m of the water surface when \code{surface == FALSE} or
#' (2) just the median values of surface measurements within the top 2m of the water surface \code{surface == TRUE}.
#' The latter is to enable examining patterns in the stream surface or lake epilimnion over time and is currently (as of 11/1/2019) the data working
#' in the NCRNwater package.
#'
#' @examples
#' # compile NETN water data for surface measurements and active metrics only,
#' # and export to C:/Data.
#' compileNETNdata(path = "C:/Data/", export = TRUE, surface = TRUE, active = TRUE,
#' restricted = TRUE)
#'
#' @export
compileNETNdata <- function(path = "./Data/", export = TRUE, surface = TRUE, active_site = TRUE,
active_metric = TRUE, restricted = TRUE, cleanEnv = TRUE) {
if(!requireNamespace("RODBC", quietly = TRUE)){
stop("Package 'RODBC' is needed for this function to work. Please install it.", call. = FALSE)
}
if(!requireNamespace("stringr", quietly = TRUE)){
stop("Package 'stringr' is needed for this function to work. Please install it.", call. = FALSE)
}
# Error handling for specified path
path <- if (substr(path, nchar(path), nchar(path)) != "/") {
paste0(path, "/")
} else {(paste0(path))}
# Check that specified path exists on computer
if(export == TRUE & dir.exists(path) == FALSE){
stop(paste0("The specified path: ", path, " does not exist"))
}
# Check that NETNWQ is a named user or system DSN
if(nrow(data.frame(driver=(RODBC::odbcDataSources())) %>%
mutate(dsn_name = row.names(.)) %>%
filter(dsn_name == "NETNWQ"))==0)
stop ('Compile function failed. There is no DSN named "NETNWQ".')
#########################
#Import water data from Access database:
# Connect to database BE ti bring in tables
con <- RODBC::odbcConnect("NETNWQ")
# grab all tables name from DB
tableList <- RODBC::sqlTables(con)$TABLE_NAME %>% as.vector()
# BUILD LIST OF tables from DB connection
dfList <- map(tableList, function(x) RODBC::sqlFetch(con, sqtable = x, rows_at_time = 1))
dfList <- setNames(dfList, tableList) # name each of the list tables
RODBC::odbcClose(con) # close ODBC connection
#put a dataframe for each DB table from the list object in the Global Environment:
list2env(dfList, envir=.GlobalEnv) # extract separate df's to environment
##### create Water Data.csv ----
#create data frame in the template 'Water Data.csv' for the NCRN Water R package and Rshiny WaterViz:
#prep ancillary data for merging:
Samples <- Sample %>%
select(c('PK_Sample', 'FK_Event'))
Events <- Event %>%
select(c('PK_Event', 'FK_Location', 'StartDate'))
Locations <- Location %>%
select(c('PK_Location', 'NPStoretSiteCode', 'LocationType'))
#set column order for 'Water data.csv', long form:
col_order <- c("NPSTORET.Org.ID.Code", "StationID", "Visit.Start.Date",
"SampleDepth", "Depth.Units", "Local.Characteristic.Name",
"Result.Value.Text", "Lower.Quantification.Limit",
"Upper.Quantification.Limit")
#### Build Chemistry table ----
#select variables of interest from Chemistry table:
Chem <- Chemistry %>%
select(-c('SEC2016', 'LabCode', 'SampleTime', 'SampleStation',
'SampleType', 'FK_WaterQualityMethod', 'Project', 'pH_Lab',
'eqPH', 'AppColorFlag', 'AppColor_PCU', 'TrueColor_PCU',
'TColor_Flag', 'pH_Lab_Method', 'CONDMETH', 'COLORMETH',
'ALKMETH', 'ChemComments'))
#merge in ancillary info on date and site:
Chem <- merge(Chem, Samples, by.x="FK_Sample", by.y = "PK_Sample")
Chem <- merge(Chem, Events, by.x="FK_Event", by.y = "PK_Event")
Chem <- merge(Chem, Locations, by.x="FK_Location", by.y = "PK_Location")
#format Date
Chem$StartDate <- as.POSIXct(Chem$StartDate, format = "%Y-%m-%d")
#filter out everything but QCtype = "ENV", rename columns, re-code Sample Depth:
Chem <- Chem %>%
filter(QCtype == "ENV") %>%
rename(StationID=NPStoretSiteCode,
Visit.Start.Date=StartDate,
SampleDepth = SampleDepth_m) %>%
mutate(NPSTORET.Org.ID.Code = "NETN") %>%
mutate(SampleDepth = case_when(LocationType == 'Stream' ~ 'stream',
LocationType == 'Pond' ~ 'epilimnion',
LocationType == 'Lake' ~ 'epilimnion')) %>%
arrange(StationID, Visit.Start.Date)
Chem <- Chem[order(Chem$StationID, Chem$Visit.Start.Date),]
#check to be sure there is only measurement (representing the epilimnion) per visit. This takes about 15 seconds to run.
### function to count the number of unique levels of a character variable:
how.many <- function(x){length(unique(x))}
try(if(max(as.vector(tapply(
Chem$SampleDepth,
list(Chem$Visit.Start.Date, Chem$StationID),
how.many)), na.rm = T) > 1)
stop ('More than one sampling depth per site visit')
)
#split into two dataframes - measurments and quality flags.
Chem_dat <- select(Chem, NPSTORET.Org.ID.Code, StationID,
Visit.Start.Date, SampleDepth, -contains("Flag"),
contains("mgL"), contains("ugL"), contains("eqL"))
Chem_flag <- select(Chem, StationID, Visit.Start.Date, contains("Flag"))
#convert both dataframes to long form and remove rows with NA for observation value
Chem_dat_long <- Chem_dat %>%
pivot_longer(cols= c(-StationID, -NPSTORET.Org.ID.Code, -SampleDepth, -Visit.Start.Date),
names_to = "Local.Characteristic.Name", values_to = "Result.Value.Text") %>%
mutate(Depth.Units = NA) %>%
arrange(StationID, Visit.Start.Date, Local.Characteristic.Name )
Chem_flag_long <- Chem_flag %>%
pivot_longer(cols = c( -StationID,-Visit.Start.Date), names_to = "Flag.Characteristic.Name",
values_to = "Flag.Value")%>%
rename(Visit.Start.Date.F = Visit.Start.Date, StationID.F = StationID) %>%
arrange(StationID.F, Visit.Start.Date.F, Flag.Characteristic.Name )
#merge together:
Chem_long <- cbind(Chem_dat_long, Chem_flag_long)
#remove NA and create Upper and Lower Quantification Limit columns:
test <- Chem_long %>%
mutate(Lower.Quantification.Limit = NA) %>%
mutate(Upper.Quantification.Limit = NA) %>%
mutate(Flag.Value = as.character(Flag.Value))
#extract Lower and Upper limits from Flag.Value column and replace Result.Value.Text with character string if value was outside of qunatification limits:
test <- suppressWarnings(test %>% mutate(limit = stringr::str_extract(Flag.Value, "\\-*\\d+\\.*\\d*"), #extracts the numbers
Lower.Quantification.Limit = as.numeric(ifelse(stringr::str_detect(Flag.Value, "<"), paste(limit), NA)),
Upper.Quantification.Limit = as.numeric(ifelse(stringr::str_detect(Flag.Value, ">"), paste(limit), NA)),
Result.Value.Text = case_when(!is.na(Lower.Quantification.Limit) ~ paste0("*Present <QL"),
!is.na(Upper.Quantification.Limit) ~ paste0("*Present >QL"),
TRUE ~ paste0(Result.Value.Text))) %>%
select(-limit))
#clean up
Chem_long <- test %>%
dplyr::filter(!is.na(Result.Value.Text)) %>% select(col_order)
#### Build WQ Insitu table ----
#select variables of interest from WQInSitu table:
WQ <- WQInSitu %>%
select(PK_WQInSitu, FK_Sample, SampleDepth = Depth_m, BP_mmHg,
DOsat_pct, DO_mgL, pH, SpCond_uScm,Temp_C, QCType) %>%
filter(QCType == "0") %>% #Remove calibratin samples.
select(-QCType) %>%
left_join(., Samples, by=c("FK_Sample"= "PK_Sample"))%>% # add in ancillary data
left_join(.,Events, by=c("FK_Event" = "PK_Event")) %>%
left_join(.,Locations, by=c("FK_Location" = "PK_Location")) %>%
mutate(StartDate = as.POSIXct(StartDate, format = "%Y-%m-%d")) %>%
rename(StationID=NPStoretSiteCode, Visit.Start.Date=StartDate) %>%#clean up column names, remove rows with SampleDepth = NA
mutate(NPSTORET.Org.ID.Code = "NETN") %>%
filter(!is.na(SampleDepth)) %>%
select(-c(FK_Location, FK_Event, FK_Sample, PK_WQInSitu)) %>%
arrange(StationID, Visit.Start.Date)
#make long, remove rows with NA for observation value, record Depth units
WQ_long <- WQ %>%
select(-LocationType) %>%
pivot_longer(names_to = "Local.Characteristic.Name",
values_to = "Result.Value.Text",
cols= -c(SampleDepth, Visit.Start.Date, StationID, NPSTORET.Org.ID.Code)) %>%
dplyr::filter(!is.na(Result.Value.Text)) %>%
mutate(Upper.Quantification.Limit = NA) %>%
mutate(Lower.Quantification.Limit = NA) %>%
mutate(Depth.Units = "m") %>%
select(col_order) %>% filter(!is.na(StationID)) #Removed records missing primary key
# 3 sites in WQInSitu table missing site data.
#calculate median value for all depths 2m or less to represent the stream/epilimnion value.
temp <- suppressWarnings(
WQ %>% filter(SampleDepth <= 2) %>%
gather(key = Local.Characteristic.Name, value = Result.Value.Text,
-c(SampleDepth, Visit.Start.Date, StationID, NPSTORET.Org.ID.Code)) %>%
mutate(Result.Value.Text = as.numeric(Result.Value.Text)) %>%
dplyr::filter(!is.na(Result.Value.Text)) %>%
group_by(StationID, Visit.Start.Date, Local.Characteristic.Name) %>%
dplyr::summarize(Result.Value.Text = median(as.numeric(Result.Value.Text))) %>%
ungroup() %>%
spread(key = Local.Characteristic.Name, value = Result.Value.Text) %>%
mutate(NPSTORET.Org.ID.Code = "NETN") %>%
left_join(., Locations, by =c("StationID" = "NPStoretSiteCode")) # bind on location type (stream or Lake)
)
temp <- temp %>%
mutate(SampleDepth = case_when(LocationType == 'Stream' ~ 'stream',
LocationType == 'Pond' ~ 'epilimnion',
LocationType == 'Lake' ~ 'epilimnion')) %>%
select(-c(PK_Location, LocationType))
#make long, remove rows with NA for observation value, add column for Depth.Units:
temp_long <- temp %>%
gather(key = Local.Characteristic.Name, value = Result.Value.Text,
-c(SampleDepth, Visit.Start.Date, StationID, NPSTORET.Org.ID.Code)) %>%
dplyr::filter(!is.na(Result.Value.Text)) %>%
mutate(Upper.Quantification.Limit = NA) %>%
mutate(Lower.Quantification.Limit = NA) %>%
mutate(Depth.Units = NA) %>%
select(col_order)
#add summaries to WQ_long df:
WQ_long <- rbind(WQ_long, temp_long)
#### Handle Discharge data ----
#select variables of interest from StreamDischarge table:
SD <- StreamDischarge %>%
select(c('PK_StreamDischarge', 'FK_Sample', 'Discharge_cfs')) %>%
left_join(., Samples, by=c("FK_Sample" = "PK_Sample")) %>%
left_join(., Events, by =c("FK_Event"= "PK_Event")) %>%
left_join(., Locations, by=c("FK_Location"= "PK_Location")) %>%
mutate(StartDate= as.POSIXct(StartDate, format = "%Y-%m-%d")) %>%
rename(StationID=NPStoretSiteCode, Visit.Start.Date=StartDate) %>%
mutate(NPSTORET.Org.ID.Code = "NETN") %>%
mutate(SampleDepth = "stream") %>%
select(-c(FK_Location, FK_Event, FK_Sample, PK_StreamDischarge, LocationType)) %>%
arrange(StationID, Visit.Start.Date)
#make long and remove rows with NA for observation value
SD_long <- SD %>%
pivot_longer(names_to = "Local.Characteristic.Name",
values_to = "Result.Value.Text",
cols=-c(SampleDepth, Visit.Start.Date, StationID, NPSTORET.Org.ID.Code)) %>%
dplyr::filter(!is.na(Result.Value.Text)) %>%
mutate(Upper.Quantification.Limit = NA) %>%
mutate(Lower.Quantification.Limit = NA) %>%
mutate(Depth.Units = NA) %>%
filter(StationID != "NETN_MIMA_SC00") %>% # Discharge not measured in Concord River
select(col_order)
##### Turbidity table ----
#select variables of interest from Turbidity table:
Turb <- Turbidity %>%
select(c('PK_Turbidity', 'FK_Sample', 'Turbidity_NTU')) %>%
left_join(.,Samples, by =c("FK_Sample"= "PK_Sample")) %>% #merge in ancillary info on date and site:
left_join(., Events, by=c("FK_Event" = "PK_Event")) %>%
left_join(., Locations, by=c("FK_Location" = "PK_Location")) %>%
mutate(StartDate = as.POSIXct(StartDate, format = "%Y-%m-%d")) %>% #format Date
rename(StationID=NPStoretSiteCode, Visit.Start.Date=StartDate) %>%
mutate(NPSTORET.Org.ID.Code = "NETN") %>%
mutate(SampleDepth = "stream") %>%
select(-c(FK_Location, FK_Event, FK_Sample, PK_Turbidity, LocationType)) %>%
arrange(StationID, Visit.Start.Date)
#make long and remove rows with NA for observation value
Turb_long <- Turb %>%
pivot_longer(names_to = "Local.Characteristic.Name",
values_to = "Result.Value.Text",
cols= -c(SampleDepth, Visit.Start.Date, StationID, NPSTORET.Org.ID.Code)) %>%
dplyr::filter(!is.na(Result.Value.Text)) %>%
mutate(Upper.Quantification.Limit = NA) %>%
mutate(Lower.Quantification.Limit = NA) %>%
mutate(Depth.Units = NA) %>%
select(col_order)
#### Secchi Depth ----
#select variables of interest from Secchi table:
Sec <- Secchi %>%
select(c('PK_Secchi', 'FK_Sample','SDepth1_m', 'Bot_SD1')) %>%
left_join(., Samples, by = c("FK_Sample" = "PK_Sample")) %>% #merge in ancillary info on date and site:
left_join(., Events, by = c("FK_Event" = "PK_Event")) %>%
left_join(., Locations, by = c("FK_Location" = "PK_Location")) %>%
mutate(StartDate = as.POSIXct(StartDate, format = "%Y-%m-%d")) %>% #format Date
mutate(Lower.Quantification.Limit = ifelse(Bot_SD1 == 'B', paste(SDepth1_m), NA)) %>%
mutate(SDepth1_m = case_when(Bot_SD1 == 'B' ~ paste("*Present <QL"),
is.na(Bot_SD1) ~ paste(SDepth1_m))) %>%
rename(StationID= NPStoretSiteCode, Visit.Start.Date=StartDate) %>%
mutate(NPSTORET.Org.ID.Code = "NETN") %>%
mutate(Upper.Quantification.Limit = NA) %>%
mutate(SampleDepth = case_when(LocationType == 'Stream' ~ 'stream',
LocationType == 'Pond' ~ 'epilimnion',
LocationType == 'Lake' ~ 'epilimnion',
LocationType == 'Bottom' ~ 'bottom')) %>%
select(-c(FK_Location, FK_Event, FK_Sample, PK_Secchi, LocationType, Bot_SD1)) %>%
arrange(StationID, Visit.Start.Date)
#make long and remove rows with NA for observation value
Sec_long <- Sec %>%
pivot_longer(names_to = "Local.Characteristic.Name", values_to = "Result.Value.Text",
cols= -c(SampleDepth, Visit.Start.Date, StationID,
NPSTORET.Org.ID.Code, Upper.Quantification.Limit,
Lower.Quantification.Limit)) %>%
dplyr::filter(!is.na(Result.Value.Text)) %>%
mutate(Depth.Units = NA) %>%
select(col_order)
#### Light Penetration ----
#select variables of interest from LightPenetration table:
Light <- LightPenetration %>%
select(c('PK_LightPenetration', 'FK_Sample', 'Depth_m', 'PenetrationRatio')) %>%
left_join(.,Samples, by =c("FK_Sample"= "PK_Sample")) %>% #merge in ancillary info on date and site:
left_join(., Events, by=c("FK_Event" = "PK_Event")) %>%
left_join(., Locations, by=c("FK_Location" = "PK_Location")) %>%
mutate(StartDate = as.POSIXct(StartDate, format = "%Y-%m-%d")) %>% #format Date
rename(StationID=NPStoretSiteCode, Visit.Start.Date=StartDate, SampleDepth = Depth_m) %>%
mutate(NPSTORET.Org.ID.Code = "NETN") %>%
select(-c(FK_Location, FK_Event, FK_Sample, PK_LightPenetration)) %>%
arrange(StationID, Visit.Start.Date)
#make long and remove rows with NA for observation value
Light_long <- Light %>%
select(-LocationType) %>%
pivot_longer(names_to = "Local.Characteristic.Name",
values_to = "Result.Value.Text",
cols= -c(SampleDepth, Visit.Start.Date, StationID, NPSTORET.Org.ID.Code)) %>%
dplyr::filter(!is.na(Result.Value.Text)) %>%
mutate(Upper.Quantification.Limit = NA) %>%
mutate(Lower.Quantification.Limit = NA) %>%
mutate(Depth.Units = "m") %>%
select(col_order)
#calc median light penetration in top 2m:
## Note : using gather since multiple data types in the value field (for pivot_longer these need to be specified by value ptypes))
temp2 <- suppressWarnings(
Light %>%
filter(SampleDepth <= 2) %>%
gather(key = Local.Characteristic.Name,
value = Result.Value.Text,
-c(SampleDepth, Visit.Start.Date, StationID, NPSTORET.Org.ID.Code)) %>%
mutate(Result.Value.Text = as.numeric(Result.Value.Text)) %>%
dplyr::filter(!is.na(Result.Value.Text)) %>%
group_by(StationID, Visit.Start.Date, Local.Characteristic.Name) %>%
dplyr::summarize(Result.Value.Text = median(as.numeric(Result.Value.Text)), .groups = 'keep') %>%
ungroup() %>%
spread(key = Local.Characteristic.Name, value = Result.Value.Text) %>%
mutate(NPSTORET.Org.ID.Code = "NETN") %>%
left_join(., Locations, by= c("StationID"= "NPStoretSiteCode")) %>% # bind liocation type
mutate(SampleDepth = case_when(LocationType == 'Stream' ~ 'stream',
LocationType == 'Pond' ~ 'epilimnion',
LocationType == 'Lake' ~ 'epilimnion')) %>%
select(-c(PK_Location, LocationType))
)
#make long, remove rows with NA for observation value, add column for Depth.Units:
temp2_long <- temp2 %>%
pivot_longer(names_to = "Local.Characteristic.Name",
values_to = "Result.Value.Text",
cols= -c(SampleDepth, Visit.Start.Date, StationID,
NPSTORET.Org.ID.Code)) %>%
dplyr::filter(!is.na(Result.Value.Text)) %>%
mutate(Upper.Quantification.Limit = NA) %>%
mutate(Lower.Quantification.Limit = NA) %>%
mutate(Depth.Units = NA) %>%
select(col_order)
#add median to Light_long df:
Light_long <- rbind(Light_long, temp2_long)
#### Final binding and clean-up of data to create NETN_Water_Data.csv -----
#merge primary observations together
#First, make 'NETN_Water_Data.csv' (full datset with all depths and aggregations).
#Next, make 'Water Data.csv' (with data for depth profiles removed so that only samples that represent the stream/epilimnion remain) for the NCRN Water data and WaterViz packages.
waterDat <- Reduce(function(x,y) rbind(x,y),
list(Chem_long, WQ_long, SD_long, Turb_long, Sec_long, Light_long))
waterDat <- waterDat %>% filter(!is.na(Result.Value.Text) & Result.Value.Text!="NA") # Dataset created a lot of site/param combinations that don't have data
### Retain or exclude measurements by depth? If TRUE, only the median surface value from the top 2m are returned.
# write data to new object to use for constructing metadata file below so that subsetting by surface measurements won't affect output and colname changing isn't a problem.
#waterDatBkup <- waterDat
if(surface == TRUE){
#select only 'stream', 'epilimnion', NA Sample Depths, or depths <2m.
waterDat <- waterDat %>%
filter(SampleDepth == 'stream' | SampleDepth == 'epilimnion' ) #| is.na(SampleDepth) | as.numeric(SampleDepth) <2)
} # MIMA sites have number recorded instead of factor level
#replace "." with " " or "/" in column names to match what importNCRNWater expects:
names(waterDat) <- c('Network', 'StationID', 'Visit Start Date',
'SampleDepth', 'Depth Units','Local Characteristic Name',
'Result Value/Text', 'Lower Quantification Limit',
'Upper Quantification Limit')
active_sites <- c("NETN_ACAD_ABIN", "NETN_ACAD_ANTB", "NETN_ACAD_BRBK", "NETN_ACAD_BRKB",
"NETN_ACAD_BRWN", "NETN_ACAD_BUBL", "NETN_ACAD_CADS", "NETN_ACAD_DKLI", "NETN_ACAD_DUCK",
"NETN_ACAD_EAGL", "NETN_ACAD_ECHO", "NETN_ACAD_HADB", "NETN_ACAD_HNTR", "NETN_ACAD_HODG",
"NETN_ACAD_HTHB", "NETN_ACAD_JORD", "NETN_ACAD_KEBO", "NETN_ACAD_LBRK", "NETN_ACAD_LHAD",
"NETN_ACAD_LKWO", "NETN_ACAD_LONG", "NETN_ACAD_LSIE", "NETN_ACAD_LVYB", "NETN_ACAD_MRSL",
"NETN_ACAD_OTRC", "NETN_ACAD_ROUN", "NETN_ACAD_SEAL", "NETN_ACAD_SEAW",
"NETN_ACAD_SGTB", "NETN_ACAD_STNL", "NETN_ACAD_UBRK", "NETN_ACAD_UHAD", "NETN_ACAD_WHOL",
"NETN_ACAD_WOOD",
"NETN_MABI_PA00", "NETN_MABI_SA00",
"NETN_MIMA_SA00", "NETN_MIMA_SB00", "NETN_MIMA_SC00",
"NETN_MORR_SB00", "NETN_MORR_SD00", "NETN_MORR_SE00",
"NETN_ROVA_SA00", "NETN_ROVA_SB00", "NETN_ROVA_SD00", "NETN_ROVA_SE00", "NETN_ROVA_SF00",
"NETN_SAGA_PA00", "NETN_SAGA_SA00", "NETN_SAGA_SB00",
"NETN_SAIR_SA00", "NETN_SAIR_SB00",
"NETN_SARA_SA00", "NETN_SARA_SC00", "NETN_SARA_SD00",
"NETN_WEFA_PA00"
)
if(active_site == TRUE){
waterDat <- waterDat %>% filter(StationID %in% active_sites) %>% droplevels()
} else {waterDat}
active_metrics <- c('AL_ugL', 'ANC_ueqL', 'BP_mmHg', 'Ca_ueqL', 'ChlA_ugL',
'Cl_ueqL', 'Discharge_cfs', 'DO_mgL', 'DOC_mgL',
'DOsat_pct', 'K_ueqL', 'Mg_ueqL', 'Na_ueqL', 'NH4_mgL',
'NO3_ueqL', 'PenetrationRatio', 'pH', 'SDepth1_m',
'SO4_ueqL', 'SpCond_uScm', 'Temp_C','TN_mgL',
'TP_ugL', 'Turbidity_NTU')
if(active_metric == TRUE){
waterDat <- waterDat %>% filter(`Local Characteristic Name` %in% active_metrics) %>% droplevels()
} else {waterDat}
if(restricted == TRUE){
waterDat <- waterDat %>% group_by(StationID, `Local Characteristic Name`) %>%
mutate(tot_meas = length(!is.na(`Result Value/Text`))) %>%
filter(tot_meas >= 4) %>% ungroup() %>% droplevels()
} else {waterDat}
waterDatBkup <- waterDat
#### Create Metadata.csv ----
#Each row represents a variable (characteristic) at a location.
## create look-up table of sites and their location metadata
sites <- unique(waterDatBkup$StationID)
sites.temp <- merge(Location, tluParkCode, by = "ParkCode", all.x=T, all.y=F)
sites_tlu <- sites.temp %>%
rename(ShortName = PARKNAME, Type = LocationType,
SiteCode = NPStoretSiteCode, SiteName = ShortSiteName,
Lat = StartLat_DD, Long = StartLon_DD) %>%
mutate(Network = "NETN") %>%
mutate(LongName = paste(ShortName, PARKTYPE, sep = " ")) %>%
mutate(Type = case_when(Type == "Pond" ~ "Lake", ##change Pond to Lake for further handling; have to change all values
Type == "Lake" ~ "Lake",
Type == "Stream" ~ "Stream")) %>%
dplyr::filter(SiteCode %in% sites) %>%
select(c(Network, ParkCode, ShortName, LongName, SiteCode, SiteName, Lat, Long, Type))
sites_tlu$LongName <- ifelse(sites_tlu$ParkCode == "SAGA",
paste0("Saint-Gaudens National Historical Park"),
paste0(sites_tlu$LongName))
#create rows only for the variables measured at each site, extract units, and add in site names
MD <- suppressWarnings(
waterDatBkup %>% select(SiteCode = StationID,
CharacteristicName = `Local Characteristic Name`) %>%
mutate(SiteCode = as.character(SiteCode)) %>%
group_by(SiteCode) %>%
distinct() %>%
separate(CharacteristicName, c("CategoryDisplay","Units"), sep="_", remove= FALSE) %>%
left_join(sites_tlu,.,by= "SiteCode") # join in site metadata
)
#create Display names
MD <- MD %>%
mutate(DisplayName = case_when(
CharacteristicName == "Al_ugL" ~ "Aluminium",
CharacteristicName == "ANC_ueqL" ~ "Acid Neutralizing Capacity",
CharacteristicName == "Ca_ueqL" ~ "Calcium",
CharacteristicName == "Cl_ueqL" ~ "Chloride",
CharacteristicName == "DOC_mgL" ~ "Dissolved Organic Carbon",
CharacteristicName == "K_ueqL" ~ "Potassium",
CharacteristicName == "Mg_ueqL" ~ "Magnesium",
CharacteristicName == "Na_ueqL" ~ "Sodium",
CharacteristicName == "NH3_mgL" ~ "Ammonia",
CharacteristicName == "NH4_mgL" ~ "Ammonium",
CharacteristicName == "NO3_ueqL" ~ "Nitrate",
CharacteristicName == "SO4_ueqL" ~ "Sulfate",
CharacteristicName == "TN_mgL" ~ "Total Nitrogen",
CharacteristicName == "BP_mmHg" ~ "Air Pressure",
CharacteristicName == "DOsat_pct" ~ "Dissolved Oxygen Saturation",
CharacteristicName == "DO_mgL" ~ "Dissolved Oxygen",
CharacteristicName == "pH" ~ "pH",
CharacteristicName == "SpCond_uScm" ~ "Specific Conductance",
CharacteristicName == "Temp_C" ~ "Water Temperature",
CharacteristicName == "SDepth1_m" ~ "Secchi Depth",
CharacteristicName == "ChlA_ugL" ~ "Chlorophyll A",
CharacteristicName == "PO4_ugL" ~ "Phosphate",
CharacteristicName == "TotDissN_mgL" ~ "Total Dissolved Nitrogen",
CharacteristicName == "TotDissP_ugL" ~ "Total Dissolved Phosphorus",
CharacteristicName == "TP_ugL" ~ "Total Phosphorus",
CharacteristicName == "PenetrationRatio" ~ "Light Penetration Ratio",
CharacteristicName == "NO2+NO3_mgL" ~ "Nitrate + Nitrite",
CharacteristicName == "Discharge_cfs" ~ "Discharge",
CharacteristicName == "Turbidity_NTU" ~ "Turbidity",
CharacteristicName == "NO2_mgL" ~ "Nitrite"
),
CategoryDisplay = case_when(
CharacteristicName == "Al_ugL" ~ "Al",
CharacteristicName == "ANC_ueqL" ~ "ANC",
CharacteristicName == "Ca_ueqL" ~ "Ca",
CharacteristicName == "Cl_ueqL" ~ "Cl",
CharacteristicName == "DOC_mgL" ~ "DOC",
CharacteristicName == "K_ueqL" ~ "K",
CharacteristicName == "Mg_ueqL" ~ "Mg",
CharacteristicName == "Na_ueqL" ~ "Na",
CharacteristicName == "NH3_mgL" ~ "NH3",
CharacteristicName == "NH4_mgL" ~ "NH4",
CharacteristicName == "NO3_ueqL" ~ "NO3",
CharacteristicName == "SO4_ueqL" ~ "SO4",
CharacteristicName == "TN_mgL" ~ "TN",
CharacteristicName == "BP_mmHg" ~ "Air Press.",
CharacteristicName == "DOsat_pct" ~ "DO % Sat.",
CharacteristicName == "DO_mgL" ~ "DO",
CharacteristicName == "pH" ~ "pH",
CharacteristicName == "SpCond_uScm" ~ "Spec. Cond.",
CharacteristicName == "Temp_C" ~ "Water Temp.",
CharacteristicName == "SDepth1_m" ~ "Secchi Depth",
CharacteristicName == "ChlA_ugL" ~ "Chl. A",
CharacteristicName == "PO4_ugL" ~ "PO4",
CharacteristicName == "TotDissN_mgL" ~ "Total Diss. N",
CharacteristicName == "TotDissP_ugL" ~ "Total Diss. P",
CharacteristicName == "TP_ugL" ~ "TP",
CharacteristicName == "PenetrationRatio" ~ "Light Pen. Ratio",
CharacteristicName == "NO2+NO3_mgL" ~ "NO2+NO3",
CharacteristicName == "Discharge_cfs" ~ "Discharge",
CharacteristicName == "Turbidity_NTU" ~ "Turbidity",
CharacteristicName == "NO2_mgL" ~ "NO2"))
#create extra columns. Column for Data Type: these are all numeric, so I took a shortcut. Will need to be changed if factor or ordinal data are added.
MD$DataType <- "numeric"
MD$LowerPoint <- as.numeric(NA) #needs to be Num
MD$UpperPoint <- as.numeric(NA) #needs to be Num
MD$DataName <- MD$CharacteristicName
physical <- c("Discharge_cfs", "DO_mgL", "pH", "SpCond_uScm", "Temp_C")
nutrients <- c("ANC_ueqL", "ChlA_ugL", "DOC_mgL", "TN_mgL", "TP_ugL")
other <- c("Ca_ueqL", "Cl_ueqL", "K_ueqL", "Mg_ueqL", "Na_ueqL", "NH4_mgL",
"NO3_ueqL", "SO4_ueqL", "Turbidity_NTU", "DOsat_pct", "SDepth1_m", "PenetrationRatio")
MD <- MD %>% mutate(Category = case_when(CharacteristicName %in% physical ~ paste0("physical"),
CharacteristicName %in% nutrients ~ paste0("nutrients"),
CharacteristicName %in% other ~ paste0("other_chem"),
TRUE ~ "other_chem")
# CategoryDisplay = case_when(Category == "physical" ~ paste0("In situ physical parameter"),
# Category == "nutrients" ~ paste0("Lab-derived nutrient"),
# Category == "other chemistry" ~ paste0("Other lab-derived parameter"))
)
#-------
# Lower and upper assessment points based on Tables in 2015 reports
# Create matrix that is joined to MD
MD <- MD %>% mutate(
LowerPoint = case_when(ParkCode == "MABI" & Type == "Stream" & CharacteristicName == "DO_mgL" ~ 7,
ParkCode == "MABI" & Type == "Stream" & CharacteristicName == "pH" ~ 6.5,
ParkCode == "MABI" & Type == "Lake" & CharacteristicName == "pH" ~ 6.5,
ParkCode == "MABI" & Type == "Lake" & CharacteristicName == "DO_mgL" ~ 5,
ParkCode == "SAGA" & Type == "Stream" & CharacteristicName == "DO_mgL" ~ 6,
ParkCode == "SAGA" & Type == "Lake" & CharacteristicName == "DO_mgL" ~ 5,
ParkCode == "SAGA" & Type == "Stream" & CharacteristicName == "pH" ~ 6.5,
ParkCode == "SAGA" & Type == "Lake" & CharacteristicName == "pH" ~ 6.5,
ParkCode == "MIMA" & SiteName == "Concord River" &
CharacteristicName == "DO_mgL" ~ 6.0,
ParkCode == "MIMA" & SiteName != "Concord River" &
CharacteristicName == "DO_mgL" ~ 5.0,
ParkCode == "MIMA" & CharacteristicName == "pH" ~ 6.5,
ParkCode == "SAIR" & CharacteristicName == "pH" ~ 6.5,
#ParkCode == "SAIR" & CharacteristicName == "DO_mgL" ~ 6.0,
ParkCode == "SARA" & CharacteristicName == "DO_mgL" ~ 4.0,
ParkCode == "SARA" & CharacteristicName == "pH" ~ 6.5,
ParkCode == "ROVA" & CharacteristicName == "DO_mgL" ~ 4.0,
ParkCode == "ROVA" & CharacteristicName == "pH" ~ 6.5,
ParkCode == "WEFA" & CharacteristicName == "DO_mgL" ~ 5.0,
ParkCode == "WEFA" & CharacteristicName == "pH" ~ 6.5,
ParkCode == "WEFA" & CharacteristicName == "SDepth1_m" ~ 4.5,
ParkCode == "MORR" & CharacteristicName == "DO_mgL" ~ 7.0,
ParkCode == "MORR" & CharacteristicName == "pH" ~ 6.5),
# Thresholds based on state thresholds and NARS EPA 2016/2017 thresholds for most disturbed
# I'll figure out a better way to do this after these thresholds settle down
UpperPoint = case_when(ParkCode == "ACAD" & Type == "Stream" & CharacteristicName == "TP_ugL" ~ 32.6, #EPA new
ParkCode == "ACAD" & Type == "Stream" & CharacteristicName == "TN_mgL" ~ 0.482,#EPA new
#ParkCode == "ACAD" & Type == "Stream" & CharacteristicName == "SpCond_uScm" ~ 1000,#EPA new
ParkCode == "ACAD" & Type == "Lake" & CharacteristicName == "TP_ugL" ~ 27.9,#EPA new
ParkCode == "ACAD" & Type == "Lake" & CharacteristicName == "TN_mgL" ~ 0.655,#EPA new
ParkCode == "ACAD" & Type == "Lake" & CharacteristicName == "ChlA_ugL" ~ 8.43,#EPA new
ParkCode == "MABI" & Type == "Stream" & CharacteristicName == "pH" ~ 8.5,
ParkCode == "MABI" & Type == "Stream" & CharacteristicName == "Turbidity_NTU" ~ 10,
ParkCode == "MABI" & Type == "Stream" & CharacteristicName == "TP_ugL" ~ 32.6, #EPA new
ParkCode == "MABI" & Type == "Stream" & CharacteristicName == "TN_mgL" ~ 0.482,#EPA new
#ParkCode == "MABI" & Type == "Stream" & CharacteristicName == "SpCond_uScm" ~ 1000,#EPA new
ParkCode == "MABI" & Type == "Lake" & CharacteristicName == "pH" ~ 8.5,
ParkCode == "MABI" & Type == "Lake" & CharacteristicName == "TN_mgL" ~ 0.655, #EPA new
ParkCode == "MABI" & Type == "Lake" & CharacteristicName == "TP_ugL" ~ 27.9, #EPA new
#ParkCode == "MABI" & Type == "Lake" & CharacteristicName == "TN_mgL" ~ 0.6, #EPA new; off b/c state thresh exists
ParkCode == "MABI" & Type == "Lake" & CharacteristicName == "ChlA_ugL" ~ 8.43, #EPA new
ParkCode == "SAGA" & Type == "Stream" & CharacteristicName == "pH" ~ 8.0,
ParkCode == "SAGA" & Type == "Stream" & CharacteristicName == "TP_ugL" ~ 32.6, #EPA new
ParkCode == "SAGA" & Type == "Stream" & CharacteristicName == "TN_mgL" ~ 0.482,#EPA new
#ParkCode == "SAGA" & Type == "Stream" & CharacteristicName == "SpCond_uScm" ~ 1000,#EPA new
ParkCode == "SAGA" & Type == "Lake" & CharacteristicName == "pH" ~ 8.5,
ParkCode == "SAGA" & Type == "Lake" & CharacteristicName == "TP_ugL" ~ 27.9,#EPA new
ParkCode == "SAGA" & Type == "Lake" & CharacteristicName == "TN_mgL" ~ 0.655,#EPA new
ParkCode == "SAGA" & Type == "Lake" & CharacteristicName == "ChlA_ugL" ~ 8.43,#EPA new
ParkCode == "MIMA" & SiteName == "Concord River" &
CharacteristicName == "Temp_C" ~ 28.3,
ParkCode == "MIMA" & SiteName != "Concord River" &
CharacteristicName == "Temp_C" ~ 20.3,
ParkCode == "MIMA" & CharacteristicName == "pH" ~ 8.3,
ParkCode == "MIMA" & CharacteristicName == "TP_ugL" ~ 32.6, #EPA new
ParkCode == "MIMA" & CharacteristicName == "TN_mgL" ~ 0.482, #EPA new
#ParkCode == "MIMA" & CharacteristicName == "SpCond_uScm" ~ 1000, #EPA new
ParkCode == "SAIR" & CharacteristicName == "pH" ~ 8.5,
ParkCode == "SAIR" & CharacteristicName == "TP_ugL" ~ 32.6,
ParkCode == "SAIR" & CharacteristicName == "TN_mgL" ~ 0.482,
ParkCode == "SAIR" & CharacteristicName == "Temp_C" ~ 28.3,
ParkCode == "SARA" & CharacteristicName == "pH" ~ 8.5,
ParkCode == "SARA" & CharacteristicName == "TP_ugL" ~ 32.6, #EPA new
ParkCode == "SARA" & CharacteristicName == "TN_mgL" ~ 0.482,#EPA new
#ParkCode == "SARA" & CharacteristicName == "SpCond_uScm" ~ 1000,#EPA new
ParkCode == "ROVA" & CharacteristicName == "pH" ~ 8.5,
ParkCode == "ROVA" & CharacteristicName == "TP_ugL" ~ 32.6, #EPA new
ParkCode == "ROVA" & CharacteristicName == "TN_mgL" ~ 0.482,#EPA new
#ParkCode == "ROVA" & CharacteristicName == "SpCond_uScm" ~ 1000,#EPA new
ParkCode == "WEFA" & CharacteristicName == "Temp_C" ~ 29.4,
ParkCode == "WEFA" & CharacteristicName == "pH" ~ 8.0,
ParkCode == "WEFA" & CharacteristicName == "TP_ugL" ~ 27.9, #EPA new
ParkCode == "WEFA" & CharacteristicName == "TN_mgL" ~ 0.655, #EPA new
ParkCode == "WEFA" & CharacteristicName == "ChlA_ugL" ~ 8.43, #EPA new
#ParkCode == "WEFA" & CharacteristicName == "TP_ugL" ~ 22,#EPA new; off b/c state thresh exists
#ParkCode == "WEFA" & CharacteristicName == "TN_mgL" ~ 0.6,#EPA new; off b/c state thresh exists
#ParkCode == "WEFA" & CharacteristicName == "ChlA_ugL" ~ 7.76,#EPA new; off b/c state thresh exists
ParkCode == "MORR" & CharacteristicName == "Temp_C" ~ 22.0,
ParkCode == "MORR" & CharacteristicName == "pH" ~ 8.5,
ParkCode == "MORR" & CharacteristicName == "TP_ugL" ~ 32.6, #EPA new
#ParkCode == "MORR" & CharacteristicName == "TP_ugL" ~ 32.6, #EPA new; off b/c state thresh exists
ParkCode == "MORR" & CharacteristicName == "TN_mgL" ~ 0.482,#EPA new;
#ParkCode == "MORR" & CharacteristicName == "SpCond_uScm" ~ 1000,#EPA new
ParkCode == "MORR" & CharacteristicName == "Turbidity_NTU" ~ 50,
ParkCode == "MORR" & CharacteristicName == "SO4_ueqL" ~ 5208), #EPA new
Units = ifelse(CharacteristicName == "pH", paste0("pH units"),
ifelse(CharacteristicName == "PenetrationRatio", paste0("pct"),
paste0(Units)))
)
MD$LowerDescription <- ifelse(!is.na(MD$LowerPoint),
paste0("Acceptable ", MD$DisplayName,
" is above ", MD$LowerPoint, " ",
ifelse(MD$DisplayName != "pH", paste0(MD$Units,"."), paste0("."))
), paste0(NA))
# paste0("Acceptable lower limits have not been established for this parameter."))
MD$UpperDescription <- ifelse(!is.na(MD$UpperPoint),
paste0("Acceptable ", MD$DisplayName,
" is below ", MD$UpperPoint, " ",
ifelse(MD$DisplayName != "pH", paste0(MD$Units,"."), paste0("."))
), paste0(NA))
# paste0("Acceptable upper limits have not been established for this parameter."))
MD$AssessmentDetails <- ifelse(!is.na(MD$LowerPoint) | !is.na(MD$UpperPoint),
paste0("See Table 3 of Gawley et al. 2016 for more details."),
NA)
#------
MD <- MD %>% select(Network,
ParkCode,
ShortName,
LongName,
SiteCode,
SiteName,
Lat,
Long,
Type,
CharacteristicName,
DisplayName,
DataName,
Category,
CategoryDisplay,
Units,
LowerPoint,
UpperPoint,
DataType,
LowerDescription,
UpperDescription,
AssessmentDetails)
# Identify and handle metrics within a park that have censored data
waterDat <- waterDat %>% mutate(
`STORET Characteristic Name` = `Local Characteristic Name`,
`Visit Start Date` = format(`Visit Start Date`, "%m/%d/%Y")) %>%
select(
StationID,
`Visit Start Date`,
`Local Characteristic Name`,
SampleDepth,
`Depth Units`,
`Result Value/Text`,
`Lower Quantification Limit`,
`Upper Quantification Limit`) %>%
filter(!is.na(`Result Value/Text`) & `Result Value/Text`!="NA") %>%
droplevels() %>%
rename(MQL = `Lower Quantification Limit`,
UQL = `Upper Quantification Limit`)
waterDat <- waterDat %>%
mutate(Censored = ifelse(grepl("\\*", `Result Value/Text`) &
(!is.na(MQL) | !is.na(UQL)), TRUE, FALSE),
ValueCen = case_when(!grepl("\\*", `Result Value/Text`) ~ paste(`Result Value/Text`),
grepl("\\*", `Result Value/Text`) & !is.na(MQL) ~ paste(MQL),
grepl("\\*", `Result Value/Text`) & !is.na(UQL) ~ paste(UQL)))
assign("waterDat", waterDat, .GlobalEnv)
assign("MD", MD, .GlobalEnv)
if (export == TRUE) {
#write out data as 'Water Data.csv':
write.csv(waterDat, paste0(path, "Water Data.csv"),
row.names = FALSE)
#write out Metadata in wide form...
write.csv(MD, paste0(path, "VizMetaData.csv"),
row.names = FALSE)
cat(paste0("Water Data.csv and VizMetaData.csv successfully saved to: ",
path))
} else {
cat("NETN water data successfully compiled as
waterDat and MD in global environment.")
}
if(cleanEnv == TRUE){
objs <- ls(pos = ".GlobalEnv")
objs <- objs[!objs %in% c("waterDat","MD")]
rm(list = objs, pos = ".GlobalEnv")
}
#return(list(waterDat, MD))
} #end of function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.