#' Run the WRTDS model through EGRET for MacroSheds data
#'
#' ms_run_egret prepares data for the EGRET package and runs the Weighted Regression
#' on Time, Discharge, and Season (WRTDS) model.
#'
#' @keywords internal
#' @author Spencer Rhea
#' @author Mike Vlah, \email{vlahm13@@gmail.com}
#' @author Wes Slaughter
#' @param stream_chemistry \code{data.frame} in MacroSheds format (see [MacroSheds EDI](https://portal.edirepository.org/nis/mapbrowse?scope=edi&identifier=1262)
#' Must contain only one stream chemistry variable and one site_code to comply with WRTDS.
#' A \code{tibble} of stream chemistry, in
#' MacroSheds format, can be generated by [ms_load_product()].
#' @param discharge \code{data.frame} in MacroSheds format (see [MacroSheds EDI](https://portal.edirepository.org/nis/mapbrowse?scope=edi&identifier=1262)
#' Must contain discharge for only one site_code to comply with WRTDS.
#' A \code{tibble} of discharge, in
#' MacroSheds format, can be generated by [ms_load_product()].
#' @param prep_data logical. Should data be thinned/altered to follow EGRET recommendations?
#' See details for more information.
#' @param run_egret logical. If FALSE, the EGRET model will not be run and an EGRET
#' eList will be returned. This argument is intended for those who want to change
#' default parameters in the EGRET model but need data in the eList format.
#' @param Kalman logical. Should EGRET run the Kalman filter on WRTDS results? See
#' [EGRET::WRTDSKalman()] for more information.
#' @param site_data tibble. Default NULL, if you are using a non-MacroSheds site you must
#' supply a tibble with the columns: site_code, ws_area_ha, latitude, longitude. If
#' using a MacroSheds site you can leave this argument NULL and the MacroSheds site_data
#' table will be retrieved.
#' @param quiet logical. Should warnings be printed to console?
#' @return returns a \code{list} in the EGRET format with model results.
#' @details
#' WRTDS is a model to predict stream chemistry at a daily time step
#' based on the season, discharge, and time since last grab sample. This model is
#' useful for calculating flux when sampling is sparse (greater than a weekly
#' sampling frequency). For more information on the WRTDS and EGRET package
#' see <https://github.com/USGS-R/EGRET>.
#'
#' \code{prep_data} will remove NAs from the dataset, set all 0 values in stream
#' chemistry dataset to the minimum value in the dataset (this is required because EGRET will
#' fail if there are 0 values in a dataset), remove years with fewer than 6 chemistry samples,
#' and remove datetimes when there is chemistry but no discharge data reported.
#' @seealso [ms_load_product()], [ms_conversions()]
ms_run_egret <- function(stream_chemistry, discharge, prep_data = TRUE,
run_egret = TRUE, kalman = FALSE, quiet = FALSE,
site_data = NULL){
library("dplyr", quietly = TRUE)
check_suggested_pkgs(c('EGRET'))
# Checks
if(any(! c('date', 'site_code', 'var', 'val') %in% names(stream_chemistry))){
stop('stream_chemistry must be in MacroSheds format (required columns: date, site_code, var, val)')
}
if(any(! c('site_code', 'var', 'val', 'date') %in% names(discharge))){
stop('discharge must be in MacroSheds format (required columns: date, site_code, var, val)')
}
if(! length(unique(macrosheds::ms_drop_var_prefix_(stream_chemistry$var))) == 1){
stop('Only one chemistry variable can be run at a time.')
}
if((! length(unique(stream_chemistry$site_code)) == 1) || (! length(unique(discharge$site_code)) == 1)){
stop('Only one site can be run in EGRET at a time')
}
if(! unique(stream_chemistry$site_code) == unique(discharge$site_code)){
stop('stream_chemistry and discharge must contain the same site_code')
}
requireNamespace('macrosheds', quietly = TRUE)
# Get var and site info
if(is.null(site_data)){
site_data <- macrosheds::ms_site_data
if(! unique(stream_chemistry$site_code) %in% site_data$site_code){
stop('This site is not in the MacroSheds dataset, provide a site_data table with the names: site_code, ws_area_ha, latitude, longitude')
}
} else{
if(!all(names(site_data) %in% c('site_code', 'ws_area_ha', 'latitude', 'longitude'))){
stop('If you are not using a macrosheds site, you must supply site_data with a tibble with the names: site_code, ws_area_ha, latitude, longitude')
}
site_data <- site_data %>%
mutate(site_type = 'stream_gauge')
}
ms_vars <- macrosheds::ms_vars_ts %>%
filter(unit != 'kg/ha/d') %>%
dplyr::select(variable_code, unit) %>%
distinct()
site_code <- unique(stream_chemistry$site_code)
#### Prep Files ####
if(prep_data){
# EGRET does not like NAs
stream_chemistry <- stream_chemistry %>%
filter(!is.na(val))
discharge <- discharge %>%
filter(!is.na(val))
# Egret can't accept 0s in the column for min val (either hack egret or do this or
# look for detection limits)
min_chem <- stream_chemistry %>%
filter(val > 0) %>%
pull(val) %>%
min()
if(!min_chem == Inf){
stream_chemistry <- stream_chemistry %>%
mutate(val = ifelse(val == 0, !!min_chem, val))
}
# Filter so there is only Q going into the model that also has chem
stream_chemistry <- stream_chemistry %>%
mutate(year = lubridate::year(datetime),
month = lubridate::month(datetime)) %>%
mutate(waterYear = ifelse(month %in% c(10, 11, 12), year+1, year))
# Get years with at least 6 chemistry samples (bi-monthly sampling is a
# reasonable requirement?)
years_with_data <- stream_chemistry %>%
group_by(waterYear) %>%
summarise(n = n()) %>%
filter(n >= 6) %>%
pull(waterYear)
# Filter Q and chem to overlap in a water-year
chem_dates <- get_start_end(stream_chemistry)
q_dates <- get_start_end(discharge)
start_date <- if(chem_dates[1] > q_dates[1]) { chem_dates[1] } else{ q_dates[1] }
end_date <- if(chem_dates[2] < q_dates[2]) { chem_dates[2] } else{ q_dates[2] }
discharge <- discharge %>%
filter(datetime >= !!start_date,
datetime <= !!end_date)
stream_chemistry <- stream_chemistry %>%
filter(datetime >= !!start_date,
datetime <= !!end_date)
# Filter discharge to only include water years with chemistry sampling
discharge <- discharge %>%
mutate(year = lubridate::year(datetime),
month = lubridate::month(datetime)) %>%
mutate(waterYear = ifelse(month %in% c(10, 11, 12), year+1, year)) %>%
filter(waterYear %in% !!years_with_data)
# Remove times when there is a chem sample and no Q reported
samples_to_remove <- left_join(stream_chemistry, discharge, by = 'datetime') %>%
filter(is.na(val.y)) %>%
pull(datetime)
stream_chemistry <- stream_chemistry %>%
filter(!datetime %in% samples_to_remove)
}
# Set up EGRET Sample file
Sample_file <- tibble(Name = site_code,
Date = as.Date(stream_chemistry$datetime),
ConcLow = stream_chemistry$val,
ConcHigh = stream_chemistry$val,
Uncen = 1,
ConcAve = stream_chemistry$val,
Julian = as.numeric(julian(lubridate::ymd(stream_chemistry$datetime),origin=as.Date("1850-01-01"))),
Month = lubridate::month(stream_chemistry$datetime),
Day = lubridate::yday(stream_chemistry$datetime),
DecYear = decimalDate(stream_chemistry$datetime),
MonthSeq = get_MonthSeq(stream_chemistry$datetime)) %>%
mutate(SinDY = sin(2*pi*DecYear),
CosDY = cos(2*pi*DecYear)) %>%
mutate(waterYear = ifelse(Month %in% c(10, 11, 12), lubridate::year(Date) + 1, lubridate::year(Date))) %>%
dplyr::select(Name, Date, ConcLow, ConcHigh, Uncen, ConcAve, Julian, Month, Day,
DecYear, MonthSeq, waterYear, SinDY, CosDY)
# Set up EGRET Daily file
Daily_file <- tibble(Name = site_code,
Date = as.Date(discharge$datetime),
Q = discharge$val/1000,
Julian = as.numeric(julian(lubridate::ymd(discharge$datetime),origin=as.Date("1850-01-01"))),
Month = lubridate::month(discharge$datetime),
Day = lubridate::yday(discharge$datetime),
DecYear = decimalDate(discharge$datetime),
MonthSeq = get_MonthSeq(discharge$datetime),
Qualifier = discharge$ms_status)
if(prep_data){
# Egret can't handle 0 in Q, setting 0 to the minimum Q ever reported seem reasonable
min_flow <- min(Daily_file$Q[Daily_file$Q > 0], na.rm = TRUE)
no_flow_days <- Daily_file %>%
filter(Q == 0) %>%
pull(Date)
Daily_file <- Daily_file %>%
mutate(Q = ifelse(Q <= 0, !!min_flow, Q))
}
Daily_file <- Daily_file %>%
mutate(Q7 = zoo::rollmean(Q, 7, fill = NA, align = 'right'),
Q30 = zoo::rollmean(Q, 30, fill = NA, align = 'right'),
LogQ = log(Q))
Daily_file <- tibble::rowid_to_column(Daily_file, 'i') %>%
dplyr::select(Name, Date, Q, Julian, Month, Day, DecYear, MonthSeq, Qualifier,
i, LogQ, Q7, Q30)
# Set up INFO table
var <- macrosheds::ms_drop_var_prefix_(unique(stream_chemistry$var))
var_unit <- ms_vars %>%
filter(variable_code == !!var) %>%
pull(unit)
site_lat <- site_data %>%
filter(site_code == !!site_code) %>%
pull('latitude')
site_lon <- site_data %>%
filter(site_code == !!site_code) %>%
pull('longitude')
site_ws_area <- site_data %>%
filter(site_code == !!site_code,
site_type == 'stream_gauge') %>%
pull('ws_area_ha')
site_ws_area <- site_ws_area * 100
new_point <- sf::st_sfc(sf::st_point(c(site_lon, site_lat)), crs = 4326) %>%
sf::st_transform(., crs = 4267)
INFO_file <- tibble(agency_cd = 'macrosheds',
site_no = site_code,
station_nm = site_code,
site_tp_code = 'ST',
lat_va = site_lat,
long_va = site_lon,
dec_lat_va = site_lat,
dec_long_va = site_lon,
coord_meth_cd = NA,
coord_acy_cd = NA,
coord_datum_cd = NA,
dec_coord_datum_cd = NA,
district_cd = NA,
state_cd = NA,
county_cd = NA,
country_cd = NA,
land_net_ds = NA,
map_nm = NA,
map_scale_fc = NA,
alt_va = NA,
alt_meth_cd = NA,
alt_acy_va = NA,
alt_datum_cd = NA,
huc_cd = NA,
basin_cd = NA,
topo_cd = NA,
instruments_cd = NA,
construction_dt = NA,
inventory_dt = NA,
drain_area_va = site_ws_area*2.59,
contrib_drain_area_va = NA,
tz_cd = 'UTC',
local_time_fg = 'Y',
reliability_cd = NA,
gw_file_cd = NA,
nat_aqfr_cd = NA,
aqfr_cd = NA,
aqfr_type_cd = NA,
well_depth_va = NA,
hole_depth_va = NA,
depth_src_cd = NA,
project_no = NA,
shortName = site_code,
drainSqKm = site_ws_area,
staAbbrev = site_code,
param.nm = var,
param.units = var_unit,
paramShortName = var,
paramNumber = NA,
constitAbbrev = var,
paStart = 10,
paLong = 12)
eList <- EGRET::mergeReport(INFO_file, Daily_file, Sample_file,
verbose = !quiet)
if(! run_egret){
return(eList)
}
eList <- try(EGRET::modelEstimation(eList, verbose = !quiet))
if(inherits(eList, 'try-error')){
stop('EGRET failed while running WRTDS. See https://github.com/USGS-R/EGRET for reasons data may not be compatible with the WRTDS model.')
}
if(kalman){
eList <- EGRET::WRTDSKalman(eList, verbose = !quiet)
if(prep_data){
# Set flux and Q to 0 and conc to NA on no flow days
eList$Daily <- eList$Daily %>%
mutate(Q = ifelse(Date %in% !!no_flow_days, 0, Q),
LogQ = ifelse(Date %in% !!no_flow_days, 0, LogQ),
Q7 = ifelse(Date %in% !!no_flow_days, 0, Q7),
Q30 = ifelse(Date %in% !!no_flow_days, 0, Q30),
ConcDay = ifelse(Date %in% !!no_flow_days, NA, ConcDay),
FluxDay = ifelse(Date %in% !!no_flow_days, 0, FluxDay),
FNConc = ifelse(Date %in% !!no_flow_days, NA, FNConc),
FNFlux = ifelse(Date %in% !!no_flow_days, 0, FNFlux),
GenFlux = ifelse(Date %in% !!no_flow_days, 0, GenFlux),
GenConc = ifelse(Date %in% !!no_flow_days, 0, GenConc))
}
} else if(prep_data){
# Set flux and Q to 0 and conc to NA on no flow days
eList$Daily <- eList$Daily %>%
mutate(Q = ifelse(Date %in% !!no_flow_days, 0, Q),
LogQ = ifelse(Date %in% !!no_flow_days, 0, LogQ),
Q7 = ifelse(Date %in% !!no_flow_days, 0, Q7),
Q30 = ifelse(Date %in% !!no_flow_days, 0, Q30),
ConcDay = ifelse(Date %in% !!no_flow_days, NA, ConcDay),
FluxDay = ifelse(Date %in% !!no_flow_days, 0, FluxDay),
FNConc = ifelse(Date %in% !!no_flow_days, NA, FNConc),
FNFlux = ifelse(Date %in% !!no_flow_days, 0, FNFlux))
}
return(eList)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.