#' Cleaning Data Frame
#'
#' @description This function helps clean the data from the districts
#' master flow data as well as perform some interpolation for discharge.
#' @param data A data.frame from `get_master_flow()`.
#' @param wy_month A numeric vector for water year month.
#'
#' @return A data.frame
#' @export
#' @importFrom dplyr across mutate group_by select left_join ungroup if_else filter pull
#' @importFrom tidyr nest unnest
#' @importFrom purrr map pluck safely keep
#' @examples
#' \dontrun{
#' master_flow <- get_master_flow()
#' cleaning_update <- discharge_cleaning(master_flow)
#'
#' }
discharge_cleaning <- function(data, wy_month = 10){
master_flow <- data %>%
mutate(year = lubridate::year(Date),
month = lubridate::month(Date),
day = lubridate::day(Date)) %>%
mutate(across(c('Q', 'TSS'), as.numeric)) %>%
mutate(Stream = dplyr::if_else(Stream %in% c('Young 303', 'Young 303 Bridge'), 'Young 303', Stream))
linear_results <- master_flow %>%
group_by(Stream, year) %>%
nest() %>%
mutate(model = map(data, safely(~lm(log10(.$Q)~log10(.$GH), data = .)))) %>%
purrr::keep(~length(.) != 0) %>%
mutate(result = map(model, ~.x[['result']]),
result_null = map(result, ~!is.null(.))) %>%
dplyr::filter(result_null == TRUE) %>%
mutate(intercept = as.numeric(map(result, ~pluck(.$coefficients[1]))),
beta = as.numeric(map(result, ~pluck(.$coefficients[2])))) %>%
dplyr::select(Stream,year, intercept, beta) %>% unnest()
master_flow_update <- master_flow %>%
left_join(linear_results, by = c('Stream', 'year')) %>%
group_by(Stream, year) %>%
mutate(Q_flag = if_else(is.na(Q), 'Not Measured', 'Measured'),
Q = as.numeric(if_else(is.na(Q), 10^((log10(GH)*beta)+intercept), Q))) %>%
ungroup %>% mutate(type = 'Observed Q',
dt = lubridate::as_datetime(as.character(Date, format="%Y-%m-%d 12:00:00")))
clean_up(master_flow_update, wy_month = wy_month)
}
#' Water Year Stats
#' @description This function uses the results of the \link[resourceviz]{discharge_cleaning} object to
#' generate mean, maximum, median, standard deviation and some normalization methods (drainage
#' area, scaled by log and standard deviation) per water year.
#' @param procDV A previously created \link[resourceviz]{discharge_cleaning}.
#' @return A \code{tibble} filtered by water year with added meta-data.
#' @export
#'
#' @examples \dontrun{
#'
#' library(resourceviz)
#'
#' master_flow <- get_master_flow()
#' cleaning_update <- discharge_cleaning(master_flow)
#'
#' wy_stats <- get_wy_stats(cleaning_update)
#' }
#'
#' @importFrom lubridate year month day
#' @importFrom dplyr mutate filter group_by summarise slice_head ungroup everything row_number n
#' @importFrom stringr str_c str_remove_all
#' @importFrom stats median sd
#'
get_wy_stats <- function(procDV) {
proc_raw <- procDV %>%
pad_zero_for_logging()
cols <- c('Q', 'TSS')
proc_raw %>%
group_by(Stream, wy) %>%
summarise(across(dplyr::any_of(cols),
list(
max = ~max(.x, na.rm = TRUE),
min = ~min(.x, na.rm = TRUE),
mean = ~mean(.x, na.rm = TRUE),
median = ~median(.x, na.rm = TRUE),
stdev = ~sd(.x, na.rm = TRUE),
coef_var = ~sd(.x, na.rm = TRUE)/mean(.x, na.rm = TRUE),
max_sdnorm = ~log(max(.x, na.rm = TRUE))/sd(log(.x), na.rm = TRUE),
min_sdnorm = ~log(min(.x, na.rm = TRUE))/sd(log(.x), na.rm = TRUE),
mean_sdnorm = ~log(mean(.x, na.rm = TRUE))/sd(log(.x), na.rm = TRUE),
med_sdnorm = ~log(median(.x, na.rm = TRUE))/sd(log(.x), na.rm = TRUE),
sd_norm = ~sd(log(.x), na.rm = TRUE)))) %>%
ungroup() %>%
dplyr::left_join(procDV %>%
dplyr::select(
Stream,
wy,
obs_per_wy) %>%
dplyr::group_by(Stream,wy) %>%
dplyr::slice(n = 1) %>%
dplyr::ungroup(), by = c('Stream', 'wy')) %>%
dplyr::group_by(Stream) %>%
dplyr::add_count(name = 'wy_count') %>%
dplyr::ungroup() %>%
dplyr::relocate(Stream, obs_per_wy, wy_count, dplyr::everything())
}
#' Get HUC12 Info
#'
#' @param path character; typically the T:Drive
#' @param sf A sf object.
#' @param layer If using a gdb or gpkg indicate the layer name you want.
#' @param ... additional arguments
#' @return A data.frame
#' @export
#' @note This function assumes that there is a gdb with a layer name 'PotentialUnits'. If not
#' (missing) then it defaults to the sf object of the units.
get_huc12_information <- function(path = NULL, sf = NULL, layer = 'PotentialUnits', ...){
# get units for project within the T: drive
if(!is.null(sf)){pa <- sf %>% sf::st_cast('MULTIPOLYGON')}
if(!is.null(path)){
if(stringr::str_detect(path, '.gdb')) {
pa <- sf::st_read(path, layer = layer, quiet = T) %>%
sf::st_cast('MULTIPOLYGON')
} else {
pa <- sf::st_read(path, quiet = T) %>%
sf::st_cast('MULTIPOLYGON')
}
}
stopifnot(inherits(pa, 'sf'))
pa <- pa[!sf::st_is_empty(pa),]
## Get HUC 12's that intersect the units
get_centroid <- sf::st_centroid(pa)
# huc12s <- nhdplusTools::get_huc12(sf::st_as_sfc(sf::st_bbox(pa)))
vars <- c('max', 'mean', 'median', 'min', 'stdDev')
huc12s <- districth12 %>%
sf::st_transform(sf::st_crs(pa)) %>%
dplyr::mutate(dplyr::across(dplyr::any_of(vars), ~.*0.0393701))
huc12_intersect <- sf::st_intersects(huc12s, get_centroid)
huc12_intersect_logic <- lengths(huc12_intersect) > 0
huc12s <- huc12s[huc12_intersect_logic,]
huc12s <- huc12s %>% dplyr::bind_rows()
huc12s <- huc12s %>% dplyr::distinct(geometry, .keep_all = T)
huc12s_df <- huc12s %>% sf::st_drop_geometry()
hucs_prop <- huc12s %>%
sf::st_intersection(pa) %>%
dplyr::mutate(unit_area = as.numeric(units::set_units(st_area(.), 'acres'))) %>%
sf::st_drop_geometry() %>%
dplyr::group_by(name) %>%
dplyr::summarise(sum_of_units = sum(unit_area),
proportion = 100*(sum_of_units/areaacres)) %>%
dplyr::slice(n = 1) %>%
dplyr::left_join(huc12s_df %>% dplyr::select(dplyr::all_of(vars), name), by = 'name' )
huc12s_df <- huc12s_df %>%
dplyr::select(huc12, areaacres, name, dplyr::all_of(vars)) %>%
dplyr::left_join(hucs_prop) %>%
dplyr::arrange(proportion)
}
#' Finalize Q Data
#' @description This function will combine all the previously
#' created lists (`wl_sites_daily` and `wl_sites_hourly`) and perform
#' the log10(Q)~log10(height) regression parameters to daily and hourly data. It will
#' then add to the original `data` for a collated final dataset.
#'
#'
#' @param data Previously created data.frame using \link{get_master_flow}.
#' @param wl_sites_daily Previously created list with daily hobo data.
#' @param wl_sites_hourly Previously created list with hourly hobo data.
#' @param year A numeric vector determining what year to regress the `wl_sites_*` on.
#' @param wy_month A numeric vector determining the water year month.
#' @return A data.frame
#' @export
#'
#' @examples
finalize_q_data <- function(data, wl_sites_daily, wl_sites_hourly, year, wy_month = 10) {
mf_mod <- data %>%
filter(year %in% {{year}},
!is.na(GH),
!is.na(Q),
Q > 0,
GH > 0,
Stream %in% names(wl_sites_daily)) %>%
group_by(Stream) %>%
nest() %>%
mutate(model = map(data, safely(~lm(log10(.$Q)~log10(.$GH), data = .)))) %>%
purrr::keep(~length(.) != 0) %>%
mutate(result = map(model, ~.x[['result']]),
result_null = map(result, ~!is.null(.))) %>%
dplyr::filter(result_null == TRUE) %>%
mutate(intercept = map(result, ~pluck(.$coefficients[1])),
beta = map(result, ~pluck(.$coefficients[2]))) %>%
dplyr::select(Stream,intercept, beta) %>% unnest() %>% ungroup()
mf_mod_final_daily <- map(wl_sites_daily, ~merge(., mf_mod, by = c('Stream')))
mod_results_final_daily <- map(mf_mod_final_daily, ~.x %>%
mutate(Q = 10^((log10(.$GH)*.$beta)+.$intercept),
type = 'Daily Q',
year = lubridate::year(dt),
month = lubridate::month(dt),
day = lubridate::day(dt))) %>% bind_rows() %>%
clean_up(wy_month = wy_month)
mf_mod_final_hourly <- map(wl_sites, ~merge(., mf_mod, by = 'Stream'))
mod_results_final_hourly <- map(mf_mod_final_hourly, ~.x %>%
mutate(Q = 10^((log10(.$GH)*.$beta)+.$intercept),
type = 'Hourly Q',
year = lubridate::year(dt),
month = lubridate::month(dt),
day = lubridate::day(dt))) %>% bind_rows() %>%
clean_up(wy_month = wy_month)
final_data <- list(final_daily = mod_results_final_daily,final_hourly = mod_results_final_hourly)
}
#' Getting Treatment Proportions
#'
#' @param path character; typically the T:Drive
#' @param sf An sf object.
#' @param layer If using a gdb or gpkg indicate the layer name you want.
#' @param ... Additional arguments.
#' @return A data.frame
#' @export
#'
get_treatment_proportion <- function(path = NULL,sf = NULL, layer = 'PotentialUnits', ...) {
if(missing(path) && !is.null(sf)){pa <- sf %>% sf::st_cast('MULTIPOLYGON')}
if(!is.null(path)){
if(stringr::str_detect(path, '.gdb')) {
pa <- sf::st_read(path, layer = layer, quiet = T) %>%
sf::st_cast('MULTIPOLYGON')
} else {
pa <- sf::st_read(path, quiet = T) %>%
sf::st_cast('MULTIPOLYGON')
}
}
stopifnot(inherits(pa, 'sf'))
pa <- pa[!sf::st_is_empty(pa),]
possible_treatment_names <- c('Treatment', 'treatment','Harvest_Tr', 'activity', 'Activity')
possible_col_names <- c('unit', 'Unit', 'Units', 'units', 'acres', 'Acres')
df <- pa %>%
dplyr::mutate(acres_unit = as.numeric(units::set_units(sf::st_area(.), 'acres'))) %>%
sf::st_drop_geometry() %>%
dplyr::select(dplyr::any_of(c(possible_treatment_names, possible_col_names, 'acres_unit')))
group_name <- possible_treatment_names[match(colnames(df), possible_treatment_names, nomatch = 0)]
df %>%
dplyr::group_by(!!rlang::sym(group_name)) %>%
dplyr::summarise(acres = sum(acres_unit),
proportion = 100*(sum(acres_unit)/sum(df$acres_unit))) %>%
dplyr::mutate(across(dplyr::any_of(group_name), ~stringr::str_to_title(.))) %>%
dplyr::rename_with(.cols = dplyr::any_of(group_name), ~stringr::str_to_title(.)) %>%
dplyr::arrange(proportion)
}
#' Get TSS Boxplot
#'
#' @param data A data.frame with Stream, year, month and TSS cols.
#' @param stream A character vector of stream names
#' @param remove_outliers logical. Whether to remove outliers or not.
#' @param ... Arguments to pass to appedix functions.
#' @return A ggplot.
#' @export
#' @importFrom lubridate year
#' @importFrom ggplot2 ggplot aes stat_boxplot geom_boxplot stat_summary scale_y_continuous
#' @importFrom rlang .data
#'
get_tss_boxplot <- function(data, stream,remove_outliers = FALSE, ...) {
# master_flow <- get_master_flow()
#
# cleaning_update <- discharge_cleaning(master_flow)
site <- data %>%
filter(Stream == dplyr::all_of(stream)) %>%
filter(month %in% c(4,5,6,7,8,9)) %>%
mutate(season = dplyr::if_else(month %in% c(4,5,6), 'Runoff (April-June)', 'Lowflow (July-Aug)'),
season = factor(season, levels = c('Runoff (April-June)', 'Lowflow (July-Aug)')))
if(length(stream) == 1){
bx <- ggplot(data = site,
aes(x = year, y = TSS, group = year)) +
stat_boxplot(geom ='errorbar', width = 0.6) +
geom_boxplot(width = 0.6, fill = "lightgrey",
outlier.shape = ifelse(remove_outliers, NA, 20)) +
stat_summary(fun.data = ~n_fun(., site %>% dplyr::slice_max(TSS) %>% pull(TSS)),
geom = "text",
hjust = 0.5) +
expand_limits(y = 0) +
geom_hline(yintercept = 20 ) +
scale_y_continuous(sec.axis = dup_axis(label = NULL,
name = NULL),
expand = expansion(mult = c(0, 0))) +
labs(x = 'Year', y = 'Total Suspended Sediment (TSS) mg/l',
title = paste0('Figure 1: ', stream,' Creek Longitudinal TSS Monitoring'),
subtitle = paste0(site %>%
pull(year) %>%
min(na.rm = T), ' - ',
site %>%
pull(year) %>%
max(na.rm = T))
) +
custom_theme()+
facet_wrap(~season)
legend_plot <- ggplot_box_legend()
cowplot::plot_grid(bx,
legend_plot,
nrow = 1,
rel_widths = c(1, .5))
} else {
bx <- ggplot(data = site,
aes(x = year, y = TSS, group = year)) +
stat_boxplot(geom ='errorbar', width = 0.6) +
geom_boxplot(width = 0.6, fill = "lightgrey",
outlier.alpha = ifelse(remove_outliers, 0, 1)) +
stat_summary(fun.data = ~n_fun(., site %>% dplyr::slice_max(TSS) %>% dplyr::pull(TSS)),
geom = "text", hjust = 0.5) +
expand_limits(y = 0) +
scale_y_continuous(sec.axis = dup_axis(label = NULL,
name = NULL),
expand = expansion(mult = c(0, 0))) +
geom_hline(yintercept = 20 ) +
labs(x = 'Year', y = 'Total Suspended Sediment (TSS) mg/l') +
custom_theme()+
facet_wrap(Stream~season,...)
bx
}
}
#' Get TSS Timeseries
#'
#' @param stream A character vector of stream names
#' @param year A numeric vector of years, e.g. 2012:2015, 2015, etc.
#' @param ... Arguments to pass to appedix functions.
#' @return A ggplot.
#'
#' @export
#'
get_tss_timeseries <- function(stream, year, ...){
master_flow <- get_master_flow()
cleaning_update <- discharge_cleaning(master_flow)
if(missing(year)){
site <- cleaning_update %>%
filter(Stream == dplyr::all_of(stream))
} else {
site <- cleaning_update %>%
filter(Stream == dplyr::all_of(stream),
year %in% {{year}})
}
bx <- ggplot(data = site,
aes(x = Date, y = TSS, group = year)) +
expand_limits(y = 0) +
geom_point(size = 0.25) +
geom_line(size = .5) +
scale_y_continuous(sec.axis = dup_axis(label = NULL,
name = NULL),
expand = expansion(mult = c(0, 0))) +
labs(x = 'Year', y = 'Total Suspended Sediment (TSS) mg/l') +
custom_theme()+
facet_wrap(dplyr::vars(Stream),...)
bx
}
#' Get Flow Data
#'
#' @return A data.frame of local discharge and TSS data
#' @export
#' @noRd
get_master_flow <- function() {
master_flow <- readxl::read_xlsx('C:/Users/joshualerickson/Box/2500WatershedAirMgmt/EUR/stream/master_flow_data.xlsx')
}
#'Water Year These functions are hi-jacked from smwrBase package.
#'
#'Create an ordered factor or numeric values from a vector of dates based on
#'the water year.
#' @noRd
#' @param x an object of class "Date" or "POSIXt." Missing values are permitted and
#'result in corresponding missing values in the output.
#' @param wy_month A numeric indicating the month the water year begins.
#' @param numeric a logical value that indicates whether the returned values
#'should be numeric \code{TRUE} or an ordered factor \code{FALSE}. The default
#'value is \code{FALSE}.
#' @return An ordered factor or numeric vector corresponding to the water year.
#' @note The water year is defined as the period from October 1 to September 30.
#'The water year is designated by the calendar year in which it ends. Thus, the
#'year ending September 30, 1999, is the "1999 water year."
#' @seealso
#Flip for production/manual
#'\code{\link[lubridate]{year}}
#\code{year} (in lubridate package)
waterYear <- function(x, wy_month = 10, numeric=FALSE) {
## Coding history:
## 2005Jul14 DLLorenz Initial dated verion
## 2010Feb17 DLLorenz Added option to return numerics
## 2011Jun07 DLLorenz Conversion to R
## 2012Aug11 DLLorenz Integer fixes
## 2013Feb15 DLLorenz Prep for gitHub
##
x <- as.POSIXlt(x)
yr <- x$year + 1900L
mn <- x$mon + 1L
## adjust for water year
yr <- yr + ifelse(mn < as.integer(wy_month), 0L, 1L)
if(numeric)
return(yr)
ordered(yr)
}
#' Add Counts
#' @description Adds counts of observation per water and month
#'
#' @param data A daily value df
#'
#' @return counts within df
#' @noRd
add_date_counts <- function(data) {
dplyr::group_by(data, Stream, wy) %>%
filter(!is.na(Q)) %>%
dplyr::add_count(name = 'obs_per_wy') %>%
dplyr::ungroup() %>%
dplyr::group_by(Stream, wy, month) %>%
dplyr::add_count(name = 'obs_per_month') %>%
dplyr::ungroup()
}
#' water year to months
#' @description Change wy_month to doy.
#' @param wy_month A numeric
#' @param leap Logical
#' @return A numeric value
#' @noRd
month_to_doy <- function(wy_month, leap = FALSE) {
ifelse(isTRUE(leap),
dplyr::case_when(wy_month == 1 ~ 1,
wy_month == 2 ~ 32,
wy_month == 3 ~ 61,
wy_month == 4 ~ 92,
wy_month == 5 ~ 122,
wy_month == 6 ~ 153,
wy_month == 7 ~ 183,
wy_month == 8 ~ 214,
wy_month == 9 ~ 245,
wy_month == 10 ~ 275,
wy_month == 11 ~ 306,
wy_month == 12 ~ 336,
TRUE ~ NA_real_)
,
dplyr::case_when(wy_month == 1 ~ 1,
wy_month == 2 ~ 32,
wy_month == 3 ~ 60,
wy_month == 4 ~ 91,
wy_month == 5 ~ 122,
wy_month == 6 ~ 152,
wy_month == 7 ~ 182,
wy_month == 8 ~ 213,
wy_month == 9 ~ 244,
wy_month == 10 ~ 274,
wy_month == 11 ~ 305,
wy_month == 12 ~ 335,
TRUE ~ NA_real_)
)
}
#' Prepping for loggin
#'
#' @param data data.frame
#'
#' @return values are padded if zero for parameter of interest
#' @noRd
#'
pad_zero_for_logging <- function(data){
param_names <- c("Q","TSS")
cols_to_update <- names(data[which(names(data) %in% param_names)])
data %>%
mutate(across(dplyr::all_of(cols_to_update), ~ifelse(.x <= 0 , .x + 0.000001, .x)))
}
#' Clean-Up helper
#'
#' @param data data.frame
#' @param wy_month A numeric vector determining the wy month.
#' @return A data.frame
#' @noRd
#'
clean_up <- function(data, wy_month = 10) {
leap_years <- seq(1832,3048, 4)
data %>%
mutate(Date = lubridate::as_date(Date),
doy=lubridate::yday(Date),
wy_doy = ifelse(!(year %in% leap_years),ifelse(doy >= month_to_doy(wy_month, leap = F),
doy-month_to_doy(wy_month, leap = F)+1,
(365-month_to_doy(wy_month, leap = F)+1+doy)),
ifelse(doy >= month_to_doy(wy_month, leap = T),
doy-month_to_doy(wy_month, leap = T)+1,
(366-month_to_doy(wy_month, leap = T)+1+doy))),
month_day = str_c(month, day, sep = "-"),
wy = waterYear(Date, wy_month, TRUE),
month_abb = factor(month.abb[month], levels = month.abb),
month_day = str_c(month, day, sep = "-")) %>%
add_date_counts()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.