knitr::opts_chunk$set(echo = TRUE) # mandatory libraries library(tidyverse) library(tidyr) # speed-enhancing libraries library(data.table) # report-enhancing libraries library(DT) # libraries just for introduction/study site sections library(leaflet) library(dataRetrieval) library(cowplot) library(reshape) # default values input_folder <- paste0(system.file("extdata", package = "HyMETT"),"/") sites <- c("01013500", "01021470","01022500","01030500","01031500","01047000") # Decimal Lat and long. lat <- c(47.2375, 44.8008333, 44.608056, 45.50111, 45.175, 44.86916667) long <- c(-68.58278, -67.725, -67.935278, -68.3058333, -69.3147222, -69.955) # .drop = c('V1') # extra column of row numbers to drop during CSV import
# Setup some default values. # GOF Metrics you want to plot metric_list <- c("percent_bias", "volumetric_efficiency", "spearman_rho", "NRMSE") # Categories of annual stats to plot for general GOF and monthly GOF category_list <- c("Mean", "Min", "Max", "Percent Annual", "Percentile", "Q") category_list_monthly <- c('Mean', 'Min', 'Max', 'Percent Annual') # Annual statistics to plot for comparisons and trends. stat_list <- c('annual_mean', 'high_q3', 'low_q7', "annual_1_percentile", "Aug_mean")
# TODO: Add catch all category for stats that don't fit into another category. # TODO: Make stat vs metric names consistent # TODO: add low flow stats to a category
#------- Import all necessary data gof <- purrr::map(sites, function(site){ # import as dataframe paste0(input_folder, site, '_GOF.csv') %>% data.table::fread(input = ., data.table = F) %>% tibble::as_tibble() %>% # add site number dplyr::select(-one_of('site_no'))%>% dplyr::mutate(site = site) %>% dplyr::select(site, dplyr::everything()) %>% dplyr::rename(`Annual Stat` = annual_stat) %>% dplyr::rename(name = metric) %>% # add statistic categories dplyr::mutate(category = NA) %>% dplyr::mutate(category = ifelse(grepl('mean', `Annual Stat`, ignore.case = T), 'Mean', category)) %>% dplyr::mutate(category = ifelse(grepl('max', `Annual Stat`, ignore.case = T), 'Max', category)) %>% dplyr::mutate(category = ifelse(grepl('min', `Annual Stat`, ignore.case = T), 'Min', category)) %>% dplyr::mutate(category = ifelse(grepl('percent_annual', `Annual Stat`, ignore.case = T), 'Percent Annual', category)) %>% dplyr::mutate(category = ifelse(grepl('CVD', `Annual Stat`, ignore.case = T), 'CVD', category)) %>% dplyr::mutate(category = ifelse(grepl('percentile', `Annual Stat`, ignore.case = T), 'Percentile', category)) %>% dplyr::mutate(category = ifelse(grepl('sd', `Annual Stat`, ignore.case = T), 'SD', category)) %>% dplyr::mutate(category = ifelse(grepl('q', `Annual Stat`, ignore.case = T), 'Q', category)) %>% dplyr::mutate_if(is.numeric, round, digits = 2) }) %>% dplyr::bind_rows() ## Streamflow - Daily daily_sim <- purrr::map(sites, function(site){ daily_sim <- paste0(input_folder, site, '_mod_daily.csv') %>% data.table::fread(input = ., data.table = F) %>% dplyr::mutate(date = as.Date(Date)) %>% dplyr::mutate(site = site) %>% dplyr::select(site, dplyr::everything()) %>% dplyr::select(-one_of('site_no'))%>% dplyr::select(-one_of('Date'))%>% dplyr::rename(Q = value) %>% tibble::as_tibble() }) daily_sim_long <- dplyr::bind_rows(daily_sim) daily_obs <- purrr::map(sites, function(site){ daily_sim <- paste0(input_folder, site, '_obs_daily.csv') %>% data.table::fread(input = ., data.table = F) %>% dplyr::mutate(date = as.Date(Date)) %>% dplyr::mutate(site = site) %>% dplyr::select(site, dplyr::everything()) %>% dplyr::select(-one_of('site_no'))%>% dplyr::select(-one_of('Date'))%>% dplyr::rename(Q = value) %>% tibble::as_tibble() }) daily_obs_long <- dplyr::bind_rows(daily_obs) ## Streamflow - Annual annual_sim <- purrr::map(sites, function(site){ paste0(input_folder, site, '_mod_annual.csv') %>% data.table::fread(input = ., data.table = F) %>% dplyr::mutate(site = site) %>% dplyr::select(-one_of('site_no'))%>% tibble::as_tibble() }) annual_obs <- purrr::map(sites, function(site){ paste0(input_folder, site, '_obs_annual.csv') %>% data.table::fread(input = ., data.table = F) %>% dplyr::mutate(site = site) %>% dplyr::select(-one_of('site_no'))%>% tibble::as_tibble() }) ## Trends - Annual annual_trend_comp <- purrr::map(sites, function(site){ paste0(input_folder, site, '_com_trend.csv') %>% data.table::fread(input = ., data.table = F) %>% dplyr::mutate(site = site) %>% dplyr::select(-one_of('site_no'))%>% tibble::as_tibble() }) annual_trend_sim <- purrr::map(sites, function(site){ paste0(input_folder, site, '_mod_trend.csv') %>% data.table::fread(input = ., data.table = F) %>% dplyr::mutate(site = site) %>% dplyr::select(-one_of('site_no'))%>% tibble::as_tibble() }) annual_trend_obs <- purrr::map(sites, function(site){ paste0(input_folder, site, '_obs_trend.csv') %>% data.table::fread(input = ., data.table = F) %>% dplyr::mutate(site = site) %>% dplyr::select(-one_of('site_no'))%>% tibble::as_tibble() }) # Import POR POR_annual_max <- purrr::map(sites, function(site){ paste0(input_folder, site, '_POR_annual_max.csv') %>% data.table::fread(input = ., data.table = F) %>% dplyr::mutate(site = site) %>% dplyr::select(-one_of('site_no'))%>% tibble::as_tibble() }) POR_annual_max <- dplyr::bind_rows(POR_annual_max) POR_annual_min <- purrr::map(sites, function(site){ paste0(input_folder, site, '_POR_annual_min.csv') %>% data.table::fread(input = ., data.table = F) %>% dplyr::mutate(site = site) %>% dplyr::select(-one_of('site_no'))%>% tibble::as_tibble() }) POR_annual_min <- dplyr::bind_rows(POR_annual_min) POR_daily <- purrr::map(sites, function(site){ paste0(input_folder, site, '_POR_daily_metrics.csv') %>% data.table::fread(input = ., data.table = F) %>% dplyr::mutate(site = site) %>% dplyr::select(-one_of('site_no'))%>% tibble::as_tibble() })
This is a an automated report for 1-5 sites generated from thy HyMETT tool in R. The sites included in this report are:
cat( sites)
This report provides goodness-of-fit measurements between modeled and observational streamflow at these gauged sites. Measures of model skill applied here are percent bias, volumetric efficiency (VE), Spearman's rho correlation, and normalized root mean square error (NRMSE). These metrics are applied to numerous streamflow statistics summarizing streamflow at daily, monthly, annual, and period-of-record timesteps. Streamflow statistics include mean, minima, maxima, percent of annual, standard deviation, percentiles, and multi-day high flows (Q).
## Map site location via leaflet # Get site coordinates site_coord <- tryCatch({ NULL # dataRetrieval::readNWISsite(siteNumbers = site) }, error = function(e){ NULL }) # The firewall on my laptop doesn't let me use dataRetrieval package, so including # example output here site_coord <- data.frame(site_no = sites, dec_lat_va = lat, dec_long_va = long) # Create map object if coordinates are available if (!is.null(site_coord)){ leaflet::leaflet(width = "100%") %>% leaflet::addTiles() %>% leaflet::addMarkers(data = site_coord, lng = ~dec_long_va, lat = ~dec_lat_va, popup = ~site_no) } # Create text warning if site coordinates were not available coord_warn <- ifelse(is.null(site_coord), "Warning: Study site coordinates were not retrievable through the 'dataRetrieval' R package.", "") # (add warning in text below)
r coord_warn
# Collect info on daily obs and sim timeseries daily_ts_sim <- daily_sim_long %>% dplyr::select(site, date, Q) %>% dplyr::group_by(site) %>% dplyr::summarise(start = min(date, na.rm = T), end = max(date, na.rm = T)) %>% dplyr::rename('Simulated Start Date' = start, 'Simulated End Date' = end) daily_ts_obs <- daily_obs_long %>% dplyr::select(site, date, Q) %>% dplyr::group_by(site) %>% dplyr::summarise(start = min(date, na.rm = T), end = max(date, na.rm = T)) %>% dplyr::rename('Observed Start Date' = start, 'Observed End Date' = end) daily_info <- dplyr::full_join(daily_ts_obs, daily_ts_sim, by = 'site') DT::datatable(daily_info)
# Calculate mean daily Q for each site daily_mean_sim <- daily_sim_long %>% dplyr::select(site, Q) %>% dplyr::group_by(site) %>% dplyr::summarise(meanQ = mean(Q, na.rm = T)) %>% dplyr::rename(`Mean Daily Q - Simulated [cfs]` = meanQ) daily_mean_obs <- daily_obs_long %>% dplyr::select(site, Q) %>% dplyr::group_by(site) %>% dplyr::summarise(meanQ = mean(Q, na.rm = T)) %>% dplyr::rename(`Mean Daily Q - Observed [cfs]` = meanQ) daily_mean <- dplyr::full_join(daily_mean_obs, daily_mean_sim, by = 'site') %>% dplyr::mutate_if(is.numeric, round, digits = 2) # Generate table of mean values over POR DT::datatable(daily_mean)
# Calculate min daily Q for each site daily_min_sim <- daily_sim_long %>% dplyr::select(site, Q) %>% dplyr::group_by(site) %>% dplyr::summarise(minQ = min(Q, na.rm = T)) %>% dplyr::rename(`Minimum Daily Q - Simulated [cfs]` = minQ) daily_min_obs <- daily_obs_long %>% dplyr::select(site, Q) %>% dplyr::group_by(site) %>% dplyr::summarise(minQ = min(Q, na.rm = T)) %>% dplyr::rename(`Minimum Daily Q - Observed [cfs]` = minQ) daily_min <- dplyr::full_join(daily_min_obs, daily_min_sim, by = 'site') %>% dplyr::mutate_if(is.numeric, round, digits = 2) DT::datatable(daily_min)
# Calculate max daily Q for each site daily_max_sim <- daily_sim_long %>% dplyr::select(site, Q) %>% dplyr::group_by(site) %>% dplyr::summarise(maxQ = max(Q, na.rm = T)) %>% dplyr::rename(`Maximum Daily Q - Simulated [cfs]` = maxQ) daily_max_obs <- daily_obs_long %>% dplyr::select(site, Q) %>% dplyr::group_by(site) %>% dplyr::summarise(maxQ = max(Q, na.rm = T)) %>% dplyr::rename(`Maximum Daily Q - Observed [cfs]` = maxQ) daily_max <- dplyr::full_join(daily_max_obs, daily_max_sim, by = 'site') %>% dplyr::mutate_if(is.numeric, round, digits = 2) DT::datatable(daily_max)
Period of Record 10% Annual Exceedance Probability for High Flows (Q1)
POR_annual_max_10 <- POR_annual_max %>% dplyr::filter(metric == 'high_q1') %>% dplyr::filter(percentile == 0.9) %>% dplyr::select(data,value,site) %>% dplyr::mutate_if(is.numeric, round, digits = 1) POR_annual_max_10 <- tidyr::pivot_wider(POR_annual_max_10, names_from = data, values_from = value) DT::datatable(POR_annual_max_10)
Period of Record 50% Annual Exceedance Probability for High Flows (Q1)
POR_annual_max_50 <- POR_annual_max %>% dplyr::filter(metric == 'high_q1') %>% dplyr::filter(percentile == 0.5) %>% dplyr::select(data,value,site) %>% dplyr::mutate_if(is.numeric, round, digits = 1) POR_annual_max_50 <- tidyr::pivot_wider(POR_annual_max_50, names_from = data, values_from = value) DT::datatable(POR_annual_max_50)
Period of Record 10% Annual Exceedance Probability for Low Flows (Q1)
POR_annual_min_10 <- POR_annual_min %>% dplyr::filter(metric == 'low_q1') %>% dplyr::filter(return_period == '10-yr') %>% dplyr::select(data,value,site) %>% dplyr::mutate_if(is.numeric, round, digits = 1) POR_annual_min_10 <- tidyr::pivot_wider(POR_annual_min_10, names_from = data, values_from = value) DT::datatable(POR_annual_min_10) # TODO: Make column names uniform between min and max POR
Period of Record 50% Annual Exceedance Probability for Low Flows (Q1)
POR_annual_min_50 <- POR_annual_min %>% dplyr::filter(metric == 'low_q1') %>% dplyr::filter(return_period == '2-yr') %>% dplyr::select(data,value,site) %>% dplyr::mutate_if(is.numeric, round, digits = 1) POR_annual_min_50 <- tidyr::pivot_wider(POR_annual_min_50, names_from = data, values_from = value) DT::datatable(POR_annual_min_50)
for (i in 1:length(POR_daily)){ POR_daily[[i]] <- tidyr::pivot_wider(POR_daily[[i]], names_from = data, values_from = value) } # split iris by Species and generate a datatable for each species htmltools::tagList( lapply(POR_daily, datatable) ) # TODO: be consistent about column names.
# Non updated version of this code is at bottom of page. # TODO: add other categories to list for (categ in category_list){ cat("### ",categ, "{.tabset .tabset-dropdown} \n") # cat("#### All \n") # ## Barplot of metrics (x = site, y = value) # box_plot <- gof %>% # dplyr::filter(category == categ) %>% # ggplot2::ggplot(., aes(x = name, y = value)) + # ggplot2::geom_boxplot() # # print(box_plot) # cat("\n\n") cat("#### By Site \n") ## Barplot of metrics (x = site, y = value) bar_plot <- gof %>% dplyr::filter(category == categ) %>% dplyr::filter(name %in% metric_list) %>% # dplyr::mutate(Month = gsub('_mean', '', `Annual Stat`)) %>% # dplyr::mutate(Month = paste0(toupper(substr(Month, 1, 1)), # substr(Month, 2, 3))) %>% ggplot2::ggplot(., aes(x = site, y = value)) + ggplot2::geom_boxplot() + ggplot2::facet_wrap(facets = vars(name), nrow = 1, scales = 'free') + ggplot2::theme_bw() + ggplot2::labs(x = 'Site', y = 'Value', title = 'Goodness-of-fit boxplots.') + ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) print(bar_plot) cat("\n\n") # TODO: Make this so that it can plot other sites. if (categ == "Mean"){ cat("#### By Site and Month \n") ## Heatmap of metrics by month df_heat <- gof %>% dplyr::filter(category == "Mean") %>% dplyr::filter(name %in% metric_list) %>% # dplyr::filter(name == 'Bias') %>% dplyr::mutate(Month = gsub('_mean', '', `Annual Stat`)) %>% dplyr::mutate(Month = paste0(toupper(substr(Month, 1, 1)), substr(Month, 2, 3))) %>% dplyr::mutate(Month = factor(Month, levels = rev(unique(Month)))) stats_heat <- unique(df_heat$name) # Loop plot creation (assign to objects) for (i in 1:length(stats_heat)){ assign(x = paste0('p', i), value = df_heat %>% dplyr::filter(name == stats_heat[i]) %>% ggplot2::ggplot(., aes(x = site, y = Month, fill = value)) + ggplot2::geom_tile() + ggplot2::theme_bw() + ggplot2::labs(x = 'Site', y = 'Value', title = stats_heat[i]) + ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylab(NULL)) } print(cowplot::plot_grid(p1,p2,p3,p4, nrow = 2, ncol = 2)) cat("\n\n") } cat("\n\n") }
Percent Bias: Average percent bias at each basin was computed as the sum of the difference between modeled and observed values for each year (the sum of residuals), divided by the sum of the observed values and converted to percent.
Volumetric Efficiency (VE): An overall goodness of fit metric, that is similar to the more commonly used Nash Sutcliffe model efficiency (NSE). VE was used here as it balances the effect of small and large values evenly whereas NSE puts more weight on larger values. A value of 1 is a perfect match between the observed and simulated values. Lighter colors are closer to one and represent a better fit between simulated and observed values.
Spearman’s rank correlation coefficient is a non-parametric measure of statistical dependence between the ranking of two variables. Lighter colors represent higher Spearman’s values or better correlation between the observed and simulated values. Darker colors represent lower Spearman’s values or poorer correlation between the observed and simulated values.
Normalized Root Mean Square Error. Lighter colors are closer to zero reflecting less difference between simulated and observed values, while darker colors represent higher NRMSE values with more difference between simulated and observed values.
Other parameters which the tool calculates but which have not been included in this report are Pearson's r, RMSE, Nash-Sutcliffe-Efficiency, Mean Error, absolute error, and Kendall's Tau.
for (stat in metric_list){ cat("### ", stat, "{.tabset .tabset-dropdown} \n") # stat = annual_sd gof_stat <- gof %>% dplyr::filter(name == stat) gof_stat <- gof %>% dplyr::filter(name == stat) %>% dplyr::mutate(Stat = `Annual Stat`) %>% dplyr::mutate(Stat = ifelse(grepl('Apr', Stat), month.abb[4], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('Aug', Stat), month.abb[8], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('Dec', Stat), month.abb[12], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('Feb', Stat), month.abb[2], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('Jan', Stat), month.abb[1], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('Jul', Stat), month.abb[7], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('Jun', Stat), month.abb[6], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('Mar', Stat), month.abb[3], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('May', Stat), month.abb[5], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('Nov', Stat), month.abb[11], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('Oct', Stat), month.abb[10], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('Sep', Stat), month.abb[9], Stat)) %>% dplyr::mutate(Stat = ifelse(grepl('annual', Stat), 'Annual', Stat)) # %>% # dplyr::mutate(Stat = factor(Stat, levels = c('Annual', month.abb))) %>% # dplyr::mutate(site = factor(site, levels = sort(unique(site)))) for (stat_cat in category_list_monthly){ cat("#### ", stat_cat, "\n") gof_category <- gof_stat %>% dplyr::filter(category == stat_cat) #%>% color_plot <- ggplot2::ggplot(gof_category, aes(y = value, x = Stat, fill = site)) + ggplot2::geom_bar(stat = 'identity', position=position_dodge()) + ggplot2::theme_bw() + ggplot2::geom_hline(yintercept = 0) + ggplot2::facet_grid(rows = ggplot2::vars(category), cols = ggplot2::vars(Stat), scales = 'free') + ggplot2::theme(panel.spacing.x = unit(0, "lines"), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position='bottom', legend.title = element_blank()) + ggplot2::labs(title = stat, x = "", y = stat) print(color_plot) cat("\n\n") } cat("\n\n") }
Percent Bias: Average percent bias at each basin was computed as the sum of the difference between modeled and observed values for each year (the sum of residuals), divided by the sum of the observed values and converted to percent.
Volumetric Efficiency (VE): An overall goodness of fit metric, that is similar to the more commonly used Nash Sutcliffe model efficiency (NSE). VE was used here as it balances the effect of small and large values evenly whereas NSE puts more weight on larger values. A value of 1 is a perfect match between the observed and simulated values. Lighter colors are closer to one and represent a better fit between simulated and observed values.
Spearman’s rank correlation coefficient is a non-parametric measure of statistical dependence between the ranking of two variables. Lighter colors represent higher Spearman’s values or better correlation between the observed and simulated values. Darker colors represent lower Spearman’s values or poorer correlation between the observed and simulated values.
Normalized Root Mean Square Error. Lighter colors are closer to zero reflecting less difference between simulated and observed values, while darker colors represent higher NRMSE values with more difference between simulated and observed values.
Other parameters which the tool calculates but which have not been included in this report are Pearson's r, RMSE, Nash-Sutcliffe-Efficiency, Mean Error, absolute error, and Kendall's Tau.
for (i in 1:length(daily_obs)){ site <- sites[i] cat("#### ", site, "\n") #----- Daily data # Get daily Q dailyQ <- dplyr::full_join(daily_sim[[i]][, c('date', 'Q')], daily_obs[[i]][, c('date', 'Q')], by = 'date') %>% dplyr::rename(Simulated = Q.x) %>% dplyr::rename(Observed = Q.y) # Set zero values to 0.001 dailyQ$Observed <- ifelse(dailyQ$Observed <= 0.001, 0.001, dailyQ$Observed) dailyQ$Simulated <- ifelse(dailyQ$Simulated <= 0.001, 0.001, dailyQ$Simulated) dailyQ$Observed <- log10(dailyQ$Observed) dailyQ$Simulated <- log10(dailyQ$Simulated) # Plot daily obs v sim comparison_plot <- ggplot2::ggplot(dailyQ, aes(x = Observed, y = Simulated)) + ggplot2::geom_point(alpha = 0.10) + ggplot2::theme_bw() + ggplot2::coord_cartesian(xlim = c(min(dailyQ[,-1], na.rm = T), max(dailyQ[,-1], na.rm = T)), ylim = c(min(dailyQ[,-1], na.rm = T), max(dailyQ[,-1], na.rm = T))) + ggplot2::geom_abline(slope = 1, col = 'red') + ggplot2::labs(x = 'Log Observed [cfs]', y = 'Log Simulated [cfs]', title = 'Daily Streamflow', subtitle = paste0('Site #', site), caption = paste0('n = ', formatC(nrow(dailyQ), big.mark = ','))) print(comparison_plot) cat("\n\n") }
#----- Monthly data # Average daily values to monthly for (i in 1:length(daily_obs)){ site <- sites[i] cat("#### ", site, "\n") #----- Daily data # Get daily Q dailyQ <- dplyr::full_join(daily_sim[[i]][, c('date', 'Q')], daily_obs[[i]][, c('date', 'Q')], by = 'date') %>% dplyr::rename(Simulated = Q.x) %>% dplyr::rename(Observed = Q.y) monthlyQ <- dailyQ %>% dplyr::mutate(yearmon = substr(date, 1, 7)) %>% dplyr::group_by(yearmon) %>% dplyr::summarise(Simulated = mean(Simulated, na.rm = T), Observed = mean(Observed, na.rm = T)) %>% dplyr::ungroup() # Set zero values to 0.001 monthlyQ$Observed <- ifelse(monthlyQ$Observed <= 0.001, 0.001, monthlyQ$Observed) monthlyQ$Simulated <- ifelse(monthlyQ$Simulated <= 0.001, 0.001, monthlyQ$Simulated) monthlyQ$Observed <- log10(monthlyQ$Observed) monthlyQ$Simulated <- log10(monthlyQ$Simulated) # Plot monthly obs v sim plot_out <- ggplot2::ggplot(monthlyQ, aes(x = Observed, y = Simulated)) + ggplot2::geom_point(alpha = 0.40) + ggplot2::theme_bw() + ggplot2::coord_cartesian(xlim = c(min(monthlyQ[,-1], na.rm = T), max(monthlyQ[,-1], na.rm = T)), ylim = c(min(monthlyQ[,-1], na.rm = T), max(monthlyQ[,-1], na.rm = T))) + ggplot2::geom_abline(slope = 1, col = 'red') + ggplot2::labs(x = 'Log Observed [cfs]', y = 'Log Simulated [cfs]', title = 'Monthly Streamflow', subtitle = paste0('Site #', site), caption = paste0('n = ', formatC(nrow(monthlyQ), big.mark = ','))) print(plot_out) cat("\n\n") }
# Run comparison plots for annual stats. for (stat in stat_list){ cat("### ", stat, "{.tabset} \n") for (i in 1:length(annual_obs)){ site <- sites[i] cat("#### ", site, "\n") #----- Daily data # Get daily Q df_Q <- dplyr::full_join(annual_sim[[i]][, c('WY', stat)], annual_obs[[i]][, c('WY', stat)], by = 'WY') %>% dplyr::rename(Simulated = paste0(stat,".x")) %>% dplyr::rename(Observed = paste0(stat,".y")) # dailyQ$Observed <- log10(dailyQ$Observed) # dailyQ$Simulated <- log10(dailyQ$Simulated) # # Plot daily obs v sim comparison_plot <- ggplot2::ggplot(df_Q, aes(x = Observed, y = Simulated)) + ggplot2::geom_point(alpha = 0.50) + ggplot2::theme_bw() + ggplot2::coord_cartesian(xlim = c(min(df_Q[,-1], na.rm = T), max(df_Q[,-1], na.rm = T)), ylim = c(min(df_Q[,-1], na.rm = T), max(df_Q[,-1], na.rm = T))) + ggplot2::geom_abline(slope = 1, col = 'red') + ggplot2::labs(x = 'Observed [cfs]', y = 'Simulated [cfs]', title = paste(stat, "for Site #", site), caption = paste0('n = ', formatC(nrow(df_Q), big.mark = ','))) print(comparison_plot) cat("\n\n") } cat("\n\n") }
The plots show the observed vs simulated values for the given variable. The red line is the 1:1 line where the simulated and observed values match. For some of the plots the log of the values are shown to increase the visibility of low values. For daily comparisons, the daily values are plotted against each other for each day in the period. For monthly values, the average flow for each month is plotted. The other statistics plotted have one value per year of available data. During the log transform of monthly and daily data, any values that are less than 0.001 are set to 0.001 to prevent zero errors with logs.
trend_comp <- purrr::map(sites, function(site){ paste0(input_folder, site, '_com_trend.csv') %>% data.table::fread(input = ., data.table = F) %>% tibble::as_tibble()%>% # add site number dplyr::mutate(site = site) %>% dplyr::select(site, dplyr::everything()) %>% # add statistic categories dplyr::mutate(category = NA) %>% dplyr::mutate(category = ifelse(grepl('mean', `annual_stat`, ignore.case = T), 'Mean', category)) %>% dplyr::mutate(category = ifelse(grepl('max', annual_stat, ignore.case = T), 'Max', category)) %>% dplyr::mutate(category = ifelse(grepl('min', annual_stat, ignore.case = T), 'Min', category)) %>% dplyr::mutate(category = ifelse(grepl('percent_annual', annual_stat, ignore.case = T), 'Percent Annual', category)) %>% dplyr::mutate(category = ifelse(grepl('CVD', annual_stat, ignore.case = T), 'CVD', category)) %>% dplyr::mutate(category = ifelse(grepl('percentile', annual_stat, ignore.case = T), 'Percentile', category)) %>% dplyr::mutate(category = ifelse(grepl('sd', annual_stat, ignore.case = T), 'SD', category)) %>% dplyr::mutate(category = ifelse(grepl('q', annual_stat, ignore.case = T), 'Q', category)) %>% dplyr::mutate_if(is.numeric, round, digits = 2) %>% # transform to long format tidyr::pivot_longer(cols = sen_slope_diff:val_perc_change_diff) %>% dplyr::rename(annual_stat = annual_stat) }) %>% dplyr::bind_rows() # stat = annual_sd trend_difference <- trend_comp %>% dplyr::filter(name == "val_perc_change_diff") #%>% # dplyr::filter(`Annual Stat` %in% c('annual_sd', 'high_q1')) trend_difference$value[trend_difference$value > 100] <- 100 trend_difference$value[trend_difference$value < -100] <- -100 bar_plot <- ggplot2::ggplot(trend_difference, aes(x = value, y = annual_stat)) + ggplot2::geom_point(aes(color=site)) + ggplot2::theme_bw() + ggplot2::facet_grid(rows = ggplot2::vars(category), scales = 'free_y') + ggplot2::labs(title = paste('Trend Differences')) + xlim(-100, 100) print(bar_plot)
# Build Plots plot_list <- c() for (stat in stat_list){ subplot_list <- c() for (i in 1:length(annual_obs)){ # Slice trend data trend_data_obs <- filter(annual_trend_obs[[i]], annual_stat == stat) # slice discharge data discharge_data_obs <- annual_obs[[i]] %>% select('WY', stat) discharge_data_obs <- dplyr::rename(discharge_data_obs, "Obs" = stat) # Slice trend data trend_data_sim <- filter(annual_trend_sim[[i]], annual_stat == stat) # slice discharge data discharge_data_sim <- annual_sim[[i]] %>% select('WY', stat) discharge_data_sim <- dplyr::rename(discharge_data_sim, "Sim" = stat) trend_data_diff <- filter(annual_trend_comp[[i]], val_perc_change_diff == stat) df <- merge(discharge_data_obs, discharge_data_sim, by = "WY") df <- reshape::melt(df, id = "WY") p <- ggplot2::ggplot(df, aes(x=WY, y=value, colour = variable)) + geom_point() + scale_color_manual(values = c("blue", "red")) + geom_abline(intercept = trend_data_obs$intercept, slope = trend_data_obs$sen_slope, color = "blue") + # geom_point(data = discharge_data_sim, color = 'red') + geom_abline(intercept = trend_data_sim$intercept, slope = trend_data_sim$sen_slope, color = 'red') + labs(title = paste0("Site ", sites[i]), # subtitle = paste0("Difference in Percent Change: ", trend_data_diff$percent_change_q_difference), y = stat) + theme(axis.title.y = element_blank(), legend.title = element_blank()) subplot_list[[i]] <- p } nCol <- 2 plot_list[[paste0(stat, '_plot')]] <- subplot_list }
# Build Tables observation_list <- c() simulation_list <- c() comparison_list <- c() for (stat in stat_list){ obs_list <- c() sim_list <- c() comp_list <- c() for (i in 1:length(annual_obs)){ # Slice trend data obs_list[[i]] <- t(filter(annual_trend_obs[[i]], annual_stat == stat)) # Slice trend data sim_list[[i]] <- t(filter(annual_trend_sim[[i]], annual_stat == stat)) # Slice difference data comp_list[[i]] <- t(filter(annual_trend_comp[[i]], annual_stat == stat)) } obs_df <- dplyr::bind_cols(obs_list) colnames(obs_df) <- sites obs_df$Statistics <- colnames(annual_trend_obs[[i]]) obs_df <- tail(obs_df,9) obs_df <- obs_df %>% relocate(Statistics) obs_df <- obs_df[-c(1,9),] observation_list[[paste0(stat, '_table')]] <- obs_df sim_df <- dplyr::bind_cols(sim_list) colnames(sim_df) <- sites sim_df$Statistics <- colnames(annual_trend_sim[[i]]) sim_df <- tail(sim_df,9) sim_df <- sim_df %>% relocate(Statistics) sim_df <- sim_df[-c(1,9),] simulation_list[[paste0(stat, '_table')]] <- sim_df comp_df <- dplyr::bind_cols(comp_list) colnames(comp_df) <- sites comp_df$Statistics <- colnames(annual_trend_comp[[i]]) comp_df <- tail(comp_df,4) comp_df <- comp_df[-c(4),] comp_df <- comp_df %>% relocate(Statistics) comparison_list[[paste0(stat, '_table')]] <- comp_df } # # Filter by column # trend_data <- filter(df_trend, annual_stat == "high_q1") # # Filter by row # discharge_data <- df %>% select('WY', 'high_q1')
# Set plots up for each tab. for (i in 1:length(plot_list)) { cat("#### ",stat_list[[i]],"\n") plot <- cowplot::plot_grid(plotlist = plot_list[[i]], ncol=nCol) print(plot) cat('\n\n Observed Trends \n') print(htmltools::tagList(DT::datatable(observation_list[[i]]))) cat('\n Simulated Trends \n') print(htmltools::tagList(DT::datatable(simulation_list[[i]]))) cat('\n Simulated minus Observed Trends \n') print(htmltools::tagList(DT::datatable(comparison_list[[i]]))) cat('\n\n') }
Trend tests were completed using Sen’s slope [40] for magnitude, except in cases when the annual series had “zero-flow” status for any observed or modeled time series for a given basin; in those cases, the time series was converted to a binomial series, and a logistic regression [41] was done with year as the explanatory variable. The predicted probabilities from the model (dependent variable) were used to quantify the change in probability of a zero-flow year over time. Sen’s slope was computed using the R function “GeneralMannKendall.R”; the function is authored by Benjamin Renard of Irstea, UR Riverly, Lyon, France and is available for download from Dudley et al. [42]. Logistic regression was completed using the “glm” function in the R “stats” package with family = binomial (link = “logit”). Correlations between basin characteristics for study basins and project results were computed with Kendall’s tau. (Hodgkins et al 2020)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.