Report for Few Sites'

knitr::opts_chunk$set(echo = TRUE)
# mandatory libraries
library(tidyverse)
library(tidyr)
# speed-enhancing libraries
library(data.table)
# report-enhancing libraries
library(DT)
# libraries just for introduction/study site sections
library(leaflet)
library(dataRetrieval)
library(cowplot)
library(reshape)
# default values
input_folder <- paste0(system.file("extdata", package = "HyMETT"),"/")
sites <- c("01013500", "01021470","01022500","01030500","01031500","01047000")
# Decimal Lat and long. 
lat <- c(47.2375,  44.8008333, 44.608056, 45.50111, 45.175, 44.86916667)
long <- c(-68.58278,  -67.725, -67.935278, -68.3058333, -69.3147222, -69.955)
# .drop = c('V1')     # extra column of row numbers to drop during CSV import
# Setup some default values. 

# GOF Metrics you want to plot
metric_list <- c("percent_bias", "volumetric_efficiency", "spearman_rho", "NRMSE")

# Categories of annual stats to plot for general GOF and monthly GOF
category_list <- c("Mean", "Min", "Max", "Percent Annual", "Percentile", "Q")
category_list_monthly <- c('Mean', 'Min', 'Max', 'Percent Annual')

# Annual statistics to plot for comparisons and trends.
stat_list <- c('annual_mean', 'high_q3', 'low_q7', "annual_1_percentile", "Aug_mean")
# TODO: Add catch all category for stats that don't fit into another category. 
# TODO: Make stat vs metric names consistent
# TODO: add low flow stats to a category
#------- Import all necessary data
gof <- purrr::map(sites, function(site){
  # import as dataframe
  paste0(input_folder, site, '_GOF.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    tibble::as_tibble() %>%
    # add site number
    dplyr::select(-one_of('site_no'))%>%
    dplyr::mutate(site = site)  %>%
    dplyr::select(site, dplyr::everything()) %>%
    dplyr::rename(`Annual Stat` = annual_stat) %>%
    dplyr::rename(name = metric) %>%
    # add statistic categories
    dplyr::mutate(category = NA) %>%
    dplyr::mutate(category = ifelse(grepl('mean', `Annual Stat`, ignore.case = T), 'Mean', category)) %>%
    dplyr::mutate(category = ifelse(grepl('max', `Annual Stat`, ignore.case = T), 'Max', category)) %>%
    dplyr::mutate(category = ifelse(grepl('min', `Annual Stat`, ignore.case = T), 'Min', category)) %>%
    dplyr::mutate(category = ifelse(grepl('percent_annual', `Annual Stat`, ignore.case = T), 'Percent Annual',
                                    category)) %>%
    dplyr::mutate(category = ifelse(grepl('CVD', `Annual Stat`, ignore.case = T), 'CVD', category)) %>%
    dplyr::mutate(category = ifelse(grepl('percentile', `Annual Stat`, ignore.case = T), 'Percentile',
                                    category)) %>%
    dplyr::mutate(category = ifelse(grepl('sd', `Annual Stat`, ignore.case = T), 'SD', category)) %>%
    dplyr::mutate(category = ifelse(grepl('q', `Annual Stat`, ignore.case = T), 'Q', category)) %>%
    dplyr::mutate_if(is.numeric, round, digits = 2)
  }) %>%
  dplyr::bind_rows()

## Streamflow - Daily
daily_sim <- purrr::map(sites, function(site){
  daily_sim <- paste0(input_folder, site, '_mod_daily.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(date = as.Date(Date)) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(site, dplyr::everything()) %>%
    dplyr::select(-one_of('site_no'))%>%
    dplyr::select(-one_of('Date'))%>%
    dplyr::rename(Q = value) %>%
    tibble::as_tibble()
})
daily_sim_long <- dplyr::bind_rows(daily_sim)

daily_obs <- purrr::map(sites, function(site){
  daily_sim <- paste0(input_folder, site, '_obs_daily.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(date = as.Date(Date)) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(site, dplyr::everything()) %>%
    dplyr::select(-one_of('site_no'))%>%
    dplyr::select(-one_of('Date'))%>%
    dplyr::rename(Q = value) %>%
    tibble::as_tibble()
})
daily_obs_long <- dplyr::bind_rows(daily_obs)


## Streamflow - Annual
annual_sim <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_mod_annual.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})

annual_obs <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_obs_annual.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})


## Trends - Annual
annual_trend_comp <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_com_trend.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})
annual_trend_sim <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_mod_trend.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})
annual_trend_obs <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_obs_trend.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})

# Import POR

POR_annual_max <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_POR_annual_max.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})
POR_annual_max <- dplyr::bind_rows(POR_annual_max)

POR_annual_min <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_POR_annual_min.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})
POR_annual_min <- dplyr::bind_rows(POR_annual_min)

POR_daily <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_POR_daily_metrics.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})

Introduction

This is a an automated report for 1-5 sites generated from thy HyMETT tool in R. The sites included in this report are:

cat( sites)

This report provides goodness-of-fit measurements between modeled and observational streamflow at these gauged sites. Measures of model skill applied here are percent bias, volumetric efficiency (VE), Spearman's rho correlation, and normalized root mean square error (NRMSE). These metrics are applied to numerous streamflow statistics summarizing streamflow at daily, monthly, annual, and period-of-record timesteps. Streamflow statistics include mean, minima, maxima, percent of annual, standard deviation, percentiles, and multi-day high flows (Q).


Study sites

## Map site location via leaflet
# Get site coordinates
site_coord <- tryCatch({
  NULL # dataRetrieval::readNWISsite(siteNumbers = site)
}, error = function(e){ NULL })

# The firewall on my laptop doesn't let me use dataRetrieval package, so including
# example output here
site_coord <- data.frame(site_no = sites,
                         dec_lat_va = lat,
                         dec_long_va = long)
# Create map object if coordinates are available
if (!is.null(site_coord)){
  leaflet::leaflet(width = "100%") %>%
    leaflet::addTiles() %>%
    leaflet::addMarkers(data = site_coord, lng = ~dec_long_va, lat = ~dec_lat_va, popup = ~site_no)
}
# Create text warning if site coordinates were not available
coord_warn <- ifelse(is.null(site_coord),
  "Warning: Study site coordinates were not retrievable through the 'dataRetrieval' R package.", "")

# (add warning in text below)

r coord_warn

Timeseries Tables {.tabset}

General Information {.tabset}

Dates {.tabset .tabset-dropdown}

# Collect info on daily obs and sim timeseries
daily_ts_sim <- daily_sim_long %>%
  dplyr::select(site, date, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(start = min(date, na.rm = T), end = max(date, na.rm = T)) %>%
  dplyr::rename('Simulated Start Date' = start, 'Simulated End Date' = end)
daily_ts_obs <- daily_obs_long %>%
  dplyr::select(site, date, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(start = min(date, na.rm = T), end = max(date, na.rm = T)) %>%
  dplyr::rename('Observed Start Date' = start, 'Observed End Date' = end)
daily_info <- dplyr::full_join(daily_ts_obs, daily_ts_sim, by = 'site')
DT::datatable(daily_info)

Mean {.tabset .tabset-dropdown}

# Calculate mean daily Q for each site
daily_mean_sim <- daily_sim_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(meanQ = mean(Q, na.rm = T)) %>%
  dplyr::rename(`Mean Daily Q - Simulated [cfs]` = meanQ)
daily_mean_obs <- daily_obs_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(meanQ = mean(Q, na.rm = T)) %>%
  dplyr::rename(`Mean Daily Q - Observed [cfs]` = meanQ)
daily_mean <- dplyr::full_join(daily_mean_obs, daily_mean_sim, by = 'site') %>%
  dplyr::mutate_if(is.numeric, round, digits = 2)

# Generate table of mean values over POR
DT::datatable(daily_mean)

Min {.tabset .tabset-dropdown}

# Calculate min daily Q for each site
daily_min_sim <- daily_sim_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(minQ = min(Q, na.rm = T)) %>%
  dplyr::rename(`Minimum Daily Q - Simulated [cfs]` = minQ)
daily_min_obs <- daily_obs_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(minQ = min(Q, na.rm = T)) %>%
  dplyr::rename(`Minimum Daily Q - Observed [cfs]` = minQ)
daily_min <- dplyr::full_join(daily_min_obs, daily_min_sim, by = 'site') %>%
  dplyr::mutate_if(is.numeric, round, digits = 2)
DT::datatable(daily_min)

Max {.tabset .tabset-dropdown}

# Calculate max daily Q for each site
daily_max_sim <- daily_sim_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(maxQ = max(Q, na.rm = T)) %>%
  dplyr::rename(`Maximum Daily Q - Simulated [cfs]` = maxQ)
daily_max_obs <- daily_obs_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(maxQ = max(Q, na.rm = T)) %>%
  dplyr::rename(`Maximum Daily Q - Observed [cfs]` = maxQ)
daily_max <- dplyr::full_join(daily_max_obs, daily_max_sim, by = 'site') %>%
  dplyr::mutate_if(is.numeric, round, digits = 2)
DT::datatable(daily_max)

Period of Record {.tabset}

10% AEP High

Period of Record 10% Annual Exceedance Probability for High Flows (Q1)

POR_annual_max_10 <- POR_annual_max %>%
    dplyr::filter(metric == 'high_q1') %>%
    dplyr::filter(percentile == 0.9) %>%
    dplyr::select(data,value,site) %>%
    dplyr::mutate_if(is.numeric, round, digits = 1)
POR_annual_max_10 <- tidyr::pivot_wider(POR_annual_max_10, names_from = data, values_from = value)
DT::datatable(POR_annual_max_10)

50% AEP High

Period of Record 50% Annual Exceedance Probability for High Flows (Q1)

POR_annual_max_50 <- POR_annual_max %>%
    dplyr::filter(metric == 'high_q1') %>%
    dplyr::filter(percentile == 0.5) %>%
    dplyr::select(data,value,site) %>%
    dplyr::mutate_if(is.numeric, round, digits = 1)
POR_annual_max_50 <- tidyr::pivot_wider(POR_annual_max_50, names_from = data, values_from = value)
DT::datatable(POR_annual_max_50)

10% AEP Low

Period of Record 10% Annual Exceedance Probability for Low Flows (Q1)

POR_annual_min_10 <- POR_annual_min %>%
    dplyr::filter(metric == 'low_q1') %>%
    dplyr::filter(return_period == '10-yr') %>%
    dplyr::select(data,value,site) %>%
    dplyr::mutate_if(is.numeric, round, digits = 1)
POR_annual_min_10 <- tidyr::pivot_wider(POR_annual_min_10, names_from = data, values_from = value)
DT::datatable(POR_annual_min_10)
# TODO: Make column names uniform between min and max POR 

50% AEP Low

Period of Record 50% Annual Exceedance Probability for Low Flows (Q1)

POR_annual_min_50 <- POR_annual_min %>%
    dplyr::filter(metric == 'low_q1') %>%
    dplyr::filter(return_period == '2-yr') %>%
    dplyr::select(data,value,site) %>%
    dplyr::mutate_if(is.numeric, round, digits = 1)
POR_annual_min_50 <- tidyr::pivot_wider(POR_annual_min_50, names_from = data, values_from = value)
DT::datatable(POR_annual_min_50)

POR Daily {.tabset .tabset-dropdown}

for (i in 1:length(POR_daily)){
  POR_daily[[i]] <- tidyr::pivot_wider(POR_daily[[i]], names_from = data, values_from = value)
}

# split iris by Species and generate a datatable for each species
htmltools::tagList(
  lapply(POR_daily, datatable)
)

# TODO: be consistent about column names.

Goodness-of-fit metrics {.tabset}

# Non updated version of this code is at bottom of page.
# TODO: add other categories to list

for (categ in category_list){
  cat("### ",categ, "{.tabset .tabset-dropdown} \n")

  # cat("#### All  \n")
  #   ## Barplot of metrics (x = site, y = value)
  # box_plot <- gof %>%
  #   dplyr::filter(category == categ) %>%
  #   ggplot2::ggplot(., aes(x = name, y = value)) +
  #     ggplot2::geom_boxplot()
  # 
  # print(box_plot)
  # cat("\n\n")

  cat("#### By Site  \n")
  ## Barplot of metrics (x = site, y = value)
  bar_plot <- gof %>%
    dplyr::filter(category == categ) %>%
    dplyr::filter(name %in% metric_list) %>%
    # dplyr::mutate(Month = gsub('_mean', '', `Annual Stat`)) %>%
    # dplyr::mutate(Month = paste0(toupper(substr(Month, 1, 1)),
    #                              substr(Month, 2, 3))) %>%
    ggplot2::ggplot(., aes(x = site, y = value)) +
      ggplot2::geom_boxplot() +
      ggplot2::facet_wrap(facets = vars(name),
                          nrow = 1,
                          scales = 'free') +
      ggplot2::theme_bw() +
      ggplot2::labs(x = 'Site',
                    y = 'Value',
                    title = 'Goodness-of-fit boxplots.') +
      ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  print(bar_plot)
  cat("\n\n")

  # TODO: Make this so that it can plot other sites.
  if (categ == "Mean"){
    cat("#### By Site and Month  \n")
    ## Heatmap of metrics by month
    df_heat <- gof %>%
      dplyr::filter(category == "Mean") %>%
      dplyr::filter(name %in% metric_list) %>%
      # dplyr::filter(name == 'Bias') %>%
      dplyr::mutate(Month = gsub('_mean', '', `Annual Stat`)) %>%
      dplyr::mutate(Month = paste0(toupper(substr(Month, 1, 1)),
                                   substr(Month, 2, 3))) %>%
      dplyr::mutate(Month = factor(Month, levels = rev(unique(Month))))
    stats_heat <- unique(df_heat$name)
    # Loop plot creation (assign to objects)
    for (i in 1:length(stats_heat)){
      assign(x = paste0('p', i),
             value = df_heat %>%
               dplyr::filter(name == stats_heat[i]) %>%
               ggplot2::ggplot(., aes(x = site, y = Month, fill = value)) +
               ggplot2::geom_tile() +
               ggplot2::theme_bw() +
               ggplot2::labs(x = 'Site',
                             y = 'Value',
                             title = stats_heat[i]) +
               ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
               ylab(NULL))

    }

    print(cowplot::plot_grid(p1,p2,p3,p4, nrow = 2, ncol = 2))
    cat("\n\n")
  }
  cat("\n\n")
}

GOF Methods

Percent Bias: Average percent bias at each basin was computed as the sum of the difference between modeled and observed values for each year (the sum of residuals), divided by the sum of the observed values and converted to percent.

Volumetric Efficiency (VE): An overall goodness of fit metric, that is similar to the more commonly used Nash Sutcliffe model efficiency (NSE). VE was used here as it balances the effect of small and large values evenly whereas NSE puts more weight on larger values. A value of 1 is a perfect match between the observed and simulated values. Lighter colors are closer to one and represent a better fit between simulated and observed values.

Spearman’s rank correlation coefficient is a non-parametric measure of statistical dependence between the ranking of two variables. Lighter colors represent higher Spearman’s values or better correlation between the observed and simulated values. Darker colors represent lower Spearman’s values or poorer correlation between the observed and simulated values.

Normalized Root Mean Square Error. Lighter colors are closer to zero reflecting less difference between simulated and observed values, while darker colors represent higher NRMSE values with more difference between simulated and observed values.

Other parameters which the tool calculates but which have not been included in this report are Pearson's r, RMSE, Nash-Sutcliffe-Efficiency, Mean Error, absolute error, and Kendall's Tau.

Description of Categories

Description of Statistics

Goodness-of-fit By-Month {.tabset}

for (stat in metric_list){
  cat("### ", stat, "{.tabset .tabset-dropdown}  \n")
  # stat = annual_sd
  gof_stat <- gof %>%
    dplyr::filter(name == stat)


  gof_stat <- gof %>%
    dplyr::filter(name == stat) %>%
    dplyr::mutate(Stat = `Annual Stat`) %>%
    dplyr::mutate(Stat = ifelse(grepl('Apr', Stat), month.abb[4], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Aug', Stat), month.abb[8], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Dec', Stat), month.abb[12], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Feb', Stat), month.abb[2], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Jan', Stat), month.abb[1], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Jul', Stat), month.abb[7], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Jun', Stat), month.abb[6], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Mar', Stat), month.abb[3], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('May', Stat), month.abb[5], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Nov', Stat), month.abb[11], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Oct', Stat), month.abb[10], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Sep', Stat), month.abb[9], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('annual', Stat), 'Annual', Stat)) # %>%
    # dplyr::mutate(Stat = factor(Stat, levels = c('Annual', month.abb))) %>%
    # dplyr::mutate(site = factor(site, levels = sort(unique(site))))

  for (stat_cat in category_list_monthly){

    cat("#### ", stat_cat, "\n")

    gof_category <- gof_stat %>%
      dplyr::filter(category == stat_cat) #%>%

  color_plot <- ggplot2::ggplot(gof_category, aes(y = value, x = Stat, fill = site)) +
    ggplot2::geom_bar(stat = 'identity', position=position_dodge()) +
    ggplot2::theme_bw() +
    ggplot2::geom_hline(yintercept = 0) + 
    ggplot2::facet_grid(rows = ggplot2::vars(category),
                        cols = ggplot2::vars(Stat),
                        scales = 'free') +
    ggplot2::theme(panel.spacing.x = unit(0, "lines"), 
                   axis.text.x=element_blank(),
                   axis.ticks.x=element_blank(),
                   legend.position='bottom',
                   legend.title = element_blank()) +
    ggplot2::labs(title = stat, x = "", y = stat)
  print(color_plot)


  cat("\n\n")
  }

  cat("\n\n")
}

GOF Methods

Percent Bias: Average percent bias at each basin was computed as the sum of the difference between modeled and observed values for each year (the sum of residuals), divided by the sum of the observed values and converted to percent.

Volumetric Efficiency (VE): An overall goodness of fit metric, that is similar to the more commonly used Nash Sutcliffe model efficiency (NSE). VE was used here as it balances the effect of small and large values evenly whereas NSE puts more weight on larger values. A value of 1 is a perfect match between the observed and simulated values. Lighter colors are closer to one and represent a better fit between simulated and observed values.

Spearman’s rank correlation coefficient is a non-parametric measure of statistical dependence between the ranking of two variables. Lighter colors represent higher Spearman’s values or better correlation between the observed and simulated values. Darker colors represent lower Spearman’s values or poorer correlation between the observed and simulated values.

Normalized Root Mean Square Error. Lighter colors are closer to zero reflecting less difference between simulated and observed values, while darker colors represent higher NRMSE values with more difference between simulated and observed values.

Other parameters which the tool calculates but which have not been included in this report are Pearson's r, RMSE, Nash-Sutcliffe-Efficiency, Mean Error, absolute error, and Kendall's Tau.

Description of Categories

Description of Statistics


Streamflow comparisons {.tabset}

Daily {.tabset}

for (i in 1:length(daily_obs)){
  site <- sites[i]
  cat("#### ", site, "\n")
  #----- Daily data
  # Get daily Q
  dailyQ <- dplyr::full_join(daily_sim[[i]][, c('date', 'Q')],
                            daily_obs[[i]][, c('date', 'Q')],
                            by = 'date') %>%
    dplyr::rename(Simulated = Q.x) %>%
    dplyr::rename(Observed = Q.y)

  # Set zero values to 0.001
  dailyQ$Observed <- ifelse(dailyQ$Observed <= 0.001, 0.001, dailyQ$Observed)
  dailyQ$Simulated <- ifelse(dailyQ$Simulated <= 0.001, 0.001, dailyQ$Simulated)

  dailyQ$Observed <- log10(dailyQ$Observed)
  dailyQ$Simulated <- log10(dailyQ$Simulated)

  # Plot daily obs v sim
  comparison_plot <- ggplot2::ggplot(dailyQ, aes(x = Observed, y = Simulated)) +
    ggplot2::geom_point(alpha = 0.10) +
    ggplot2::theme_bw() +
    ggplot2::coord_cartesian(xlim = c(min(dailyQ[,-1], na.rm = T), max(dailyQ[,-1], na.rm = T)),
                             ylim = c(min(dailyQ[,-1], na.rm = T), max(dailyQ[,-1], na.rm = T))) +
    ggplot2::geom_abline(slope = 1, col = 'red') +
    ggplot2::labs(x = 'Log Observed [cfs]',
                  y = 'Log Simulated [cfs]',
                  title = 'Daily Streamflow',
                  subtitle = paste0('Site #', site),
                  caption = paste0('n = ', formatC(nrow(dailyQ), big.mark = ',')))
  print(comparison_plot)
  cat("\n\n")
}

Monthly{.tabset}

#----- Monthly data
# Average daily values to monthly
for (i in 1:length(daily_obs)){
  site <- sites[i]
  cat("#### ", site, "\n")
  #----- Daily data
  # Get daily Q
  dailyQ <- dplyr::full_join(daily_sim[[i]][, c('date', 'Q')],
                            daily_obs[[i]][, c('date', 'Q')],
                            by = 'date') %>%
    dplyr::rename(Simulated = Q.x) %>%
    dplyr::rename(Observed = Q.y)


  monthlyQ <- dailyQ %>%
    dplyr::mutate(yearmon = substr(date, 1, 7)) %>%
    dplyr::group_by(yearmon) %>%
    dplyr::summarise(Simulated = mean(Simulated, na.rm = T),
                     Observed = mean(Observed, na.rm = T)) %>%
    dplyr::ungroup()

  # Set zero values to 0.001
  monthlyQ$Observed <- ifelse(monthlyQ$Observed <= 0.001, 0.001, monthlyQ$Observed)
  monthlyQ$Simulated <- ifelse(monthlyQ$Simulated <= 0.001, 0.001, monthlyQ$Simulated)

  monthlyQ$Observed <- log10(monthlyQ$Observed)
  monthlyQ$Simulated <- log10(monthlyQ$Simulated)

  # Plot monthly obs v sim
  plot_out <- ggplot2::ggplot(monthlyQ, aes(x = Observed, y = Simulated)) +
    ggplot2::geom_point(alpha = 0.40) +
    ggplot2::theme_bw() +
    ggplot2::coord_cartesian(xlim = c(min(monthlyQ[,-1], na.rm = T), max(monthlyQ[,-1], na.rm = T)),
                             ylim = c(min(monthlyQ[,-1], na.rm = T), max(monthlyQ[,-1], na.rm = T))) +
    ggplot2::geom_abline(slope = 1, col = 'red') +
    ggplot2::labs(x = 'Log Observed [cfs]',
                  y = 'Log Simulated [cfs]',
                  title = 'Monthly Streamflow',
                  subtitle = paste0('Site #', site),
                  caption = paste0('n = ', formatC(nrow(monthlyQ), big.mark = ',')))
  print(plot_out)
  cat("\n\n")
}
# Run comparison plots for annual stats. 
for (stat in stat_list){
  cat("### ", stat, "{.tabset} \n")
  for (i in 1:length(annual_obs)){
    site <- sites[i]
    cat("#### ", site, "\n")
    #----- Daily data
    # Get daily Q
    df_Q <- dplyr::full_join(annual_sim[[i]][, c('WY', stat)],
                              annual_obs[[i]][, c('WY', stat)],
                              by = 'WY') %>%
      dplyr::rename(Simulated = paste0(stat,".x")) %>%
      dplyr::rename(Observed = paste0(stat,".y"))

    # dailyQ$Observed <- log10(dailyQ$Observed)
    # dailyQ$Simulated <- log10(dailyQ$Simulated)
    # 
    # Plot daily obs v sim
    comparison_plot <- ggplot2::ggplot(df_Q, aes(x = Observed, y = Simulated)) +
      ggplot2::geom_point(alpha = 0.50) +
      ggplot2::theme_bw() +
      ggplot2::coord_cartesian(xlim = c(min(df_Q[,-1], na.rm = T), max(df_Q[,-1], na.rm = T)),
                               ylim = c(min(df_Q[,-1], na.rm = T), max(df_Q[,-1], na.rm = T))) +
      ggplot2::geom_abline(slope = 1, col = 'red') +
      ggplot2::labs(x = 'Observed [cfs]',
                    y = 'Simulated [cfs]',
                    title = paste(stat, "for Site #", site),
                    caption = paste0('n = ', formatC(nrow(df_Q), big.mark = ',')))
    print(comparison_plot)
    cat("\n\n")
}
  cat("\n\n")
}

Methods

The plots show the observed vs simulated values for the given variable. The red line is the 1:1 line where the simulated and observed values match. For some of the plots the log of the values are shown to increase the visibility of low values. For daily comparisons, the daily values are plotted against each other for each day in the period. For monthly values, the average flow for each month is plotted. The other statistics plotted have one value per year of available data. During the log transform of monthly and daily data, any values that are less than 0.001 are set to 0.001 to prevent zero errors with logs.

Description of Statistics


Trends {.tabset}

Trend Summary

trend_comp <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_com_trend.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    tibble::as_tibble()%>%
    # add site number
    dplyr::mutate(site = site) %>%
    dplyr::select(site, dplyr::everything()) %>%
    # add statistic categories
    dplyr::mutate(category = NA) %>%
    dplyr::mutate(category = ifelse(grepl('mean', `annual_stat`, ignore.case = T), 'Mean', category)) %>%
    dplyr::mutate(category = ifelse(grepl('max', annual_stat, ignore.case = T), 'Max', category)) %>%
    dplyr::mutate(category = ifelse(grepl('min', annual_stat, ignore.case = T), 'Min', category)) %>%
    dplyr::mutate(category = ifelse(grepl('percent_annual', annual_stat, ignore.case = T), 'Percent Annual',
                                    category)) %>%
    dplyr::mutate(category = ifelse(grepl('CVD', annual_stat, ignore.case = T), 'CVD', category)) %>%
    dplyr::mutate(category = ifelse(grepl('percentile', annual_stat, ignore.case = T), 'Percentile',
                                    category)) %>%
    dplyr::mutate(category = ifelse(grepl('sd', annual_stat, ignore.case = T), 'SD', category)) %>%
    dplyr::mutate(category = ifelse(grepl('q', annual_stat, ignore.case = T), 'Q', category)) %>%
    dplyr::mutate_if(is.numeric, round, digits = 2) %>%
    # transform to long format
    tidyr::pivot_longer(cols = sen_slope_diff:val_perc_change_diff) %>%
    dplyr::rename(annual_stat = annual_stat)
}) %>%
  dplyr::bind_rows()


# stat = annual_sd
trend_difference <- trend_comp %>%
  dplyr::filter(name == "val_perc_change_diff") #%>%
  # dplyr::filter(`Annual Stat` %in% c('annual_sd', 'high_q1'))

trend_difference$value[trend_difference$value > 100] <- 100
trend_difference$value[trend_difference$value < -100] <- -100


bar_plot <- ggplot2::ggplot(trend_difference, aes(x = value, y = annual_stat)) +
  ggplot2::geom_point(aes(color=site)) + 
  ggplot2::theme_bw() +
  ggplot2::facet_grid(rows = ggplot2::vars(category),
                      scales = 'free_y') +
  ggplot2::labs(title = paste('Trend Differences')) +
  xlim(-100, 100)
print(bar_plot)

Trend Plots {.tabset}

# Build Plots
plot_list <- c()

for (stat in stat_list){

  subplot_list <- c()

  for (i in 1:length(annual_obs)){ 
    # Slice trend data
    trend_data_obs <- filter(annual_trend_obs[[i]], annual_stat == stat)
    # slice discharge data
    discharge_data_obs <- annual_obs[[i]] %>% select('WY', stat)
    discharge_data_obs <- dplyr::rename(discharge_data_obs, "Obs" = stat)

    # Slice trend data
    trend_data_sim <- filter(annual_trend_sim[[i]], annual_stat == stat)
    # slice discharge data
    discharge_data_sim <- annual_sim[[i]] %>% select('WY', stat)
    discharge_data_sim <- dplyr::rename(discharge_data_sim, "Sim" = stat)

    trend_data_diff <- filter(annual_trend_comp[[i]], val_perc_change_diff == stat)

    df <- merge(discharge_data_obs, discharge_data_sim, by = "WY")
    df <- reshape::melt(df, id = "WY")

    p <- ggplot2::ggplot(df, aes(x=WY, y=value, colour = variable)) + 
      geom_point() +
      scale_color_manual(values = c("blue", "red")) +
      geom_abline(intercept = trend_data_obs$intercept, slope = trend_data_obs$sen_slope, color = "blue") +
      # geom_point(data = discharge_data_sim, color = 'red') +
      geom_abline(intercept = trend_data_sim$intercept, slope = trend_data_sim$sen_slope, color = 'red') +
      labs(title = paste0("Site ", sites[i]),
           # subtitle = paste0("Difference in Percent Change: ", trend_data_diff$percent_change_q_difference),
           y = stat) +
      theme(axis.title.y = element_blank(),
            legend.title = element_blank())

    subplot_list[[i]] <- p
  }
  nCol <- 2
  plot_list[[paste0(stat, '_plot')]] <- subplot_list
}
# Build Tables

observation_list <- c()
simulation_list <- c()
comparison_list <- c()

for (stat in stat_list){

  obs_list <- c()
  sim_list <- c()
  comp_list <- c()

  for (i in 1:length(annual_obs)){ 
    # Slice trend data
    obs_list[[i]] <- t(filter(annual_trend_obs[[i]], annual_stat == stat))

    # Slice trend data
    sim_list[[i]] <- t(filter(annual_trend_sim[[i]], annual_stat == stat))

    # Slice difference data
    comp_list[[i]] <- t(filter(annual_trend_comp[[i]], annual_stat == stat))

  }
  obs_df <- dplyr::bind_cols(obs_list)
  colnames(obs_df) <- sites
  obs_df$Statistics <- colnames(annual_trend_obs[[i]])
  obs_df <- tail(obs_df,9)
  obs_df <- obs_df %>% relocate(Statistics)
  obs_df <- obs_df[-c(1,9),]
  observation_list[[paste0(stat, '_table')]] <- obs_df

  sim_df <- dplyr::bind_cols(sim_list)
  colnames(sim_df) <- sites
  sim_df$Statistics <- colnames(annual_trend_sim[[i]])
  sim_df <- tail(sim_df,9)
  sim_df <- sim_df %>% relocate(Statistics)
  sim_df <- sim_df[-c(1,9),]
  simulation_list[[paste0(stat, '_table')]] <- sim_df

  comp_df <- dplyr::bind_cols(comp_list)
  colnames(comp_df) <- sites
  comp_df$Statistics <- colnames(annual_trend_comp[[i]])
  comp_df <- tail(comp_df,4)
  comp_df <- comp_df[-c(4),]
  comp_df <- comp_df %>% relocate(Statistics)
  comparison_list[[paste0(stat, '_table')]] <- comp_df

}
# # Filter by column
# trend_data <- filter(df_trend, annual_stat == "high_q1")
# # Filter by row
# discharge_data <- df %>% select('WY', 'high_q1')
# Set plots up for each tab. 
for (i in 1:length(plot_list)) {
  cat("#### ",stat_list[[i]],"\n")
  plot <- cowplot::plot_grid(plotlist = plot_list[[i]], ncol=nCol)
  print(plot)

  cat('\n\n Observed Trends \n')
  print(htmltools::tagList(DT::datatable(observation_list[[i]])))
  cat('\n Simulated Trends \n')
  print(htmltools::tagList(DT::datatable(simulation_list[[i]])))
  cat('\n Simulated minus Observed Trends \n')
  print(htmltools::tagList(DT::datatable(comparison_list[[i]])))

  cat('\n\n')
}

Trend Methods

Trend tests were completed using Sen’s slope [40] for magnitude, except in cases when the annual series had “zero-flow” status for any observed or modeled time series for a given basin; in those cases, the time series was converted to a binomial series, and a logistic regression [41] was done with year as the explanatory variable. The predicted probabilities from the model (dependent variable) were used to quantify the change in probability of a zero-flow year over time. Sen’s slope was computed using the R function “GeneralMannKendall.R”; the function is authored by Benjamin Renard of Irstea, UR Riverly, Lyon, France and is available for download from Dudley et al. [42]. Logistic regression was completed using the “glm” function in the R “stats” package with family = binomial (link = “logit”). Correlations between basin characteristics for study basins and project results were computed with Kendall’s tau. (Hodgkins et al 2020)

Table Definitions

Description of Categories

Description of Statistics

References




Try the HyMETT package in your browser

Any scripts or data that you put into this service are public.

HyMETT documentation built on Nov. 23, 2023, 1:08 a.m.