Report for Few Sites'
In HyMETT: Hydrologic Model Evaluation and Time-Series Tools

knitr::opts_chunk$set(echo = TRUE)
# mandatory libraries
library(tidyverse)
library(tidyr)
# speed-enhancing libraries
library(data.table)
# report-enhancing libraries
library(DT)
# libraries just for introduction/study site sections
library(leaflet)
library(dataRetrieval)
library(cowplot)
library(reshape)
# default values
input_folder <- paste0(system.file("extdata", package = "HyMETT"),"/")
sites <- c("01013500", "01021470","01022500","01030500","01031500","01047000")
# Decimal Lat and long. 
lat <- c(47.2375,  44.8008333, 44.608056, 45.50111, 45.175, 44.86916667)
long <- c(-68.58278,  -67.725, -67.935278, -68.3058333, -69.3147222, -69.955)
# .drop = c('V1')     # extra column of row numbers to drop during CSV import

# Setup some default values. 

# GOF Metrics you want to plot
metric_list <- c("percent_bias", "volumetric_efficiency", "spearman_rho", "NRMSE")

# Categories of annual stats to plot for general GOF and monthly GOF
category_list <- c("Mean", "Min", "Max", "Percent Annual", "Percentile", "Q")
category_list_monthly <- c('Mean', 'Min', 'Max', 'Percent Annual')

# Annual statistics to plot for comparisons and trends.
stat_list <- c('annual_mean', 'high_q3', 'low_q7', "annual_1_percentile", "Aug_mean")

# TODO: Add catch all category for stats that don't fit into another category. 
# TODO: Make stat vs metric names consistent
# TODO: add low flow stats to a category

#------- Import all necessary data
gof <- purrr::map(sites, function(site){
  # import as dataframe
  paste0(input_folder, site, '_GOF.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    tibble::as_tibble() %>%
    # add site number
    dplyr::select(-one_of('site_no'))%>%
    dplyr::mutate(site = site)  %>%
    dplyr::select(site, dplyr::everything()) %>%
    dplyr::rename(`Annual Stat` = annual_stat) %>%
    dplyr::rename(name = metric) %>%
    # add statistic categories
    dplyr::mutate(category = NA) %>%
    dplyr::mutate(category = ifelse(grepl('mean', `Annual Stat`, ignore.case = T), 'Mean', category)) %>%
    dplyr::mutate(category = ifelse(grepl('max', `Annual Stat`, ignore.case = T), 'Max', category)) %>%
    dplyr::mutate(category = ifelse(grepl('min', `Annual Stat`, ignore.case = T), 'Min', category)) %>%
    dplyr::mutate(category = ifelse(grepl('percent_annual', `Annual Stat`, ignore.case = T), 'Percent Annual',
                                    category)) %>%
    dplyr::mutate(category = ifelse(grepl('CVD', `Annual Stat`, ignore.case = T), 'CVD', category)) %>%
    dplyr::mutate(category = ifelse(grepl('percentile', `Annual Stat`, ignore.case = T), 'Percentile',
                                    category)) %>%
    dplyr::mutate(category = ifelse(grepl('sd', `Annual Stat`, ignore.case = T), 'SD', category)) %>%
    dplyr::mutate(category = ifelse(grepl('q', `Annual Stat`, ignore.case = T), 'Q', category)) %>%
    dplyr::mutate_if(is.numeric, round, digits = 2)
  }) %>%
  dplyr::bind_rows()

## Streamflow - Daily
daily_sim <- purrr::map(sites, function(site){
  daily_sim <- paste0(input_folder, site, '_mod_daily.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(date = as.Date(Date)) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(site, dplyr::everything()) %>%
    dplyr::select(-one_of('site_no'))%>%
    dplyr::select(-one_of('Date'))%>%
    dplyr::rename(Q = value) %>%
    tibble::as_tibble()
})
daily_sim_long <- dplyr::bind_rows(daily_sim)

daily_obs <- purrr::map(sites, function(site){
  daily_sim <- paste0(input_folder, site, '_obs_daily.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(date = as.Date(Date)) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(site, dplyr::everything()) %>%
    dplyr::select(-one_of('site_no'))%>%
    dplyr::select(-one_of('Date'))%>%
    dplyr::rename(Q = value) %>%
    tibble::as_tibble()
})
daily_obs_long <- dplyr::bind_rows(daily_obs)


## Streamflow - Annual
annual_sim <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_mod_annual.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})

annual_obs <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_obs_annual.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})


## Trends - Annual
annual_trend_comp <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_com_trend.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})
annual_trend_sim <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_mod_trend.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})
annual_trend_obs <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_obs_trend.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})

# Import POR

POR_annual_max <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_POR_annual_max.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})
POR_annual_max <- dplyr::bind_rows(POR_annual_max)

POR_annual_min <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_POR_annual_min.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})
POR_annual_min <- dplyr::bind_rows(POR_annual_min)

POR_daily <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_POR_daily_metrics.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    dplyr::mutate(site = site) %>%
    dplyr::select(-one_of('site_no'))%>%
    tibble::as_tibble()
})

Introduction

This is a an automated report for 1-5 sites generated from thy HyMETT tool in R. The sites included in this report are:

cat( sites)

This report provides goodness-of-fit measurements between modeled and observational streamflow at these gauged sites. Measures of model skill applied here are percent bias, volumetric efficiency (VE), Spearman's rho correlation, and normalized root mean square error (NRMSE). These metrics are applied to numerous streamflow statistics summarizing streamflow at daily, monthly, annual, and period-of-record timesteps. Streamflow statistics include mean, minima, maxima, percent of annual, standard deviation, percentiles, and multi-day high flows (Q).

Study sites

## Map site location via leaflet
# Get site coordinates
site_coord <- tryCatch({
  NULL # dataRetrieval::readNWISsite(siteNumbers = site)
}, error = function(e){ NULL })

# The firewall on my laptop doesn't let me use dataRetrieval package, so including
# example output here
site_coord <- data.frame(site_no = sites,
                         dec_lat_va = lat,
                         dec_long_va = long)
# Create map object if coordinates are available
if (!is.null(site_coord)){
  leaflet::leaflet(width = "100%") %>%
    leaflet::addTiles() %>%
    leaflet::addMarkers(data = site_coord, lng = ~dec_long_va, lat = ~dec_lat_va, popup = ~site_no)
}
# Create text warning if site coordinates were not available
coord_warn <- ifelse(is.null(site_coord),
  "Warning: Study site coordinates were not retrievable through the 'dataRetrieval' R package.", "")

# (add warning in text below)

r coord_warn

Timeseries Tables {.tabset}

General Information {.tabset}

Dates {.tabset .tabset-dropdown}

# Collect info on daily obs and sim timeseries
daily_ts_sim <- daily_sim_long %>%
  dplyr::select(site, date, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(start = min(date, na.rm = T), end = max(date, na.rm = T)) %>%
  dplyr::rename('Simulated Start Date' = start, 'Simulated End Date' = end)
daily_ts_obs <- daily_obs_long %>%
  dplyr::select(site, date, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(start = min(date, na.rm = T), end = max(date, na.rm = T)) %>%
  dplyr::rename('Observed Start Date' = start, 'Observed End Date' = end)
daily_info <- dplyr::full_join(daily_ts_obs, daily_ts_sim, by = 'site')
DT::datatable(daily_info)

Mean {.tabset .tabset-dropdown}

# Calculate mean daily Q for each site
daily_mean_sim <- daily_sim_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(meanQ = mean(Q, na.rm = T)) %>%
  dplyr::rename(`Mean Daily Q - Simulated [cfs]` = meanQ)
daily_mean_obs <- daily_obs_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(meanQ = mean(Q, na.rm = T)) %>%
  dplyr::rename(`Mean Daily Q - Observed [cfs]` = meanQ)
daily_mean <- dplyr::full_join(daily_mean_obs, daily_mean_sim, by = 'site') %>%
  dplyr::mutate_if(is.numeric, round, digits = 2)

# Generate table of mean values over POR
DT::datatable(daily_mean)

Min {.tabset .tabset-dropdown}

# Calculate min daily Q for each site
daily_min_sim <- daily_sim_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(minQ = min(Q, na.rm = T)) %>%
  dplyr::rename(`Minimum Daily Q - Simulated [cfs]` = minQ)
daily_min_obs <- daily_obs_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(minQ = min(Q, na.rm = T)) %>%
  dplyr::rename(`Minimum Daily Q - Observed [cfs]` = minQ)
daily_min <- dplyr::full_join(daily_min_obs, daily_min_sim, by = 'site') %>%
  dplyr::mutate_if(is.numeric, round, digits = 2)
DT::datatable(daily_min)

Max {.tabset .tabset-dropdown}

# Calculate max daily Q for each site
daily_max_sim <- daily_sim_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(maxQ = max(Q, na.rm = T)) %>%
  dplyr::rename(`Maximum Daily Q - Simulated [cfs]` = maxQ)
daily_max_obs <- daily_obs_long %>%
  dplyr::select(site, Q) %>%
  dplyr::group_by(site) %>%
  dplyr::summarise(maxQ = max(Q, na.rm = T)) %>%
  dplyr::rename(`Maximum Daily Q - Observed [cfs]` = maxQ)
daily_max <- dplyr::full_join(daily_max_obs, daily_max_sim, by = 'site') %>%
  dplyr::mutate_if(is.numeric, round, digits = 2)
DT::datatable(daily_max)

Period of Record {.tabset}

10% AEP High

Period of Record 10% Annual Exceedance Probability for High Flows (Q1)

POR_annual_max_10 <- POR_annual_max %>%
    dplyr::filter(metric == 'high_q1') %>%
    dplyr::filter(percentile == 0.9) %>%
    dplyr::select(data,value,site) %>%
    dplyr::mutate_if(is.numeric, round, digits = 1)
POR_annual_max_10 <- tidyr::pivot_wider(POR_annual_max_10, names_from = data, values_from = value)
DT::datatable(POR_annual_max_10)

50% AEP High

Period of Record 50% Annual Exceedance Probability for High Flows (Q1)

POR_annual_max_50 <- POR_annual_max %>%
    dplyr::filter(metric == 'high_q1') %>%
    dplyr::filter(percentile == 0.5) %>%
    dplyr::select(data,value,site) %>%
    dplyr::mutate_if(is.numeric, round, digits = 1)
POR_annual_max_50 <- tidyr::pivot_wider(POR_annual_max_50, names_from = data, values_from = value)
DT::datatable(POR_annual_max_50)

10% AEP Low

Period of Record 10% Annual Exceedance Probability for Low Flows (Q1)

POR_annual_min_10 <- POR_annual_min %>%
    dplyr::filter(metric == 'low_q1') %>%
    dplyr::filter(return_period == '10-yr') %>%
    dplyr::select(data,value,site) %>%
    dplyr::mutate_if(is.numeric, round, digits = 1)
POR_annual_min_10 <- tidyr::pivot_wider(POR_annual_min_10, names_from = data, values_from = value)
DT::datatable(POR_annual_min_10)
# TODO: Make column names uniform between min and max POR

50% AEP Low

Period of Record 50% Annual Exceedance Probability for Low Flows (Q1)

POR_annual_min_50 <- POR_annual_min %>%
    dplyr::filter(metric == 'low_q1') %>%
    dplyr::filter(return_period == '2-yr') %>%
    dplyr::select(data,value,site) %>%
    dplyr::mutate_if(is.numeric, round, digits = 1)
POR_annual_min_50 <- tidyr::pivot_wider(POR_annual_min_50, names_from = data, values_from = value)
DT::datatable(POR_annual_min_50)

POR Daily {.tabset .tabset-dropdown}

for (i in 1:length(POR_daily)){
  POR_daily[[i]] <- tidyr::pivot_wider(POR_daily[[i]], names_from = data, values_from = value)
}

# split iris by Species and generate a datatable for each species
htmltools::tagList(
  lapply(POR_daily, datatable)
)

# TODO: be consistent about column names.

Goodness-of-fit metrics {.tabset}

# Non updated version of this code is at bottom of page.
# TODO: add other categories to list

for (categ in category_list){
  cat("### ",categ, "{.tabset .tabset-dropdown} \n")

  # cat("#### All  \n")
  #   ## Barplot of metrics (x = site, y = value)
  # box_plot <- gof %>%
  #   dplyr::filter(category == categ) %>%
  #   ggplot2::ggplot(., aes(x = name, y = value)) +
  #     ggplot2::geom_boxplot()
  # 
  # print(box_plot)
  # cat("\n\n")

  cat("#### By Site  \n")
  ## Barplot of metrics (x = site, y = value)
  bar_plot <- gof %>%
    dplyr::filter(category == categ) %>%
    dplyr::filter(name %in% metric_list) %>%
    # dplyr::mutate(Month = gsub('_mean', '', `Annual Stat`)) %>%
    # dplyr::mutate(Month = paste0(toupper(substr(Month, 1, 1)),
    #                              substr(Month, 2, 3))) %>%
    ggplot2::ggplot(., aes(x = site, y = value)) +
      ggplot2::geom_boxplot() +
      ggplot2::facet_wrap(facets = vars(name),
                          nrow = 1,
                          scales = 'free') +
      ggplot2::theme_bw() +
      ggplot2::labs(x = 'Site',
                    y = 'Value',
                    title = 'Goodness-of-fit boxplots.') +
      ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  print(bar_plot)
  cat("\n\n")

  # TODO: Make this so that it can plot other sites.
  if (categ == "Mean"){
    cat("#### By Site and Month  \n")
    ## Heatmap of metrics by month
    df_heat <- gof %>%
      dplyr::filter(category == "Mean") %>%
      dplyr::filter(name %in% metric_list) %>%
      # dplyr::filter(name == 'Bias') %>%
      dplyr::mutate(Month = gsub('_mean', '', `Annual Stat`)) %>%
      dplyr::mutate(Month = paste0(toupper(substr(Month, 1, 1)),
                                   substr(Month, 2, 3))) %>%
      dplyr::mutate(Month = factor(Month, levels = rev(unique(Month))))
    stats_heat <- unique(df_heat$name)
    # Loop plot creation (assign to objects)
    for (i in 1:length(stats_heat)){
      assign(x = paste0('p', i),
             value = df_heat %>%
               dplyr::filter(name == stats_heat[i]) %>%
               ggplot2::ggplot(., aes(x = site, y = Month, fill = value)) +
               ggplot2::geom_tile() +
               ggplot2::theme_bw() +
               ggplot2::labs(x = 'Site',
                             y = 'Value',
                             title = stats_heat[i]) +
               ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
               ylab(NULL))

    }

    print(cowplot::plot_grid(p1,p2,p3,p4, nrow = 2, ncol = 2))
    cat("\n\n")
  }
  cat("\n\n")
}

GOF Methods

Percent Bias: Average percent bias at each basin was computed as the sum of the difference between modeled and observed values for each year (the sum of residuals), divided by the sum of the observed values and converted to percent.

Volumetric Efficiency (VE): An overall goodness of fit metric, that is similar to the more commonly used Nash Sutcliffe model efficiency (NSE). VE was used here as it balances the effect of small and large values evenly whereas NSE puts more weight on larger values. A value of 1 is a perfect match between the observed and simulated values. Lighter colors are closer to one and represent a better fit between simulated and observed values.

Spearman’s rank correlation coefficient is a non-parametric measure of statistical dependence between the ranking of two variables. Lighter colors represent higher Spearman’s values or better correlation between the observed and simulated values. Darker colors represent lower Spearman’s values or poorer correlation between the observed and simulated values.

Normalized Root Mean Square Error. Lighter colors are closer to zero reflecting less difference between simulated and observed values, while darker colors represent higher NRMSE values with more difference between simulated and observed values.

Other parameters which the tool calculates but which have not been included in this report are Pearson's r, RMSE, Nash-Sutcliffe-Efficiency, Mean Error, absolute error, and Kendall's Tau.

Description of Categories

Mean: Mean monthly flow for each year or specified month of each year.
Min: Min monthly flow for each year or specified month of each year.
Max: Max monthly flow for each year or specified month of each year.
Percent Annual: The yearly percentage of annual flow occurring in the specified month.
Percentile: Yearly flow percentiles for daily flow.
Q: High flow statistics
SD: Standard deviation statistics.

Description of Statistics

CVD: Center volume date. Reported as a julian date
month_max: The maximum daily value of the specified month for each year.
month_mean: The mean daily value of the specified month for each year.
month_min: The minimum daily value of the specified month for each year.
month_percent_annual: The percentage of yearly flow that occurred in the specified month for each year.
annual_#_percentile: The flow percentile for a given percent for each year.
high_q#: The highest flow value for each year. The q# is a moving average spanning that number of days.
month_sd: The standard deviation for a specific month for each year.
annual_sd: The standard deviation for each year.

Goodness-of-fit By-Month {.tabset}

for (stat in metric_list){
  cat("### ", stat, "{.tabset .tabset-dropdown}  \n")
  # stat = annual_sd
  gof_stat <- gof %>%
    dplyr::filter(name == stat)


  gof_stat <- gof %>%
    dplyr::filter(name == stat) %>%
    dplyr::mutate(Stat = `Annual Stat`) %>%
    dplyr::mutate(Stat = ifelse(grepl('Apr', Stat), month.abb[4], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Aug', Stat), month.abb[8], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Dec', Stat), month.abb[12], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Feb', Stat), month.abb[2], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Jan', Stat), month.abb[1], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Jul', Stat), month.abb[7], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Jun', Stat), month.abb[6], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Mar', Stat), month.abb[3], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('May', Stat), month.abb[5], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Nov', Stat), month.abb[11], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Oct', Stat), month.abb[10], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('Sep', Stat), month.abb[9], Stat)) %>%
    dplyr::mutate(Stat = ifelse(grepl('annual', Stat), 'Annual', Stat)) # %>%
    # dplyr::mutate(Stat = factor(Stat, levels = c('Annual', month.abb))) %>%
    # dplyr::mutate(site = factor(site, levels = sort(unique(site))))

  for (stat_cat in category_list_monthly){

    cat("#### ", stat_cat, "\n")

    gof_category <- gof_stat %>%
      dplyr::filter(category == stat_cat) #%>%

  color_plot <- ggplot2::ggplot(gof_category, aes(y = value, x = Stat, fill = site)) +
    ggplot2::geom_bar(stat = 'identity', position=position_dodge()) +
    ggplot2::theme_bw() +
    ggplot2::geom_hline(yintercept = 0) + 
    ggplot2::facet_grid(rows = ggplot2::vars(category),
                        cols = ggplot2::vars(Stat),
                        scales = 'free') +
    ggplot2::theme(panel.spacing.x = unit(0, "lines"), 
                   axis.text.x=element_blank(),
                   axis.ticks.x=element_blank(),
                   legend.position='bottom',
                   legend.title = element_blank()) +
    ggplot2::labs(title = stat, x = "", y = stat)
  print(color_plot)


  cat("\n\n")
  }

  cat("\n\n")
}

GOF Methods

Other parameters which the tool calculates but which have not been included in this report are Pearson's r, RMSE, Nash-Sutcliffe-Efficiency, Mean Error, absolute error, and Kendall's Tau.

Description of Categories

Mean: Mean monthly flow for each year or specified month of each year.
Min: Min monthly flow for each year or specified month of each year.
Max: Max monthly flow for each year or specified month of each year.
Percent Annual: The yearly percentage of annual flow occurring in the specified month.
Percentile: Yearly flow percentiles for daily flow.
Q: High flow statistics
SD: Standard deviation statistics.

Description of Statistics

CVD: Center volume date. Reported as a julian date
month_max: The maximum daily value of the specified month for each year.
month_mean: The mean daily value of the specified month for each year.
month_min: The minimum daily value of the specified month for each year.
month_percent_annual: The percentage of yearly flow that occurred in the specified month for each year.
annual_#_percentile: The flow percentile for a given percent for each year.
high_q#: The highest flow value for each year. The q# is a moving average spanning that number of days.
month_sd: The standard deviation for a specific month for each year.
annual_sd: The standard deviation for each year.

Streamflow comparisons {.tabset}

Daily {.tabset}

for (i in 1:length(daily_obs)){
  site <- sites[i]
  cat("#### ", site, "\n")
  #----- Daily data
  # Get daily Q
  dailyQ <- dplyr::full_join(daily_sim[[i]][, c('date', 'Q')],
                            daily_obs[[i]][, c('date', 'Q')],
                            by = 'date') %>%
    dplyr::rename(Simulated = Q.x) %>%
    dplyr::rename(Observed = Q.y)

  # Set zero values to 0.001
  dailyQ$Observed <- ifelse(dailyQ$Observed <= 0.001, 0.001, dailyQ$Observed)
  dailyQ$Simulated <- ifelse(dailyQ$Simulated <= 0.001, 0.001, dailyQ$Simulated)

  dailyQ$Observed <- log10(dailyQ$Observed)
  dailyQ$Simulated <- log10(dailyQ$Simulated)

  # Plot daily obs v sim
  comparison_plot <- ggplot2::ggplot(dailyQ, aes(x = Observed, y = Simulated)) +
    ggplot2::geom_point(alpha = 0.10) +
    ggplot2::theme_bw() +
    ggplot2::coord_cartesian(xlim = c(min(dailyQ[,-1], na.rm = T), max(dailyQ[,-1], na.rm = T)),
                             ylim = c(min(dailyQ[,-1], na.rm = T), max(dailyQ[,-1], na.rm = T))) +
    ggplot2::geom_abline(slope = 1, col = 'red') +
    ggplot2::labs(x = 'Log Observed [cfs]',
                  y = 'Log Simulated [cfs]',
                  title = 'Daily Streamflow',
                  subtitle = paste0('Site #', site),
                  caption = paste0('n = ', formatC(nrow(dailyQ), big.mark = ',')))
  print(comparison_plot)
  cat("\n\n")
}

Monthly{.tabset}

#----- Monthly data
# Average daily values to monthly
for (i in 1:length(daily_obs)){
  site <- sites[i]
  cat("#### ", site, "\n")
  #----- Daily data
  # Get daily Q
  dailyQ <- dplyr::full_join(daily_sim[[i]][, c('date', 'Q')],
                            daily_obs[[i]][, c('date', 'Q')],
                            by = 'date') %>%
    dplyr::rename(Simulated = Q.x) %>%
    dplyr::rename(Observed = Q.y)


  monthlyQ <- dailyQ %>%
    dplyr::mutate(yearmon = substr(date, 1, 7)) %>%
    dplyr::group_by(yearmon) %>%
    dplyr::summarise(Simulated = mean(Simulated, na.rm = T),
                     Observed = mean(Observed, na.rm = T)) %>%
    dplyr::ungroup()

  # Set zero values to 0.001
  monthlyQ$Observed <- ifelse(monthlyQ$Observed <= 0.001, 0.001, monthlyQ$Observed)
  monthlyQ$Simulated <- ifelse(monthlyQ$Simulated <= 0.001, 0.001, monthlyQ$Simulated)

  monthlyQ$Observed <- log10(monthlyQ$Observed)
  monthlyQ$Simulated <- log10(monthlyQ$Simulated)

  # Plot monthly obs v sim
  plot_out <- ggplot2::ggplot(monthlyQ, aes(x = Observed, y = Simulated)) +
    ggplot2::geom_point(alpha = 0.40) +
    ggplot2::theme_bw() +
    ggplot2::coord_cartesian(xlim = c(min(monthlyQ[,-1], na.rm = T), max(monthlyQ[,-1], na.rm = T)),
                             ylim = c(min(monthlyQ[,-1], na.rm = T), max(monthlyQ[,-1], na.rm = T))) +
    ggplot2::geom_abline(slope = 1, col = 'red') +
    ggplot2::labs(x = 'Log Observed [cfs]',
                  y = 'Log Simulated [cfs]',
                  title = 'Monthly Streamflow',
                  subtitle = paste0('Site #', site),
                  caption = paste0('n = ', formatC(nrow(monthlyQ), big.mark = ',')))
  print(plot_out)
  cat("\n\n")
}

# Run comparison plots for annual stats. 
for (stat in stat_list){
  cat("### ", stat, "{.tabset} \n")
  for (i in 1:length(annual_obs)){
    site <- sites[i]
    cat("#### ", site, "\n")
    #----- Daily data
    # Get daily Q
    df_Q <- dplyr::full_join(annual_sim[[i]][, c('WY', stat)],
                              annual_obs[[i]][, c('WY', stat)],
                              by = 'WY') %>%
      dplyr::rename(Simulated = paste0(stat,".x")) %>%
      dplyr::rename(Observed = paste0(stat,".y"))

    # dailyQ$Observed <- log10(dailyQ$Observed)
    # dailyQ$Simulated <- log10(dailyQ$Simulated)
    # 
    # Plot daily obs v sim
    comparison_plot <- ggplot2::ggplot(df_Q, aes(x = Observed, y = Simulated)) +
      ggplot2::geom_point(alpha = 0.50) +
      ggplot2::theme_bw() +
      ggplot2::coord_cartesian(xlim = c(min(df_Q[,-1], na.rm = T), max(df_Q[,-1], na.rm = T)),
                               ylim = c(min(df_Q[,-1], na.rm = T), max(df_Q[,-1], na.rm = T))) +
      ggplot2::geom_abline(slope = 1, col = 'red') +
      ggplot2::labs(x = 'Observed [cfs]',
                    y = 'Simulated [cfs]',
                    title = paste(stat, "for Site #", site),
                    caption = paste0('n = ', formatC(nrow(df_Q), big.mark = ',')))
    print(comparison_plot)
    cat("\n\n")
}
  cat("\n\n")
}

Methods

The plots show the observed vs simulated values for the given variable. The red line is the 1:1 line where the simulated and observed values match. For some of the plots the log of the values are shown to increase the visibility of low values. For daily comparisons, the daily values are plotted against each other for each day in the period. For monthly values, the average flow for each month is plotted. The other statistics plotted have one value per year of available data. During the log transform of monthly and daily data, any values that are less than 0.001 are set to 0.001 to prevent zero errors with logs.

Description of Statistics

CVD: Center volume date. Reported as a julian date
month_max: The maximum daily value of the specified month for each year.
month_mean: The mean daily value of the specified month for each year.
month_min: The minimum daily value of the specified month for each year.
month_percent_annual: The percentage of yearly flow that occurred in the specified month for each year.
annual_#_percentile: The flow percentile for a given percent for each year.
high_q#: The highest flow value for each year. The q# is a moving average spanning that number of days.
month_sd: The standard deviation for a specific month for each year.
annual_sd: The standard deviation for each year.

Trends {.tabset}

Trend Summary

trend_comp <- purrr::map(sites, function(site){
  paste0(input_folder, site, '_com_trend.csv') %>%
    data.table::fread(input = ., data.table = F) %>%
    tibble::as_tibble()%>%
    # add site number
    dplyr::mutate(site = site) %>%
    dplyr::select(site, dplyr::everything()) %>%
    # add statistic categories
    dplyr::mutate(category = NA) %>%
    dplyr::mutate(category = ifelse(grepl('mean', `annual_stat`, ignore.case = T), 'Mean', category)) %>%
    dplyr::mutate(category = ifelse(grepl('max', annual_stat, ignore.case = T), 'Max', category)) %>%
    dplyr::mutate(category = ifelse(grepl('min', annual_stat, ignore.case = T), 'Min', category)) %>%
    dplyr::mutate(category = ifelse(grepl('percent_annual', annual_stat, ignore.case = T), 'Percent Annual',
                                    category)) %>%
    dplyr::mutate(category = ifelse(grepl('CVD', annual_stat, ignore.case = T), 'CVD', category)) %>%
    dplyr::mutate(category = ifelse(grepl('percentile', annual_stat, ignore.case = T), 'Percentile',
                                    category)) %>%
    dplyr::mutate(category = ifelse(grepl('sd', annual_stat, ignore.case = T), 'SD', category)) %>%
    dplyr::mutate(category = ifelse(grepl('q', annual_stat, ignore.case = T), 'Q', category)) %>%
    dplyr::mutate_if(is.numeric, round, digits = 2) %>%
    # transform to long format
    tidyr::pivot_longer(cols = sen_slope_diff:val_perc_change_diff) %>%
    dplyr::rename(annual_stat = annual_stat)
}) %>%
  dplyr::bind_rows()


# stat = annual_sd
trend_difference <- trend_comp %>%
  dplyr::filter(name == "val_perc_change_diff") #%>%
  # dplyr::filter(`Annual Stat` %in% c('annual_sd', 'high_q1'))

trend_difference$value[trend_difference$value > 100] <- 100
trend_difference$value[trend_difference$value < -100] <- -100


bar_plot <- ggplot2::ggplot(trend_difference, aes(x = value, y = annual_stat)) +
  ggplot2::geom_point(aes(color=site)) + 
  ggplot2::theme_bw() +
  ggplot2::facet_grid(rows = ggplot2::vars(category),
                      scales = 'free_y') +
  ggplot2::labs(title = paste('Trend Differences')) +
  xlim(-100, 100)
print(bar_plot)

Trend Plots {.tabset}

# Build Plots
plot_list <- c()

for (stat in stat_list){

  subplot_list <- c()

  for (i in 1:length(annual_obs)){ 
    # Slice trend data
    trend_data_obs <- filter(annual_trend_obs[[i]], annual_stat == stat)
    # slice discharge data
    discharge_data_obs <- annual_obs[[i]] %>% select('WY', stat)
    discharge_data_obs <- dplyr::rename(discharge_data_obs, "Obs" = stat)

    # Slice trend data
    trend_data_sim <- filter(annual_trend_sim[[i]], annual_stat == stat)
    # slice discharge data
    discharge_data_sim <- annual_sim[[i]] %>% select('WY', stat)
    discharge_data_sim <- dplyr::rename(discharge_data_sim, "Sim" = stat)

    trend_data_diff <- filter(annual_trend_comp[[i]], val_perc_change_diff == stat)

    df <- merge(discharge_data_obs, discharge_data_sim, by = "WY")
    df <- reshape::melt(df, id = "WY")

    p <- ggplot2::ggplot(df, aes(x=WY, y=value, colour = variable)) + 
      geom_point() +
      scale_color_manual(values = c("blue", "red")) +
      geom_abline(intercept = trend_data_obs$intercept, slope = trend_data_obs$sen_slope, color = "blue") +
      # geom_point(data = discharge_data_sim, color = 'red') +
      geom_abline(intercept = trend_data_sim$intercept, slope = trend_data_sim$sen_slope, color = 'red') +
      labs(title = paste0("Site ", sites[i]),
           # subtitle = paste0("Difference in Percent Change: ", trend_data_diff$percent_change_q_difference),
           y = stat) +
      theme(axis.title.y = element_blank(),
            legend.title = element_blank())

    subplot_list[[i]] <- p
  }
  nCol <- 2
  plot_list[[paste0(stat, '_plot')]] <- subplot_list
}

# Build Tables

observation_list <- c()
simulation_list <- c()
comparison_list <- c()

for (stat in stat_list){

  obs_list <- c()
  sim_list <- c()
  comp_list <- c()

  for (i in 1:length(annual_obs)){ 
    # Slice trend data
    obs_list[[i]] <- t(filter(annual_trend_obs[[i]], annual_stat == stat))

    # Slice trend data
    sim_list[[i]] <- t(filter(annual_trend_sim[[i]], annual_stat == stat))

    # Slice difference data
    comp_list[[i]] <- t(filter(annual_trend_comp[[i]], annual_stat == stat))

  }
  obs_df <- dplyr::bind_cols(obs_list)
  colnames(obs_df) <- sites
  obs_df$Statistics <- colnames(annual_trend_obs[[i]])
  obs_df <- tail(obs_df,9)
  obs_df <- obs_df %>% relocate(Statistics)
  obs_df <- obs_df[-c(1,9),]
  observation_list[[paste0(stat, '_table')]] <- obs_df

  sim_df <- dplyr::bind_cols(sim_list)
  colnames(sim_df) <- sites
  sim_df$Statistics <- colnames(annual_trend_sim[[i]])
  sim_df <- tail(sim_df,9)
  sim_df <- sim_df %>% relocate(Statistics)
  sim_df <- sim_df[-c(1,9),]
  simulation_list[[paste0(stat, '_table')]] <- sim_df

  comp_df <- dplyr::bind_cols(comp_list)
  colnames(comp_df) <- sites
  comp_df$Statistics <- colnames(annual_trend_comp[[i]])
  comp_df <- tail(comp_df,4)
  comp_df <- comp_df[-c(4),]
  comp_df <- comp_df %>% relocate(Statistics)
  comparison_list[[paste0(stat, '_table')]] <- comp_df

}
# # Filter by column
# trend_data <- filter(df_trend, annual_stat == "high_q1")
# # Filter by row
# discharge_data <- df %>% select('WY', 'high_q1')

# Set plots up for each tab. 
for (i in 1:length(plot_list)) {
  cat("#### ",stat_list[[i]],"\n")
  plot <- cowplot::plot_grid(plotlist = plot_list[[i]], ncol=nCol)
  print(plot)

  cat('\n\n Observed Trends \n')
  print(htmltools::tagList(DT::datatable(observation_list[[i]])))
  cat('\n Simulated Trends \n')
  print(htmltools::tagList(DT::datatable(simulation_list[[i]])))
  cat('\n Simulated minus Observed Trends \n')
  print(htmltools::tagList(DT::datatable(comparison_list[[i]])))

  cat('\n\n')
}

Trend Methods

Trend tests were completed using Sen’s slope [40] for magnitude, except in cases when the annual series had “zero-flow” status for any observed or modeled time series for a given basin; in those cases, the time series was converted to a binomial series, and a logistic regression [41] was done with year as the explanatory variable. The predicted probabilities from the model (dependent variable) were used to quantify the change in probability of a zero-flow year over time. Sen’s slope was computed using the R function “GeneralMannKendall.R”; the function is authored by Benjamin Renard of Irstea, UR Riverly, Lyon, France and is available for download from Dudley et al. [42]. Logistic regression was completed using the “glm” function in the R “stats” package with family = binomial (link = “logit”). Correlations between basin characteristics for study basins and project results were computed with Kendall’s tau. (Hodgkins et al 2020)

Table Definitions

p_stat: The p-value or probability of getting results as extreme as the actual results.
sen_slope: The slope of the trend line.
intercept: The y intercept of the fit line.
trend_mag: The magnitude of the trend in terms of expected change over the period.
val_beg: The expected value at the beginning of the time period
val_end: The expected value at the end of the time period
val_perc_change: The percent change in the expected value over the time period.
sen_slope_diff: The difference in sen slope between the simulated and observed.
trend_mag_diff: The difference in trend magnitude between the simulated and observed.
val_perc_change_diff: The difference in the percent change between the simulated and observed.

Description of Categories

Mean: Mean monthly flow for each year or specified month of each year.
Min: Min monthly flow for each year or specified month of each year.
Max: Max monthly flow for each year or specified month of each year.
Percent Annual: The yearly percentage of annual flow occurring in the specified month.
Percentile: Yearly flow percentiles for daily flow.
Q: High flow statistics
SD: Standard deviation statistics.

Description of Statistics

CVD: Center volume date. Reported as a julian date
month_max: The maximum daily value of the specified month for each year.
month_mean: The mean daily value of the specified month for each year.
month_min: The minimum daily value of the specified month for each year.
month_percent_annual: The percentage of yearly flow that occurred in the specified month for each year.
annual_#_percentile: The flow percentile for a given percent for each year.
high_q#: The highest flow value for each year. The q# is a moving average spanning that number of days.
month_sd: The standard deviation for a specific month for each year.
annual_sd: The standard deviation for each year.

References

Hodgkins G.A., Dudley R.W., Russell A.M., LaFontaine J.H., 2020. Comparing Trends in Modeled and Observed Streamflows at Minimally Altered Basins in the United States. Water 12(6):1728. https://doi.org/10.3390/w12061728

Any scripts or data that you put into this service are public.

HyMETT documentation built on Nov. 23, 2023, 1:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

HyMETT Hydrologic Model Evaluation and Time-Series Tools

Report for Few Sites' In HyMETT: Hydrologic Model Evaluation and Time-Series Tools

Introduction

Study sites

Timeseries Tables {.tabset}

General Information {.tabset}

Dates {.tabset .tabset-dropdown}

Mean {.tabset .tabset-dropdown}

Min {.tabset .tabset-dropdown}

Max {.tabset .tabset-dropdown}

Period of Record {.tabset}

10% AEP High

50% AEP High

10% AEP Low

50% AEP Low

POR Daily {.tabset .tabset-dropdown}

Goodness-of-fit metrics {.tabset}

GOF Methods

Description of Categories

Description of Statistics

Goodness-of-fit By-Month {.tabset}

GOF Methods

Description of Categories

Description of Statistics

Streamflow comparisons {.tabset}

Daily {.tabset}

Monthly{.tabset}

Methods

Description of Statistics

Trends {.tabset}

Trend Summary

Trend Plots {.tabset}

Trend Methods

Table Definitions

Description of Categories

Description of Statistics

References

Try the HyMETT package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HyMETT
Hydrologic Model Evaluation and Time-Series Tools

Report for Few Sites'
In HyMETT: Hydrologic Model Evaluation and Time-Series Tools