In mpdougherty/razviz: A package to visualize performance of HEC-RAS models

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Introduction

library(razviz)

Import Hydrograph

hydrograph_csv <- system.file("extdata/hydrographs", "LD10.csv",
                    package = "razviz")
hydrograph <- readr::read_csv(hydrograph_csv)

rm_event_stats <- function(hydrograph, rm_event, cal_stats) {
  # Get values from the cal_stats table for the current rm_event
  run_type  <- cal_stats[cal_stats$rm_event == rm_event, ]$Run_type
  run_num   <- cal_stats[cal_stats$rm_event == rm_event, ]$Run_num
  river_sta <- cal_stats[cal_stats$rm_event == rm_event, ]$River_Sta
  event     <- cal_stats[cal_stats$rm_event == rm_event, ]$Event

  # Subset the cal data frame for the rm_event
  c <- dplyr::filter(cal_filtered, Run_type == run_type,
                                 Run_num == run_num,
                                 River_Sta == river_sta,
                                 Event == event)

  # Convert from long to wide
  c_wide <- tidyr::pivot_wider(c,
                               names_from = Type,
                               values_from = value,
                               values_fn = mean)
  # Remove missing values
  c_wide_na <- na.omit(c_wide)

  # Calculate water surface goodness of fit statistics for water surface
  # NOTE: Not all the gages have observed water surface (Obs_WS) data 
  if (all(c("WS_Elev", "Obs_WS") %in% colnames(c_wide_na))) { #check if water surface elevation is available for this rm_event
    if(all(!is.na(c_wide_na$WS_Elev)) & all(!is.na(c_wide_na$Obs_WS))) { #check that there is data in the columns
      # Create linear model of modeled water surface elevation
      rm_event_WS_lm <- lm(c_wide_na$WS_Elev ~ c_wide_na$Obs_WS,
                           data = c_wide_na)

      # Set R Squared for modeled water surface elevation
      cal_stats[cal_stats$rm_event == rm_event, ]$WS_R2 <-
        summary(rm_event_WS_lm)$r.squared

      # Set RMSE for modeled water surface elevation
      cal_stats[cal_stats$rm_event == rm_event,]$WS_RMSE <-
        sqrt(mean((c_wide_na$WS_Elev - c_wide_na$Obs_WS)^2))

      # Set Mean Average Error (MAE) for modeled water surface elevation
      cal_stats[cal_stats$rm_event == rm_event,]$WS_MAE <-
        mean(abs(c_wide_na$WS_Elev - c_wide_na$Obs_WS))
    }
  }

  # Calculate discharge goodness of fit statistics
  if (all(c("Model_Q", "Obs_Q") %in% colnames(c_wide))) {
    if(all(!is.na(c_wide_na$Model_Q)) & all(!is.na(c_wide_na$Obs_Q))) {
      # Create linear model of modeled water surface elevation
      rm_event_Q_lm <- lm(c_wide_na$Model_Q ~ c_wide_na$Obs_Q,
                          data = c_wide_na)

      # Set R Squared for modeled Q
      cal_stats[cal_stats$rm_event == rm_event, ]$Q_R2 <-
        summary(rm_event_Q_lm)$r.squared

      # Set RMSE for modeled Q
      cal_stats[cal_stats$rm_event == rm_event,]$Q_RMSE <-
        sqrt(mean((c_wide_na$Model_Q - c_wide_na$Obs_Q)^2))

      # Set Mean Average Error (MAE) for modeled Q
      cal_stats[cal_stats$rm_event == rm_event,]$Q_MAE <-
        mean(abs(c_wide_na$Model_Q - c_wide_na$Obs_Q))
    }
  }

  return(cal_stats)
}

Calculate Goodness-of-fit Statistics

cal_filtered <- cal[!is.na(cal$Run_type),]

cal_filtered <- cal_filtered[!is.na(cal_filtered$value),]

# Create a table with a unique set of records for "Run_type", "Run_num", "River_Sta", "Event"
cal_stats <- unique(cal_filtered[,c("Run_type", "Run_num", "River_Sta", "Event")])
# Sort the table by "Run_type", "Run_num", "Event", "River_Sta"
cal_stats <- arrange(cal_stats, Run_type, Run_num, Event, desc(River_Sta))
# Create a column to represent river mile events
cal_stats <- add_column(cal_stats, rm_event = 1:length(cal_stats$Run_type), .before = 1)

# Add goodness-of-fit fields to cal_stats
cal_stats <- add_column(cal_stats, WS_R2   = as.numeric(rep(NA, times = length(cal_stats$rm_event))))
cal_stats <- add_column(cal_stats, WS_RMSE = as.numeric(rep(NA, times = length(cal_stats$rm_event))))
cal_stats <- add_column(cal_stats, WS_MAE  = as.numeric(rep(NA, times = length(cal_stats$rm_event))))
cal_stats <- add_column(cal_stats, Q_R2   = as.numeric(rep(NA, times = length(cal_stats$rm_event))))
cal_stats <- add_column(cal_stats, Q_RMSE = as.numeric(rep(NA, times = length(cal_stats$rm_event))))
cal_stats <- add_column(cal_stats, Q_MAE  = as.numeric(rep(NA, times = length(cal_stats$rm_event))))
cal_stats_empty <- cal_stats




# Iterate through cal_stats table and calculate stats - run through each event
for (j in cal_stats$rm_event) {
    # Set the current rm_event
    rm_event <- cal_stats[cal_stats$rm_event == j,]$rm_event
    # Call the goodness-of-fit stats function
    cal_stats <- rm_event_stats(cal_filtered, rm_event, cal_stats)
    assign(paste0("StatEvent",rm_event), na.omit(cal_stats))
}

Stat_List <- mget(ls(pattern="StatEvent"))
Stats_Total <- bind_rows(Stat_List)

#filter to each plan
cal_stats_2001 <- dplyr::filter(cal_stats, Event == 2001)

#merge with name of reach 
cal_stats_2001 <- merge(cal_stats_2001, Formatted_UnsteadyFlowFile_Unique[,c(3,5)], by.x="River_Sta", by.y = "RiverMile")
cal_stats_2001 <- cal_stats_2001[,c(1,12,6:11)]

#save as .csv