inst/extdata/Operational_single_site.R

#### Operational Script for HyMETT functions on a single site with observed and modeled data ####
#### Load libraries ####
library(HyMETT)
library(dplyr)
#### Load and pre-format data ####
##### User inputs #####
OBS_dir <- paste0(system.file("extdata", package = "HyMETT"),"/")
MOD_dir <- paste0(system.file("extdata", package = "HyMETT"),"/")
out_dir <- paste0(normalizePath(tempdir(), winslash = "/"), "/")
site_no <- "01013500"
#site_no <- "08202700"#zero flow
#### Load and pre-format data ####
model <- read.csv(paste0(MOD_dir, site_no, "_MOD.csv"), header = T, stringsAsFactors = FALSE)
model$Date <- as.Date(model$date, format = "%Y-%m-%d")

observations <- read.csv(paste0(OBS_dir, site_no, "_OBS.csv"), header = T, stringsAsFactors = FALSE)
observations$Date <- as.Date(observations$date, format = "%Y-%m-%d")

# trim daily to common POR
porStart <- max(min(model$Date), min(observations$Date))
porEnd <- min(max(model$Date), max(observations$Date))

model <- dplyr::filter(model, Date >= porStart, Date <= porEnd)
observations <- dplyr::filter(observations, Date >= porStart, Date <= porEnd)



#### Pre-process data ####
obs_WY <- HyMETT::preproc_main(Date = observations$Date, 
                               value = observations$streamflow_cfs,
                               date_format = "%Y-%m-%d",
                               calc_WSCVD = FALSE)

mod_WY <- HyMETT::preproc_main(Date = model$Date, 
                               value = model$streamflow_cfs,
                               date_format = "%Y-%m-%d",
                               calc_WSCVD = FALSE)

# count NA (ie. no value) days
na_obs <- obs_WY$daily$Date[is.na(obs_WY$daily$value)]
na_mod <- mod_WY$daily$Date[is.na(mod_WY$daily$value)]
na_all <- unique(c(na_obs, na_mod))

# trim to complete annual data
compYears <- merge(obs_WY$audit, mod_WY$audit, by = "WY", all = TRUE, suffixes = c(".obs",".mod"))
compYears <- dplyr::filter(compYears, complete.obs == TRUE, complete.mod == TRUE)

obs_WY$annual <- obs_WY$annual[obs_WY$annual$WY %in% compYears$WY,]
mod_WY$annual <- mod_WY$annual[mod_WY$annual$WY %in% compYears$WY,]

#### Trends in annual statistics ####
if(nrow(compYears) >= 8){
  obs_WY_trend <- lapply(obs_WY$annual, 
                         HyMETT::calc_annual_stat_trend, #FUN
                         year = obs_WY$annual$WY, data = NULL)       #args to FUN
  obs_WY_trend <- dplyr::bind_rows(obs_WY_trend, .id = "annual_stat")
  obs_WY_trend <- dplyr::filter(obs_WY_trend, annual_stat != "WY") #remove WY row
  
  mod_WY_trend <- lapply(mod_WY$annual, 
                         HyMETT::calc_annual_stat_trend, #FUN
                         year = mod_WY$annual$WY, data = NULL)       #args to FUN
  mod_WY_trend <- dplyr::bind_rows(mod_WY_trend, .id = "annual_stat")
  mod_WY_trend <- dplyr::filter(mod_WY_trend, annual_stat != "WY") #remove WY row
  
  # trend comparison
  comp_WY_trend <- tibble::tibble(annual_stat = mod_WY_trend$annual_stat,
                                  sen_slope_diff = mod_WY_trend$sen_slope - obs_WY_trend$sen_slope,
                                  trend_mag_diff = mod_WY_trend$trend_mag - obs_WY_trend$trend_mag,
                                  val_perc_change_diff = mod_WY_trend$val_perc_change - obs_WY_trend$val_perc_change)
} else{
  obs_WY_trend <- NA
  mod_WY_trend <- NA
  comp_WY_trend <- NA
}

#### Goodness-of-Fit statistics ####
gof_WY <- mapply(HyMETT::GOF_summary, 
                 mod_WY$annual, 
                 obs_WY$annual, 
                 MoreArgs = list(rmse_normalize = "range"),
                 SIMPLIFY = FALSE)
gof_WY <- dplyr::bind_rows(gof_WY, .id = "annual_stat")
gof_WY <- dplyr::filter(gof_WY, annual_stat != "WY") #remove WY row

# format for report-style table
gof_WY_report <- tidyr::pivot_wider(gof_WY, 
                                    id_cols = annual_stat, 
                                    names_from = metric, 
                                    values_from = c(value, p_value))
gof_WY_report <- dplyr::select(gof_WY_report, !c(p_value_mean_absolute_error,
                                                 p_value_mean_error,
                                                 p_value_nash_sutcliffe_efficiency,
                                                 p_value_percent_bias,p_value_RMSE,
                                                 p_value_NRMSE,
                                                 p_value_volumetric_efficiency))

#### Period-of-record statistics ####
## POR daily metrics
POR_daily_obs <- POR_distribution_metrics(obs_WY$daily$value)
POR_daily_mod <- POR_distribution_metrics(mod_WY$daily$value)
# deseasonalized AR-1
ar1_obs <- POR_calc_AR1(value = POR_deseasonalize(value = obs_WY$daily$value,
                                                  Date = obs_WY$daily$Date,
                                                  time_step = "daily"),
                        Date = obs_WY$daily$Date)
ar1_mod <- POR_calc_AR1(value = POR_deseasonalize(value = mod_WY$daily$value,
                                                  Date = mod_WY$daily$Date,
                                                  time_step = "daily"),
                        Date = mod_WY$daily$Date)
POR_daily_obs <- tibble::add_row(POR_daily_obs, metric = "lag1_autocorrelation", value = ar1_obs)
POR_daily_mod <- tibble::add_row(POR_daily_mod, metric = "lag1_autocorrelation", value = ar1_mod)
rm(ar1_obs, ar1_mod)
# compute seasonal amplitude and phase of daily flows
seas_obs <- POR_calc_amp_and_phase(value = obs_WY$daily$value,
                                   Date = obs_WY$daily$Date,
                                   time_step = "daily")
seas_mod <- POR_calc_amp_and_phase(value = mod_WY$daily$value,
                                    Date = mod_WY$daily$Date,
                                    time_step = "daily")
POR_daily_obs <- tibble::add_row(POR_daily_obs, metric = seas_obs$metric, value = seas_obs$value)
POR_daily_mod <- tibble::add_row(POR_daily_mod, metric = seas_mod$metric, value = seas_mod$value)
rm(seas_obs, seas_mod)
# daily - label and combine
POR_daily_obs <- tibble::add_column(POR_daily_obs, data = "observed", .before = 1)
POR_daily_mod <- tibble::add_column(POR_daily_mod, data = "modeled", .before = 1)
POR_daily_metrics <- rbind(POR_daily_obs, POR_daily_mod)
rm(POR_daily_obs, POR_daily_mod)

## POR annual metrics
POR_annual_low_obs <- POR_apply_annual_lowflow_stats(dplyr::select(obs_WY$annual, low_q1, low_q3, low_q7, low_q30))
POR_annual_low_obs <- tibble::add_column(POR_annual_low_obs, data = "observed", .before = 1)
POR_annual_low_mod <- POR_apply_annual_lowflow_stats(dplyr::select(mod_WY$annual, low_q1, low_q3, low_q7, low_q30))
POR_annual_low_mod <- tibble::add_column(POR_annual_low_mod, data = "modeled", .before = 1)
POR_annual_min <- rbind(POR_annual_low_obs, POR_annual_low_mod)
rm(POR_annual_low_obs, POR_annual_low_mod)

POR_annual_hi_obs <- POR_apply_annual_hiflow_stats(dplyr::select(obs_WY$annual, high_q1, high_q3, high_q7, high_q30))
POR_annual_hi_obs <- tibble::add_column(POR_annual_hi_obs, data = "observed", .before = 1)
POR_annual_hi_mod <- POR_apply_annual_hiflow_stats(dplyr::select(mod_WY$annual, high_q1, high_q3, high_q7, high_q30))
POR_annual_hi_mod <- tibble::add_column(POR_annual_hi_mod, data = "modeled", .before = 1)
POR_annual_max <- rbind(POR_annual_hi_obs, POR_annual_hi_mod)
rm(POR_annual_hi_obs, POR_annual_hi_mod)

## POR monthly metrics
# monthly mean time-series
obs_WY$monthly <- obs_WY$daily %>%
  group_by(year, month) %>% dplyr::summarise(value = mean(value))
mod_WY$monthly <- mod_WY$daily %>%
  group_by(year, month) %>% dplyr::summarise(value = mean(value))
# monthly metrics
POR_monthly_obs <- POR_distribution_metrics(obs_WY$monthly$value)
POR_monthly_mod <- POR_distribution_metrics(mod_WY$monthly$value)
# AR-1
obs_WY$monthly$date_mid <- as.Date(paste(obs_WY$monthly$year, obs_WY$monthly$month, "15", sep = "-"))
mod_WY$monthly$date_mid <- as.Date(paste(mod_WY$monthly$year, mod_WY$monthly$month, "15", sep = "-"))
ar1_obs <- POR_calc_AR1(value = POR_deseasonalize(value = obs_WY$monthly$value,
                                                  Date = obs_WY$monthly$date_mid,
                                                  time_step = "monthly"),
                        Date = obs_WY$monthly$date_mid,
                        time_step = "monthly")
ar1_mod <- POR_calc_AR1(value = POR_deseasonalize(value = mod_WY$monthly$value,
                                                  Date = mod_WY$monthly$date_mid,
                                                  time_step = "monthly"),
                        Date = mod_WY$monthly$date_mid,
                        time_step = "monthly")
POR_monthly_obs <- tibble::add_row(POR_monthly_obs, metric = "lag1_autocorrelation", value = ar1_obs)
POR_monthly_mod <- tibble::add_row(POR_monthly_mod, metric = "lag1_autocorrelation", value = ar1_mod)
rm(ar1_obs, ar1_mod)
# seasonality
seas_obs <- POR_calc_amp_and_phase(value = obs_WY$monthly$value,
                                    Date = obs_WY$monthly$date_mid,
                                    time_step="monthly")
seas_mod <- POR_calc_amp_and_phase(value = mod_WY$monthly$value,
                                    Date = mod_WY$monthly$date_mid,
                                    time_step="monthly")
POR_monthly_obs <- tibble::add_row(POR_monthly_obs, metric = seas_obs$metric, value = seas_obs$value)
POR_monthly_mod <- tibble::add_row(POR_monthly_mod, metric = seas_mod$metric, value = seas_mod$value)
rm(seas_obs, seas_mod)

# monthly - label and combine
POR_monthly_obs <- tibble::add_column(POR_monthly_obs, data = "observed", .before = 1)
POR_monthly_mod <- tibble::add_column(POR_monthly_mod, data = "modeled", .before = 1)
POR_monthly_metrics <- rbind(POR_monthly_obs, POR_monthly_mod)
rm(POR_monthly_obs, POR_monthly_mod)

#### Logistic regression for zero-flow annual statistics ####
if(nrow(compYears) >= 8){
  obs_zero <- attr(obs_WY$annual, "zero_flow_years")
  # select statistics over the zero threshold
  obs_zero <- dplyr::select(obs_WY$annual, "WY", 
                            obs_zero$annual_stat[obs_zero$over_threshold == TRUE & 
                                                   !is.na(obs_zero$over_threshold)])
  if(ncol(obs_zero) > 1){
    obs_WY_lr <- lapply(obs_zero, 
                        HyMETT::calc_logistic_regression, 
                        year = obs_zero$WY, data = NULL)
    obs_WY_lr <- dplyr::bind_rows(obs_WY_lr, .id = "annual_stat")
    obs_WY_lr <- dplyr::filter(obs_WY_lr, annual_stat != "WY") #remove WY row
  } else{obs_WY_lr <- NA}
  
  mod_zero <- attr(mod_WY$annual, "zero_flow_years")
  # select statistics over the zero threshold
  mod_zero <- dplyr::select(mod_WY$annual, "WY", 
                            mod_zero$annual_stat[mod_zero$over_threshold == TRUE & 
                                                   !is.na(mod_zero$over_threshold)])
  if(ncol(mod_zero) > 1){
    mod_WY_lr <- lapply(mod_zero, 
                        HyMETT::calc_logistic_regression, 
                        year = mod_zero$WY, data = NULL)
    mod_WY_lr <- dplyr::bind_rows(mod_WY_lr, .id = "annual_stat")
    mod_WY_lr <- dplyr::filter(mod_WY_lr, annual_stat != "WY") #remove WY row
  } else{mod_WY_lr <- NA}
  rm(obs_zero, mod_zero)
} else{
  obs_WY_lr <- NA
  mod_WY_lr <- NA
}
rm(compYears)


#### Add site number and output tables ####
# preproc data
obs_WY$daily <- tibble::add_column(obs_WY$daily, site_no = site_no, .before = 1)
obs_WY$annual <- tibble::add_column(obs_WY$annual, site_no = site_no, .before = 1)
obs_WY$monthly <- tibble::add_column(obs_WY$monthly, site_no = site_no, .before = 1)

mod_WY$daily <- tibble::add_column(mod_WY$daily, site_no = site_no, .before = 1)
mod_WY$annual <- tibble::add_column(mod_WY$annual, site_no = site_no, .before = 1)
mod_WY$monthly <- tibble::add_column(mod_WY$monthly, site_no = site_no, .before = 1)

# trend
if(is.data.frame(obs_WY_trend)){obs_WY_trend <- tibble::add_column(obs_WY_trend, site_no = site_no, .before = 1)}
if(is.data.frame(mod_WY_trend)){mod_WY_trend <- tibble::add_column(mod_WY_trend, site_no = site_no, .before = 1)}
if(is.data.frame(comp_WY_trend)){comp_WY_trend <- tibble::add_column(comp_WY_trend, site_no = site_no, .before = 1)}

# GOF
gof_WY <- tibble::add_column(gof_WY, site_no = site_no, .before = 1)
gof_WY_report <- tibble::add_column(gof_WY_report, site_no = site_no, .before = 1)

# POR
POR_annual_max <- tibble::add_column(POR_annual_max, site_no = site_no, .before = 1)
POR_annual_min <- tibble::add_column(POR_annual_min, site_no = site_no, .before = 1)
POR_daily_metrics <- tibble::add_column(POR_daily_metrics, site_no = site_no, .before = 1)
POR_monthly_metrics <- tibble::add_column(POR_monthly_metrics, site_no = site_no, .before = 1)

# Logistic Regression
if(!is.na(obs_WY_lr)){obs_WY_lr <- tibble::add_column(obs_WY_lr, site_no = site_no, .before = 1)}
if(!is.na(mod_WY_lr)){mod_WY_lr <- tibble::add_column(mod_WY_lr, site_no = site_no, .before = 1)}

# output
write.table(obs_WY$daily, file = paste0(out_dir, site_no,"_obs_daily.csv"), sep = ",", quote = TRUE, row.names = FALSE)
write.table(obs_WY$annual, file = paste0(out_dir, site_no,"_obs_annual.csv"), sep = ",", quote = TRUE, row.names = FALSE)
write.table(obs_WY$monthly, file = paste0(out_dir, site_no,"_obs_monthly.csv"), sep = ",", quote = TRUE, row.names = FALSE)
write.table(mod_WY$daily, file = paste0(out_dir, site_no,"_mod_daily.csv"), sep = ",", quote = TRUE, row.names = FALSE)
write.table(mod_WY$annual, file = paste0(out_dir, site_no,"_mod_annual.csv"), sep = ",", quote = TRUE, row.names = FALSE)
write.table(mod_WY$monthly, file = paste0(out_dir, site_no,"_mod_monthly.csv"), sep = ",", quote = TRUE, row.names = FALSE)
if(is.data.frame(obs_WY_trend)){write.table(obs_WY_trend, file = paste0(out_dir, site_no,"_obs_trend.csv"), sep = ",", quote = TRUE, row.names = FALSE)}
if(is.data.frame(mod_WY_trend)){write.table(mod_WY_trend, file = paste0(out_dir, site_no,"_mod_trend.csv"), sep = ",", quote = TRUE, row.names = FALSE)}
if(is.data.frame(comp_WY_trend)){write.table(comp_WY_trend, file = paste0(out_dir, site_no,"_com_trend.csv"), sep = ",", quote = TRUE, row.names = FALSE)}
write.table(gof_WY, file = paste0(out_dir, site_no,"_GOF.csv"), sep = ",", quote = TRUE, row.names = FALSE)
write.table(gof_WY_report, file = paste0(out_dir, site_no,"_GOF_report.csv"), sep = ",", quote = TRUE, row.names = FALSE)
write.table(POR_annual_max, file = paste0(out_dir, site_no,"_POR_annual_max.csv"), sep = ",", quote = TRUE, row.names = FALSE)
write.table(POR_annual_min, file = paste0(out_dir, site_no,"_POR_annual_min.csv"), sep = ",", quote = TRUE, row.names = FALSE)
write.table(POR_daily_metrics, file = paste0(out_dir, site_no,"_POR_daily_metrics.csv"), sep = ",", quote = TRUE, row.names = FALSE)
write.table(POR_monthly_metrics, file = paste0(out_dir, site_no,"_POR_monthly_metrics.csv"), sep = ",", quote = TRUE, row.names = FALSE)

Try the HyMETT package in your browser

Any scripts or data that you put into this service are public.

HyMETT documentation built on Nov. 23, 2023, 1:08 a.m.