Nothing
#' Simulation quality assessment
#'
#' Evaluates the simulated water budget with the average KGE.
#'
#' The columns of the water budget data set input are:
#' * **year**
#' * **month**
#' * **vi**
#' * **t_mean**
#' * **runoff**
#' * **pet**
#' * **aet**
#' * **gwr**
#' * **runoff_2**
#' * **delta_reservoir**
#' * **rcn_id**
#'
#' The columns of the observed flow data set input are:
#' * **year**
#' * **month**
#' * **day**
#' * **one column per station** (named by the station ID), the flow rates in mm/day
#'
#' The columns of the RCN gauging stations data set input are:
#' * **rcn_id**, the cell ID
#' * **station_id**, the station ID
#'
#' The columns of the Lyne and Hollick filter data set input are:
#' * **station_id**, the station ID
#' * **alpha**
#'
#' @param obj The HydroBudget object with calibration parameters and column names mappings.
#' @param water_budget The computed water budget. Input can be a data.frame/data.table or a path to a data file.
#' @param rcn_gauging The table with the list of RCN cells located in each gauging station watershed. Input can be a data.frame/data.table or a path to a data file.
#' @param observed_flow The flow rates in mm/day. Input can be a data.frame/data.table or a path to a data file.
#' @param alpha_lyne_hollick The Lyne and Hollick filter. Input can be a data.frame/data.table or a path to a data file.
#' @param period The start and end years. If not provided, the start/end years will be extracted from the water budget data.
#' @return The HydroBudget quality assessment.
#'
#' @importFrom data.table setkey
#' @rdname evaluate_simulation_quality
#' @method evaluate_simulation_quality hydrobudget
#' @export
#'
#' @examples
#' \dontrun{
#' # Use input example files provided by the package
#' base_url <- "https://github.com/gwrecharge/rechaRge-book/raw/main/examples/input/"
#' input_rcn_gauging <- paste0(base_url, "rcn_gauging.csv.gz")
#' input_observed_flow <- paste0(base_url, "observed_flow.csv.gz")
#' input_alpha_lyne_hollick <- paste0(base_url, "alpha_lyne_hollick.csv.gz")
#'
#' # Calibration parameters
#' HB <- rechaRge::new_hydrobudget(
#' T_m = 2.1, # melting temperature (°C)
#' C_m = 6.2, # melting coefficient (mm/°C/d)
#' TT_F = -17.6, # Threshold temperature for soil frost (°C)
#' F_T = 16.4, # Freezing time (d)
#' t_API = 3.9, # Antecedent precipitation index time (d)
#' f_runoff = 0.63, # Runoff factor (-)
#' sw_m = 431, # Maximum soil water content (mm)
#' f_inf = 0.07 # infiltration factor (-)
#' )
#'
#' # ... compute the water budget ...
#'
#' result <- evaluate_simulation_quality(
#' HB,
#' water_budget = water_budget,
#' rcn_gauging = input_rcn_gauging,
#' observed_flow = input_observed_flow,
#' alpha_lyne_hollick = input_alpha_lyne_hollick,
#' period = simul_period
#' )
#' }
evaluate_simulation_quality.hydrobudget <- function(obj, water_budget, rcn_gauging, observed_flow, alpha_lyne_hollick, period = NULL, ...) {
water_budget_data <- .as.data.table(water_budget)
rcn_gauging_data <- .as.data.table(rcn_gauging, obj$rcn_gauging_columns)
year_range <- period
# Simulation period
if (is.null(year_range)) {
year_range <- c(min(water_budget_data$year), max(water_budget_data$year))
} else if (length(year_range) == 1) {
year_range <- append(year_range, max(water_budget_data$year))
}
year_start <- as.numeric(year_range[1])
year_end <- as.numeric(year_range[2])
if (year_end < year_start) {
stop("Wrong simulation period, start year must be before end year")
}
# Load and filter observed flow
observed_flow_ <- .as.data.table(observed_flow, obj$observed_flow_columns)
list_year <- seq(year_start, year_end, 1)
observed_flow_ <- observed_flow_[year %in% list_year]
flow <- process_river_flow(obj, observed_flow_, alpha_lyne_hollick)
observed_flow_month <- flow$observed_flow_month
gauging <- flow$gauging
# Calibration parameters
# Snow model
calibration_ <- obj$calibration
T_snow <- calibration_$Tsnow
T_m <- calibration_$T_m
C_m <- calibration_$C_m
# Soil frost
TT_F <- calibration_$TT_F
F_T <- calibration_$F_T
# Runoff
t_API <- calibration_$t_API
f_runoff <- calibration_$f_runoff
# Soil parameters
sw_m <- calibration_$sw_m
f_inf <- calibration_$f_inf
output <- list(
gauging = c()
)
for (st in 1:length(gauging)) {
# Load the modeled data per gauging station
setkey(water_budget_data, cols = "rcn_id")
rcn_ids <- rcn_gauging_data$rcn_id[which(rcn_gauging_data$station_id == gauging[st])]
if (length(rcn_ids) > 0) {
budget_month <- water_budget_data[.(rcn_ids)]
budget_month <- na.omit(budget_month, invert = FALSE)
budget_month <- budget_month[, .(
vi = mean(get("vi")),
t_mean = mean(get("t_mean")),
runoff = mean(get("runoff")),
pet = mean(get("pet")),
aet = mean(get("aet")),
gwr = mean(get("gwr")),
runoff_2 = mean(get("runoff_2")),
delta_reservoir = mean(get("delta_reservoir"))
), .(year, month)]
budget_month$station_id <- gauging[st]
# Create the comparison data frame
cols <- which(colnames(observed_flow_month) %in% c(
"year", "month", as.character(unique(budget_month$station_id)),
paste(as.character(unique(budget_month$station_id)), "_bf", sep = "")
))
comparison_month <- merge(observed_flow_month[, cols, with = FALSE],
budget_month[, 1:(ncol(budget_month) - 1), with = FALSE],
by = c("year", "month"), all.x = TRUE
)
rm(cols)
colnames(comparison_month)[1:4] <- c("year", "month", "q", "qbase")
# Save the simulation results by gauging station
output$gauging <- append(output$gauging, list(list(gauging = gauging[st], comparison_month = comparison_month)))
# Objective functions
# combining the error indicators in a data table
error_ind <- matrix(ncol = 2, nrow = 2)
colnames(error_ind) <- c("month_cal", "month_val")
rownames(error_ind) <- c("KGE_qtot", "KGE_baseflow")
# defining the periods of data for the observed river flow and base flow per gauging station and separate it in
# first 2/3 = calibration period // last 1/3 = validation period
flow_beg <- min(na.contiguous(comparison_month[, 1:3])[, 1])
flow_end <- max(na.contiguous(comparison_month[, 1:3])[, 1])
ifelse(flow_beg < year_start, flow_beg <- year_start, flow_beg <- as.numeric(flow_beg))
ifelse(flow_end > year_end, flow_end <- year_end, flow_end <- as.numeric(flow_end))
calibration_start <- flow_beg
calibration_end <- flow_beg + round((flow_end - flow_beg) * 2 / 3)
validation_start <- calibration_end + 1
validation_end <- flow_end
# defining the modeled and observed on each period for flow and baseflow
m_q_month_cal <- (comparison_month$runoff[which(comparison_month$year %in% c(calibration_start:calibration_end))] +
comparison_month$runoff_2[which(comparison_month$year %in% c(calibration_start:calibration_end))] +
comparison_month$gwr[which(comparison_month$year %in% c(calibration_start:calibration_end))])
o_q_month_cal <- comparison_month[[3]][which(comparison_month$year %in% c(calibration_start:calibration_end))]
m_q_month_val <- (comparison_month$runoff[which(comparison_month$year %in% c(validation_start:validation_end))] +
comparison_month$runoff_2[which(comparison_month$year %in% c(validation_start:validation_end))] +
comparison_month$gwr[which(comparison_month$year %in% c(validation_start:validation_end))])
o_q_month_val <- comparison_month[[3]][which(comparison_month$year %in% c(validation_start:validation_end))]
m_baseflow_month_cal <- comparison_month$gwr[which(comparison_month$year %in% c(calibration_start:calibration_end))]
o_baseflow_month_cal <- comparison_month[[4]][which(comparison_month$year %in% c(calibration_start:calibration_end))]
m_baseflow_month_val <- comparison_month$gwr[which(comparison_month$year %in% c(validation_start:validation_end))]
o_baseflow_month_val <- comparison_month[[4]][which(comparison_month$year %in% c(validation_start:validation_end))]
# KGE
error_ind[1, 1] <- KGE(m_q_month_cal[which(!is.na(o_q_month_cal))],
o_q_month_cal[which(!is.na(o_q_month_cal))])
error_ind[1, 2] <- KGE(m_q_month_val[which(!is.na(o_q_month_val))],
o_q_month_val[which(!is.na(o_q_month_val))])
error_ind[2, 1] <- KGE(m_baseflow_month_cal[which(!is.na(o_baseflow_month_cal))],
o_baseflow_month_cal[which(!is.na(o_baseflow_month_cal))])
error_ind[2, 2] <- KGE(m_baseflow_month_val[which(!is.na(o_baseflow_month_val))],
o_baseflow_month_val[which(!is.na(o_baseflow_month_val))])
# Clean
rm(
m_q_month_cal, o_q_month_cal, m_q_month_val, o_q_month_val, m_baseflow_month_cal, o_baseflow_month_cal,
m_baseflow_month_val, o_baseflow_month_val
)
# Write the simulation metadata in a datatable and save it
if (st == 1) {
simulation_metadata <- data.table(
station_id = unique(budget_month$station_id),
cal_beg = calibration_start,
cal_end = calibration_end,
val_beg = validation_start,
val_end = validation_end,
T_snow = T_snow,
T_m = T_m,
C_m = C_m,
TT_F = TT_F,
F_T = F_T,
t_API = t_API,
f_runoff = f_runoff,
sw_m = sw_m,
f_inf = f_inf,
KGE_qtot_cal = error_ind[1, 1], KGE_qbase_cal = error_ind[2, 1],
KGE_qtot_val = error_ind[1, 2], KGE_qbase_val = error_ind[2, 2],
qtot_sim = mean(budget_month[
, .(qtot = sum(get("runoff") + get("runoff_2") + get("gwr"), na.rm = TRUE)),
.(year)
][[2]]),
aet_sim = mean(budget_month[
, .(aet = sum(get("aet"), na.rm = TRUE)),
.(year)
][[2]]),
gwr_sim = mean(budget_month[
, .(gwr = sum(get("gwr"), na.rm = TRUE)),
.(year)
][[2]]),
time = format(Sys.time(), "%Y_%m_%d-%H_%M"),
KGE_mean_cal = NA_real_,
KGE_mean_val = NA_real_
)
} else {
simulation_metadata <- rbind(
simulation_metadata,
data.table(
station_id = unique(budget_month$station_id),
cal_beg = calibration_start,
cal_end = calibration_end,
val_beg = validation_start,
val_end = validation_end,
T_snow = T_snow,
T_m = T_m,
C_m = C_m,
TT_F = TT_F,
F_T = F_T,
t_API = t_API,
f_runoff = f_runoff,
sw_m = sw_m,
f_inf = f_inf,
KGE_qtot_cal = error_ind[1, 1],
KGE_qbase_cal = error_ind[2, 1],
KGE_qtot_val = error_ind[1, 2],
KGE_qbase_val = error_ind[2, 2],
qtot_sim = mean(budget_month[
, .(qtot = sum(get("runoff") + get("runoff_2") + get("gwr"), na.rm = TRUE)),
.(year)
][[2]]),
aet_sim = mean(budget_month[
, .(aet = sum(get("aet"), na.rm = TRUE)),
.(year)
][[2]]),
gwr_sim = mean(budget_month[
, .(gwr = sum(get("gwr"), na.rm = TRUE)),
.(year)
][[2]]),
time = format(Sys.time(), "%Y_%m_%d-%H_%M"),
KGE_mean_cal = NA_real_,
KGE_mean_val = NA_real_
)
)
}
if (st == length(gauging)) {
simulation_metadata[nrow(simulation_metadata), c("KGE_mean_cal", "KGE_mean_val") := as.list(c(mean((simulation_metadata$KGE_qtot_cal + simulation_metadata$KGE_qbase_cal) / 2),
mean((simulation_metadata$KGE_qtot_val + simulation_metadata$KGE_qbase_val) / 2),
na.rm = TRUE
))[1:2]]
output$simulation_metadata <- simulation_metadata
}
rm(calibration_start, calibration_end, validation_start, validation_end, flow_beg, flow_end)
}
}
rm(st, comparison_month)
output
}
#' River flow processing
#'
#' Processing of the river flow to compute the baseflow with Lyne and Hollick and resample them into monthly observations by:
#' 1. select the available river flow observations for the simulation period and fill the gaps (up to 5 missing days),
#' 2. extract the list of the available gauging stations for the simulation period (list of the names),
#' 3. compute the Lyne and Hollick baseflow and resample river flow and baseflow with a monthly time step.
#'
#' @param obj The HydroBudget object with calibration parameters and column names mappings.
#' @param observed_flow The flow rates in mm/day. Input can be a data.frame/data.table or a path to a data file.
#' @param alpha_lyne_hollick The Lyne and Hollick filter. Input can be a data.frame/data.table or a path to a data file.
#'
#' @return A list of observed_flow_month and gauging
#' @keywords internal
#'
#' @importFrom hydrostats baseflows
process_river_flow <- function(obj, observed_flow, alpha_lyne_hollick) {
# Read or coerce input data
observed_flow_ <- .as.data.table(observed_flow, obj$observed_flow_columns)
alpha_lyne_hollick_ <- .as.data.table(alpha_lyne_hollick, obj$alpha_lyne_hollick_columns)
# Observations data processing
# Select the observed flow for the simulation period and interpolate the gaps
observed_flow_no_na <- observed_flow_[, which(unlist(lapply(observed_flow_, function(x) !all(is.na(x))))), with = F]
if (ncol(observed_flow_no_na) < 4) {
stop("error - no observed data on the simulation period")
}
# Station IDs are the unknown columns
get_station_ids <- function(flw) {
station_ids <- colnames(observed_flow_no_na)
station_ids <- station_ids[!(station_ids %in% c("year", "month", "day", "date"))]
# station IDs must have an alpha lyne hollick
station_ids_alpha <- unique(as.character(alpha_lyne_hollick_$station))
station_ids[station_ids %in% station_ids_alpha]
}
station_ids <- get_station_ids(observed_flow_no_na)
row_count <- nrow(observed_flow_no_na) - 1
for (station_id in station_ids) {
observed_flow_no_na[[station_id]][2:row_count] <- zoo::na.approx(observed_flow_no_na[[station_id]][2:row_count],
maxgap = 5, na.rm = FALSE
)
} # Fill up the gap in the observed flow up to 5 days
# Add date column and move it to 1st position
observed_flow_no_na$date <- as.POSIXct(paste(observed_flow_no_na$year, observed_flow_no_na$month, observed_flow_no_na$day, sep = "-"),
format = "%Y-%m-%d", tz = "UTC"
)
observed_flow_no_na <- observed_flow_no_na[, c(ncol(observed_flow_no_na), 1:(ncol(observed_flow_no_na) - 1)), with = FALSE]
observed_flow_month <- data.table::data.table(
year = c(rep(unique(observed_flow_no_na$year), each = 12)),
month = c(rep(c(1:12), length(unique(observed_flow_no_na$year))))
)
# List of the available gauging station for the simulation period
if (ncol(observed_flow_no_na) > 4) {
station_ids <- get_station_ids(observed_flow_no_na)
# FIXME why numeric ID?
gauging <- as.numeric(station_ids)
}
# Compute baseflow with Lyne and Hollick (alpha calibrated independently)
if (ncol(observed_flow_no_na) < 5) {
stop("error - no observed river flow on the simulation period - baseflow computation impossible")
}
for (station_id in station_ids) {
bf <- observed_flow_no_na[, c("date", "year", "month", station_id), with = FALSE]
bf <- na.contiguous(bf) # select the longest period without NA
colnames(bf) <- c("Date", "year", "month", "Q")
bf$bf_lh <- baseflows(bf[, c(1, 4), with = FALSE],
alpha_lyne_hollick_$alpha[which(alpha_lyne_hollick_$station == station_id)],
n.reflected = 30, ts = "daily"
)[, 3]
q_month <- bf[, .(qmonth = sum(get("Q"), na.rm = TRUE),
bf_lh_month = sum(get("bf_lh"), na.rm = TRUE)), .(year, month)]
colnames(q_month)[3:4] <- c(station_id, paste(station_id, "_bf", sep = ""))
observed_flow_month <- merge(observed_flow_month, q_month, by = c("year", "month"), all.x = TRUE)
}
# Returned value
list(
observed_flow_month = observed_flow_month,
gauging = gauging
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.