#' Function to calculate the differences in statistics for two given datasets.
#'
#' This function accepts two data frames of daily flow data and returns a data frame of
#' calculated difference statistics from \link[EflowStats]{calc_allHIT},
#' \link[EflowStats]{calc_magnifSeven}, \link{calculate_other_flow_stats}, and \link{calculate_GoF_stats}.
#' HitStats, calc_magnifSeven, and other_flow_stats are differented as \code{(a-b)/a}
#'
#' Assumptions:
#' Dates must match between the two data sources.
#'
#' @param sites A two column dataframe containing site names for flow_data_a
#' and flow_data_b flow data.
#' @param flow_data_a A dataframe containing a NWCCompare flow dataset.
#' Should have been cleaned by \link[EflowStats]{validate_data} and constructed by
#' a build dataset function from this package.
#' @param flow_data_b A second NWCCompare flow dataset to be compared
#' to flow_data_a.
#' @param yearType A charcter of either "water" or "calendar" indicating
#' whether to use water years or calendar years, respectively.
#' @param digits A numeric. Number of digits to round indice values
#' @return statsout data frame of calculated statistics
#' @importFrom stats aggregate
#' @export
#' @examples
#' # https://cida.usgs.gov/nwc/#!waterbudget/achuc/031300011004
#' nwis <- "02335757"
#' huc <- "031300011004"
#' sites <- data.frame(a=nwis, b=huc, stringsAsFactors = FALSE)
#' start_date <- "2004-10-01"
#' end_date <- "2010-09-30"
#' flow_data_a <- build_nwis_dv_dataset(nwis, start_date, end_date)
#' flow_data_b <- build_nwc_flow_dataset(huc, start_date, end_date)
#' diff_statsout <- calculate_stats_diffs(sites = sites,
#' flow_data_a = flow_data_a,
#' flow_data_b = flow_data_b,
#' yearType = "water",
#' digits = 2)
#'
calculate_stats_diffs<-function(sites, flow_data_a, flow_data_b,
yearType = "water", digits = 3) {
if(!all(flow_data_a$daily_streamflow_cfs[[1]]$date ==
flow_data_b$daily_streamflow_cfs[[1]]$date)) {
stop("Dates in flow data do not match, can not proceed.")}
stats=c("calc_magAverage", "calc_magLow", "calc_magHigh",
"calc_frequencyLow", "calc_frequencyHigh",
"calc_durationLow", "calc_durationHigh",
"calc_timingAverage", "calc_timingLow", "calc_timingHigh",
"calc_rateChange",
"calc_magnifSeven", "otherStat")
min_date <- as.character(min(flow_data_a$daily_streamflow_cfs[[1]]$date))
max_date <- as.character(max(flow_data_a$daily_streamflow_cfs[[1]]$date))
calc_allHIT_result_a <- calculate_stats_by_group(stats = stats,
flow_data = flow_data_a,
yearType = yearType,
digits = digits)
calc_allHIT_result_b <- calculate_stats_by_group(stats = stats,
flow_data = flow_data_b,
yearType = yearType,
digits = digits)
GoFstats_results <- rep(list(list()), nrow(sites))
for (i in 1:nrow(sites)) {
site_a <- sites[i,1]
site_b <- sites[i,2]
GoFstats_results_site <- calculate_GoF_stats(flow_data_b$daily_streamflow_cfs[site_b][[1]],
flow_data_a$daily_streamflow_cfs[site_a][[1]])
if(i==1) {
statsout <- cbind(calc_allHIT_result_a,GoFstats_results_site)
init <- FALSE
}
statsout[i,4:(ncol(calc_allHIT_result_a)-1)] <- (calc_allHIT_result_a[i,4:(ncol(calc_allHIT_result_a)-1)] -
calc_allHIT_result_b[i,4:(ncol(calc_allHIT_result_a)-1)]) /
calc_allHIT_result_a[i,4:(ncol(calc_allHIT_result_a)-1)]
statsout[i,(ncol(calc_allHIT_result_a)+1):ncol(statsout)] <- GoFstats_results_site
}
return(statsout)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.