rwrfhydro: R tools for the WRF Hydro Model

Documented in CalcFdc CalcFdcSpline PlotFdc PlotFdcCompare

#' Calculate flow duration curve statistics
#' 
#' \code{CalcFdc} calculates flow exceedance probabilities and adds them as a 
#' new column to the dataframe.
#' 
#' \code{CalcFdc} reads a streamflow time series dataframe (modeled or observed)
#' and outputs a modified dataframe with calculated percent exceedances.
#' 
#' @param strDf The streamflow time series dataframe (e.g., output from 
#'   \code{\link{ReadFrxstPts}} or \code{\link{ReadUsgsGage}}). The data frame 
#'   must contain a column of streamflow values.
#' @param strCol The name of the column containing the streamflow values 
#'   (DEFAULT="q_cms").
#' @return A dataframe containing the original input data with a new column 
#'   added called "<strCol>.fdc" containing the flow exceedance probabilities.
#'   
#' @examples
#' ## Take the time series of observed 5-minute streamflow values for Fourmile 
#' ## Creek (stored in the dataframe obsStr5min.fc in the column "q_cms") and 
#' ## return the same dataframe with a new column called "q_cms.fdc"
#' ## containing the flow exceedance probabilities.
#' \dontrun{
#' obsStr5min.fc <- CalcFdc(obsStr5min.fc, "q_cms")
#' }
#' @keywords univar
#' @concept modelEval
#' @family flowDurationCurves
#' @export
CalcFdc <- function(strDf, strCol="q_cms") {
    if (data.table::is.data.table(strDf)) {
        tmp <- rank(-strDf[, strCol, with=FALSE], na.last="keep")
        strDf[, paste0(strCol,".fdc") := NA]
        strDf[, paste0(strCol,".fdc") := tmp/(sum(!is.na(strDf[,strCol,with=FALSE]))+1)]
    } else {
        tmp <- rank(-strDf[,strCol],na.last="keep")
        strDf[,paste0(strCol,".fdc")] <- NA
        strDf[,paste0(strCol,".fdc")] <- tmp/(sum(!is.na(strDf[,strCol]))+1)
    }
    strDf
}


#' Generate a spline-fit funtion for a flow duration curve
#' 
#' \code{CalcFdcSpline} generates a spline fit function for a flow duration
#' curve.
#' 
#' \code{CalcFdcSpline} reads the percent exceedances generated by
#' \code{\link{CalcFdc}} and outputs a fitted spline function useful for
#' plotting or estimating flow at specified exceedance thresholds (e.g., 20\%
#' exceedance probability).
#' 
#' @param strDf The streamflow time series dataframe (e.g., output from
#'   \code{\link{ReadFrxstPts}} or \code{\link{ReadUsgsGage}}) already processed
#'   through \code{\link{CalcFdc}}. The data frame must contain a column of
#'   streamflow values and a corresponding column (named "<strCol>.fdc") of flow
#'   exceedance probabilities.
#' @param strCol The name of the column containing the streamflow values
#'   (DEFAULT="q_cms").
#' @return A spline function fitted to flow (y) vs. flow exceedance
#'   probabilities (x).
#'   
#' @examples
#' ## Take a time series of observed 5-minute streamflow values for Fourmile
#' ## Creek (stored in the dataframe obsStr5min.fc in the column "q_cms") that
#' ## has already been processed through "CalcFdc" (so also contains a column 
#' ## named "q_cms.fdc") and return a spline fit function.
#' \dontrun{
#' fdc.obsStr5min.fc <- CalcFdcSpline(obsStr5min.fc, "q_cms")
#' 
#' ## Use the spline function to estimate the flow value that is exceeded 20% of the time.
#' 
#' fdc.obsStr5min.fc(0.2)
#' > 0.72
#' }
#' @keywords univar
#' @concept modelEval
#' @family flowDurationCurves
#' @export
CalcFdcSpline <- function(strDf, strCol="q_cms") {
    strflowSpline <- splinefun(strDf[,paste0(strCol,".fdc")], strDf[,strCol], 
                               method='natural')
    strflowSpline
}


#' Plot a flow duration curve for a single streamflow time series
#' 
#' \code{PlotFdc} plots a flow duration curve from a dataframe processed through
#' \code{\link{CalcFdc}} and optional spline function processed through
#' \code{\link{CalcFdcSpline}}.
#' 
#' \code{PlotFdc} reads the dataframe generated by \code{\link{CalcFdc}} and
#' outputs a plot of flow vs. percent exceedances and, optionally the fitted
#' spline curve.
#' 
#' @param strDf The streamflow time series dataframe (e.g., output from
#'   \code{\link{ReadFrxstPts}} or \code{\link{ReadUsgsGage}}) already processed
#'   through \code{\link{CalcFdc}}. The data frame must contain a column of
#'   streamflow values and a corresponding column (named "<strCol>.fdc") of flow
#'   exceedance probabilities.
#' @param strCol The name of the column containing the streamflow values
#'   (DEFAULT="q_cms").
#' @param spline (TRUE or FALSE) Option to add spline-fit curves to the plot
#'   (DEFAULT=TRUE).
#' @param fdcProb Optional exceedance threshold to calculate/plot (e.g., 0.2 for
#'   20\% exceedance).
#' @return A plot of flow (y) vs. flow exceedance probabilities (x).
#'   
#' @examples
#' ## Take a time series of observed 5-minute streamflow values for Fourmile 
#' ## Creek (stored in the dataframe obsStr5min.fc in the column "q_cms") that
#' ## has already been processed through "CalcFdc" (so also contains a column 
#' ## named "q_cms.fdc") and plot with the 20% exceedance threshold called out.
#' \dontrun{
#' PlotFdc(obsStr5min.fc, fdcProb=0.2)
#' }
#' @keywords hplot
#' @concept modelEval plot
#' @family flowDurationCurves
#' @export
PlotFdc <- function(strDf, strCol="q_cms", spline=TRUE, fdcProb=NULL) {
    plot(strDf[,paste0(strCol,".fdc")], strDf[,strCol], log='y', 
         xlab="Probability of Exceedance", ylab=c("Flow (log scale)"), 
         main=c(paste("Flow Duration Curve: ", deparse(substitute(strDf)), ":", 
                      deparse(substitute(strCol)), ", n=", sum(!is.na(strDf[,strCol])))))
    if (spline | !is.null(fdcProb)) {
        fdcSplineFun <- splinefun(strDf[,paste0(strCol,".fdc")], strDf[,strCol], 
                                  method='natural')
        if (spline) {
            curve(fdcSplineFun(x), col='red', lwd=2, lty=1, add=T)
            }
        if (!is.null(fdcProb)) {
            abline(v=fdcProb,h=fdcSplineFun(fdcProb),col='grey')
            text(fdcProb,fdcSplineFun(fdcProb),
                 labels=paste("P =",fdcProb*100,"%, Q=",round(fdcSplineFun(fdcProb),2)), 
                 adj=c(-0.05,-1), cex=0.9)
            }
        }
}


#' Plots a flow duration curve for up to three streamflow time series (e.g.,
#' observed & two model outputs)
#' 
#' \code{PlotFdcCompare} plots up to three flow duration curves for comparison.
#' 
#' \code{PlotFdcCompare} reads up to three dataframes (e.g., generated by
#' \code{\link{ReadFrxstPts}} or \code{\link{ReadUsgsGage}}) and outputs a plot
#' of flow vs. percent exceedances and, optionally, the fitted spline curves.
#' Intended to plot one observed flow duration curve and one or two modelled
#' flow duration curves for comparison. The tool will subset data to matching
#' time periods (e.g., if the observed data is at 5-min increments and modelled
#' data is at 1-hr increments, the tool will subset the observed data to select
#' only observations on the matching hour break). Therefore, the tool will 
#' RECALCULATE the flow exceedance probabilities on the fly.
#' 
#' @param strDf.obs The OBSERVED streamflow time series dataframe (e.g., output
#'   from \code{\link{ReadUsgsGage}}). The dataframe must contain a column of
#'   streamflow values and a POSIXct column.
#' @param strCol.obs The name of the column containing the streamflow values for
#'   the OBSERVED dataframe (DEFAULT="q_cms").
#' @param strDf.mod1 The FIRST MODEL streamflow time series dataframe (e.g.,
#'   output from \code{\link{ReadFrxstPts}}). The dataframe must contain a
#'   column of streamflow values and a POSIXct column.
#' @param strCol.mod1 The name of the column containing the FIRST MODEL
#'   streamflow values (DEFAULT="q_cms").
#' @param strDf.mod2 The SECOND MODEL streamflow time series dataframe (e.g.,
#'   output from \code{\link{ReadFrxstPts}}). The dataframe must contain a
#'   column of streamflow values and a POSIXct column.
#' @param strCol.mod2 The name of the column containing the SECOND MODEL
#'   streamflow values (DEFAULT="q_cms").
#' @param spline (TRUE or FALSE) Option to add spline-fit curves to the plot
#'   (DEFAULT=TRUE).
#' @param logy (TRUE or FALSE) Optional flag to set the y-axis to log-scale
#'   (DEFAULT=TRUE).
#' @param labelObs Optional label for the observed streamflow
#'   (DEFAULT="Observed")
#' @param labelMod1 Optional label for the FIRST MODEL (DEFAULT="Model 1")
#' @param labelMod2 Optional label for the SECOND MODEL (DEFAULT="Model 2")
#' @return A plot of flow (y) vs. flow exceedance probabilities (x).
#'   
#' @examples
#' ## Take a time series of observed 5-minute streamflow values for Fourmile
#' ## Creek (obsStr5min.fc) and two hourly model runs (modStrh.allrt.fc, 
#' ## modStrh.chrt.fc), all with streamflow columns named "q_cms", and plot 
#' ## the flow duration curves for all three.
#' 
#' 
#' \dontrun{
#' PlotFdcCompare(obsStr5min.fc, "q_cms", modStrh.allrt.fc, "q_cms",
#'                strDf.mod2=modStrh.chrt.fc, strCol.mod2="q_cms", 
#'                labelObs="Observed Fourmile Creek", 
#'                labelMod1="All Routing", labelMod2="Channel Routing Only")
#' }
#' @keywords hplot
#' @concept modelEval plot
#' @family flowDurationCurves
#' @export
PlotFdcCompare <- function(strDf.obs, strCol.obs="q_cms",
                            strDf.mod1, strCol.mod1="q_cms",
                            strDf.mod2=NULL, strCol.mod2="q_cms",
                            spline=TRUE, logy=TRUE,
                            labelObs="Observed", 
                           labelMod1="Model 1", labelMod2="Model 2") {
    # Prepare data
    strDf.obs$qcomp.obs <- strDf.obs[,strCol.obs]
    strDf.mod1$qcomp.mod1 <- strDf.mod1[,strCol.mod1]
    if (!is.null(strDf.mod2)) {
        strDf.mod2$qcomp.mod2 <- strDf.mod2[,strCol.mod2]
        }
    stroutDf <- merge(strDf.obs[c("POSIXct","qcomp.obs")], 
                      strDf.mod1[c("POSIXct","qcomp.mod1")], by<-c("POSIXct"))
    stroutDf <- subset(stroutDf, !is.na(stroutDf$qcomp.obs) & !is.na(stroutDf$qcomp.mod1))
    if (!is.null(strDf.mod2)) {
        stroutDf <- merge(stroutDf, strDf.mod2[c("POSIXct","qcomp.mod2")], 
                          by<-c("POSIXct"))
        stroutDf <- subset(stroutDf, !is.na(stroutDf$qcomp.mod2))
        }
    stroutDf <- CalcFdc(stroutDf, "qcomp.obs")
    stroutDf <- CalcFdc(stroutDf, "qcomp.mod1")
    if (!is.null(strDf.mod2)) {
        stroutDf <- CalcFdc(stroutDf, "qcomp.mod2")
        }
    # Calculate splines if needed
    if (spline) {
        fdcSplineFun.obs <- splinefun(stroutDf[,"qcomp.obs.fdc"], 
                                      stroutDf[,"qcomp.obs"], method='natural')
        fdcSplineFun.mod1 <- splinefun(stroutDf[,"qcomp.mod1.fdc"], 
                                       stroutDf[,"qcomp.mod1"], method='natural')
        if (!is.null(strDf.mod2)) {
            fdcSplineFun.mod2 <- splinefun(stroutDf[,"qcomp.mod2.fdc"], 
                                           stroutDf[,"qcomp.mod2"], method='natural')
            }
        }
    if (!is.null(strDf.mod2)) {
        combmin <- max(0.001, min(min(stroutDf[,"qcomp.obs"],na.rm=T), 
                                  min(stroutDf[,"qcomp.mod1"],na.rm=T), 
                                  min(stroutDf[,"qcomp.mod2"],na.rm=T)))
        print(paste("Combined min flow for y-axis (capped at 0.001):",combmin))
        combmax <- max(max(stroutDf[,"qcomp.obs"],na.rm=T), 
                       max(stroutDf[,"qcomp.mod1"],na.rm=T), 
                       max(stroutDf[,"qcomp.mod2"], na.rm=T))
        print(paste("Combined max flow for y-axis:",combmax))
        }
    else {
        combmin <- max(0.001,min(min(stroutDf[,"qcomp.obs"],na.rm=T), 
                                 min(stroutDf[,"qcomp.mod1"],na.rm=T)))
        print(paste("Combined min flow for y-axis (capped at 0.001):",combmin))
        combmax <- max(max(stroutDf[,"qcomp.obs"],na.rm=T), 
                       max(stroutDf[,"qcomp.mod1"],na.rm=T))
        print(paste("Combined max flow for y-axis:",combmax))
        }
    # Plot
    if (logy) {
        plot(stroutDf[,"qcomp.obs.fdc"], stroutDf[,"qcomp.obs"], log='y', 
             xlab="Probability of Exceedance", 
             ylab=c(paste("Flow", deparse(substitute(strCol.obs)), "(log scale)")), 
             main=c("Flow Duration Curves"), ylim=c(combmin,combmax))
        }
    else {
        plot(stroutDf[,"qcomp.obs.fdc"], stroutDf[,"qcomp.obs"], 
             xlab="Probability of Exceedance", 
             ylab=c(paste("Flow", deparse(substitute(strCol.obs)))), 
             main=c("Flow Duration Curves"), ylim=c(combmin,combmax))
        }
    if (spline) {
        curve(fdcSplineFun.obs(x), col='red', lwd=2, lty=1, add=TRUE)
        }
    points(stroutDf[,"qcomp.mod1.fdc"], stroutDf[,"qcomp.mod1"],col='blue')
    if (spline) {
        curve(fdcSplineFun.mod1(x), col='cyan', lwd=2, lty=1, add=TRUE)
        }
    if (!is.null(strDf.mod2)) {
        points(stroutDf[,"qcomp.mod2.fdc"], stroutDf[,"qcomp.mod2"],col='darkgreen')
        if (spline) {
            curve(fdcSplineFun.mod2(x), col='green', lwd=2, lty=1, add=TRUE)
            legend('topright', legend=c(labelObs, paste(labelObs,"- Curve Fit"), 
                                        labelMod1, paste(labelMod1,"- Curve Fit"), 
                                        labelMod2, paste(labelMod2,"- Curve Fit")), 
                   lty=c(NA,1,NA,1,NA,1), pch=c(1,NA,1,NA,1,NA), lwd=c(NA,1,NA,1,NA,1), 
                   col=c("black","red","blue","cyan","darkgreen","green"), bty='n')
            }
        else {
            legend('topright', legend=c(labelObs, labelMod1, labelMod2), 
                   pch=c(1,1,1), col=c("black","blue","darkgreen"), bty='n')
            }
        }
    else {
        if (spline) {
            legend('topright', legend=c(labelObs,paste(labelObs,"- Curve Fit"),
                                        labelMod1, paste(labelMod1,"- Curve Fit")), 
                   lty=c(NA,1,NA,1), pch=c(1,NA,1,NA), lwd=c(NA,1,NA,1), 
                   col=c("black","red","blue","cyan"), bty='n')
            }
        else {
            legend('topright', legend=c(labelObs,labelMod1), pch=c(1,1), 
                   col=c("black","blue"), bty='n')
            }
        }
        mtext(c(paste("Dates: ", min(format(stroutDf$POSIXct,"%Y-%m-%d")), " to ", 
                      max(format(stroutDf$POSIXct,"%Y-%m-%d")), ", n=", nrow(stroutDf))), 
              side=3, line=0.0, cex=0.9)
}