#' Estimate daily loads using multi-variate regression
#'
#' @param x data frame containing observed concentrations, flows, and loads
#' @param interp flag to interpolate residuals
#' @param max_interval maximum interval for using interpolation
#' @param extrapolate_flow flag to allow extrapolation of flow variables for prediction (Qderiv, Q)
#' @param start_month first month of water year
#' @param predict_wyear_range vector of length 2 specifying range of water years for final predications (or NULL to use span of data)
#' @return list
#' @export
flux_regression <- function(x, interp=FALSE, max_interval=90, extrapolate_flow=FALSE, start_month=10, predict_wyear_range=NULL) {
stopifnot(all(names(x) %in% c('DATE', 'Q', 'C')))
x_obs <- x
# sort by DATE
x <- x[order(x[['DATE']]), ]
# check for dates problems
stopifnot(sum(duplicated(x[['DATE']])) == 0)
stopifnot(sum(is.na(x[['DATE']])) == 0)
# check for missing flows
stopifnot(all(!is.na(x[['Q']])))
# compute loads
x[['Cobs']] <- x[['C']]
x[['Qobs']] <- x[['Q']]
x[['C']] <- NULL
x[['Lobs']] <- x[['Qobs']]*x[['Cobs']]
x[['SAMPLED']] <- !is.na(x[['Cobs']])
x[['WYEAR']] <- wyear(x[['DATE']], start_month=start_month)
x[['MONTH']] <- lubridate::month(x[['DATE']])
x[['JDAY']] <- lubridate::yday(x[['DATE']])
x[['JDAY']] <- ifelse(x[['JDAY']] < 60, 60, x[['JDAY']])
x[['JDAY']] <- ifelse(x[['JDAY']] > 300, 300, x[['JDAY']])
x[['LogCobs']] = log(x[['Cobs']])
x[['Qderiv']] = log(x[['Q']]/dplyr::lag(x[['Q']]))
x[['Qderiv']] = ifelse(is.na(x[['Qderiv']]), 0, x[['Qderiv']])
x[['Year']] = lubridate::year(x[['DATE']]) + x[['JDAY']]/365.25
x[['Year2']] = x[['Year']]*x[['Year']]
x[['Cos2t']] = cos(2*2*pi*x[['JDAY']]/365.25)
x[['Sin2t']] = sin(2*2*pi*x[['JDAY']]/365.25)
x[['Cost']] = cos(2*pi*x[['JDAY']]/365.25)
x[['Sint']] = sin(2*pi*x[['JDAY']]/365.25)
x[['LogQ3']] = log(x[['Q']])^3
x[['LogQ2']] = log(x[['Q']])^2
x[['LogQ']] = log(x[['Q']])
sampled <- which(x[['SAMPLED']])
if (!extrapolate_flow) {
Qderiv_range <- range(x[sampled, 'Qderiv'])
Q_range <- range(x[sampled, 'Q'])
# limit Qderiv and Q to prevent extrapolation
x[['Qderiv']]=ifelse(x[['Qderiv']] < Qderiv_range[1], Qderiv_range[1], x[['Qderiv']])
x[['Qderiv']]=ifelse(x[['Qderiv']] > Qderiv_range[2], Qderiv_range[2], x[['Qderiv']])
x[['Q']]=ifelse(x[['Qobs']] < Q_range[1], Q_range[1], x[['Qobs']])
x[['Q']]=ifelse(x[['Q']] > Q_range[2], Q_range[2], x[['Q']])
x[['LogQ3']]=log(x[['Q']])^3
x[['LogQ2']]=log(x[['Q']])^2
x[['LogQ']]=log(x[['Q']])
}
x_fit <- x[sampled, ]
lm_flux <- lm(LogCobs ~ Qderiv + Year + Year2 + Cos2t + Sin2t + Cost + Sint + LogQ3 + LogQ2 + LogQ,
data=x_fit)
# compute predictions
x_predict <- x
# correct for re-transformation bias
x_predict[['LogCpred_biased']] <- predict(lm_flux, x_predict)
bias_correction <- log(sum(x_predict[sampled, 'Cobs']/exp(x_predict[sampled, 'LogCpred_biased'])) / length(sampled))
x_predict[['LogCpred']] <- x_predict[['LogCpred_biased']] + bias_correction
x_predict[['Cpred']] <- exp(x_predict[['LogCpred']])
if (interp) {
x_predict[['Cres']] <- log(x_predict[['Cobs']] / x_predict[['Cpred']])
x_predict[['Cres_interp']] <- interp_range(x = x_predict[['DATE']],
y = x_predict[['Cres']],
max_interval = max_interval,
fill = 0)
x_predict[['Cest']] <- x_predict[['Cpred']] * exp(x_predict[['Cres_interp']])
} else {
x_predict[['Cest']] <- x_predict[['Cpred']]
x_predict[['Cres']] <- log(x_predict[['Cobs']] / x_predict[['Cest']])
}
x_predict[['Lest']] <- x_predict[['Cest']] * x_predict[['Qobs']]
x_predict[['Lpred']] <- x_predict[['Cpred']] * x_predict[['Qobs']]
x_predict[['Lres']] <- x_predict[['Lobs']] - x_predict[['Lpred']]
x_stats <- flux_stats(x_predict, lm_flux, n.coeff=9, method="Method 5")
# create simplified output data frame
x_day <- data.frame(DATE=x_predict[['DATE']],
WYEAR=x_predict[['WYEAR']],
MONTH=x_predict[['MONTH']],
SAMPLED=x_predict[['SAMPLED']],
Q=x_predict[['Qobs']],
C=x_predict[['Cest']],
L=x_predict[['Lest']])
x_wyr <- dplyr::group_by(x_predict, WYEAR)
x_wyr <- dplyr::summarise(x_wyr,
N.DAY=length(WYEAR),
N.SAMPLE=sum(SAMPLED),
Q=sum(Qobs),
L=sum(Lest),
C=L/Q)
x_wyr[['L_rse']] <- x_stats[['cv_Lres']] / sqrt(x_wyr[['N.SAMPLE']])
x_wyr[['L_se']] <- x_wyr[['L']] * x_wyr[['L_rse']]
x_wyr[['C_se']] <- x_wyr[['C']] * x_wyr[['L_rse']]
x_mon <- dplyr::mutate(x_predict, MONTHYEAR=lubridate::floor_date(DATE, unit="month"))
x_mon <- dplyr::group_by(x_mon, MONTHYEAR, MONTH, WYEAR)
x_mon <- dplyr::summarise(x_mon,
N.DAY=length(WYEAR),
N.SAMPLE=sum(SAMPLED),
Q=sum(Qobs),
L=sum(Lest, na.rm=TRUE),
C=L/Q)
if (!is.null(predict_wyear_range)) {
x_day <- dplyr::filter(x_day, WYEAR %in% seq(predict_wyear_range[[1]], predict_wyear_range[[2]], by=1))
x_mon <- dplyr::filter(x_mon, WYEAR %in% seq(predict_wyear_range[[1]], predict_wyear_range[[2]], by=1))
x_wyr <- dplyr::filter(x_wyr, WYEAR %in% seq(predict_wyear_range[[1]], predict_wyear_range[[2]], by=1))
}
sample_period <- range(x_fit[['DATE']])
predict_period <- range(x_day[['DATE']])
return(list(model=lm_flux,
obs=as.data.frame(x_obs),
fit=as.data.frame(x_fit),
out=list(day=as.data.frame(x_day),
mon=as.data.frame(x_mon),
wyr=as.data.frame(x_wyr)),
stats=x_stats,
predict=as.data.frame(x_predict),
sampled=sampled,
sample_period=sample_period,
predict_period=predict_period
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.