################################################################################
#' Prediction of flood quantiles at ungauged sites using Region of Influence
#'
#' Return the flood quantiles, root mean square error, relative bias, lower and upper
#' bound of the 95% confidence interval estimated provided with a combination of
#' local regression and kriging technique.
#'
#' @param x Dataset containing the hydrometric data.
#'
#' @param target Descriptors of the target sites. A data.frame where the first
#' column is the name of the basin.
#'
#' @param target.coord Coordinates of the target sites.
#'
#' @param sites Descriptors of gauged sites. A data.frame where the first
#' column is the name of the basin.
#'
#' @param sites.coord Coordinates of the gauged sites.
#'
#' @param size Size of the region of influence.
#'
#' @param period Return period to predict. Must be a single value.
#'
#' @param distr Distribution of the gauged sites. Can a common distribution or
#' a vector of the individual distributions.
#'
#' @param corr Correlation matrix or coefficient that represent
#' the intersite correlation. If no specified the average coefficient
#' of correlation is used between for all pairs.
#'
#' @param nsim Number of bootstrap samples.
#'
#' @param out.model Logical. Should the model be returned.
#'
#' @param verbose Should progress and message be display during the procedure.
#'
#' @details
#'
#' The function uses a quantile regression technique (QRT) to predict flood
#' quantiles of a given return period at ungauged sites.
#' The function first perform at-site flood frequency analysis of the annual
#' maximum discharges extracted from the HYDAT database (or provided data).
#' At-site estimates of the flood quantile are evaluated by the L-moments method.
#' The resulting flood quantiles are fed to the QRT model to predict the
#' flood quantiles at ungauged sites according to its basin characteristics.
#' The QRT model uses a local regression method to evaluate flood quantiles at
#' specific target locations.
#' If coordinates are provided, simple kriging is additionally used to improve
#' the prediction by extracting further information
#' from the spatially correlated residuals.
#'
#' If the size of the region of influence (ROI) is not provided,
#' a value is automatically selected.
#' If the dataset include fewer than 30 sites, all sites will beincluded by default,
#' otherwise ROI sizes of 20 and more are tried by steps of 5. The one
#' associated with the lowest mean absolute prediction error
#' (based on leave-one-out cross-validation) is finally selected
#'
#' Please note that the distance between sites is the Euclidean distance and
#' that proper projection of the coordinates is required for input.
#'
#' A mix of parametric bootstraps for the at-site distributions and residual
#' bootstraps for the QRT model is used to evaluate model uncertainty.
#'
#' @references
#'
#' Durocher, M., Burn, D. H., & Mostofi Zadeh, S. (2018). A nationwide regional
#' flood frequency analysis at ungauged sites using ROI/GLS with copulas and
#' super regions. Journal of Hydrology, 567, 191–202.
#' https://doi.org/10.1016/j.jhydrol.2018.10.011
#'
#' Durocher, M., Burn, D. H., Zadeh, S. M., & Ashkar, F. (2019). Estimating
#' flood quantiles at ungauged sites using nonparametric regression methods
#' with spatial components. Hydrological Sciences Journal, 64(9), 1056–1070.
#' https://doi.org/10.1080/02626667.2019.1620952
#'
#' @import CSHShydRology stats utils
#' @export
#'
#' @examples
#'
#' \dontrun{
#' ## Path the HYDAT database
#' db <- DB_HYDAT
#'
#' ## Extract catchment descriptors
#' xd <- with(descriptors,
#' data.frame(
#' site = station,
#' area = log(area),
#' map = log(map_ws),
#' wb = log(.01 + wb),
#' stream = log(.01 + stream),
#' elev = elev_ws,
#' slope = log(.01 + slope)))
#'
#' ## Put the target site apart
#' target.id <- (xd$site == '01AF009')
#'
#' target <- xd[target.id,]
#' xd <- xd[-target.id,]
#'
#' ## Fit the model
#' FloodnetRoi(target = target, sites = xd, db = db,
#' period = 100, size = 30, nsim = 30)
#'
#' }
#'
FloodnetRoi <- function(
x,
sites,
target,
sites.coord = NULL,
target.coord = NULL,
size = 20,
period = 100,
distr = 'gev',
corr = 0,
nsim = 0,
out.model = FALSE,
verbose = TRUE){
#########################################
## Verification
##########################################
if(verbose)
bar <- txtProgressBar()
## Reorganize target an sites information
target <- .ToDF(target)
sites <- .ToDF(sites)
## Extract stations names
sname <- as.character(sites[,1])
tname <- as.character(target[,1])
sites <- sites[,-1]
target <- target[,-1]
if(any(colnames(sites) != colnames(target)))
stop('Sites and target should have the same columns names')
if(any(tname %in% sname))
stop('The target cannot not be in the gauged sites')
## Scale the descriptors
sites <- scale(sites)
target <- scale(target, center = attr(sites, 'scaled:center'),
scale = attr(sites, 'scaled:scale'))
## Verify that kriging was asked.
do.krig <- ifelse(all(!is.null(target.coord),
!is.null(sites.coord)), TRUE, FALSE)
## Verify that sites ID match
if(do.krig){
msg <- 'The sites ID in the input data and the coord data do not match.'
target.coord <- .ToDF(target.coord)
sites.coord <- .ToDF(sites.coord)
if(nrow(sites) != nrow(sites.coord))
stop(msg)
if(nrow(target) != nrow(target.coord))
stop(msg)
}
if(length(distr) == 1)
distr <- rep(distr, nrow(sites))
if(length(as.numeric(period)) != 1)
stop("Only one return period must be passed")
period.p <- 1-1/period
##################################################
## Verify the hydrometric data
##################################################
## Remove missing from hydrometric data
an <- na.omit(as.data.frame(x))
## Order and Filter
an <- an[order(an[,1], an[,2]), ]
an <- an[an[,1] %in% sname, ]
if(ncol(an) != 3)
stop('Wrong number of columns in the input')
## Reformat column names
colnames(an) <- c('site','date','value')
if(!methods::is(an$date,'Date'))
stop('The second column of x must be a Date')
an$year <- as.integer(format(an$date, '%Y'))
###########################################
## Estimate at-site flood quantiles
##########################################
## Evaluate the at-site flood quantile
an.lst <- split(an$value, an$site)
an.para <- mapply(fAmax, an.lst, distr, SIMPLIFY = FALSE)
an.qua <- mapply(qAmax, an.para, distr,
MoreArgs = list(p = period.p))
#####################################
## Organize the input to use formulas
#####################################
xsite <- data.frame(y = log(an.qua), phy = sites)
xtarget <- data.frame(y = 0, phy = target)
phy.name <- colnames(xsite)[seq(2,ncol(xsite))]
phy.form <- as.formula(paste0('y~', paste(phy.name, collapse = '+')))
sim.form <- as.formula(paste0('~', paste(phy.name, collapse = '+')))
if(do.krig){
xsite <- data.frame(xsite, krig = sites.coord)
xtarget <- data.frame(xtarget, krig = target.coord)
krig.name <- colnames(xsite)[seq(2+length(phy.name),ncol(xsite))]
krig.form <- as.formula(paste0('~', paste(krig.name, collapse = '+')))
}
#################################################################
## Automatically select the number of sites in the pooling groups
#################################################################
do.cv <- length(size) > 1
if(do.cv){
if(verbose)
cat('\n[Finding the ROI size]\n')
if(do.krig){
suppressWarnings(
cv <- CvRoi(x = xsite, phy = phy.form, similarity = sim.form,
kriging = krig.form, model = 'Exp',
nk = size, fold = 10, verbose = FALSE))
} else {
suppressWarnings(
cv <- CvRoi(x = xsite, phy = phy.form, similarity = sim.form,
nk = size, fold = 10, verbose = FALSE))
}
size <- cv$nk[which.min(cv$mad)]
}
####################
## Fitting the model
####################
if(do.krig){
fit <- suppressWarnings(FitRoi(x = xsite, xnew = xtarget, nk = size,
phy = phy.form, similarity = sim.form, kriging = krig.form))
} else{
fit <- suppressWarnings(FitRoi(x = xsite, xnew = xtarget, nk = size,
phy = phy.form, similarity = sim.form))
}
##############################################
# Bootstrapping
###############################################
if(nsim > 1){
if(verbose)
cat('\n[Bootstrapping]\n')
## Verify the input intersite correlation
if(any(corr > 1 | corr < 0))
stop('The correlation coefficient must be between 0 and 1.')
if(all(corr == 0)){
nocorr <- TRUE
} else {
if(length(corr) == 1){
corr <- matrix(corr, nrow(sites), nrow(sites))
diag(corr) <- 1
}
demi <- chol(corr)
nocorr <- FALSE
}
## At-site info for simulation
an.size <- sapply(an.lst,length)
an.nsite <- nrow(sites)
an.nmax <- max(an.size)
## allocate memory
xsite0 <- xsite
qboot <- matrix(0,nsim, nrow(target))
## Make sure that nsim is even
if(nsim %% 2 != 0)
nsim <- nsim + 1
res <- residuals(fit, xsite, fold = 10)
## Use a balance bootstrap to sample the residuals
## It also assumed a symmetric distribution.
res.boot <- split(sample(rep(c(res,-res),nsim/2)),
rep(1:nsim,length(res)))
for(ii in 1:nsim){
if(verbose)
setTxtProgressBar(bar, ii/nsim)
##------- At site simulation ----------##
if(nocorr){
## Simulate directly uniform variable
u <- lapply(an.size, runif)
} else {
## Simulate from multivariate normal
z <- matrix(rnorm(an.nsite * an.nmax), an.nsite, an.nmax)
z <- pnorm(crossprod(demi, z))
u <- lapply(1:an.nsite, function(jj) z[jj, 1:an.size[jj]])
}
oo <- mapply(qAmax, u, an.para, distr)
pp <- mapply(fAmax, oo, distr, SIMPLIFY = FALSE)
qq <- log(mapply(qAmax, pp, distr, MoreArgs = list(p = period.p)))
xsite0$y <- qq + res.boot[[ii]]
## Fit the model
if(do.krig){
ff <- suppressWarnings(FitRoi(x = xsite0, xnew = xtarget, nk = size,
phy = phy.form, similarity = sim.form, kriging = krig.form))
} else{
ff <- suppressWarnings(FitRoi(x = xsite0, xnew = xtarget, nk = size,
phy = phy.form, similarity = sim.form))
}
qboot[ii,] <- ff$pred
}
qboot <- exp(qboot)
qua <- data.frame(quantile = exp(fit$pred),
se = apply(qboot, 2, sd),
lower = apply(qboot, 2, quantile, 0.025),
upper = apply(qboot, 2, quantile, 0.975))
} else {
qua <- data.frame(quantile = exp(fit$pred))
}
rownames(qua) <- tname
xdata <- data.frame(atsite = an.qua, pred = exp(predict(fit, xsite, 10)))
rownames(xdata) <- sname
ans <- list(site = tname, method = 'qrt', period = period,
quantile = qua, size = size, data = xdata)
class(ans) <- 'floodnetRoi'
if(do.cv){
cv <- cv[ ,c(1,4,7)]
colnames(cv) <- c('size','nsh','skill')
ans$cv <- cv
}
if(out.model)
ans$fit <- fit
return(ans)
}
## Function that transform different format into a data.frame
.ToDF <- function(x){
if(is.data.frame(x))
ans <- x
else if(is.vector(x))
ans <- as.data.frame(as.list(x))
else
ans <- as.data.frame(x)
return(ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.