###############################################################################
#' Fitting a index-flood model on a pooling groups
#'
#' Returns a pooling group model with regional parameters and common
#' homogeneity statistics diagnostics.
#' The function can be used for analysing annual maximum or peaks over threshold.
#'
#' @param lmom Matrix of the at-site L-moments of all sites.
#'
#' @param intersite An intersite object. Alternative to pass L-moments directly.
#'
#' @param nrec Record Length of each site. If a single value is passed, it is
#' applied to all sites.
#'
#' @param nk Size of the desired pooling groups.
#'
#' @param distr At-site distribution of each site. If a single value is passed, it is
#' applied to all sites.
#'
#' @param distance Vector of distance with the target site.
#' The target is included with distance zero.
#'
#' @param method Type of data used. Either annual maximums (`amax`) or
#' peaks over threshold (`pot`).
#'
#' @param diagnostic Shoud the diagnostics homogeneity and Z-score be evaluated.
#'
#' @param diagnostic.nsim Number of simulations used to evaluate the diagnostics.
#'
#' @param pvalue Logical. Should the p-value of the homogeneity test be computed?
#'
#' @param pvalue.nrep,pvalue.nsim Number of repetitions and simulations used for
#' evaluating the p-value.
#'
#' @param inter Type of intersite correlation used for simulation. For `'mat'`the
#' empirical correlation matrix is used directly.
#' For `'avg'` the average coefficient of correlation is used.
#'
#'
#' @param inter.corr Intersite correlation matrix of all sites.
#'
#' @return
#'
#' \item{id}{List of the selected sites}
#' \item{nrec}{Record lengths.}
#' \item{distance}{Distance matrix.}
#' \item{lmom}{Matrix of L-moments.}
#' \item{rlmom}{Regional L-moments.}
#' \item{ntot}{Number of station-year.}
#' \item{distr}{Regional distribution}
#' \item{para}{Parameters of the regional distribution}
#' \item{method}{Type of analysis. Annual maximum (`amax`) or peaks over threshold(`pot`)}
#' \item{stat}{Homogenous criteria and Z-score for goodness-of-fit.}
#' \item{pvalue}{P-values of `stat`}
#' \item{discord}{Discordance measures.}
#' \item{inter}{Type of intersite correlation model used.}
#' \item{inter.corr}{Intersite correlation matrix}
#' \item{inter.avg}{Pairwise average of the intersite correlation.}
#'
#' @section References:
#'
#' Hosking, J. R. M., & Wallis, J. R. (1997). Regional frequency analysis:
#' an approach based on L-moments. Cambridge Univ Pr.
#'
#' Mostofi Zadeh, S., Durocher, M., Burn, D.H., Ashkar, F., 2019.
#' Pooled flood frequency analysis: a comparison based on peaks-over-threshold
#' and annual maximum series. Hydrological Sciences Journal 0, null.
#' https://doi.org/10.1080/02626667.2019.1577556
#'
#' @export
#'
#' @examples
#'
#' isite <- Intersite(ams ~ id + year, flowAtlantic$ams,
#' distance = flowAtlantic$distance,
#' smooth = TRUE)
#'
#' fit <- PoolGroup(isite, distance = 2, nk = 15)
#' print(fit)
#'
#' ## Create a L-moment ratio diagram.
#' plot(fit)
#'
############################################################################
PoolGroup <- function(x, ...) UseMethod('PoolGroup',x)
#' @export
PoolGroup.intersite <- function(
obj,
distance,
nk,
inter = 'mat',
inter.corr = NULL,
...)
{
if (is.null(inter.corr))
inter.corr <- obj$corr
if(length(distance) == 1){
## verify that distance was passed
if(is.null(obj$distance))
stop('Must specify a distance.')
h <- obj$distance[distance,]
} else {
h <- distance
}
PoolGroup(obj$lmom, nrec = obj$nrec, distance = h, nk = nk,
inter = inter, inter.corr = inter.corr, ...)
}
#' @export
PoolGroup.matrix <-function(
lmom,
nrec,
distance,
nk,
distr = NULL,
method = 'amax',
diagnostic = FALSE,
diagnostic.nsim = 1000,
pvalue = FALSE,
pvalue.nrep = 500,
pvalue.nsim = 500,
inter = 'ind',
inter.corr = 0,
lscale = FALSE)
{
## Number of site and corrected maximum number of site
nk <- min(nk, nrow(lmom))
## Transform the second L-moment if not the LCV
if (lscale)
lmom[,2] <- lmom[,2]/lmom[,1]
if(ncol(lmom) < 4)
stop('Must have at least 4 L-moments')
##---------------------------##
## Identify the neighborhood ##
##---------------------------##
## Find the nearest sites and re-order the data
nid <- FindNearest(distance,nk)
lmom <- lmom[nid,]
distance <- distance[nid]
nrec <- nrec[nid]
## Evaluate the number of station-year
ntot <- sum(nrec)
## Reformat the if a constant value
if (length(inter.corr)>1){
inter.corr <- as.matrix(Matrix::nearPD(inter.corr[nid,nid])$mat)
inter.avg <- inter.corr[lower.tri(inter.corr)]
inter.avg <- max(mean(inter.avg),0)
} else {
inter.avg <- inter.corr
}
##---------------------------------------##
## Estimation and homogenity test
##---------------------------------------##
## compute the regional lmoments
dd <- lmomRFA::as.regdata(cbind(nid,nrec,lmom), FALSE)
rlmom <- lmomRFA::regavlmom(dd)
names(rlmom) <- c('L1','LCV','LSK','LKUR','T5','T6')[seq_along(rlmom)]
## POT method always use Pareto distribution
if (method == 'pot')
distr <- 'gpa'
## If no distribution is passed, the diagnostic routine
## must be performed to select the growth curve
if (is.null(distr) & method == 'amax')
diagnostic <- TRUE
## If necessary evaluate the statistics H and Z
if (diagnostic){
tst <- lmomRFA::regtst(dd, diagnostic.nsim)
stat <- c(tst$H, tst$Z)
names(stat) <- c('H1','H2','H3','Zglo','Zgev','Zgno','Zpe3','Zgpa')
discord <- tst$D
if (is.null(distr))
distr <- c('glo','gev','gno','pe3')[which.min(abs(stat[4:7]))]
} else {
## No diagnostic performed
stat <- NULL
discord <- NULL
}
##-----------------------------------------##
## Evaluating P-value of the homogeneity test
##-----------------------------------------##
## Don't perform p-value simulation if statistics not computed
if (is.null(stat))
pvalue <- FALSE
if (pvalue){
## Choose the correlation with respect to "inter"
if(inter == 'mat'){
corr0 <- inter.corr
} else {
corr0 <- inter.avg
}
## Compute the null distribution of H and Z
h0 <- lmomRFA::regsimh(lmom::quakap, rlmom,
cor = corr0,
nrec = nrec,
nrep = pvalue.nrep, nsim = pvalue.nsim)$results
pval <- rep(0,length(stat))
## Compute p-value of homogeneity test (unilateral)
for(kk in 1:3)
pval[kk] <- mean(stat[kk] < h0[kk,])
## Compute p-value of Z-score (bilateral)
for(kk in 4:8)
pval[kk] <- mean(abs(stat[kk]) < abs(h0[kk,]))
names(pval) <- names(stat)
} else pval <- NULL
## Estimate model parameters
if (method == 'amax'){
v <- lmomco::vec2lmom(rlmom, lscale = FALSE)
para <- lmomco::lmom2par(v, distr)$para
} else if (method == 'pot'){
k <- 1/rlmom[2]-2
para <- c(0, 1 + k, k)
names(para) <- c('xi','alpha','kappa')
}
ans <- list(id = nid,
nrec = nrec,
lmom = lmom,
rlmom = rlmom,
ntot = ntot,
distr = distr,
para = para,
method = method,
stat = stat,
pvalue = pval,
discord = discord,
distance = distance,
inter = inter,
inter.corr = inter.corr,
inter.avg = inter.avg)
class(ans) <- 'poolgrp'
ans
}
#' @export
print.poolgrp <- function(obj)
{
cat('\nRegional frequency analysis with pooling groups\n')
cat('\nMethod:', obj$method)
cat('\nNb. station:', nrow(obj$lmom))
cat('\nStation-year:', obj$ntot)
if (obj$inter == 'mat'){
cat('\nIntersite:', dim(obj$inter.corr) ,'(matrix)\n')
} else {
cat('\nIntersite: ', round(obj$inter.avg,2),' (', obj$inter,')', sep= '')
}
if (any(!is.null(obj$stat))){
cat('\nHomogeneity:', round(obj$stat[1:3],2))
if (obj$method == 'amax'){
cat('\n\nZ-scores (absolute):\n')
print(round(abs(obj$stat[4:8]),2))
} else cat('\n')
}
cat('\nRegional L-moments:\n')
print(obj$rlmom, digits = 4)
cat('\nDistribution:', obj$distr)
cat('\nParameter:\n')
print(obj$para, digits = 4)
if (!is.null(obj$pvalue)){
cat('\np-values:\n')
print(obj$pvalue)
}
}
#' @export
plot.poolgrp <- function(obj){
lmm <- obj$lmom[,3:4]
colnames(lmm) <- c('t_3','t_4')
lmom::lmrd(lmm)
points(obj$rlmom[3],obj$rlmom[4], pch = 16, col = 'red', cex = 1.5)
}
############################################################################
#' Remove heterogenous sites from pooling groups
#'
#' Return a pooling groups object where one site is removed.
#' The selected site is the one that best improve the
#' homogeneity score.
#' The function `Poolremove.auto` proceed step-by-step and remove sites until
#' the heterogeous criteria or a stopping rule is met.
#'
#' @param obj A pooling group object.
#'
#' @param method Which homogeneity statistics used in the procedure. Choices :
#' `H1`, `H2`, `H3`.
#'
#' @param nsim Number of simulation used to evaluate the homogenous criteria
#'
#' @param distr.fix Logical, should the selection of the distribution be
#' reevaluated after removing the site.
#'
#' @export
#'
#' @examples
#'
#' isite <- Intersite(ams ~ id + year, flowAtlantic$ams,
#' distance = flowAtlantic$distance,
#' smooth = TRUE)
#'
#' fit <- PoolGroup(isite, distance = 2, nk = 30)
#'
#' ## remove the site no. 1, see fit$id for a list of the available site
#' PoolRemove(fit, id = 1)$stat[1:3]
#'
#' PoolRemove.auto(fit, tol = 1.5)$stat[1:3]
#'
############################################################################
PoolRemove <- function(
obj,
id = NULL,
method = 'H1',
nsim = 1000,
distr.fix = TRUE)
{
## Number of site
nk <- nrow(obj$lmom)
## Which homogeneity statistic to use
r <- which(method == c('H1','H2','H3'))
## Create a regdata object
dd <- lmomRFA::as.regdata(cbind(obj$id, obj$nrec, obj$lmom), FALSE)
## Compute the homogeneity if not already done in obj
if (is.null(obj$stat)){
hmg <- lmomRFA::regtst(dd)$H
} else {
hmg <- obj$stat[1:3]
}
## Function for computing the homogeneity H without a specific station
FunHmg <- function(ii) lmomRFA::regtst(dd[-ii,],nsim)
if(!is.null(id)){
mid <- which(id == obj$id)
## verify that the site to remove is in the pooling group
if(is.null(mid))
stop("Must selected a site in the pooling group")
tst0 <- FunHmg(mid)
} else {
## find the stations that improve best H
## Note that the target is always the first station and must be kept
tst.new <- lapply(1:nk, FunHmg)
hgm.all <- sapply(tst.new, getElement, 'H')
mid <- which.min(abs(hgm.all[r,-1])) + 1
tst0 <- tst.new[[mid]]
}
## update the pooling group object
obj$id <- obj$id[-mid]
obj$lmom <- obj$lmom[-mid,]
obj$nrec <- obj$nrec[-mid]
obj$ntot <- sum(obj$nrec)
obj$distance <- obj$distance[-mid]
obj$stat[1:8] <- c(tst0$H, tst0$Z)
obj$discord <- tst0$D
## update the correlation matrix
if (length(obj$inter.corr)>1){
inter.corr <- obj$inter.corr[-mid,-mid]
obj$inter.corr <- as.matrix(Matrix::nearPD(inter.corr)$mat)
inter.avg <- inter.corr[lower.tri(inter.corr)]
obj$inter.avg <- max(mean(inter.avg),0)
} else {
inter.avg <- inter.corr
}
## For amax, update the selected distribution
if (!distr.fix & obj$method == 'amax')
obj$distr <- c('glo','gev','gno','pe3')[which.min(abs(obj$stat[4:7]))]
## Estimate the new regional L-moments
obj$rlmom <- lmomRFA::regavlmom(dd[-mid,])
## Refit the growth curve with maybe new distribution
UpdateDistr(obj, obj$distr)
}
#' @export
PoolRemove.auto <-
function(obj, method = 'H1', tol = 2, nmin = 15, ntot.min = 0, nsim = 1000,
verbose = TRUE, distr.fix = FALSE){
## Select the criteria for evaluating the homogeneity
r <- which(c('H1','H2','H3') == method)
if (obj$stat[r] <= tol){
warning('The pooling group is already sufficiently homogenous')
return(obj)
}
repeat{
## monitoring (if require)
if (verbose){
print(paste('n :', nrow(obj$lmom),
', h:', round(obj$stat[r],2),
', t:', obj$ntot, sep = '') )
}
## remove one
suppressWarnings(obj.new <- PoolRemove(obj, method = method, nsim = nsim))
## Verify that the new pooling group has enough station-years
if ((obj.new$ntot < ntot.min) | (length(obj.new$id) < nmin)){
warning('The minimal number of stations or station-years have been reached')
ans <- obj
break
}
## If a sufficient heterogeous criterion has been reached
if (obj.new$stat[r] <= tol){
ans <- obj.new
break
}
## iterate
obj <- obj.new
}
## Find the new best distribution if necessary
if (!distr.fix & ans$method == 'amax')
distr <- c('glo','gev','gno','pe3')[which.min(abs(ans$stat[4:7]))]
else
distr <- ans$distr
## Update parameters
return(UpdateDistr(ans, distr))
}
UpdateDistr <- function(obj, distr)
{
obj$distr <- distr
## Estimate parameters
if (obj$method == 'amax'){
v <- lmomco::vec2lmom(obj$rlmom, lscale = FALSE)
obj$para <- lmomco::lmom2par(v, obj$distr)$para
} else if (obj$method == 'pot'){
k <- 1/obj$rlmom[2]-2
obj$para <- c(1 + k, k)
}
return(obj)
}
############################################################################
#' Flood quantiles estimated at the target of a pooling group
#'
#' Predict the flood quantile of a given return period for its target sites.
#'
#' @param obj Pooling group object.
#'
#' @param q Probability associated to the flood quantiles.
#'
#' @param indx Index flood factor. By default the sample average of the target.
#' @param ci Logical. Should the confident intervals and the standard deviation
#' be evaluated?
#'
#' @param nsim Number of simulation used for approximating the confident intervals.
#' @param alpha Significance level.
#'
#' @section References:
#'
#' Hosking, J. R. M., & Wallis, J. R. (1997). Regional frequency analysis:
#' an approach based on L-moments. Cambridge Univ Pr.
#'
#' @import lmomRFA
#' @import lmom
#'
#' @export
#'
#' @examples
#' isite <- Intersite(ams ~ id + year, flowAtlantic$ams,
#' distance = flowAtlantic$distance,
#' smooth = TRUE)
#'
#' fit <- PoolGroup(isite, distance = 2, nk = 30)
#'
#' predict(fit, c(.3,.7))
#' predict(fit, ci = TRUE)
#'
predict.poolgrp <-function(
obj,
q = c(.5, .8, .9, .95, .98 , .99),
indx = NULL,
ci = FALSE,
nsim = 5000,
alpha = 0.05)
{
## Probability in term of return period.
rt <- 1/(1-q)
## Default index-flood is the mean of the site
if (is.null(indx))
indx <- obj$lmom[,1]
## Extract quantile function
qfunc <- getFromNamespace(paste0('qua',obj$distr), 'lmom')
if (ci){
## extract correlation matrix or value
if (obj$inter == 'mat'){
corr0 <- obj$inter.corr
} else {
corr0 <- obj$inter.avg
}
## compute the parameter at each site
ffunc <- getFromNamespace(paste0('pel',obj$distr), 'lmom')
lmom <- obj$lmom
lmom[,2] <- lmom[,2]*lmom[,1]
para <- apply(lmom,1,ffunc)
## Alternative code: more robust version
#ffunc <- function(v){
# z <- lmomco::vec2lmom(v, lscale = FALSE)
# lmomco::lmom2par(z,obj$distr)$para
#}
#para <- apply(obj$lmom,1,ffunc)
## Create a regfit object
dd <- as.regdata(cbind(obj$id,obj$nrec,obj$lmom),FALSE)
rfit <- regfit(dd,obj$distr)
## simulate flood quantile
simq <- regsimq(qfunc,
para = t(para),
cor = corr0,
index = indx,
nrec = obj$nrec,
nrep = nsim,
fit = obj$distr,
f = q,
boundprob = c(alpha/2, 1-alpha/2))
## extract confident interval and standard deviation
ans <- sitequantbounds(simq, rfit, site=1)
ans <- as.matrix(ans)
colnames(ans) <- c('rt', 'pred', 'se', 'lower','upper')
ans[,1] <- rt
} else {
## Compute the flood quantiles
ans <- indx[1] * qfunc(q, obj$para)
names(ans) <- paste0('rt',round(rt,1))
}
return(ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.