######################################################################
#' Peak over threshold (POT)
#'
#' Fit the parameters of a model using peak over threshold (POT).
#'
#' @param x Sample.
#'
#' @param dt Date or time of observation.
#'
#' @param u Threshold.
#'
#' @param method Estimation method. Either \code{'lmom'}, \code{'mle'} or
#' \code{'mle2'}.
#'
#' @param declust If necessary, declustering method.
#' Either 'run' or 'wrc'.
#'
#' @param r Lag parameter for declustering. Either the running length betwee clusters
#' or the minimum separating time between two flood Peaks.
#' The scale must coincide with the observation date \code{dt}.
#'
#' @param rlow For WRC, recession level between two flood peaks in percentage.
#'
#' @param se Should the standard error or the confidence interval be returned.
#'
#' @param ci For `coef` should confidence interval be returned. For `predict`,
#' method used to compute confidence intervals. Either `profile`,`delta` or `boot`.
#'
#' @param nsim Number of bootstrap samples.
#'
#'
#' @details
#'
#' The access functions \code{coef} and \code{vcov} return respectively the
#' parameters and the variance-covariance matrix of the POT model. For the L-moment
#' method the covariance matrix is using bootstraps.
#' The access function \code{predict} evaluates flood quantiles.
#' If \code{dt} is a Date the return period is computed in year using the range
#' of observation.
#'
#'
#' @seealso \link{which.floodPeaks}, \link{which.clusters}, \link{PlotMrl}.
#'
#' @section References:
#'
#' Coles S. (2001) An introduction to statistical modeling of extreme values.
#' Springer Verlag.
#'
#' Davison AC, Smith RL. (1990) Models for Exceedances over High Thresholds.
#' Journal of the Royal Statistical Society Series B (Methodological).
#' 52(3):393–442.
#'
#' @export
#'
#' @examples
#'
#' xd <- rgpa(100, 1, -.2)
#' fit <- FitPot(xd, u = 0)
#'
#' print(fit)
#' vcov(fit)
#' predict(fit)
#' coef(fit, ci = TRUE)
#'
#' fit <- FitPot(flow~date, flowStJohn, u = 1000,
#' declust = 'wrc', r = 14)
#'
#' print(fit)
#' plot(flow~date,flowStJohn, type = 'l')
#' points(fit$time,fit$excess+fit$u, col = 2, pch = 16)
#' abline(h=1000, col = 3, lwd = 2)
#'
#' predict(fit, se = TRUE, ci = 'delta')
#'
FitPot <- function(x, ...) UseMethod('FitPot',x)
#' @export
#' @rdname FitPot
FitPot.data.frame <- function(obj, ...)
FitPot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname FitPot
FitPot.matrix <- function(obj, ...)
FitPot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname FitPot
FitPot.formula <- function(form, x, ...){
xd <- model.frame(form,as.list(x))
FitPot.numeric(x = xd[,1], dt = xd[,2], ...)
}
#' @export
#' @rdname FitPot
FitPot.numeric <- function(x, dt = NULL, u = 0, method = 'mle',
declust = 'none', r = 1, rlow = .75, nsim = 1000,
varcov = TRUE){
## Supply a date variable if not presented
if(is.null(dt))
dt <- seq_along(x)
## Compute the number of years
if(class(dt) == 'Date'){
nyear <- sum(is.finite(dt)) / 365.25
} else {
nyear <- NA
}
## If necessary, extract peaks using a declustering method
if(declust == 'run')
cid <- which.clusters(x, dt, u, r)
else if(declust == 'wrc')
cid <- which.floodPeaks(x, dt, u, r = r, rlow = rlow, ini = 'wrc')
else
cid <- x > u
ans <- list(excess = x[cid] - u, time = dt[cid])
## Estimatate the parameters
if(method == 'lmom'){
## Estimatate the parameters
ans$estimate <- fgpaLmom(ans$excess)
## Covariance matrix estimated by boostraps
if(varcov){
ans$varcov <- matrix(NA,2,2)
xboot <- replicate(nsim,
sample(ans$excess, length(ans$excess), replace = TRUE))
pboot <- t(apply(xboot,2,fgpaLmom))
## Correct
vc <- Matrix::nearPD(cov(pboot))$mat
ans$varcov <- as.matrix(vc)
}
} else if(method == 'mle'){
sol <- fgpa1d(ans$excess, sol = TRUE)
ans$estimate <- sol$par
if(varcov){
ans$varcov <- sol$varcov
colnames(ans$varcov) <- rownames(ans$varcov) <- names(ans$estimate)
}
} else if(method == 'mle2'){
sol <- fgpa2d(ans$excess, sol = TRUE)
ans$estimate <- sol$par
if(varcov){
ans$varcov <- sol$varcov
colnames(ans$varcov) <- rownames(ans$varcov) <- names(ans$estimate)
}
if( (ans$estimate[2] < -.5 + 1e-4) | (ans$estimate[2] > 1-1e-4) )
warning(paste('Shape parameter have reach boundary. Normal',
' approximation of the likelihood may be unreliable'))
}
## Complete object
ans$u <- u
ans$method <- method
ans$mrl <- ans$estimate[1]/ (1+ans$estimate[2])
ans$nyear <- nyear
ans$ntot <- length(x)
ans$nexcess <- length(ans$excess)
ans$nabove <- sum(x>u)
class(ans) <- 'fpot'
return(ans)
}
#' @export
#' @rdname FitPot
coef.fpot <- function(obj, rate = FALSE, ci = FALSE, alpha = 0.05){
if(rate)
ans <- c(obj$nexcess/obj$ntot,obj$estimate)
else
ans <- obj$estimate
if(ci){
## Full likelihood
llikFull <- logLik(obj)
## Initiate searching intervals for parameters
ase <- sqrt(obj$varcov[1,1])
abnd0 <- c(pmax(1e-8, obj$estimate[1] - 10 * ase),
obj$estimate[1] + 10* ase)
kbnd0 <- c(-1,2)
## Bound for the deviance
khi <- qchisq(1-alpha,1)
## Deviance for the profile likelihood of the scale parameter
Dscl <- function(z){
nllik <- function(p) -sum(dgpa(obj$excess, z, p, log = TRUE))
## Compute the profile likelihood
llik0 <- -goptimize(nllik, kbnd0)$objective
return(2*(llikFull-llik0))
}
## Deviance for the profile likelihood of the shape parameter
Dshp <- function(z){
nllik <- function(p) -sum(dgpa(obj$excess, p, z, log = TRUE))
llik0 <- -optimize(nllik, abnd0)$objective
return(2*(llikFull-llik0))
}
## Find the boundary
suppressWarnings(lbScl <-
goptimize(function(z) abs(Dscl(z)-khi),
c(abnd0[1],obj$estimate[1]))$minimum)
suppressWarnings(ubScl <-
goptimize(function(z) abs(Dscl(z)-khi),
c(obj$estimate[1],abnd0[2]))$minimum)
suppressWarnings(lbShp <-
goptimize(function(z) abs(Dshp(z)-khi),
c(kbnd0[1],obj$estimate[2]))$minimum)
suppressWarnings(ubShp <-
goptimize(function(z) abs(Dshp(z)-khi),
c(obj$estimate[2],kbnd0[2]))$minimum)
bnd <- data.frame(lower = c(NA,lbScl,lbShp),
upper = c(NA,ubScl,ubShp))
rownames(bnd) <- c('rate','alpha','kappa')
if(!rate) bnd <- bnd[-1,]
ans <- data.frame(estimate = ans, bnd)
}
return(ans)
}
#' @export
#' @rdname FitPot
vcov.fpot <- function(obj, rate = FALSE){
if(rate){
ans <- matrix(0,3,3)
kn <- obj$nexcess/obj$ntot
ans[1,1] <- kn*(1-kn)/obj$ntot
ans[2:3,2:3] <- obj$varcov
} else
ans <- obj$varcov
return(ans)
}
#' @export
#' @rdname FitPot
print.fpot <- function(obj){
cat('\nAnalysis of peaks over threshold')
cat('\n\nThreshold : ', obj$u)
cat('\nNumber of excess : ', obj$nexcess)
if(!is.na(obj$nyear)){
cat('\nNumber of years : ', format(round(obj$nyear,2)))
cat('\nExcess rate (yearly): ', format(round(obj$nexcess/obj$nyear,4)))
} else cat('\nExcess rate: ', format(round(obj$nexcess/obj$ntot,4)))
cat('\nExtremal index: ', format(round(obj$nexcess/obj$nabove,4)))
cat('\nDispersion Index : ', format(DispIndex(obj), digits = 4))
cat('\nMean residual Life : ', format(obj$mrl, digits = 6))
cat('\n\nParameters :\n')
print(obj$estimate, digit = 4)
if(!is.null(obj$varcov)){
cat('\nstd.err :\n')
print(sqrt(diag(obj$varcov)), digit = 4)
}
}
#' @export
#' @rdname FitPot
predict.fpot <- function(obj, q = c(.5, .8, .9, .95, .98, .99), se = FALSE,
ci = "none", alpha = 0.05, nsim = 1000, ...){
## Transform the probability in return period
rt <- 1/(1-q)
## If the excess rate is not provided
if(!is.na(obj$nyear))
mrate <- rt * 365.25
else
mrate <- rt
lambda <- mrate * obj$nexcess / obj$ntot
## funtion that compute the return period
fpred <- function(para){
if(abs(para[2]) > 1e-8)
ans <- obj$u + para[1]/para[2] * (1-lambda^(-para[2]) )
else
ans <- obj$u + para[1] * log(lambda)
return(ans)
}
hatRt <- fpred(obj$estimate)
## Standard error by delta method
if(se | ci == 'delta'){
negk <- -obj$estimate[2]
lk <- lambda^negk
ak <- obj$estimate[1]/negk
gx1 <- obj$estimate[1] * (obj$ntot/obj$nexcess) * lk
gx2 <- (lk-1)/negk
gx3 <- ak * ( lk * log(lambda) - gx2)
gx <- rbind(gx1, gx2, gx3)
hatSe <- sqrt(diag(t(gx) %*% vcov(obj, rate = T) %*% gx))
}
## Confident interval by delta method
if(ci == 'delta'){
lb <- hatRt - qnorm(1-alpha/2) * hatSe
ub <- hatRt + qnorm(1-alpha/2) * hatSe
}
## Confident interval by nonparametric bootstrap
if(ci == 'boot'){
## Resample
xboot <- replicate(nsim, sample(obj$excess, obj$nexcess, replace = TRUE))
## Refit and predict
if(obj$method == 'lmom')
fn <- function(z) fpred(fgpaLmom(z))
else if(obj$method == 'mle2')
fn <- function(z) fpred(fgpa2d(z))
else if (obj$method == 'mle')
fn <- function(z) fpred(fgpa1d(z))
pboot <- apply(xboot,2,fn)
## Compute interval
bootci <- t(apply(pboot, 1, quantile, c(alpha/2, 1-alpha/2)))
lb <- bootci[,1]
ub <- bootci[,2]
}
## Confident interval by profile likelihood
if(ci == 'profile'){
bnd0 <- c(-.5,1)
## Bound of the deviance
khi <- qchisq(1-alpha,1)
lb <- lb0 <- pmax(1e-8, hatRt - 10 * hatSe)
ub <- ub0 <- hatRt + 10 * hatSe
for(ii in seq_along(rt)){
## Deviance function in respect of the return period
Dfun <- function(z){
## Full likelihood
llikFull <- sum(dgpa(obj$excess, obj$estimate[1],
obj$estimate[2], log = TRUE))
## Negative log-likelihood
nllik <- function(p){
num <- z - obj$u
if(abs(p) < 1e-8)
denum <- log(lambda[ii])
else
denum <- -(lambda[ii]^(-p) -1)/p
return(-sum(dgpa(obj$excess, pmax(1e-8,num/denum), p, log = TRUE)))
}
## Compute the profile likelihood
llik0 <- -goptimize(nllik, bnd0)$objective
return(2*(llikFull-llik0))
}
## Lower bound
suppressWarnings(lb[ii] <-
goptimize(function(z) abs(Dfun(z) - khi),
c(lb0[ii],hatRt[ii]))$minimum)
## Upper bound
suppressWarnings(ub[ii] <-
goptimize(function(z) abs(Dfun(z) - khi),
c(hatRt[ii], ub0[ii]))$minimum)
} # end for
} # end if
## Does ci was computed
ci <- !(ci=='none')
## construct the output
if(se & !ci)
ans <- data.frame(rt = hatRt, se = hatSe)
else if(!se & ci)
ans <- data.frame(rt = hatRt, lb = lb, ub = ub)
else if(se & ci)
ans <- data.frame(rt = hatRt, se = hatSe, lb = lb, ub = ub)
else
ans <- data.frame(rt = hatRt)
rt <- round(rt,4)
if(ncol(ans) == 1){
ans <- ans[,1]
names(ans) <- paste('Q', rt, sep='')
} else
rownames(ans) <- paste('Q', rt, sep='')
return(ans)
}
#' @export
logLik.fpot <- function(obj)
sum(dgpa(obj$excess, obj$estimate[1],obj$estimate[2], log = TRUE))
#' @export
AIC.fpot <- function(obj, k = 2)
-2 * logLik(obj) + 2*k
#############################################################
#' Diagnostic tools for peaks over threshold analysis
#'
#' Produce mean residual Life, quantite-quantile,
#' probability-probability,
#' histogram of the excesses,
#' Stability plot for the parameter of the GPA.
#'
#' @param x,dt,form Sample and time of observation. A formula can also
#' be passed, in which case \code{x} is a data.frame.
#'
#' @param u Series of candidate thresholds.
#'
#' @param method Estimation method.
#'
#' @param alpha Confidence interal with level \code{1-alpha/2}.
#'
#' @param param Choice of the parameter to display. Either
#' \code{'alpha'} or \code{'kappa'}.
#'
#' @param ... Others arguments are passed to function \code{plot} and
#' \code{\link{which.floodPeaks}}
#'
#' @details
#'
#' The function \code{PlotPotShape} will loop across calls of function
#' \code{\link{FitPot}} and will produce a graphics of the
#' shape parameter in respect of the threshold \code{u}. See the respective
#' functions for a description of the function parameters.
#'
#' @seealso \link{FitPot}, \link{which.floodPeaks}.
#'
#'
#' @examples
#'
#' ## Find list of candidate thresholds
#' lstu <- seq(500,2500, len = 50)
#'
#' PlotMrl(flow~date,flowStJohn, u = lstu, declust = 'wrc', r = 14)
#'
#' PlotPotShape(flow~date,flowStJohn, u = lstu, declust = 'wrc', r = 14)
#' PlotPotShape(flow~date,flowStJohn, u = lstu,
#' declust = 'wrc', r = 14, param = 'alpha', method = 'mle2')
#'
#'
#' fit <- FitPot(flow~date,flowStJohn, u = 997, declust = 'wrc', r = 14)
#' ppplot(fit)
#' qqplot(fit, ci = TRUE)
#' hist(fit)
#'
#'
#' @export
PlotMrl <- function(form,x, u,
declust = NULL, r = 1, rlow = 0.75,
alpha = 0.05,
ylab = 'Mean Residual Life', xlab = 'Threshold',
col = 'black', lty = 1, lwd = 1,
col.ci = 'black', lty.ci = 3, lwd.ci = 1,
ylim = NULL, display = TRUE, ...){
## reorganize the data
x <- model.frame(form,x)
dt <- x[,2]
x <- x[,1]
u <- as.numeric(u)
sig <- mrl <- nn <- rep(NA,length(u))
for(ii in seq_along(u)){
## Declustering
if(is.null(declust))
cid <- x > u[ii]
else if(declust == 'run')
cid <- which.clusters(x, u[ii], r)
else if( declust == 'wrc')
cid <- which.floodPeaks(x, dt, u[ii], r = r, rlow = rlow)
excess <- x[cid]-u[ii]
mrl[ii] <- mean(excess)
nn[ii] <- length(excess)
sig[ii] <- sd(excess)/sqrt(nn[ii])
}
# Confident intervals
qn <- qnorm(1-alpha/2)
lb <- mrl - qn * sig
ub <- mrl + qn * sig
## Adjusted limits
if(is.null(ylim)) ylim <- range(c(lb,ub)) * c(.95,1.05)
if(display){
plot(u, mrl, xlab = xlab, ylab = ylab, ylim = ylim,
col = col, lty = lty, lwd = lwd, ...)
lines(u, lb, lty = lty.ci, col = col.ci, lwd = lwd.ci)
lines(u, ub, lty = lty.ci, col = col.ci, lwd = lwd.ci)
}
## Return
invisible(data.frame(u = u, n = nn,mrl = mrl, lb = lb, ub = ub))
}
#################################################################
#' Dispersion index
#'
#' Returns the dispersion index (or variance to mean ratio) to verify
#' the hypothesis of a Homogenous Poisson process.
#'
#' @param obj An output of \code{FitPot}.
#'
#' @param k Number of classes. Default value chosen to ensure at least 5 classes
#' and aims at approximately an average 5 events per classe.
#'
#' @param ci Should confident interval and p-value be returns.
#'
#' @param alpha Significance level.
#'
#' @param method Method for computing the confident interval and the p-value.
#' Either 'asymptotic' for chi-squared approximation or 'boot' for bootstrap.
#'
#' @param nsim Number of bootstrap samples.
#'
#' @details
#'
#' The function \code{PlotDispIndex} will loop across calls of function
#' \code{\link{FitPot}} and \code{DispIndex} and will produce a graphics of the
#' Dispersion index in respect of the threshold \code{u}. See respective
#' functions for a description of the function parameters. The points in the
#' plot are returns as a data.frame and parameter \code{display} can be set
#' to \code{FALSE} to prevent the display of the graphic.
#'
#' @section Reference:
#'
#' Lang M, Ouarda TBMJ, Bobée B. (1999) Towards operational guidelines for
#' over-threshold modeling. Journal of Hydrology. Dec 6;225(3):103–17.
#'
#' @seealso \link{PlotMrl}
#'
#' @export
#'
#' @examples
#'
#' fit <- FitPot(flow~date,flowStJohn, u = 1000,
#' declust = 'wrc', r = 14)
#'
#' DispIndex(fit, ci = TRUE)
#'
#' PlotDispIndex(flow~date,flowStJohn,
#' u = seq(500,2000, len = 50),
#' declust = 'wrc', r = 14)
#'
DispIndex <- function(obj, k = NULL, ci = FALSE, alpha = 0.05,
method = 'asymptotic', nsim = 1e4){
## form equal groups and count peaks
ntot <- length(obj$time)
if(is.null(k))
k <- pmax(5,floor(ntot/5))
nn <- table(cut(obj$time, k, labels = FALSE))
## Compute the dispersion index and p-value
vmr <- var(nn)/mean(nn)
if(ci){
a2 <- alpha/2
if(method == 'asymptotic'){
## Verification of minimal conditions for Chi-squared approximation
if(mean(nn) < 1 ) warning('Low average mean per classes')
if(k < 5 ) warning('Too few classes for testing')
## Confident intervale
d <- k-1
myci <- c(qchisq(a2, df = d), qchisq(1-a2, df = d))/d
## Compute p-value assuming asymetrical probabilities
if(vmr < 1) pval <- 2 * pchisq(vmr*d, df = d)
else if (abs(vmr-1) < 1e-8) pval <- 1
else pval <- 2 * (1-pchisq(vmr*d,df = d))
} else if(method == 'boot') {
## sample from poisson distribution
bfun <- function(){
z <- rpois(k,sum(nn)/k)
var(z)/mean(z)
}
bsample <- replicate(nsim, bfun())
## Boostrap intervals and p-value
myci <- quantile(bsample, c(a2,1-a2))
if(vmr < 1) pval <- 2 * mean( bsample <= vmr)
else if (abs(vmr-1) < 1e-8) pval <- 1
else pval <- 2*(1-mean( bsample >= vmr))
}
ans <- c(vmr, myci , pval)
names(ans) <- c('vmr','lb','ub', 'pval')
} else ans <- vmr
return(ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.