###############################################################
#' Estimation of the GPA distribution by maximum likelihood.
#'
#' Low level functions for the estimation of the generalized pareto
#' distribution(GPA) with two parameters base
#' either on maximum likelihoood or the method of L-moments.
#' The algorithm of \code{fpot2d} is using
#' \code{optim} to directly optimize the log-likelihood (bivariate), while
#' the algorithm of \code{fpot1d} is using a transformation to use
#' a univariate optimization routine. Moreover, \code{fpot2d} constraint
#' the shape parameter between -.5 and 1.
#'
#' @param x Sample.
#'
#' @param sol Does solution from \code{optim} be returned.
#' In case of \code{fpot1d}, it returns the variance covariance matrix.
#'
#' @param par0 Initial estimation.
#'
#'
#' @param ... aditional param to pass to \code{\link{optim}}
#'
#' @section Reference:
#'
#' Davison AC, Smith RL. (1990) Models for Exceedances over High Thresholds.
#' Journal of the Royal Statistical Society Series B (Methodological).
#' 52(3):393–442.
#'
#' Hosking JRM (1990). L-Moments: Analysis and Estimation of Distributions Using
#' Linear Combinations of Order Statistics. Journal of the Royal Statistical
#' Society Series B (Methodological). 52(1):105–24.
#'
#' @export
#'
#' @examples
#'
#' x <- rgpa(10000, 1, -.2)
#' fpot1d(x)
#' fpot2d(x)
#' fpotLmom(x)
#'
fpot1d <- function(x, sol = FALSE){
fk <- function(sigma) -mean(log(1-x/sigma))
fpara <- function(sigma){
length(x) * (-log(fk(sigma) * sigma) + fk(sigma) - 1)
}
mx <- max(x)
sigma <- NA
suppressWarnings(try(
sigma <- optimize(fpara, interval = c(-10000,10000) * mx,
maximum = TRUE)$maximum))
para <- rep(NA,2)
names(para) <- c('alpha','kappa')
try(para[2] <- fk(sigma))
try(para[1] <- para[2]*sigma)
if(sol){
ans <- list(par = para, varcov = matrix(NA,2,2))
ans$varcov[2,2] <- 1-para[2]
ans$varcov[1,1] <- 2 * para[1]^2
ans$varcov[1,2] <- ans$varcov[2,1] <- para[1]
ans$varcov <- ans$varcov * ans$varcov[2,2] / length(x)
} else {
ans <- para
}
return(ans)
}
#' @export
#' @rdname fpot1d
fpot2d <- function(x, sol = FALSE, par0 = NULL, ...){
logit0 <- function(z) logit((z+.5)/1.5)
expit0 <- function(z) expit(z)*1.5 -.5
## negative loglikelihood
nllik <- function(para) -sum(dgpa(x,exp(para[1]),
expit0(para[2]), log = TRUE))
if(!is.null(par0)){
par0[1] <- log(par0[1])
par0[2] <- logit0(par0[2])
} else
par0 <- c(log(var(x)),-.6931472)
## estimate the parameter
para <- optim( par0, nllik, ...)$par
para[1] <- exp(para[1])
para[2] <- expit0(para[2])
names(para) <- c('alpha','kappa')
if(sol){
ans <- list(par = para, varcov = matrix(NA,2,2))
ans$varcov[2,2] <- 1-para[2]
ans$varcov[1,1] <- 2 * para[1]^2
ans$varcov[1,2] <- ans$varcov[2,1] <- para[1]
ans$varcov <- ans$varcov * ans$varcov[2,2] / length(x)
} else {
ans <- para
}
return(ans)
}
#' @export
#' @rdname fpot1d
fpotLmom <- function(x){
lmom0 <- lmomco::lmoms(x, nmom = 3)
return(lmomco::lmom2par(lmom0,'gpa', xi = 0)$para[-1])
}
######################################################################
#' Peak over threshold (POT)
#'
#' Fit the parameters of a peak over threshold (POT) model.
#'
#' @param x Sample.
#'
#' @param dt Date or time of observation.
#'
#' @param u Threshold.
#'
#' @param method Estimation method. Either \code{'lmom'}, \code{'mle'} or
#' \code{'mle2'}.
#'
#' @param declust Method for declustering. If necessary,
#' Either 'run' or 'flood'.
#'
#' @param r Lag parameter for declustering. Either the run parameter
#' or the minimum time between two flood Peaks.
#' The scale must coincide with the observation date \code{dt}.
#'
#' @param rlow Declustering parameter. Percentage that define a minimal
#' recession time between two flood peaks.
#'
#' @param se,ci Should the Standard error or the confident interval be return.
#' The standard error are obtained by the delta method and the confident
#' interval by nonparametric boostrap.
#'
#' @param nboot Number of bootstrap sample. Used to extimate the covariance
#' matrix with the method of L-moments and predict return period.
#' For prediction, if \code{nboot = 0} confident intervals are obtained by
#' profile likekihood.
#'
#' @details
#'
#' The access functions \code{coef} and \code{vcov} return respectively the
#' parameters and the variance-covariance matrix of the POT model. The
#' variance covariance matrix is available only with the method \code{'mle'}
#' The access function \code{predict} returns the return periods.
#' If \code{dt} is a Date the return period is computed in year using the range
#' of observation.
#' The \code{rate} (i.e. number of event per year) can be manually adjusted
#' in case. By default \code{rate = 1}.
#'
#' The declustering can be a simple 'run' declustering, i.e. a cluster
#' start when passing a threshold and stop when \code{r} steps are below
#' the threshold.
#'
#' @seealso \link{which.floodPeaks}, \link{mrlPlot}.
#'
#' @section References:
#'
#' Choulakian V, Stephens MA. (2001) Goodness-of-Fit Tests for the Generalized
#' Pareto Distribution. Technometrics. 43(4):478–84.
#'
#' Coles S. (2001) An introduction to statistical modeling of extreme values.
#' Springer Verlag.
#'
#' Davison AC, Smith RL. (1990) Models for Exceedances over High Thresholds.
#' Journal of the Royal Statistical Society Series B (Methodological).
#' 52(3):393–442.
#'
#' @export
#'
#' @examples
#'
#' xd <- rgpa(100, 1, -.2)
#' fit <- fitPot(xd, u = 0)
#'
#' print(fit)
#' vcov(fit)
#' predict(fit)
#'
#' fit <- fitPot(flow~date, canadaFlood$daily, u = 1000,
#' declust = 'flood', r = 14)
#'
#' print(fit)
#' plot(flow~date,canadaFlood$daily, type = 'l', col = 4)
#' points(fit$time,fit$excess+fit$u, col = 2)
#'
#' predict(fit, se = TRUE,ci = TRUE)
#'
#' fit <- fitPot(flow~date, canadaFlood$daily, u = 1000,
#' declust = 'flood', r = 14, method = 'lmom')
fitPot <- function(x, ...) UseMethod('fitPot',x)
#' @export
#' @rdname fitPot
fitPot.data.frame <- function(obj, ...)
fitPot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname fitPot
fitPot.matrix <- function(obj, ...)
fitPot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname fitPot
fitPot.formula <- function(form, x, ...){
xd <- model.frame(form,as.list(x))
fitPot.numeric(x = xd[,1], dt = xd[,2], ...)
}
#' @export
#' @rdname fitPot
fitPot.numeric <- function(x, dt = NULL, u = 0, method = 'mle',
declust = NULL, r = 1, rlow = .75, nboot = 1000,
declust.scale = FALSE){
## Supply a date variable if not presented
if(is.null(dt))
dt <- seq_along(x)
## Compute the number of years
if(class(dt) == 'Date'){
#nyear <- as.numeric(diff(range(dt))) / 365.242
nyear <- sum(is.finite(dt)) / 365.242
} else {
nyear <- NA
}
## Extract peaks using declustering method if necessary
if(is.null(declust))
cid <- x > u
else if(declust == 'run')
cid <- which.clusters(x, u, r)
else if(declust == 'flood.clust')
cid <- which.floodPeaks(x, dt, u, r = r, rlow = rlow, ini = 'clust')
else if(declust == 'flood.lmax')
cid <- which.floodPeaks(x, dt, u, r = r, rlow = rlow, ini = 'lmax')
else if(declust %in% c('flood','flood.lmax2'))
cid <- which.floodPeaks(x, dt, u, r = r, rlow = rlow, ini = 'lmax2')
ans <- list(excess = x[cid] - u, time = dt[cid])
## Estimatate the parameters
if(method == 'lmom'){
ans$estimate <- fpotLmom(ans$excess)
ans$varcov <- matrix(NA,2,2)
if(!is.null(nboot)){
xboot <- replicate(nboot,
sample(ans$excess, length(ans$excess), replace = TRUE))
pboot <- t(apply(xboot,2,fpotLmom))
vc <- Matrix::nearPD(cov(pboot))$mat
ans$varcov <- as.matrix(vc)
}
} else if(method == 'mle'){
sol <- fpot1d(ans$excess, sol = TRUE)
ans$estimate <- sol$par
ans$varcov <- sol$varcov
colnames(ans$varcov) <- rownames(ans$varcov) <- names(ans$estimate)
} else if(method == 'mle2'){
sol <- fpot2d(ans$excess, sol = TRUE)
ans$estimate <- sol$par
ans$varcov <- sol$varcov
colnames(ans$varcov) <- rownames(ans$varcov) <- names(ans$estimate)
if( (ans$estimate[2] < -.5 + 1e-4) | (ans$estimate[2] > 1-1e-4) )
warning(paste('Shape parameter have reach boundary. Normal',
' approximation of the likelihood may be unreliable'))
}
## Complete object
ans$u <- u
ans$method <- method
ans$mrl <- ans$estimate[1]/ (1+ans$estimate[2])
ans$nyear <- nyear
ans$ntot <- length(x)
ans$nexcess <- length(ans$excess)
ans$nabove <- sum(x>u)
class(ans) <- 'fpot'
return(ans)
}
#' @export
#' @rdname fitPot
coef.fpot <- function(obj, rate = FALSE, ci = FALSE, alpha = 0.05){
if(rate)
ans <- c(obj$nexcess/obj$ntot,obj$estimate)
else
ans <- obj$estimate
if(ci){
## Full likelihood
llikFull <- logLik(obj)
## Initiate searching intervals for parameters
ase <- sqrt(obj$varcov[1,1])
abnd0 <- c(pmax(1e-8, obj$estimate[1] - 10 * ase),
obj$estimate[1] + 10* ase)
kbnd0 <- c(-1,2)
## Bound for the deviance
khi <- qchisq(1-alpha,1)
## Deviance for the profile likelihood of the scale parameter
Dscl <- function(z){
nllik <- function(p) -sum(dgpa(obj$excess, z, p, log = TRUE))
## Compute the profile likelihood
llik0 <- -goptimize(nllik, kbnd0)$objective
return(2*(llikFull-llik0))
}
## Deviance for the profile likelihood of the shape parameter
Dshp <- function(z){
nllik <- function(p) -sum(dgpa(obj$excess, p, z, log = TRUE))
llik0 <- -optimize(nllik, abnd0)$objective
return(2*(llikFull-llik0))
}
## Find the boundary
suppressWarnings(lbScl <-
goptimize(function(z) abs(Dscl(z)-khi),
c(abnd0[1],obj$estimate[1]))$minimum)
suppressWarnings(ubScl <-
goptimize(function(z) abs(Dscl(z)-khi),
c(obj$estimate[1],abnd0[2]))$minimum)
suppressWarnings(lbShp <-
goptimize(function(z) abs(Dshp(z)-khi),
c(kbnd0[1],obj$estimate[2]))$minimum)
suppressWarnings(ubShp <-
goptimize(function(z) abs(Dshp(z)-khi),
c(obj$estimate[2],kbnd0[2]))$minimum)
bnd <- data.frame(lb = c(NA,lbScl,lbShp),
ub = c(NA,ubScl,ubShp))
rownames(bnd) <- c('rate','alpha','kappa')
if(!rate) bnd <- bnd[-1,]
ans <- data.frame(estimate = ans, bnd)
}
return(ans)
}
#' @export
#' @rdname fitPot
vcov.fpot <- function(obj, rate = FALSE){
if(rate){
ans <- matrix(0,3,3)
kn <- obj$nexcess/obj$ntot
ans[1,1] <- kn*(1-kn)/obj$ntot
ans[2:3,2:3] <- obj$varcov
} else
ans <- obj$varcov
return(ans)
}
#' @export
#' @rdname fitPot
print.fpot <- function(obj){
cat('\nAnalysis of peaks over threshold')
cat('\n\nThreshold : ', obj$u)
cat('\nNumber of excess : ', obj$nexcess)
if(!is.na(obj$nyear)){
cat('\nNumber of years : ', format(round(obj$nyear,2)))
cat('\nExcess rate (yearly): ', format(round(obj$nexcess/obj$nyear,4)))
} else cat('\nExcess rate: ', format(round(obj$nexcess/obj$ntot,4)))
cat('\nExtremal index: ', format(round(obj$nexcess/obj$nabove,4)))
cat('\nDispersion Index : ', format(dispIndex(obj), digits = 4))
cat('\nMean residual Life : ', format(obj$mrl, digits = 6))
cat('\n\nParameters :\n')
print(obj$estimate, digit = 4)
cat('\nstd.err :\n')
print(sqrt(diag(obj$varcov)), digit = 4)
}
#' @export
#' @rdname fitPot
predict.fpot <- function(obj, rt = c(2, 5, 10, 20, 50, 100), se = FALSE,
ci = FALSE, alpha = 0.05, nboot = 0, ...){
## If the excess rate is not provided
if(!is.na(obj$nyear))
mrate <- rt * 365.242
else
mrate <- rt
lambda <- mrate * obj$nexcess / obj$ntot
## funtion that compute the return period
fpred <- function(para){
if(abs(para[2]) > 1e-8)
ans <- obj$u + para[1]/para[2] * (1-lambda^(-para[2]) )
else
ans <- obj$u + para[1] * log(lambda)
return(ans)
}
hatRt <- fpred(obj$estimate)
## Standard error by delta method
if(se | (ci & nboot == 0)){
negk <- -obj$estimate[2]
lk <- lambda^negk
ak <- obj$estimate[1]/negk
gx1 <- obj$estimate[1] * (obj$ntot/obj$nexcess) * lk
gx2 <- (lk-1)/negk
gx3 <- ak * ( lk * log(lambda) - gx2)
gx <- rbind(gx1, gx2, gx3)
hatSe <- sqrt(diag(t(gx) %*% vcov(obj, rate = T) %*% gx))
}
## verify that ci with lmoments are bootstrap
if(ci & obj$method == 'lmom' & nboot <= 0)
stop('Confident interval with L-moments must use bootstrap')
## Confident interval by non parametric bootstrap
if(ci & nboot > 0){
## Resample
xboot <- replicate(nboot, sample(obj$excess, obj$nexcess, replace = TRUE))
## Refit and predict
if(obj$method == 'lmom')
fn <- function(z) fpred(fpotLmom(z))
else if(obj$method == 'mle2')
fn <- function(z) fpred(fpot2d(z))
else if (obj$method == 'mle')
fn <- function(z) fpred(fpot1d(z))
pboot <- apply(xboot,2,fn)
## Compute interval
bootci <- t(apply(pboot, 1, quantile, c(alpha/2, 1-alpha/2)))
lb <- bootci[,1]
ub <- bootci[,2]
}
## Confident interval by profile likelihood
if(ci & nboot == 0){
bnd0 <- c(-.5,1)
## Bound of the deviance
khi <- qchisq(1-alpha,1)
lb <- lb0 <- pmax(1e-8, hatRt - 10 * hatSe)
ub <- ub0 <- hatRt + 10 * hatSe
for(ii in seq_along(rt)){
## Deviance function in respect of the return period
Dfun <- function(z){
## Full likelihood
llikFull <- sum(dgpa(obj$excess, obj$estimate[1],
obj$estimate[2], log = TRUE))
## Negative log-likelihood
nllik <- function(p){
num <- z - obj$u
if(abs(p) < 1e-8)
denum <- log(lambda[ii])
else
denum <- -(lambda[ii]^(-p) -1)/p
return(-sum(dgpa(obj$excess, pmax(1e-8,num/denum), p, log = TRUE)))
}
## Compute the profile likelihood
llik0 <- -goptimize(nllik, bnd0)$objective
return(2*(llikFull-llik0))
}
## Lower bound
suppressWarnings(lb[ii] <-
goptimize(function(z) abs(Dfun(z) - khi),
c(lb0[ii],hatRt[ii]))$minimum)
## Upper bound
suppressWarnings(ub[ii] <-
goptimize(function(z) abs(Dfun(z) - khi),
c(hatRt[ii], ub0[ii]))$minimum)
} # end for
} # end if
## construct the output
if(se & !ci)
ans <- data.frame(rt = hatRt, se = hatSe)
else if(!se & ci)
ans <- data.frame(rt = hatRt, lb = lb, ub = ub)
else if(se & ci)
ans <- data.frame(rt = hatRt, se = hatSe, lb = lb, ub = ub)
else{
ans <- data.frame(rt = hatRt)
}
if(ncol(ans) == 1){
ans <- ans[,1]
names(ans) <- paste('rt',rt)
} else
rownames(ans) <- paste('rt',rt)
return(ans)
}
#' @export
logLik.fpot <- function(obj)
sum(dgpa(obj$excess, obj$estimate[1],obj$estimate[2], log = TRUE))
#' @export
AIC.fpot <- function(obj, k = 2)
-2 * logLik(obj) + 2*k
#############################################################
#' Diagnostic tools for peaks over threshold analysis
#'
#' Produce mean residual Life, quantite-quantile,
#' probability-probability,
#' histogram of the excesses,
#' Stability plot for the parameter of the GPA.
#'
#' @param x,dt,form Sample and time of observation. A formula can also
#' be passed, in which case \code{x} is a data.frame.
#'
#' @param u Series of candidate thresholds.
#'
#' @param method Estimation method.
#'
#' @param alpha Confidence interal with level \code{1-alpha/2}.
#'
#' @param param Choice of the parameter to display. Either
#' \code{'alpha'} or \code{'kappa'}.
#'
#' @param ... Others arguments are passed to function \code{plot} and
#' \code{\link{which.floodPeaks}}
#'
#' @details
#'
#' The function \code{gpaStabPlot} will loop across calls of function
#' \code{\link{fitPot}} and will produce a graphics of the
#' shape parameter in respect of the threshold \code{u}. See the respective
#' functions for a description of the function parameters.
#'
#' @seealso \link{adTestPlot}, \link{dispIndexPlot}, \link{fitPot},
#' \link{which.floodPeaks}.
#'
#'
#' @examples
#'
#' ## Find list of candidate thresholds
#' lstu <- seq(500,2500, len = 100)
#'
#' mrlPlot(flow~date,canadaFlood$daily, u = lstu, declust = 'flood', r = 14)
#'
#' gpaStabPlot(flow~date,canadaFlood$daily, u = lstu, declust = 'flood', r = 14)
#' gpaStabPlot(flow~date,canadaFlood$daily, u = lstu,
#' declust = 'flood', r = 14, param = 'alpha', method = 'mle2')
#'
#'
#' fit <- fitPot(flow~date,canadaFlood$daily, u = 997, declust = 'flood', r = 14)
#' ppplot(fit)
#' qqplot(fit, ci = TRUE)
#' hist(fit)
#'
#'
#' @export
mrlPlot <- function(x, ...) UseMethod('mrlPlot',x)
#' @export
#' @rdname mrlPlot
mrlPlot.data.frame <- function(obj, ...)
mrlPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname mrlPlot
mrlPlot.matrix <- function(obj, ...)
mrlPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname mrlPlot
mrlPlot.formula <- function(form, x, ...){
xd <- model.frame(form,as.list(x))
mrlPlot.numeric(x = xd[,1], dt = xd[,2], ...)
}
#' @export
#' @rdname mrlPlot
mrlPlot.numeric <- function(x, dt = NULL, u,
declust = 'run', r = 1, rlow = 0.75,
alpha = 0.05,
ylab = 'Mean Residual Life', xlab = 'Threshold',
col = 'black', lty = 1, lwd = 1,
col.ci = 'black', lty.ci = 3, lwd.ci = 1,
ylim = NULL, display = TRUE, ...){
## Supply a date variable if not presented
if(is.null(dt))
dt <- seq_along(x)
u <- as.numeric(u)
sig <- mrl <- nn <- rep(NA,length(u))
for(ii in seq_along(u)){
## Declustering
if(is.null(declust))
cid <- x > u[ii]
else if(declust == 'run')
cid <- which.clusters(x, u[ii], r)
else if( declust == 'flood')
cid <- which.floodPeaks(x, dt, u[ii], r = r, rlow = rlow)
excess <- x[cid]-u[ii]
mrl[ii] <- mean(excess)
nn[ii] <- length(excess)
sig[ii] <- sd(excess)/sqrt(nn[ii])
}
# Confident intervals
qn <- qnorm(1-alpha/2)
lb <- mrl - qn * sig
ub <- mrl + qn * sig
## Adjusted limits
if(is.null(ylim)) ylim <- range(c(lb,ub)) * c(.95,1.05)
if(display){
plot(u, mrl, xlab = xlab, ylab = ylab, ylim = ylim,
col = col, lty = lty, lwd = lwd, ...)
lines(u, lb, lty = lty.ci, col = col.ci, lwd = lwd.ci)
lines(u, ub, lty = lty.ci, col = col.ci, lwd = lwd.ci)
}
## Return
invisible(data.frame(u = u, n = nn,mrl = mrl, lb = lb, ub = ub))
}
#' @export
#' @rdname mrlPlot
hist.fpot <- function(obj, main = '', xlab = 'Threshold (u)', ...){
hist(obj$excess, main = main, xlab = xlab, ...)
}
#' @export
#' @rdname mrlPlot
qqplot.fpot <- function(obj, ci = FALSE, nsim = 200, alpha = 0.05,
xlab = 'Theoritical quantiles',
ylab = 'Empirical quantiles',
log.scale = TRUE, ...){
ppos <- seq(obj$nexcess)/(obj$nexcess+1)
theo <- qgpa(ppos, obj$estimate[1], obj$estimate[2])
emp <- sort(obj$excess)
if(ci){
pt <- seq(1,obj$nexcess, len = 200)/(obj$nexcess+1)
qt <- qgpa(pt, obj$estimate[1], obj$estimate[2])
## Simulate bootstrap sample
xboot <- replicate(nsim, rgpa(obj$nexcess,obj$estimate[1], obj$estimate[2]))
## Compute bootstrap quantile
qboot <- apply(xboot, 2,
function(z){
para <- fpot1d(z)
qgpa(pt, para[1], para[2])
})
## compute confident interval
bnd <- apply(qboot, 1, quantile, c(alpha/2, 1-alpha/2))
lb <- smooth.spline(qt,bnd[1,])$y
ub <- smooth.spline(qt,bnd[2,])$y
}
## Transform to logarithm scale
if(log.scale){
theo <- log(theo)
emp <- log(emp)
xlab <- paste(xlab,'(log)')
ylab <- paste(ylab,'(log)')
if(ci){
qt <- log(qt)
lb <- log(lb)
ub <- log(ub)
}
}
## render QQ plot
plot(theo, emp, xlab = xlab, ylab = ylab, ...)
abline(0,1, col = 2)
if(ci){
lines(qt, lb, lty = 2)
lines(qt, ub, lty = 2)
}
}
#' @export
#' @rdname mrlPlot
ppplot.fpot <- function(obj, ...){
ppos <- seq(obj$nexcess)/(obj$nexcess+1)
theo <- pgpa(obj$excess, obj$estimate[1], obj$estimate[2])
plot(sort(theo), ppos,
xlab = 'Theoritical probabilities',
ylab = 'Empirical probabilities',
...)
abline(0,1, col = 2)
}
#' @export
gpaStabPlot <- function(x, ...) UseMethod('gpaStabPlot',x)
#' @export
#' @rdname mrlPlot
gpaStabPlot.data.frame <- function(obj, ...)
gpaStabPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname mrlPlot
gpaStabPlot.matrix <- function(obj, ...)
gpaStabPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname mrlPlot
gpaStabPlot.formula <- function(form, x, ...){
xd <- model.frame(form,as.list(x))
gpaStabPlot.numeric(x = xd[,1], dt = xd[,2], ...)
}
#' @export
#' @rdname mrlPlot
gpaStabPlot.numeric <- function(x, dt = NULL, u,
declust = NULL, r = 1, rlow = 0.75,
alpha = 0.05, param = 'scale',
method = 'mle', nboot = 1000,
xlab = 'Threshold (u)',
ylab = NULL,
col = 'black', lty = 1, lwd = 1,
col.ci = 'black', lty.ci = 3, lwd.ci = 1,
ylim = NULL, display = TRUE, ...){
## Supply a date variable if not presented
if(is.null(dt))
dt <- seq_along(x)
u <- as.numeric(u)
## Loop across threshold and extract parameters
para <- matrix(NA,length(u),6)
for(ii in seq_along(u)){
## Fit the POT model
fit <- fitPot(x, dt, u[ii],
declust = declust, r = r, rlow = rlow,
method = method, nboot = nboot)
para[ii,] <- c(u[ii], fit$estimate, as.vector(vcov(fit))[-2])
}
modAlpha <- para[,2] + para[,3]*para[,1]
modAlphaVar <- para[,4] + 2 * para[,1] * para[,5] + para[,1]^2 * para[,6]
## Choose which parameter to display
if(param %in% c('scale','alpha','scl')){
y <- modAlpha
ysd <- sqrt(modAlphaVar)
if(is.null(ylab)) ylab <- 'Modified scale (alpha)'
} else if(param %in% c('shape','kappa','shp')){
y <- para[,3]
ysd <- sqrt(para[,6])
if(is.null(ylab)) ylab <- 'Shape (kappa)'
}
# Compute the Confident intervals
qn <- qnorm(1-alpha/2)
lb <- y - qn * ysd
ub <- y + qn * ysd
## Do the graph
if(display){
if(is.null(ylim)) ylim <- c(.95,1.05) * range(c(y,lb,ub))
plot(u,y, ylim = ylim, xlab = xlab, ylab = ylab,
col = col, lty = lty, lwd = lwd, ...)
lines(u, lb, lty = lty.ci, col = col.ci, lwd = lwd.ci)
lines(u, ub, lty = lty.ci, col = col.ci, lwd = lwd.ci)
}
## return the data
modA <- data.frame( modAlpha = modAlpha, modAlphaVar = modAlphaVar)
colnames(para) <- c('u','alpha','kappa','alphaVar','cov','kappaVar')
invisible(cbind(para,modA))
}
######################################################
#' Parameter object for Peak Over Threshold (POT)
#'
#' Return the parameters of the generalized Pareto distribution
#' in the format use in \link{lmomco-package} to extract the
#' theoritical Lmoments
#'
#' @param obj Output of the function \code{\link{fitPot}}
#'
#' @seealso \link{vec2par}
#'
#' @export
#'
#' @examples
#'
#' library(lmomco)
#'
#' xd <- rgpa(200)
#' fit <- fitPot(xd, u = 0.1)
#' para <- pot2para(fit)
#' par2lmom(para)
#'
#' plot(fit$time,as.uniform(fit))
#'
pot2para <- function(obj){
lmomco::vec2par(c(0,obj$estimate),'gpa')
}
#' @export
#' @rdname pot2para
rankit.fpot <- function(obj) rankit(obj$excess)
#' @export
#' @rdname pot2para
as.uniform.fpot <- function(obj) {
pgpa(obj$excess, obj$estimate[1],obj$estimate[2])
}
#' @export
points.fpot <- function(x, excess = FALSE,
xlab = 'Time',
ylab = NULL, ...){
if(excess == TRUE){
if(is.null(ylab)) ylab <- 'Events'
points(x$time, x$excess, xlab = xlab, ylab = ylab...)
} else {
if(is.null(ylab)) ylab <- 'Exceedences'
points(x$time, x$excess + x$u, xlab = xlab, ylab = ylab,...)
}
}
#' @export
plot.fpot <- function(x, excess = FALSE,
xlab = 'Time',
ylab = NULL, ...){
if(excess == TRUE){
if(is.null(ylab)) ylab <- 'Events'
plot(x$time, x$excess, xlab = xlab, ylab = ylab...)
} else {
if(is.null(ylab)) ylab <- 'Exceedences'
plot(x$time, x$excess + x$u, xlab = xlab, ylab = ylab,...)
}
}
#' @export
lines.fpot <- function(x, excess = FALSE,
xlab = 'Time',
ylab = NULL, ...){
if(excess == TRUE){
if(is.null(ylab)) ylab <- 'Events'
lines(x$time, x$excess, xlab = xlab, ylab = ylab...)
} else {
if(is.null(ylab)) ylab <- 'Exceedences'
lines(x$time, x$excess + x$u, xlab = xlab, ylab = ylab,...)
}
}
#########################################################################
## See adTest.atSite in atSite.R file for more details
#
#' @export
#' @rdname adTest
adTest.fpot <- function(obj, method = 'tab',
nboot = 1000, bsample = FALSE, cores = NULL){
ans <- list()
para <- pot2para(obj)
## Case using a table
if(method %in% c('tab','table')){
ans$stat <- adStat(obj$excess, para = pot2para(obj))
ans$pvalue <- c(adGpaTable(obj$estimate[2], A2 = ans$stat[1]),NA,NA)
## Case using boostrap
} else if(method == 'boot'){
ans <- adTest(obj$excess, type = 'gpa', para = NULL, method = 'pot1d',
nboot = nboot, bsample = bsample, cores = cores)
}
ans$estimate <- obj$estimate
ans$type = 'gpa'
ans$method = 'mle'
class(ans) <- 'adTest'
return(ans)
}
############################################################
#' Threshold diagnostic using the Anderson Darling test
#'
#' Produces the graphics of the p-values of the Anderson-Darling (AD) test
#' in respect of candidate tresholds.
#'
#' @param x,dt,form Sample and time of observation. If a formula is provided,
#' then \code{x} must be a data.frame with the corresponding variable.
#'
#' @param u Vector of candidate threshold
#'
#' @param stat.test Which version of the AD test. One of 'AD','AL' or 'AU',
#' which are respectively the classical AD test and the lower or upper
#' modified AD test.
#'
#' @param logit.scale If true, the p-values are transformed to the negative
#' logit scale. Otherwise the 1 - p-values are plotted.
#'
#' @param alpha Vector indicative references for the p-values.
#' @param display Should the graphics be display. Remark that the p-values
#' are returned by the function.
#'
#' @param method.estim,nboot.estim Parameters for estimation.
#' See \link{fitPot}.
#'
#' @param method.test,stat.test,nboot.test Parameters for AD tests.
#' See \link{adTest}.
#'
#' @param ... Other graphical parameters
#'
#' @details
#'
#' The function \code{adTestPlot} will loop across calls of function
#' \code{\link{fitPot}} and \code{adTest} and will produce a graphics of the
#' P-values in respect of the threshold \code{u}. See respective
#' functions for a description of the other function parameters.
#'
#' @seealso \link{adTest}, \link{mrlPlot}, \link{dispIndexPlot},
#' \link{which.floodPeaks}
#'
#' @export
#'
#' @examples
#'
#' ## Find list of candidate thresholds
#' lstu <- seq(500,2500,len = 50)
#'
#' adTestPlot(flow~date,canadaFlood$daily, u = lstu,declust = 'flood', r = 14)
#'
adTestPlot <- function(x, ...) UseMethod('adTestPlot',x)
#' @export
#' @rdname adTestPlot
adTestPlot.data.frame <- function(obj, ...)
adTestPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname adTestPlot
adTestPlot.matrix <- function(obj, ...)
adTestPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname adTestPlot
adTestPlot.formula <- function(form, x, ...){
xd <- model.frame(form,as.list(x))
adTestPlot.numeric(x = xd[,1], dt = xd[,2], ...)
}
#' @export
#' @rdname adTestPlot
adTestPlot.numeric <- function(x, dt = NULL, u,
declust = 'run', r = 1, rlow = 0.75,
method.estim = 'mle', nboot.estim = 1000,
method.test = 'tab',
stat.test = 'AD', nboot.test = 1000,
logit.scale= FALSE,
xlab = 'Threshold (u)',
ylab = 'p-value',
alpha = c(0.05, 0.25),
col = 'black', lty = 1, lwd = 1,
col.alpha = 'black', lty.alpha = 3, lwd.alpha = 1,
cores = NULL, display = TRUE, ...){
if(method.test == 'tab' & !( method.estim %in% c('mle','mle2')))
stop('The table of the AD test is for maximum likelihood only')
## Supply a date variable if not presented
if(is.null(dt))
dt <- seq_along(x)
u <- as.numeric(u)
## Loop across threshold
ad <- matrix(NA, length(u),3)
for(ii in seq_along(u)){
## Fit the POT model
fit <- fitPot(x, dt, u[ii], method = method.estim,
declust = declust, r = r, rlow = rlow,
nboot = nboot.estim)
## Compute the Dispersion index
ad[ii,] <- adTest(fit, method = method.test, nboot = nboot.test,
bsample = FALSE)$pval
}
if(stat.test %in% c('ad','AD'))
y <- ad[,1]
else if (stat.test %in% c('al','AL'))
y <- ad[,2]
else if (stat.test %in% c('au','AU'))
y <- ad[,3]
if(logit.scale){
y <- prange(-logit(y),c(-7,7))
alphaLim <- prange(-logit(alpha),c(-7,7))
if(ylab == 'p-value') ylab <- paste(ylab, '(-logit)')
}else{
y <- 1-y
alphaLim <- 1-alpha
if(ylab == 'p-value') ylab <- paste(ylab, '(1-p)')
}
if(display){
plot(u,y, xlab = xlab, ylab = ylab, ...)
abline(h = alphaLim, col = col.alpha, lty = lty.alpha, lwd = lwd.alpha)
}
colnames(ad) <- c('AD','AL','AU')
invisible(data.frame(u=u,ad))
}
#########################################################################
#' Anderson-Darling (AD) test for the Generalized Pareto distribution (GPA)
#'
#' Return the critical value or the p-value of the AD test for the GPA
#' distribution. The results are obtained by linear interpolation of the
#' table provided by Choulakian and Stephens (2001) when scale and shape
#' parameter are estimated by maximum likelihood.
#'
#' @param kap Shape parameter of the GPA.
#'
#' @param pval P-value of the test.
#'
#' @param A2 Statistics of the AD test.
#' If \code{NULL} the function return
#' the critical value associated with \code{'pval'}.
#' Otherwise the function return the p-value associated with the provided
#' statistics.
#'
#' @param ... Additional parameter for the function \code{\link{approx}}.
#'
#' @details
#'
#' Choulakian V, Stephens MA. Goodness-of-Fit Tests for the Generalized
#' Pareto Distribution. Technometrics. 2001;43(4):478–84.
#'
#' @seealso \link{interp2d}
#'
#' @export
#'
#' @examples
#'
#' adGpaTable(kap = -.11, A2 = .93)
#' adGpaTable(kap = -.11, pval = 0.05)
adGpaTable <- function(kap, pval = 0.05, A2 = NULL, ...){
## Table of for the GPA
ad <- c(0.339, 0.471, 0.641, 0.771, 0.905, 1.086, 1.226, 1.559,
0.356, 0.499, 0.685, 0.830, 0.978, 1.180, 1.336, 1.707,
0.376, 0.534, 0.741, 0.903, 1.069, 1.296, 1.471, 1.893,
0.386, 0.550, 0.766, 0.935, 1.110, 1.348, 1.532, 1.966,
0.397, 0.569, 0.796, 0.974, 1.158, 1.409, 1.603, 2.064,
0.410, 0.591, 0.831, 1.020, 1.215, 1.481, 1.687, 2.176,
0.426, 0.617, 0.873, 1.074, 1.283, 1.567, 1.788, 2.314,
0.445, 0.649, 0.924, 1.140, 1.365, 1.672, 1.909, 2.475,
0.468, 0.688, 0.985, 1.221, 1.465, 1.799, 2.058, 2.674,
0.496, 0.735, 1.061, 1.321, 1.590, 1.958, 2.243, 2.922)
ad <- t(matrix(ad, 8 ,10))
kap0 <- c(-0.9, -0.5, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5)
pval0 <- c(0.5, 0.25, 0.1, 0.05, 0.025, 0.01, 0.005, 0.001)
## Verify table range
if(pval < 0.001){
warning('P-value outside range. Value 0.001 will be used')
pval <- 0.001
}
if(pval > 0.5){
warning('P-value outside range. Value 0.5 will be used')
pval <- 0.5
}
if(kap < (-0.9)){
warning('Kappa outside range. Value -0.9 will be used')
kap <- (-0.9)
}
if(kap > 0.5){
warning('Kappa outside range. Value 0.5 will be used')
kap <- 0.5
}
if(is.null(A2))
ans <- interp2d(kap0, pval0, ad, kap, pval, reverse = FALSE,...)
else
ans <- interp2d(kap0, pval0, ad, kap, A2, reverse = TRUE, ...)
return(ans)
}
########################################################################
#' Interpolation of a table
#'
#' Return linear interpolation of a table. Can returns either the
#' interpolation of the values of the table or the interpolations
#' the columns associated with a value in the table(\code{reverse}).
#'
#' @param x0 References in row of \code{z}.
#'
#' @param y0 References in column of \code{z}
#'
#' @param z0 Matrix representing a table to interpolate.
#'
#' @param x Point where to interpolate in row.
#'
#' @param y Point where to interpolate in column.
#'
#' @param reverse Logical. Should the Refrence value en \code{y} be
#' interpolated instead of \code{z}.
#'
#' @export
#'
#' @seealso \link{approx}
#'
#' @examples
#'
#' x <- 1:10
#' y <- 1:10
#' z <- outer(x,y)
#'
#' interp2d(x,y,z,5,5) # 5*5 = 25
#' interp2d(x,y,z,5.1,5.1) # 26.01
#'
#' interp2d(x,y,z,5.1,26, reverse = TRUE)
#'
interp2d <- function(x0, y0, z0, x, y, reverse = FALSE, ...){
## Verify consistency between x0, y0 and z0
if(length(x0) != nrow(z0) | length(y0) != ncol(z0))
stop('Wrong dimension passed in z0')
## Case interpolating z0
if(!reverse){
## y0 is exact
if(sum(y0 == y)>0){
z <- z0[, which(y0 == y)]
## case y0 is not exact
} else{
z <- sapply(seq(nrow(z0)),
function(k) approx(y0, z0[k,], xout = y, ...)$y)
}
## Interpolation z
if(x < min(x0)) ans <- max(z)
else if(x> max(x0)) ans <- min(z)
else ans <- approx(x0, z, xout = x, ...)$y
## Case Interpolation y instead of z
} else {
##case x0 is exact
if(sum(x0 == x) > 0){
z <- z0[which(x0 == x),]
##case x0 is not exact
} else {
z <- sapply(seq(ncol(z0)),
function(k) approx(x0, z0[,k], xout = x, ...)$y)
}
## interpolating y
if(y < min(z)) ans <- max(y0)
else if(y > max(z)) ans <- min(y0)
else ans <- approx(z, y0, xout = y, ...)$y
}
return(ans)
}
####################################################################
# Documentation in atSite.R
#' @export
#' @rdname shapiroTest
shapiroTest.fpot <- function(obj){
tmp <- shapiroTest(qnorm(as.uniform(obj)))
ans <- list(statistic = tmp$statistic, pvalue = tmp$p.value,
type = 'gpa', method = 'mle')
class(ans) <- 'shapiroTest'
return(ans)
}
###########################################################################
#' Generalized Pareto distribution (GPA)
#'
#' Distribution, density, quantile and random function for the Generalized
#' pareto distriution.
#'
#' @param p,q Probabilities or quantiles for the GPA distribution.
#'
#' @param n Number of simulations.
#'
#' @param alpha Scale parameter of the GPA
#'
#' @param kap Shape parameter of the GPA
#'
#' @param lower.tail Should the propability of the lower tail be returned
#'
#' @param log Should the log-density be returned
#'
#' @details
#'
#' These function are reorganisation of the function in R-package \link{evd}
#' using the parametrisation (\eqn{\alpha}, \eqn{\kappa}) that is coherent
#' with the notation frequently used in hydrology.
#'
#' @export
#'
#' @examples
#'
#' kap <- -.2
#' a <- 1
#' xd1 <- rgpa(1e4, a, kap)
#' xd2 <- qgpa(runif(1e4), a, kap)
#'
#' qqplot(xd1, xd2)
#'
#'
#' tt <- seq(0,6, len = 100)
#'
#' hist(xd[xd<6], main = 'GPA distribution',
#' freq = FALSE, ylim =c(0,1), xlim = c(0,6))
#'
#' lines(tt, dgpa(tt,a,kap))
#' lines(tt,pgpa(sort(tt), a, kap), col = 2, lty = 2)
#'
#'
pgpa <- function (q, alpha = 1, kap = 0, lower.tail = TRUE)
{
if (min(alpha) <= 0)
stop("invalid alpha")
if (length(kap) != 1)
stop("invalid kappa")
q <- pmax(q, 0)/alpha
if (kap == 0)
p <- 1 - exp(-q)
else {
p <- pmax(1 - kap * q, 0)
p <- 1 - p^(1/kap)
}
if (!lower.tail)
p <- 1 - p
return(p)
}
#' @export
#' @rdname pgpa
rgpa <- function (n, alpha = 1, kap = 0)
{
if (min(alpha) < 0)
stop("invalid alpha")
if (length(kap) != 1)
stop("invalid kappa")
if (kap == 0)
return(alpha * rexp(n))
else return(- alpha * (runif(n)^(kap) - 1)/kap)
}
#' @export
#' @rdname pgpa
dgpa <- function (x, alpha = 1, kap = 0, log = FALSE)
{
if (min(alpha) <= 0)
stop("invalid alpha")
if (length(kap) != 1)
stop("invalid kappa")
d <- x/alpha
nn <- length(d)
alpha <- rep(alpha, length.out = nn)
index <- (d > 0 & ((1 - kap * d) > 0)) | is.na(d)
if (kap == 0) {
d[index] <- log(1/alpha[index]) - d[index]
d[!index] <- -Inf
}
else {
d[index] <- log(1/alpha[index]) -
(1 - 1/kap) * log(1 - kap * d[index])
d[!index] <- -Inf
}
if (!log)
d <- exp(d)
return(d)
}
#' @export
#' @rdname pgpa
qgpa <- function (p, alpha = 1, kap = 0, lower.tail = TRUE)
{
if (min(p, na.rm = TRUE) <= 0 || max(p, na.rm = TRUE) >= 1)
stop("`p' must contain probabilities in (0,1)")
if (min(alpha) < 0)
stop("invalid alpha")
if (length(kap) != 1)
stop("invalid kappa")
if (lower.tail)
p <- 1 - p
if (kap == 0)
return(- alpha * log(p))
else
return(-alpha * (p^(kap) - 1)/kap)
}
###########################################################################
#' Extracting flood peaks
#'
#' Returns the indices of the peaks above a threshold according to the
#' declustering method put in place by the Water Resources Council.
#' See Lang et al. (1999) for more details.
#'
#' @param x Sample. Can be a vector, matrix or data.frame.
#' If \code{form} is not specified the first two rows must be respectively
#' the observations and the time of observation.
#'
#' @param form Formula that describes the variables: \code{obs ~ date}.
#'
#' @param dt Date or time of observations.
#'
#' @param u Threshold.
#'
#' @param r,rlow,ini Declustering parameters. See details.
#'
#' @details
#'
#' The declustering method performs two steps,
#' First an initial set of peaks are obtained.
#' By default, if \code{ini = 'lmax'} the initial peaks are local maximums.
#' Alternatively, if \code{ini = 'clust'} a simple run declustering
#' is realized to identify the maximums of
#' continuous clusters above the threshold \code{u}.
#' Afterward two additional conditions are required for peaks to not be
#' rejected. First, two peaks \eqn{Q1} and \eqn{Q2} must be separated by a
#' period of at least \code{r} days.
#' One recommendation is \deqn{5 days + log(A)}
#' where \eqn{A} is the drainage area in miles.
#' The second conditions is
#' \deqn{Xmin > rlow * min(Q1,Q2).}
#' By defautlt, \code{rlow = 0.75}.
#' When one of the two conditions is not statisfied the lowest of the two
#' peaks is discarded.
#' Modified version of the previous algorithm is also implemented.
#' Using \code{ini = 'lmax2'}. Instead of verify jointly both condition
#' a first series of peaks is extrated using only the second condition and
#' next the first condition is verify in the newly extracted peaks.
#' The two version are very similar and differ only on few cases where the
#' modified version is more conservative and reject peaks that are kept
#' in the initial version.
#'
#' The function \code{which.clusters} is returning the indices of the peaks
#' identified by the run declustering method. See \code{\link{clusters}}.
#'
#' @section References:
#'
#' Lang M, Ouarda TBMJ, Bobée B. (1999) Towards operational guidelines for
#' over-threshold modeling. Journal of Hydrology. Dec 6;225(3):103–17.
#'
#' @export
#'
#' @examples
#'
#' area <- 14700
#' b <- ceiling(5 + log(area/(1.609^2))) ## b = 14
#' xd <- canadaFlood$daily
#'
#' # Declustering using recommendation.
#' cid <- which.floodPeaks(flow~date, xd,
#' u = 1000, r = b, rlow = .75, ini = 'lmax')
#'
#' plot(xd, type = 'l')
#' points(canadaFlood$daily[cid,], col = 'red', pch = 16)
#' abline(h = 1000, col = 3, lwd = 2)
#'
#' ## Using a nonstationary threshold.
#' fit <- lm(log(flow)~date, canadaFlood$daily)
#' ut <- exp(fitted(fit))+900
#' lines(xd$date, ut, col = 4, lwd = 2)
#'
#' cid <- which.floodPeaks(flow~date, xd,
#' u = ut, r = b, rlow = .75, ini = 'lmax')
#' points(canadaFlood$daily[cid,], col = 'yellow', pch = 16)
#'
which.floodPeaks <- function(x, ...) UseMethod('which.floodPeaks', x)
#' @export
#' @rdname which.floodPeaks
which.floodPeaks.matrix <- function(x, ...)
which.floodPeaks.numeric(x[,1], x[,2], ...)
#' @export
#' @rdname which.floodPeaks
which.floodPeaks.data.frame <- function(x, ...)
which.floodPeaks.numeric(x[,1], x[,2], ...)
#' @export
#' @rdname which.floodPeaks
which.floodPeaks.formula <- function(form, x, ...){
xd <- model.frame(form,as.list(x))
which.floodPeaks.numeric(x = xd[,1], dt = xd[,2], ...)
}
#' @export
#' @rdname which.floodPeaks
which.floodPeaks.numeric <- function(x, dt = NULL, u, r = 1, rlow = 0.75,
ini = 'lmax2'){
## If time is not provided
if(is.null(dt))
dt <- seq_along(x)
## Verify that the time vector are matching
if(length(x) != length(dt))
stop('Length of the Time vector do not match!')
## Extract all peaks above threshold
if(ini == 'clust'){
peakId <- which.clusters(x, u)
} else if(ini == 'lmax'){
isPeak <- rep(FALSE, length(x))
isPeak[which.lmax(x)] <- TRUE
isPeak[x <= u] <- FALSE
peakId <- which(isPeak)
} else if(ini == 'lmax2'){
peakId <- which.floodPeaks(x, dt, u, r = 0, rlow, ini = 'lmax')
} else if(ini == 'all'){
peakId <- seq_along(x)
}
## Case there is just one peak above threshold
if(length(peakId) < 2) return(peakId)
## Loop accross and keep only
myId <- peakId[1]
ans <- list()
for(nextId in peakId[-1]){
## Verify if lag r is sufficient
if(abs(dt[nextId]- dt[myId]) < r){
## If not, Keep the highest peak
if(x[nextId] > x[myId])
myId <- nextId
next
}
## Verify that the a minimal intermediate point is reached
if(min(x[seq(myId, nextId)]) > rlow * min(x[myId], x[nextId])){
## If not, Keep the highest peak
if(x[nextId] > x[myId])
myId <- nextId
next
}
## Otherwise save the identified peak id
ans <- append(ans,myId)
## And move to the next one
myId <- nextId
}#endfor
## Add the last peaks
ans <- append(ans,myId)
return(unlist(ans))
}
#' @export
#' @rdname which.floodPeaks
which.clusters <- function(x, u, ...)
as.numeric(names(evd::clusters(x, u, cmax = TRUE, ...)))
#################################################################
#' Dispersion index
#'
#' Returns the dispersion index (or variance to mean ratio) to verify
#' the hypothesis of a Homogenous Poisson process.
#'
#' @param obj An output of \code{fitPot}.
#'
#' @param k Number of classes. Default value chosen to ensure at least 5 classes
#' and aims at approximately an average 5 events per classe.
#'
#' @param ci Should confident interval and p-value be returns.
#'
#' @param alpha Significance level.
#'
#' @param method Method for computing the confident interval and the p-value.
#' Either 'asymptotic' for chi-squared approximation or 'boot' for bootstrap.
#'
#' @param nboot Number of bootstrap samples.
#'
#' @details
#'
#' The function \code{dispIndexPlot} will loop across calls of function
#' \code{\link{fitPot}} and \code{dispIndex} and will produce a graphics of the
#' Dispersion index in respect of the threshold \code{u}. See respective
#' functions for a description of the function parameters. The points in the
#' plot are returns as a data.frame and parameter \code{display} can be set
#' to \code{FALSE} to prevent the display of the graphic.
#'
#' @section Reference:
#'
#' Lang M, Ouarda TBMJ, Bobée B. (1999) Towards operational guidelines for
#' over-threshold modeling. Journal of Hydrology. Dec 6;225(3):103–17.
#'
#' @seealso \link{mrlPlot}
#'
#' @export
#'
#' @examples
#'
#' fit <- fitPot(flow~date,canadaFlood$daily, u = 1000,
#' declust = 'flood', r = 14)
#'
#' dispIndex(fit, ci = TRUE)
#'
#' dispIndexPlot(flow~date,canadaFlood$daily,
#' u = seq(500,2000, len = 50),
#' declust = 'flood', r = 14)
#'
dispIndex <- function(obj, k = NULL, ci = FALSE, alpha = 0.05,
method = 'asymptotic', nboot = 1e4){
## form equal groups and count peaks
ntot <- length(obj$time)
if(is.null(k))
k <- pmax(5,floor(ntot/5))
nn <- table(cut(obj$time, k, labels = FALSE))
## Compute the dispersion index and p-value
vmr <- var(nn)/mean(nn)
if(ci){
a2 <- alpha/2
if(method == 'asymptotic'){
## Verification of minimal conditions for Chi-squared approximation
if(mean(nn) < 1 ) warning('Low average mean per classes')
if(k < 5 ) warning('Too few classes for testing')
## Confident intervale
d <- k-1
myci <- c(qchisq(a2, df = d), qchisq(1-a2, df = d))/d
## Compute p-value assuming asymetrical probabilities
if(vmr < 1) pval <- 2 * pchisq(vmr*d, df = d)
else if (abs(vmr-1) < 1e-8) pval <- 1
else pval <- 2 * (1-pchisq(vmr*d,df = d))
} else if(method == 'boot') {
## sample from poisson distribution
bfun <- function(){
z <- rpois(k,sum(nn)/k)
var(z)/mean(z)
}
bsample <- replicate(nboot, bfun())
## Boostrap intervals and p-value
myci <- quantile(bsample, c(a2,1-a2))
if(vmr < 1) pval <- 2 * mean( bsample <= vmr)
else if (abs(vmr-1) < 1e-8) pval <- 1
else pval <- 2*(1-mean( bsample >= vmr))
}
ans <- c(vmr, myci , pval)
names(ans) <- c('vmr','lb','ub', 'pval')
} else ans <- vmr
return(ans)
}
#' @export
#' @rdname dispIndex
dispIndexPlot <- function(x, ...) UseMethod('dispIndexPlot',x)
#' @export
#' @rdname dispIndex
dispIndexPlot.data.frame <- function(obj, ...)
dispIndexPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname dispIndex
dispIndexPlot.matrix <- function(obj, ...)
dispIndexPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)
#' @export
#' @rdname dispIndex
dispIndexPlot.formula <- function(form, x, ...){
xd <- model.frame(form,as.data.frame(x))
dispIndexPlot.numeric(x = xd[,1], dt = xd[,2], ...)
}
#' @export
#' @rdname dispIndex
dispIndexPlot.numeric <- function(x, dt = NULL, u = 0,
method.estim = 'mle', declust = NULL, r = 1, rlow = .75,
k = NULL, alpha = 0.05,
method.ci = 'asymptotic', nboot = 1000,
xlab = 'Threshold (u)', ylab = 'Dispersion Index',
col = 'black', lty = 1, lwd = 1,
col.ci = 'black', lty.ci = 3, lwd.ci = 1,
ylim = NULL, display = TRUE, ...){
## Supply a date variable if not presented
if(is.null(dt))
dt <- seq_along(x)
u <- as.numeric(u)
## Loop across threshold
ans <- matrix(NA, length(u),4)
for(ii in seq_along(u)){
## Fit the POT model
fit <- fitPot(x, dt, u[ii], method = method.estim,
declust = declust, r = r, rlow = rlow)
## Compute the Dispersion index
di <- dispIndex(fit, k = k, ci = TRUE, alpha = alpha,
method = method.ci, nboot = nboot)
ans[ii,] <- di
}
## Plot the Dispersion Index in respect of the threshold
if(display){
if(is.null(ylim))
ylim <- range(as.numeric(ans[,1:3])) * c(.95,1.05)
plot(u, ans[,1], xlab = xlab, ylab = ylab, ylim = ylim,
col = col, lty = lty, lwd = lwd, ...)
lines(u, ans[,2], col = col.ci, lty = lty.ci, lwd = lwd.ci)
lines(u, ans[,3], col = col.ci, lty = lty.ci, lwd = lwd.ci)
}
## Return the data if needed
ans <- as.data.frame(ans[,-4])
colnames(ans) <- c('vmr','lb','ub')
invisible(ans)
}
################################################################################
#' Performing logistic regression to test for trend in number of peaks per year
#'
#' Return the statistics and the p-value of a null trend. By defaut a t-test
#' verify that the slope is null otherwise a F-test is used to verify that the
#' model is better than random. A variable dispersion parmeter is used to
#' account for potential autocorrelation.
#'
#' @param obj Output from \link{fitPot}.
#' @param span List
#' @param method Type of test to use. Eiter \code{'T'} or \code{'F'}
#' @param trend Type of trend. Must be with method \code{'F'}.
#' Either \code{poly} for polynomial or \code{'spline'} for b-spline polynomial with
#' equally space knots
#' @param degree Degree of the polynomial or dimension of spline basis .
#'
#' @seealso \link{mannKendall}, \link{glm}.
#'
#' @section References:
#'
#' Frei C, Schär C. (2001), Detection Probability of Trends in Rare Events:
#' Theory and Application to Heavy Precipitation in the Alpine Region.
#' J Climate. Apr 1;14(7):1568–84.
#'
#' @export
#' @examples
#'
#' fit <- fitPot(flow~date, canadaFlood$daily, u = 1000,
#' declust = 'flood', r = 14)
#'
#' mannKendallPeaks(fit, lag.k = 0)
#'
#' trendLogisPPY(fit, span = canadaFlood$daily$date) ## linear trend
#' trendLogisPPY(fit, method = 'F', trend = 'spline')
#'
trendLogisPPY <- function(obj, span = NULL, method = 'T', trend = 'poly',
degree = 2){
## Transform timing to integer
tm <- as.integer(obj$time)
if(is.null(span)){
span <- range(tm)
span <- seq(span[1],span[2],1)
} else span <- as.integer(span)
pp <- as.factor(span %in% tm)
## Fit a glm model and test if significant
if(method %in% c('t','T')){
## For linear trend
trend <- 'linear'
fit <- glm(pp~span, family = quasibinomial())
tmp <- as.numeric(summary(fit)$coefficients[2,c(1,4)])
} else if(method %in% c('f','F') & trend == 'poly'){
## For polynomial trend
fit <- glm(pp~poly(span, degree = degree), family = quasibinomial())
tmp <- as.numeric(anova(fit, test = 'F')[2,c(5,6)])
} else if(method %in% c('f','F') & trend == 'spline'){
## For polynomial trend
degree <- pmax(3,degree)
fit <- glm(pp~splines::bs(span, df = degree), family = quasibinomial())
tmp <- as.numeric(anova(fit, test = 'F')[2,c(5,6)])
} else stop('Wrong combination of test and trend')
ans <- list()
ans$stat <- tmp[1]
ans$pvalue <- tmp[2]
ans$method <- method
ans$trend <- trend
ans$degree <- degree
class(ans) <- 'trendLogisPPY'
return(ans)
}
#' @export
print.trendLogisPPY <- function(obj){
cat('\nTesting trend in number of peaks per year\n')
if(obj$method %in% c('t','T')){
cat('\nSlope :', format(obj$stat, digits = 4))
} else {
cat('\nTrend :', obj$trend)
cat(paste('\nF (df = ', obj$degree,') : ',
format(obj$stat, digits = 4), sep = ''))
}
cat('\nP-value : ', round(obj$pvalue,3))
}
#' @export
#' @rdname mannKendall
mannKendallPeaks <- function(obj, ...){
mannKendall(atSite(obj$excess, distr = 'gpd'), ...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.