R/pot.R

###############################################################
#' Estimation of the GPA distribution by maximum likelihood.
#'
#' Low level functions for the estimation of the generalized pareto
#' distribution(GPA) with two parameters base
#' either on maximum likelihoood or the method of L-moments.
#' The algorithm of \code{fpot2d} is using
#' \code{optim} to directly optimize the log-likelihood (bivariate), while
#' the algorithm of \code{fpot1d} is using a transformation to use
#' a univariate optimization routine. Moreover, \code{fpot2d} constraint
#' the shape parameter between -.5 and 1.
#'
#' @param x Sample.
#'
#' @param sol Does solution from \code{optim} be returned.
#'   In case of \code{fpot1d}, it returns the variance covariance matrix.
#'
#' @param par0 Initial estimation.
#'
#'
#' @param ... aditional param to pass to \code{\link{optim}}
#'
#' @section Reference:
#'
#' Davison AC, Smith RL. (1990) Models for Exceedances over High Thresholds.
#'   Journal of the Royal Statistical Society Series B (Methodological).
#'   52(3):393–442.
#'
#' Hosking JRM (1990). L-Moments: Analysis and Estimation of Distributions Using
#'   Linear Combinations of Order Statistics. Journal of the Royal Statistical
#'   Society Series B (Methodological). 52(1):105–24.

#'
#' @export
#'
#' @examples
#'
#' x <- rgpa(10000, 1, -.2)
#' fpot1d(x)
#' fpot2d(x)
#' fpotLmom(x)
#'
fpot1d <- function(x, sol = FALSE){

  fk <- function(sigma) -mean(log(1-x/sigma))

  fpara <- function(sigma){
    length(x) * (-log(fk(sigma) * sigma) + fk(sigma) - 1)
  }

  mx <- max(x)

  sigma <- NA
  suppressWarnings(try(
    sigma <- optimize(fpara, interval = c(-10000,10000) * mx,
                      maximum = TRUE)$maximum))

  para <- rep(NA,2)
  names(para) <- c('alpha','kappa')
  try(para[2] <- fk(sigma))
  try(para[1] <- para[2]*sigma)

  if(sol){
    ans <- list(par = para, varcov = matrix(NA,2,2))
    ans$varcov[2,2] <- 1-para[2]
    ans$varcov[1,1] <- 2 * para[1]^2
    ans$varcov[1,2] <- ans$varcov[2,1] <- para[1]
    ans$varcov <- ans$varcov * ans$varcov[2,2] / length(x)

  } else {
    ans <- para
  }

  return(ans)

}

#' @export
#' @rdname fpot1d
fpot2d <- function(x, sol = FALSE, par0 = NULL, ...){

  logit0 <- function(z) logit((z+.5)/1.5)
  expit0 <- function(z) expit(z)*1.5 -.5

  ## negative loglikelihood
  nllik <- function(para) -sum(dgpa(x,exp(para[1]),
                                    expit0(para[2]), log = TRUE))

  if(!is.null(par0)){
    par0[1] <- log(par0[1])
    par0[2] <- logit0(par0[2])
  } else
    par0 <- c(log(var(x)),-.6931472)

  ## estimate the parameter
  para <- optim( par0, nllik, ...)$par
  para[1] <- exp(para[1])
  para[2] <- expit0(para[2])

  names(para) <- c('alpha','kappa')

  if(sol){
    ans <- list(par = para, varcov = matrix(NA,2,2))
    ans$varcov[2,2] <- 1-para[2]
    ans$varcov[1,1] <- 2 * para[1]^2
    ans$varcov[1,2] <- ans$varcov[2,1] <- para[1]
    ans$varcov <- ans$varcov * ans$varcov[2,2] / length(x)

  } else {
    ans <- para
  }


  return(ans)
}

#' @export
#' @rdname fpot1d
fpotLmom <- function(x){
  lmom0 <- lmomco::lmoms(x, nmom = 3)
  return(lmomco::lmom2par(lmom0,'gpa', xi = 0)$para[-1])
}

######################################################################
#' Peak over threshold (POT)
#'
#' Fit the parameters of a peak over threshold (POT) model.
#'
#' @param x Sample.
#'
#' @param dt Date or time of observation.
#'
#' @param u Threshold.
#'
#' @param method Estimation method. Either \code{'lmom'}, \code{'mle'} or
#' \code{'mle2'}.
#'
#' @param declust Method for declustering. If necessary,
#'   Either 'run' or 'flood'.
#'
#' @param r Lag parameter for declustering. Either the run parameter
#'   or the minimum time between two flood Peaks.
#'   The scale must coincide with the observation date \code{dt}.
#'
#' @param rlow Declustering parameter. Percentage that define a minimal
#'   recession time between two flood peaks.
#'
#' @param se,ci Should the Standard error or the confident interval be return.
#'    The standard error are obtained by the delta method and the confident
#'    interval by nonparametric boostrap.
#'
#' @param nboot Number of bootstrap sample. Used to extimate the covariance
#'   matrix with the method of L-moments and predict return period.
#'   For prediction, if \code{nboot = 0} confident intervals are obtained by
#'   profile likekihood.
#'
#' @details
#'
#' The access functions \code{coef} and \code{vcov} return respectively the
#' parameters and the variance-covariance matrix of the POT model. The
#' variance covariance matrix is available only with the method \code{'mle'}
#' The access function \code{predict} returns the return periods.
#' If \code{dt} is a Date the return period is computed in year using the range
#' of observation.
#' The \code{rate} (i.e. number of event per year) can be manually adjusted
#' in case. By default \code{rate = 1}.
#'
#' The declustering can be a simple 'run' declustering, i.e. a cluster
#' start when passing a threshold and stop when \code{r} steps are below
#' the threshold.
#'
#' @seealso \link{which.floodPeaks}, \link{mrlPlot}.
#'
#' @section References:
#'
#' Choulakian V, Stephens MA. (2001) Goodness-of-Fit Tests for the Generalized
#'   Pareto Distribution. Technometrics. 43(4):478–84.
#'
#' Coles S. (2001) An introduction to statistical modeling of extreme values.
#'   Springer Verlag.
#'
#' Davison AC, Smith RL. (1990) Models for Exceedances over High Thresholds.
#'   Journal of the Royal Statistical Society Series B (Methodological).
#'   52(3):393–442.
#'
#' @export
#'
#' @examples
#'
#' xd <- rgpa(100, 1, -.2)
#' fit <- fitPot(xd, u = 0)
#'
#' print(fit)
#' vcov(fit)
#' predict(fit)
#'
#' fit <- fitPot(flow~date, canadaFlood$daily, u = 1000,
#'                declust = 'flood', r = 14)
#'
#' print(fit)
#' plot(flow~date,canadaFlood$daily, type = 'l', col = 4)
#' points(fit$time,fit$excess+fit$u, col = 2)
#'
#' predict(fit, se = TRUE,ci = TRUE)
#'
#' fit <- fitPot(flow~date, canadaFlood$daily, u = 1000,
#'                declust = 'flood', r = 14, method = 'lmom')

fitPot <- function(x, ...) UseMethod('fitPot',x)

#' @export
#' @rdname fitPot
fitPot.data.frame <- function(obj,  ...)
  fitPot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)

#' @export
#' @rdname fitPot
fitPot.matrix <- function(obj,  ...)
  fitPot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)

#' @export
#' @rdname fitPot
fitPot.formula <- function(form, x, ...){
  xd <- model.frame(form,as.list(x))
  fitPot.numeric(x = xd[,1], dt = xd[,2], ...)
}

#' @export
#' @rdname fitPot
fitPot.numeric <- function(x, dt = NULL, u = 0, method = 'mle',
                    declust = NULL, r = 1, rlow = .75, nboot = 1000,
                    declust.scale = FALSE){

  ## Supply a date variable if not presented
  if(is.null(dt))
    dt <- seq_along(x)

  ## Compute the number of years
  if(class(dt) == 'Date'){

    #nyear <- as.numeric(diff(range(dt))) / 365.242
    nyear <- sum(is.finite(dt)) / 365.242

  } else {
    nyear <- NA
  }

  ## Extract peaks using declustering method if necessary
  if(is.null(declust))
    cid <- x > u
  else if(declust == 'run')
    cid <- which.clusters(x, u, r)
  else if(declust  == 'flood.clust')
    cid <- which.floodPeaks(x, dt, u, r = r, rlow = rlow, ini = 'clust')
  else if(declust == 'flood.lmax')
    cid <- which.floodPeaks(x, dt, u, r = r, rlow = rlow, ini = 'lmax')
  else if(declust %in% c('flood','flood.lmax2'))
    cid <- which.floodPeaks(x, dt, u, r = r, rlow = rlow, ini = 'lmax2')

  ans <- list(excess = x[cid] - u, time = dt[cid])

  ## Estimatate the parameters
  if(method == 'lmom'){
    ans$estimate <- fpotLmom(ans$excess)
    ans$varcov <- matrix(NA,2,2)

    if(!is.null(nboot)){
      xboot <- replicate(nboot,
                         sample(ans$excess, length(ans$excess), replace = TRUE))
      pboot <- t(apply(xboot,2,fpotLmom))

      vc <- Matrix::nearPD(cov(pboot))$mat
      ans$varcov <- as.matrix(vc)
    }

  } else if(method == 'mle'){
    sol <- fpot1d(ans$excess, sol = TRUE)
    ans$estimate <- sol$par
    ans$varcov <- sol$varcov
    colnames(ans$varcov) <- rownames(ans$varcov) <- names(ans$estimate)

  } else if(method == 'mle2'){
    sol <- fpot2d(ans$excess, sol = TRUE)

    ans$estimate <- sol$par
    ans$varcov <- sol$varcov
    colnames(ans$varcov) <- rownames(ans$varcov) <- names(ans$estimate)

    if( (ans$estimate[2] < -.5 + 1e-4) | (ans$estimate[2] > 1-1e-4) )
      warning(paste('Shape parameter have reach boundary. Normal',
                    ' approximation of the likelihood may be unreliable'))
  }

  ## Complete object
  ans$u <- u
  ans$method <- method
  ans$mrl <-  ans$estimate[1]/ (1+ans$estimate[2])
  ans$nyear <- nyear
  ans$ntot <- length(x)
  ans$nexcess <- length(ans$excess)
  ans$nabove <- sum(x>u)

  class(ans) <- 'fpot'
  return(ans)
}


#' @export
#' @rdname fitPot
coef.fpot <- function(obj, rate = FALSE, ci = FALSE, alpha = 0.05){

  if(rate)
    ans <- c(obj$nexcess/obj$ntot,obj$estimate)
  else
    ans <- obj$estimate

  if(ci){

    ## Full likelihood
    llikFull <- logLik(obj)

    ## Initiate searching intervals for parameters
    ase <- sqrt(obj$varcov[1,1])

    abnd0 <- c(pmax(1e-8, obj$estimate[1] - 10 * ase),
               obj$estimate[1] + 10* ase)

    kbnd0 <- c(-1,2)


    ## Bound for the deviance
    khi <- qchisq(1-alpha,1)

    ## Deviance for the profile likelihood of the scale parameter
    Dscl <- function(z){

      nllik <- function(p) -sum(dgpa(obj$excess, z, p, log = TRUE))

      ## Compute the profile likelihood
      llik0 <- -goptimize(nllik, kbnd0)$objective

      return(2*(llikFull-llik0))

    }

    ## Deviance for the profile likelihood of the shape parameter
    Dshp <- function(z){

      nllik <- function(p) -sum(dgpa(obj$excess, p, z, log = TRUE))

      llik0 <- -optimize(nllik, abnd0)$objective

      return(2*(llikFull-llik0))
    }

    ## Find the boundary

    suppressWarnings(lbScl <-
        goptimize(function(z) abs(Dscl(z)-khi),
                 c(abnd0[1],obj$estimate[1]))$minimum)

    suppressWarnings(ubScl <-
                       goptimize(function(z) abs(Dscl(z)-khi),
                                c(obj$estimate[1],abnd0[2]))$minimum)

    suppressWarnings(lbShp <-
                       goptimize(function(z) abs(Dshp(z)-khi),
                                c(kbnd0[1],obj$estimate[2]))$minimum)

    suppressWarnings(ubShp <-
                       goptimize(function(z) abs(Dshp(z)-khi),
                                c(obj$estimate[2],kbnd0[2]))$minimum)

    bnd <- data.frame(lb = c(NA,lbScl,lbShp),
                      ub = c(NA,ubScl,ubShp))

    rownames(bnd) <- c('rate','alpha','kappa')

    if(!rate) bnd <- bnd[-1,]

    ans <- data.frame(estimate = ans, bnd)

  }

  return(ans)
}

#' @export
#' @rdname fitPot
vcov.fpot <- function(obj, rate = FALSE){

  if(rate){
    ans <- matrix(0,3,3)

    kn <- obj$nexcess/obj$ntot

    ans[1,1] <- kn*(1-kn)/obj$ntot
    ans[2:3,2:3] <- obj$varcov

  } else
    ans <- obj$varcov

  return(ans)
}



#' @export
#' @rdname fitPot
print.fpot <- function(obj){

  cat('\nAnalysis of peaks over threshold')

  cat('\n\nThreshold : ', obj$u)
  cat('\nNumber of excess : ', obj$nexcess)

  if(!is.na(obj$nyear)){
    cat('\nNumber of years : ', format(round(obj$nyear,2)))
    cat('\nExcess rate (yearly): ', format(round(obj$nexcess/obj$nyear,4)))

  } else cat('\nExcess rate: ', format(round(obj$nexcess/obj$ntot,4)))

  cat('\nExtremal index: ', format(round(obj$nexcess/obj$nabove,4)))

  cat('\nDispersion Index : ', format(dispIndex(obj), digits = 4))

  cat('\nMean residual Life : ', format(obj$mrl, digits = 6))

  cat('\n\nParameters :\n')
  print(obj$estimate, digit = 4)

  cat('\nstd.err :\n')
  print(sqrt(diag(obj$varcov)), digit = 4)

}

#' @export
#' @rdname fitPot
predict.fpot <- function(obj, rt = c(2, 5, 10, 20, 50, 100), se = FALSE,
                          ci = FALSE, alpha = 0.05, nboot = 0, ...){


  ## If the excess rate is not provided

  if(!is.na(obj$nyear))
    mrate <- rt * 365.242
  else
    mrate <- rt

  lambda <- mrate * obj$nexcess / obj$ntot

  ## funtion that compute the return period
  fpred <- function(para){

    if(abs(para[2]) > 1e-8)
      ans <- obj$u + para[1]/para[2] * (1-lambda^(-para[2]) )
    else
      ans <- obj$u + para[1] * log(lambda)

    return(ans)
  }

  hatRt <- fpred(obj$estimate)

  ## Standard error by delta method
  if(se | (ci & nboot == 0)){
    negk <- -obj$estimate[2]
    lk <- lambda^negk
    ak <- obj$estimate[1]/negk

    gx1 <-  obj$estimate[1] * (obj$ntot/obj$nexcess)  * lk
    gx2 <- (lk-1)/negk
    gx3 <-  ak * ( lk * log(lambda) - gx2)

    gx <- rbind(gx1, gx2, gx3)
    hatSe <-  sqrt(diag(t(gx) %*% vcov(obj, rate = T) %*% gx))
  }

  ## verify that ci with lmoments are bootstrap
  if(ci & obj$method ==  'lmom' & nboot <= 0)
    stop('Confident interval with L-moments must use bootstrap')

  ## Confident interval by non parametric bootstrap
  if(ci & nboot > 0){

    ## Resample
    xboot <- replicate(nboot, sample(obj$excess, obj$nexcess, replace = TRUE))

    ## Refit and predict
    if(obj$method == 'lmom')
      fn <- function(z) fpred(fpotLmom(z))
    else if(obj$method == 'mle2')
      fn <- function(z) fpred(fpot2d(z))
    else if (obj$method == 'mle')
      fn <- function(z) fpred(fpot1d(z))

    pboot <- apply(xboot,2,fn)

    ## Compute interval
    bootci <- t(apply(pboot, 1, quantile, c(alpha/2, 1-alpha/2)))
    lb <- bootci[,1]
    ub <- bootci[,2]

  }

  ## Confident interval by profile likelihood
  if(ci & nboot == 0){

    bnd0 <- c(-.5,1)

    ## Bound of the deviance
    khi <- qchisq(1-alpha,1)
    lb <- lb0 <- pmax(1e-8, hatRt - 10 * hatSe)
    ub <- ub0 <- hatRt + 10 * hatSe

    for(ii in seq_along(rt)){

      ## Deviance function in respect of the return period
      Dfun <- function(z){

        ## Full likelihood
        llikFull <- sum(dgpa(obj$excess, obj$estimate[1],
                                         obj$estimate[2], log = TRUE))

        ## Negative log-likelihood
        nllik <- function(p){

          num <- z - obj$u

          if(abs(p) < 1e-8)
            denum <- log(lambda[ii])
          else
            denum <- -(lambda[ii]^(-p) -1)/p

          return(-sum(dgpa(obj$excess, pmax(1e-8,num/denum), p, log = TRUE)))

        }

        ## Compute the profile likelihood
        llik0 <- -goptimize(nllik, bnd0)$objective
        return(2*(llikFull-llik0))

      }

      ## Lower bound
      suppressWarnings(lb[ii] <-
                goptimize(function(z) abs(Dfun(z) - khi),
                         c(lb0[ii],hatRt[ii]))$minimum)

      ## Upper bound
      suppressWarnings(ub[ii] <-
                         goptimize(function(z) abs(Dfun(z) - khi),
                                  c(hatRt[ii], ub0[ii]))$minimum)

    } # end for

  } # end if

  ## construct the output
  if(se & !ci)
    ans <- data.frame(rt = hatRt, se = hatSe)
  else if(!se & ci)
    ans <- data.frame(rt = hatRt, lb = lb, ub = ub)
  else if(se & ci)
    ans <- data.frame(rt = hatRt, se = hatSe, lb = lb, ub = ub)
  else{
    ans <- data.frame(rt = hatRt)
  }

  if(ncol(ans) == 1){
    ans <- ans[,1]
    names(ans) <- paste('rt',rt)
  } else
    rownames(ans) <- paste('rt',rt)

  return(ans)
}


#' @export
logLik.fpot <- function(obj)
  sum(dgpa(obj$excess, obj$estimate[1],obj$estimate[2], log = TRUE))

#' @export
AIC.fpot <- function(obj, k = 2)
  -2 * logLik(obj) + 2*k

#############################################################
#' Diagnostic tools for peaks over threshold analysis
#'
#' Produce mean residual Life, quantite-quantile,
#' probability-probability,
#' histogram of the excesses,
#' Stability plot for the parameter of the GPA.
#'
#' @param x,dt,form Sample and time of observation. A formula can also
#'   be passed, in which case \code{x} is a data.frame.
#'
#' @param u Series of candidate thresholds.
#'
#' @param method Estimation method.
#'
#' @param alpha Confidence interal with level \code{1-alpha/2}.
#'
#' @param param Choice of the parameter to display. Either
#'  \code{'alpha'} or \code{'kappa'}.
#'
#' @param ... Others arguments are passed to function \code{plot} and
#' \code{\link{which.floodPeaks}}
#'
#' @details
#'
#' The function \code{gpaStabPlot} will loop across calls of function
#' \code{\link{fitPot}} and will produce a graphics of the
#' shape parameter in respect of the threshold \code{u}. See the respective
#' functions for a description of the function parameters.
#'
#' @seealso \link{adTestPlot}, \link{dispIndexPlot}, \link{fitPot},
#' \link{which.floodPeaks}.
#'
#'
#' @examples
#'
#' ## Find list of candidate thresholds
#' lstu <- seq(500,2500, len = 100)
#'
#' mrlPlot(flow~date,canadaFlood$daily, u = lstu, declust = 'flood', r = 14)
#'
#' gpaStabPlot(flow~date,canadaFlood$daily, u = lstu, declust = 'flood', r = 14)
#' gpaStabPlot(flow~date,canadaFlood$daily, u = lstu,
#'             declust = 'flood', r = 14, param = 'alpha', method = 'mle2')
#'
#'
#' fit <- fitPot(flow~date,canadaFlood$daily, u = 997, declust = 'flood', r = 14)
#' ppplot(fit)
#' qqplot(fit, ci = TRUE)
#' hist(fit)
#'
#'
#' @export
mrlPlot <- function(x, ...) UseMethod('mrlPlot',x)

#' @export
#' @rdname mrlPlot
mrlPlot.data.frame <- function(obj,  ...)
  mrlPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)

#' @export
#' @rdname mrlPlot
mrlPlot.matrix <- function(obj,  ...)
  mrlPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)

#' @export
#' @rdname mrlPlot
mrlPlot.formula <- function(form, x, ...){
  xd <- model.frame(form,as.list(x))
  mrlPlot.numeric(x = xd[,1], dt = xd[,2], ...)
}

#' @export
#' @rdname mrlPlot
mrlPlot.numeric <- function(x, dt = NULL, u,
                    declust = 'run', r = 1, rlow = 0.75,
                    alpha = 0.05,
                    ylab = 'Mean Residual Life', xlab = 'Threshold',
                    col = 'black', lty = 1, lwd = 1,
                    col.ci = 'black', lty.ci = 3, lwd.ci = 1,
                    ylim = NULL, display = TRUE, ...){

  ## Supply a date variable if not presented
  if(is.null(dt))
    dt <- seq_along(x)

  u <- as.numeric(u)

  sig <- mrl <- nn <- rep(NA,length(u))
  for(ii in seq_along(u)){

    ## Declustering
    if(is.null(declust))
      cid <- x > u[ii]
    else if(declust == 'run')
      cid <- which.clusters(x, u[ii], r)
    else if( declust == 'flood')
      cid <- which.floodPeaks(x, dt, u[ii], r = r, rlow = rlow)

    excess <- x[cid]-u[ii]
    mrl[ii] <- mean(excess)
    nn[ii] <- length(excess)
    sig[ii] <- sd(excess)/sqrt(nn[ii])

  }

  # Confident intervals
  qn <- qnorm(1-alpha/2)
  lb <- mrl - qn * sig
  ub <- mrl + qn * sig

  ## Adjusted limits
  if(is.null(ylim)) ylim <- range(c(lb,ub)) * c(.95,1.05)

  if(display){
    plot(u, mrl, xlab = xlab, ylab = ylab, ylim = ylim,
         col = col, lty = lty, lwd = lwd, ...)
    lines(u, lb, lty = lty.ci, col = col.ci, lwd = lwd.ci)
    lines(u, ub, lty = lty.ci, col = col.ci, lwd = lwd.ci)
  }

  ## Return
  invisible(data.frame(u = u, n = nn,mrl = mrl, lb = lb, ub = ub))

}

#' @export
#' @rdname mrlPlot
hist.fpot <- function(obj, main = '', xlab = 'Threshold (u)', ...){
  hist(obj$excess, main = main, xlab = xlab, ...)

}

#' @export
#' @rdname mrlPlot
qqplot.fpot <- function(obj, ci = FALSE, nsim = 200, alpha = 0.05,
                        xlab = 'Theoritical quantiles',
                        ylab = 'Empirical quantiles',
                        log.scale = TRUE, ...){


  ppos <- seq(obj$nexcess)/(obj$nexcess+1)
  theo <- qgpa(ppos, obj$estimate[1], obj$estimate[2])
  emp <- sort(obj$excess)

  if(ci){
   pt <- seq(1,obj$nexcess, len = 200)/(obj$nexcess+1)
   qt <- qgpa(pt, obj$estimate[1], obj$estimate[2])

   ## Simulate bootstrap sample
   xboot <- replicate(nsim, rgpa(obj$nexcess,obj$estimate[1], obj$estimate[2]))

   ## Compute bootstrap quantile
   qboot <- apply(xboot, 2,
                  function(z){
                    para <- fpot1d(z)
                    qgpa(pt, para[1], para[2])
                  })

   ## compute confident interval
   bnd <- apply(qboot, 1, quantile, c(alpha/2, 1-alpha/2))

   lb <- smooth.spline(qt,bnd[1,])$y
   ub <- smooth.spline(qt,bnd[2,])$y

  }

  ## Transform to logarithm scale
  if(log.scale){
    theo <- log(theo)
    emp <- log(emp)

    xlab <- paste(xlab,'(log)')
    ylab <- paste(ylab,'(log)')

    if(ci){
      qt <- log(qt)
      lb <- log(lb)
      ub <- log(ub)
    }
  }

  ## render QQ plot
  plot(theo, emp, xlab = xlab, ylab = ylab, ...)
  abline(0,1, col = 2)

  if(ci){
    lines(qt, lb, lty = 2)
    lines(qt, ub, lty = 2)
  }


}

#' @export
#' @rdname mrlPlot
ppplot.fpot <- function(obj, ...){

  ppos <- seq(obj$nexcess)/(obj$nexcess+1)
  theo <- pgpa(obj$excess, obj$estimate[1], obj$estimate[2])

  plot(sort(theo), ppos,
       xlab = 'Theoritical probabilities',
       ylab = 'Empirical probabilities',
       ...)
  abline(0,1, col = 2)

}

#' @export
gpaStabPlot <- function(x, ...) UseMethod('gpaStabPlot',x)

#' @export
#' @rdname mrlPlot
gpaStabPlot.data.frame <- function(obj,  ...)
  gpaStabPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)

#' @export
#' @rdname mrlPlot
gpaStabPlot.matrix <- function(obj,  ...)
  gpaStabPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)

#' @export
#' @rdname mrlPlot
gpaStabPlot.formula <- function(form, x, ...){
  xd <- model.frame(form,as.list(x))
  gpaStabPlot.numeric(x = xd[,1], dt = xd[,2], ...)
}

#' @export
#' @rdname mrlPlot
gpaStabPlot.numeric <- function(x, dt = NULL, u,
                            declust = NULL, r = 1, rlow = 0.75,
                            alpha = 0.05, param = 'scale',
                            method = 'mle', nboot = 1000,
                            xlab = 'Threshold (u)',
                            ylab = NULL,
                            col = 'black', lty = 1, lwd = 1,
                            col.ci = 'black', lty.ci = 3, lwd.ci = 1,
                            ylim = NULL, display = TRUE, ...){

  ## Supply a date variable if not presented
  if(is.null(dt))
    dt <- seq_along(x)

  u <- as.numeric(u)

  ## Loop across threshold and extract parameters
  para <- matrix(NA,length(u),6)
  for(ii in seq_along(u)){

    ## Fit the POT model
    fit <- fitPot(x, dt, u[ii],
                  declust = declust, r = r, rlow = rlow,
                  method = method, nboot = nboot)

    para[ii,] <- c(u[ii], fit$estimate, as.vector(vcov(fit))[-2])

  }

  modAlpha <- para[,2] + para[,3]*para[,1]
  modAlphaVar <- para[,4] + 2 * para[,1] * para[,5] + para[,1]^2 * para[,6]

  ## Choose which parameter to display
  if(param %in% c('scale','alpha','scl')){

    y <- modAlpha
    ysd <- sqrt(modAlphaVar)

    if(is.null(ylab)) ylab <- 'Modified scale (alpha)'

  } else if(param %in% c('shape','kappa','shp')){
    y <- para[,3]
    ysd <- sqrt(para[,6])

    if(is.null(ylab)) ylab <- 'Shape (kappa)'

  }

  # Compute the Confident intervals
  qn <- qnorm(1-alpha/2)
  lb <- y - qn * ysd
  ub <- y + qn * ysd


  ## Do the graph
  if(display){

    if(is.null(ylim)) ylim <- c(.95,1.05) * range(c(y,lb,ub))

    plot(u,y, ylim = ylim, xlab = xlab, ylab = ylab,
         col = col, lty = lty, lwd = lwd, ...)
    lines(u, lb, lty = lty.ci, col = col.ci, lwd = lwd.ci)
    lines(u, ub, lty = lty.ci, col = col.ci, lwd = lwd.ci)

  }

  ## return the data
  modA <- data.frame( modAlpha = modAlpha, modAlphaVar = modAlphaVar)


  colnames(para) <- c('u','alpha','kappa','alphaVar','cov','kappaVar')
  invisible(cbind(para,modA))
}
######################################################
#' Parameter object for Peak Over Threshold (POT)
#'
#' Return the parameters of the generalized Pareto distribution
#' in the format use in \link{lmomco-package} to extract the
#' theoritical Lmoments
#'
#' @param obj Output of the function \code{\link{fitPot}}
#'
#' @seealso \link{vec2par}
#'
#' @export
#'
#' @examples
#'
#' library(lmomco)
#'
#' xd <- rgpa(200)
#' fit <- fitPot(xd, u = 0.1)
#' para <- pot2para(fit)
#' par2lmom(para)
#'
#' plot(fit$time,as.uniform(fit))
#'
pot2para <- function(obj){
  lmomco::vec2par(c(0,obj$estimate),'gpa')
}

#' @export
#' @rdname pot2para
rankit.fpot <- function(obj) rankit(obj$excess)

#' @export
#' @rdname pot2para
as.uniform.fpot <- function(obj) {
  pgpa(obj$excess, obj$estimate[1],obj$estimate[2])
}

#' @export
points.fpot <- function(x, excess = FALSE,
                        xlab = 'Time',
                        ylab = NULL, ...){
  if(excess == TRUE){
    if(is.null(ylab)) ylab <- 'Events'
    points(x$time, x$excess, xlab = xlab, ylab = ylab...)
  } else {
    if(is.null(ylab)) ylab <- 'Exceedences'
    points(x$time, x$excess + x$u, xlab = xlab, ylab = ylab,...)
  }

}

#' @export
plot.fpot <- function(x, excess = FALSE,
                      xlab = 'Time',
                      ylab = NULL, ...){
  if(excess == TRUE){
    if(is.null(ylab)) ylab <- 'Events'
    plot(x$time, x$excess, xlab = xlab, ylab = ylab...)
  } else {
    if(is.null(ylab)) ylab <- 'Exceedences'
    plot(x$time, x$excess + x$u, xlab = xlab, ylab = ylab,...)
  }
}

#' @export
lines.fpot <- function(x, excess = FALSE,
                       xlab = 'Time',
                       ylab = NULL, ...){
  if(excess == TRUE){
    if(is.null(ylab)) ylab <- 'Events'
    lines(x$time, x$excess, xlab = xlab, ylab = ylab...)
  } else {
    if(is.null(ylab)) ylab <- 'Exceedences'
    lines(x$time, x$excess + x$u, xlab = xlab, ylab = ylab,...)
  }

}
#########################################################################
## See adTest.atSite in atSite.R file for more details
#
#' @export
#' @rdname adTest
adTest.fpot <- function(obj, method = 'tab',
                          nboot = 1000, bsample = FALSE, cores = NULL){

  ans <- list()
  para <- pot2para(obj)

  ## Case using a table
  if(method %in% c('tab','table')){
    ans$stat <- adStat(obj$excess, para = pot2para(obj))
    ans$pvalue <- c(adGpaTable(obj$estimate[2], A2 = ans$stat[1]),NA,NA)

  ## Case using boostrap
  } else if(method == 'boot'){
    ans <- adTest(obj$excess, type = 'gpa', para = NULL, method = 'pot1d',
                nboot = nboot, bsample = bsample, cores = cores)
  }

  ans$estimate <- obj$estimate
  ans$type = 'gpa'
  ans$method = 'mle'

  class(ans) <- 'adTest'

  return(ans)
}

############################################################
#' Threshold diagnostic using the Anderson Darling test
#'
#' Produces the graphics of the p-values of the Anderson-Darling (AD) test
#' in respect of candidate tresholds.
#'
#' @param x,dt,form Sample and time of observation. If a formula is provided,
#'   then \code{x} must be a data.frame with the corresponding variable.
#'
#' @param u Vector of candidate threshold
#'
#' @param stat.test Which version of the AD test. One of 'AD','AL' or 'AU',
#'   which are respectively the classical AD test and the lower or upper
#'   modified AD test.
#'
#' @param logit.scale If true, the p-values are transformed to the negative
#'   logit scale. Otherwise the 1 - p-values are plotted.
#'
#' @param alpha Vector indicative references for the p-values.
#' @param display Should the graphics be display. Remark that the p-values
#'   are returned by the function.
#'
#' @param method.estim,nboot.estim Parameters for estimation.
#'   See \link{fitPot}.
#'
#' @param method.test,stat.test,nboot.test Parameters for AD tests.
#'   See \link{adTest}.
#'
#' @param ... Other graphical parameters
#'
#' @details
#'
#' The function \code{adTestPlot} will loop across calls of function
#' \code{\link{fitPot}} and \code{adTest} and will produce a graphics of the
#' P-values in respect of the threshold \code{u}. See respective
#' functions for a description of the other function parameters.
#'
#' @seealso \link{adTest}, \link{mrlPlot}, \link{dispIndexPlot},
#'  \link{which.floodPeaks}
#'
#' @export
#'
#' @examples
#'
#' ## Find list of candidate thresholds
#' lstu <- seq(500,2500,len = 50)
#'
#' adTestPlot(flow~date,canadaFlood$daily, u = lstu,declust = 'flood', r = 14)
#'
adTestPlot <- function(x, ...) UseMethod('adTestPlot',x)

#' @export
#' @rdname adTestPlot
adTestPlot.data.frame <- function(obj,  ...)
  adTestPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)

#' @export
#' @rdname adTestPlot
adTestPlot.matrix <- function(obj,  ...)
  adTestPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)

#' @export
#' @rdname adTestPlot
adTestPlot.formula <- function(form, x, ...){
  xd <- model.frame(form,as.list(x))
  adTestPlot.numeric(x = xd[,1], dt = xd[,2], ...)
}


#' @export
#' @rdname adTestPlot
adTestPlot.numeric <- function(x, dt = NULL, u,
                            declust = 'run', r = 1, rlow = 0.75,
                            method.estim = 'mle', nboot.estim = 1000,
                            method.test = 'tab',
                            stat.test = 'AD', nboot.test = 1000,
                            logit.scale= FALSE,
                            xlab = 'Threshold (u)',
                            ylab = 'p-value',
                            alpha = c(0.05, 0.25),
                            col = 'black', lty = 1, lwd = 1,
                            col.alpha = 'black', lty.alpha = 3, lwd.alpha = 1,
                            cores = NULL, display = TRUE, ...){


  if(method.test == 'tab' & !( method.estim %in% c('mle','mle2')))
    stop('The table of the AD test is for maximum likelihood only')

  ## Supply a date variable if not presented
  if(is.null(dt))
    dt <- seq_along(x)

  u <- as.numeric(u)

  ## Loop across threshold
  ad <- matrix(NA, length(u),3)
  for(ii in seq_along(u)){

    ## Fit the POT model
    fit <- fitPot(x, dt, u[ii], method = method.estim,
                  declust = declust, r = r, rlow = rlow,
                  nboot = nboot.estim)

    ## Compute the Dispersion index
    ad[ii,] <- adTest(fit, method = method.test, nboot = nboot.test,
                      bsample = FALSE)$pval
  }

  if(stat.test %in% c('ad','AD'))
    y <- ad[,1]
  else if (stat.test %in% c('al','AL'))
    y <- ad[,2]
  else if (stat.test %in% c('au','AU'))
    y <- ad[,3]

  if(logit.scale){
     y <- prange(-logit(y),c(-7,7))
     alphaLim <- prange(-logit(alpha),c(-7,7))

     if(ylab == 'p-value') ylab <- paste(ylab, '(-logit)')

  }else{
    y <- 1-y
    alphaLim <- 1-alpha

    if(ylab == 'p-value') ylab <- paste(ylab, '(1-p)')
  }

  if(display){
    plot(u,y, xlab = xlab, ylab = ylab, ...)
    abline(h = alphaLim, col = col.alpha, lty = lty.alpha, lwd = lwd.alpha)


  }

  colnames(ad) <- c('AD','AL','AU')
  invisible(data.frame(u=u,ad))
}
#########################################################################
#' Anderson-Darling (AD) test for the Generalized Pareto distribution (GPA)
#'
#' Return the critical value or the p-value of the AD test for the GPA
#' distribution. The results are obtained by linear interpolation of the
#' table provided by Choulakian and Stephens (2001) when scale and shape
#' parameter are estimated by maximum likelihood.
#'
#' @param kap Shape parameter of the GPA.
#'
#' @param pval P-value of the test.
#'
#' @param A2 Statistics of the AD test.
#'   If \code{NULL} the function return
#'   the critical value associated with \code{'pval'}.
#'   Otherwise the function return the p-value associated with the provided
#'   statistics.
#'
#' @param ... Additional parameter for the function \code{\link{approx}}.
#'
#' @details
#'
#' Choulakian V, Stephens MA. Goodness-of-Fit Tests for the Generalized
#'   Pareto Distribution. Technometrics. 2001;43(4):478–84.
#'
#' @seealso \link{interp2d}
#'
#' @export
#'
#' @examples
#'
#' adGpaTable(kap = -.11, A2 = .93)
#' adGpaTable(kap = -.11, pval = 0.05)

adGpaTable <- function(kap, pval = 0.05, A2 = NULL, ...){

  ## Table of for the GPA
  ad <- c(0.339,	0.471,	0.641,	0.771,	0.905,	1.086,	1.226,	1.559,
          0.356,	0.499,	0.685,	0.830,	0.978,	1.180,	1.336,	1.707,
          0.376,	0.534,	0.741,	0.903,	1.069,	1.296,	1.471,	1.893,
          0.386,	0.550,	0.766,	0.935,	1.110,	1.348,	1.532,	1.966,
          0.397,	0.569,	0.796,	0.974,	1.158,	1.409,	1.603,	2.064,
          0.410,	0.591,	0.831,	1.020,	1.215,	1.481,	1.687,	2.176,
          0.426,	0.617,	0.873,	1.074,	1.283,	1.567,	1.788,	2.314,
          0.445,	0.649,	0.924,	1.140,	1.365,	1.672,	1.909,	2.475,
          0.468,	0.688,	0.985,	1.221,	1.465,	1.799,	2.058,	2.674,
          0.496,	0.735,	1.061,	1.321,	1.590,	1.958,	2.243,	2.922)

  ad <- t(matrix(ad, 8 ,10))

  kap0 <- c(-0.9, -0.5, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5)
  pval0 <- c(0.5,	0.25,	0.1,	0.05,	0.025,	0.01,	0.005,	0.001)


  ## Verify table range
  if(pval < 0.001){
    warning('P-value outside range. Value 0.001 will be used')
    pval <- 0.001
  }

  if(pval > 0.5){
    warning('P-value outside range. Value 0.5 will be used')
    pval <- 0.5
  }

  if(kap < (-0.9)){
    warning('Kappa outside range. Value -0.9 will be used')
    kap <- (-0.9)
  }

  if(kap > 0.5){
    warning('Kappa outside range. Value 0.5 will be used')
    kap <- 0.5
  }

  if(is.null(A2))
    ans <- interp2d(kap0, pval0, ad, kap, pval, reverse = FALSE,...)
  else
    ans <- interp2d(kap0, pval0, ad, kap, A2, reverse = TRUE, ...)

  return(ans)
}

########################################################################
#' Interpolation of a table
#'
#' Return linear interpolation of a table. Can returns either the
#' interpolation of the values of the table or the interpolations
#' the columns associated with a value in the table(\code{reverse}).
#'
#' @param x0 References in row of \code{z}.
#'
#' @param y0 References in column of \code{z}
#'
#' @param z0 Matrix representing a table to interpolate.
#'
#' @param x  Point where to interpolate in row.
#'
#' @param y  Point where to interpolate in column.
#'
#' @param reverse Logical. Should the Refrence value en \code{y} be
#'  interpolated instead of \code{z}.
#'
#' @export
#'
#' @seealso \link{approx}
#'
#' @examples
#'
#' x <- 1:10
#' y <- 1:10
#' z <- outer(x,y)
#'
#' interp2d(x,y,z,5,5) # 5*5  = 25
#' interp2d(x,y,z,5.1,5.1) # 26.01
#'
#' interp2d(x,y,z,5.1,26, reverse = TRUE)
#'
interp2d <- function(x0, y0, z0, x, y, reverse = FALSE, ...){

  ## Verify consistency between x0, y0 and z0
  if(length(x0) != nrow(z0) | length(y0) != ncol(z0))
    stop('Wrong dimension passed in z0')

  ## Case interpolating z0
  if(!reverse){

    ## y0 is exact
    if(sum(y0 == y)>0){
      z <- z0[, which(y0 == y)]

      ## case y0 is not exact
    } else{
      z <- sapply(seq(nrow(z0)),
                  function(k) approx(y0, z0[k,], xout = y, ...)$y)
    }

    ## Interpolation z
    if(x < min(x0)) ans <- max(z)
    else if(x> max(x0)) ans <- min(z)
    else ans <- approx(x0, z, xout = x, ...)$y

  ## Case Interpolation y instead of z
  } else {

    ##case x0 is exact
    if(sum(x0 == x) > 0){
      z <- z0[which(x0 == x),]

      ##case x0 is not exact
    } else {
      z <- sapply(seq(ncol(z0)),
                  function(k) approx(x0, z0[,k], xout = x, ...)$y)
    }

    ## interpolating y
    if(y < min(z)) ans <- max(y0)
    else if(y > max(z)) ans <- min(y0)
    else ans <- approx(z, y0, xout = y, ...)$y
  }

  return(ans)
}

####################################################################
# Documentation in atSite.R
#' @export
#' @rdname shapiroTest
shapiroTest.fpot <- function(obj){
  tmp <- shapiroTest(qnorm(as.uniform(obj)))
  ans <- list(statistic = tmp$statistic, pvalue = tmp$p.value,
              type = 'gpa', method = 'mle')

  class(ans) <- 'shapiroTest'

  return(ans)
}


###########################################################################
#' Generalized Pareto distribution (GPA)
#'
#' Distribution, density, quantile and random function for the Generalized
#' pareto distriution.
#'
#' @param p,q Probabilities or quantiles for the GPA distribution.
#'
#' @param n Number of simulations.
#'
#' @param alpha Scale parameter of the GPA
#'
#' @param kap Shape parameter of the GPA
#'
#' @param lower.tail Should the propability of the lower tail be returned
#'
#' @param log Should the log-density be returned
#'
#' @details
#'
#' These function are reorganisation of the function in R-package \link{evd}
#' using the parametrisation (\eqn{\alpha}, \eqn{\kappa}) that is coherent
#' with the notation frequently used in hydrology.
#'
#' @export
#'
#' @examples
#'
#' kap <- -.2
#' a <- 1
#' xd1 <- rgpa(1e4, a, kap)
#' xd2 <- qgpa(runif(1e4), a, kap)
#'
#' qqplot(xd1, xd2)
#'
#'
#' tt <- seq(0,6, len = 100)
#'
#' hist(xd[xd<6], main = 'GPA distribution',
#'   freq = FALSE, ylim =c(0,1), xlim = c(0,6))
#'
#' lines(tt, dgpa(tt,a,kap))
#' lines(tt,pgpa(sort(tt), a, kap), col = 2, lty = 2)
#'
#'
pgpa <- function (q,  alpha = 1, kap = 0, lower.tail = TRUE)
{
  if (min(alpha) <= 0)
    stop("invalid alpha")

  if (length(kap) != 1)
    stop("invalid kappa")

  q <- pmax(q, 0)/alpha

  if (kap == 0)
    p <- 1 - exp(-q)
  else {
    p <- pmax(1 - kap * q, 0)
    p <- 1 - p^(1/kap)
  }

  if (!lower.tail)
    p <- 1 - p

  return(p)
}

#' @export
#' @rdname pgpa
rgpa <- function (n, alpha = 1, kap = 0)
{
  if (min(alpha) < 0)
    stop("invalid alpha")
  if (length(kap) != 1)
    stop("invalid kappa")
  if (kap == 0)
    return(alpha * rexp(n))
  else return(- alpha * (runif(n)^(kap) - 1)/kap)
}

#' @export
#' @rdname pgpa
dgpa <- function (x, alpha = 1, kap = 0, log = FALSE)
{
  if (min(alpha) <= 0)
    stop("invalid alpha")

  if (length(kap) != 1)
    stop("invalid kappa")

  d <- x/alpha
  nn <- length(d)
  alpha <- rep(alpha, length.out = nn)
  index <- (d > 0 & ((1 - kap * d) > 0)) | is.na(d)

  if (kap == 0) {
    d[index] <- log(1/alpha[index]) - d[index]
    d[!index] <- -Inf
  }
  else {
    d[index] <- log(1/alpha[index]) -
                            (1 - 1/kap) * log(1 - kap * d[index])
    d[!index] <- -Inf
  }

  if (!log)
    d <- exp(d)

  return(d)
}

#' @export
#' @rdname pgpa
qgpa <- function (p, alpha = 1, kap = 0, lower.tail = TRUE)
{
  if (min(p, na.rm = TRUE) <= 0 || max(p, na.rm = TRUE) >= 1)
    stop("`p' must contain probabilities in (0,1)")

  if (min(alpha) < 0)
    stop("invalid alpha")

  if (length(kap) != 1)
    stop("invalid kappa")

  if (lower.tail)
    p <- 1 - p

  if (kap == 0)
    return(- alpha * log(p))
  else
    return(-alpha * (p^(kap) - 1)/kap)
}


###########################################################################
#' Extracting flood peaks
#'
#' Returns the indices of the peaks above a threshold according to the
#' declustering method put in place by the Water Resources Council.
#' See Lang et al. (1999) for more details.
#'
#' @param x Sample. Can be a vector, matrix or data.frame.
#'   If \code{form} is not specified the first two rows must be respectively
#'   the observations and the time of observation.
#'
#' @param form Formula that describes the variables: \code{obs ~ date}.
#'
#' @param dt Date or time of observations.
#'
#' @param u Threshold.
#'
#' @param r,rlow,ini Declustering parameters. See details.
#'
#' @details
#'
#' The declustering method performs two steps,
#' First an initial set of peaks are obtained.
#' By default, if \code{ini = 'lmax'} the initial peaks are local maximums.
#' Alternatively, if \code{ini = 'clust'} a simple run declustering
#' is realized to identify the maximums of
#' continuous clusters above the threshold \code{u}.
#' Afterward two additional conditions are required for peaks to not be
#' rejected. First, two peaks \eqn{Q1} and \eqn{Q2} must be separated by a
#'  period of at least \code{r} days.
#' One recommendation is \deqn{5 days + log(A)}
#' where \eqn{A} is the drainage area in miles.
#' The second conditions is
#' \deqn{Xmin > rlow * min(Q1,Q2).}
#' By defautlt, \code{rlow = 0.75}.
#' When one of the two conditions is not statisfied the lowest of the two
#' peaks is discarded.
#' Modified version of the previous algorithm is also implemented.
#' Using \code{ini = 'lmax2'}. Instead of verify jointly both condition
#' a first series of peaks is extrated using only the second condition and
#' next the first condition is verify in the newly extracted peaks.
#' The two version are very similar and differ only on few cases where the
#' modified version is more conservative and reject peaks that are kept
#' in the initial version.
#'
#' The function \code{which.clusters} is returning the indices of the peaks
#' identified by the run declustering method. See \code{\link{clusters}}.
#'
#' @section References:
#'
#' Lang M, Ouarda TBMJ, Bobée B. (1999) Towards operational guidelines for
#'   over-threshold modeling. Journal of Hydrology. Dec 6;225(3):103–17.
#'
#' @export
#'
#' @examples
#'
#' area <- 14700
#' b <- ceiling(5 + log(area/(1.609^2))) ## b = 14
#' xd <- canadaFlood$daily
#'
#' # Declustering using recommendation.
#' cid <- which.floodPeaks(flow~date, xd,
#'                         u = 1000, r = b, rlow = .75, ini = 'lmax')
#'
#' plot(xd, type = 'l')
#' points(canadaFlood$daily[cid,], col = 'red', pch = 16)
#' abline(h = 1000, col = 3, lwd = 2)
#'
#' ## Using a nonstationary threshold.
#' fit <- lm(log(flow)~date, canadaFlood$daily)
#' ut <- exp(fitted(fit))+900
#' lines(xd$date, ut, col = 4, lwd = 2)
#'
#' cid <- which.floodPeaks(flow~date, xd,
#'                         u = ut, r = b, rlow = .75, ini = 'lmax')
#' points(canadaFlood$daily[cid,], col = 'yellow', pch = 16)
#'

which.floodPeaks <- function(x, ...) UseMethod('which.floodPeaks', x)

#' @export
#' @rdname which.floodPeaks
which.floodPeaks.matrix <- function(x, ...)
  which.floodPeaks.numeric(x[,1], x[,2], ...)

#' @export
#' @rdname which.floodPeaks
which.floodPeaks.data.frame <- function(x, ...)
  which.floodPeaks.numeric(x[,1], x[,2], ...)

#' @export
#' @rdname which.floodPeaks
which.floodPeaks.formula <- function(form, x, ...){
  xd <- model.frame(form,as.list(x))
  which.floodPeaks.numeric(x = xd[,1], dt = xd[,2], ...)
}

#' @export
#' @rdname which.floodPeaks
which.floodPeaks.numeric <- function(x, dt = NULL, u, r = 1, rlow = 0.75,
                                     ini = 'lmax2'){

  ## If time is not provided
  if(is.null(dt))
    dt <- seq_along(x)

  ## Verify that the time vector are matching
  if(length(x) != length(dt))
    stop('Length of the Time vector do not match!')

  ## Extract all peaks above threshold
  if(ini == 'clust'){
    peakId <- which.clusters(x, u)

  } else if(ini == 'lmax'){
    isPeak <- rep(FALSE, length(x))
    isPeak[which.lmax(x)] <- TRUE
    isPeak[x <= u] <- FALSE
    peakId <- which(isPeak)

  } else if(ini == 'lmax2'){
    peakId <- which.floodPeaks(x, dt, u, r = 0, rlow, ini = 'lmax')

  } else if(ini == 'all'){
    peakId <- seq_along(x)
  }

  ## Case there is just one peak above threshold
  if(length(peakId) < 2) return(peakId)

  ## Loop accross and keep only
  myId <- peakId[1]
  ans <- list()

  for(nextId in peakId[-1]){

    ## Verify if lag r is sufficient
    if(abs(dt[nextId]- dt[myId]) < r){

      ## If not, Keep the highest peak
      if(x[nextId] > x[myId])
        myId <- nextId

      next
    }

    ## Verify that the a minimal intermediate point is reached
    if(min(x[seq(myId, nextId)]) > rlow * min(x[myId], x[nextId])){

      ## If not, Keep the highest peak
      if(x[nextId] > x[myId])
        myId <- nextId

      next
    }

    ## Otherwise save the identified peak id
    ans <- append(ans,myId)

    ## And move to the next one
    myId <- nextId

  }#endfor

  ## Add the last peaks
  ans <- append(ans,myId)

  return(unlist(ans))
}

#' @export
#' @rdname which.floodPeaks
which.clusters <- function(x, u, ...)
  as.numeric(names(evd::clusters(x, u, cmax = TRUE, ...)))

#################################################################
#' Dispersion index
#'
#' Returns the dispersion index (or variance to mean ratio) to verify
#' the hypothesis of a Homogenous Poisson process.
#'
#' @param obj An output of \code{fitPot}.
#'
#' @param k Number of classes. Default value chosen to ensure at least 5 classes
#'   and aims at approximately an average 5 events per classe.
#'
#' @param ci Should confident interval and p-value be returns.
#'
#' @param alpha Significance level.
#'
#' @param method Method for computing the confident interval and the p-value.
#'   Either 'asymptotic' for chi-squared approximation or 'boot' for bootstrap.
#'
#' @param nboot Number of bootstrap samples.
#'
#' @details
#'
#' The function \code{dispIndexPlot} will loop across calls of function
#' \code{\link{fitPot}} and \code{dispIndex} and will produce a graphics of the
#' Dispersion index in respect of the threshold \code{u}. See respective
#' functions for a description of the function parameters. The points in the
#' plot are returns as a data.frame and parameter \code{display} can be set
#' to \code{FALSE} to prevent the display of the graphic.
#'
#' @section Reference:
#'
#' Lang M, Ouarda TBMJ, Bobée B. (1999) Towards operational guidelines for
#'   over-threshold modeling. Journal of Hydrology. Dec 6;225(3):103–17.
#'
#' @seealso \link{mrlPlot}
#'
#' @export
#'
#' @examples
#'
#' fit <- fitPot(flow~date,canadaFlood$daily, u = 1000,
#'               declust = 'flood', r = 14)
#'
#' dispIndex(fit, ci = TRUE)
#'
#' dispIndexPlot(flow~date,canadaFlood$daily,
#'               u = seq(500,2000, len = 50),
#'               declust = 'flood', r = 14)
#'
dispIndex <- function(obj, k = NULL, ci = FALSE, alpha = 0.05,
                      method = 'asymptotic', nboot = 1e4){

  ## form equal groups and count peaks
  ntot <- length(obj$time)

  if(is.null(k))
    k <- pmax(5,floor(ntot/5))

  nn <- table(cut(obj$time, k, labels = FALSE))

  ## Compute the dispersion index and p-value
  vmr <- var(nn)/mean(nn)

  if(ci){
    a2 <- alpha/2

    if(method == 'asymptotic'){

      ## Verification of minimal conditions for Chi-squared approximation
      if(mean(nn) < 1 ) warning('Low average mean per classes')
      if(k < 5 ) warning('Too few classes for testing')


      ## Confident intervale
      d <- k-1
      myci <- c(qchisq(a2, df = d), qchisq(1-a2, df = d))/d

      ## Compute p-value assuming asymetrical probabilities
      if(vmr < 1) pval <- 2 * pchisq(vmr*d, df = d)
      else if (abs(vmr-1) < 1e-8) pval <- 1
      else pval <- 2 * (1-pchisq(vmr*d,df = d))



    } else if(method == 'boot') {

      ## sample from poisson distribution
      bfun <- function(){
        z <- rpois(k,sum(nn)/k)
        var(z)/mean(z)
      }

      bsample <- replicate(nboot, bfun())

      ## Boostrap intervals and p-value
      myci <- quantile(bsample, c(a2,1-a2))

      if(vmr < 1) pval <- 2 * mean( bsample <= vmr)
      else if (abs(vmr-1) < 1e-8) pval <- 1
      else pval <- 2*(1-mean( bsample >= vmr))
    }

    ans <- c(vmr, myci , pval)
    names(ans) <- c('vmr','lb','ub', 'pval')

  } else ans <- vmr

  return(ans)
}


#' @export
#' @rdname dispIndex
dispIndexPlot <- function(x, ...) UseMethod('dispIndexPlot',x)

#' @export
#' @rdname dispIndex
dispIndexPlot.data.frame <- function(obj,  ...)
  dispIndexPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)

#' @export
#' @rdname dispIndex
dispIndexPlot.matrix <- function(obj,  ...)
  dispIndexPlot.numeric(x = as.numeric(obj[,2]), dt = as.numeric(obj[,1]), ...)

#' @export
#' @rdname dispIndex
dispIndexPlot.formula <- function(form, x, ...){
  xd <- model.frame(form,as.data.frame(x))
  dispIndexPlot.numeric(x = xd[,1], dt = xd[,2], ...)
}

#' @export
#' @rdname dispIndex
dispIndexPlot.numeric <- function(x, dt = NULL, u = 0,
              method.estim = 'mle', declust = NULL, r = 1, rlow = .75,
              k = NULL, alpha = 0.05,
              method.ci = 'asymptotic', nboot = 1000,
              xlab = 'Threshold (u)', ylab = 'Dispersion Index',
              col = 'black', lty = 1, lwd = 1,
              col.ci = 'black', lty.ci = 3, lwd.ci = 1,
              ylim = NULL, display = TRUE, ...){

  ## Supply a date variable if not presented
  if(is.null(dt))
    dt <- seq_along(x)

  u <- as.numeric(u)

  ## Loop across threshold
  ans <- matrix(NA, length(u),4)
  for(ii in seq_along(u)){

    ## Fit the POT model
    fit <- fitPot(x, dt, u[ii], method = method.estim,
                  declust = declust, r = r, rlow = rlow)

    ## Compute the Dispersion index
    di <- dispIndex(fit, k = k, ci = TRUE, alpha = alpha,
                 method = method.ci, nboot = nboot)
    ans[ii,] <- di

  }

  ## Plot the Dispersion Index in respect of the threshold
  if(display){

    if(is.null(ylim))
      ylim <- range(as.numeric(ans[,1:3])) * c(.95,1.05)

    plot(u, ans[,1], xlab = xlab, ylab = ylab, ylim = ylim,
         col = col, lty = lty, lwd = lwd, ...)
    lines(u, ans[,2], col = col.ci, lty = lty.ci, lwd = lwd.ci)
    lines(u, ans[,3], col = col.ci, lty = lty.ci, lwd = lwd.ci)
  }

  ## Return the data if needed
  ans <- as.data.frame(ans[,-4])
  colnames(ans) <- c('vmr','lb','ub')
  invisible(ans)
}


################################################################################
#' Performing logistic regression to test for trend in number of peaks per year
#'
#' Return the statistics and the p-value of a null trend. By defaut a t-test
#' verify that the slope is null otherwise a F-test is used to verify that the
#' model is better than random. A variable dispersion parmeter is used to
#' account for potential autocorrelation.
#'
#' @param obj Output from \link{fitPot}.
#' @param span List
#' @param method Type of test to use. Eiter \code{'T'} or \code{'F'}
#' @param trend Type of trend. Must be with method \code{'F'}.
#' Either \code{poly} for polynomial or \code{'spline'} for b-spline polynomial with
#'   equally space knots
#' @param degree Degree of the polynomial or dimension of spline basis .
#'
#' @seealso \link{mannKendall}, \link{glm}.
#'
#' @section References:
#'
#' Frei C, Schär C. (2001), Detection Probability of Trends in Rare Events:
#'   Theory and Application to Heavy Precipitation in the Alpine Region.
#'   J Climate. Apr 1;14(7):1568–84.
#'
#' @export
#' @examples
#'
#' fit <- fitPot(flow~date, canadaFlood$daily, u = 1000,
#'                declust = 'flood', r = 14)
#'
#' mannKendallPeaks(fit, lag.k = 0)
#'
#' trendLogisPPY(fit, span = canadaFlood$daily$date)  ## linear trend
#' trendLogisPPY(fit, method = 'F', trend = 'spline')
#'
trendLogisPPY <- function(obj, span = NULL,  method = 'T', trend = 'poly',
                              degree = 2){

  ## Transform timing to integer
  tm <- as.integer(obj$time)

  if(is.null(span)){
    span <- range(tm)
    span <- seq(span[1],span[2],1)

  } else span <- as.integer(span)


  pp <- as.factor(span %in% tm)


  ## Fit a glm model and test if significant
  if(method %in% c('t','T')){
    ## For linear trend
    trend <- 'linear'
    fit <- glm(pp~span, family = quasibinomial())
    tmp <- as.numeric(summary(fit)$coefficients[2,c(1,4)])

  } else if(method %in% c('f','F') & trend == 'poly'){
    ## For polynomial trend
    fit <- glm(pp~poly(span, degree = degree), family = quasibinomial())
    tmp <- as.numeric(anova(fit, test = 'F')[2,c(5,6)])

  } else if(method %in% c('f','F') & trend == 'spline'){
    ## For polynomial trend
    degree <- pmax(3,degree)
    fit <- glm(pp~splines::bs(span, df = degree), family = quasibinomial())
    tmp <- as.numeric(anova(fit, test = 'F')[2,c(5,6)])

  } else stop('Wrong combination of test and trend')


  ans <- list()
  ans$stat <- tmp[1]
  ans$pvalue <- tmp[2]
  ans$method <- method
  ans$trend <- trend
  ans$degree <- degree
  class(ans) <- 'trendLogisPPY'
  return(ans)
}

#' @export
print.trendLogisPPY <- function(obj){
  cat('\nTesting trend in number of peaks per year\n')

  if(obj$method %in% c('t','T')){
    cat('\nSlope :', format(obj$stat, digits = 4))
  } else {
    cat('\nTrend :', obj$trend)
    cat(paste('\nF (df = ', obj$degree,') : ',
              format(obj$stat, digits = 4), sep = ''))
  }

  cat('\nP-value : ', round(obj$pvalue,3))

}

#' @export
#' @rdname mannKendall
mannKendallPeaks <- function(obj, ...){
  mannKendall(atSite(obj$excess, distr = 'gpd'), ...)
}
martindurocher/floodRFA documentation built on June 5, 2019, 8:44 p.m.