floodStat: FloodNet Regional Frequency Analysis

##################################################
#' At-site frequency analysis using annual maxima
#'
#' Return a fitting of a distribution, normally representing annual maximums.
#' Both maximum likelihood and L-moments method are available.
#' If the maximum likelihood method fails, the L-moments is returned and a warning
#'  message is issued.
#' The function \code{FitAmax.auto} select automatically the best distribution
#' according to the AIC criteria and return the fitted object.
#'
#' @param x Data.
#'
#' @param distr Distribution to fit. For \code{FitAmax.auto} it is a list of
#'  candidate distributions.
#'  See \code{\link{vec2par}} for the list of available distribution.
#'
#' @param method Estimation method.
#'    Either maximum likelihood ('mle') or L-moments ('lmom').
#'
#' @param varcov Should the variance-covariance matrix of the parameters
#'    be computed. For \code{mle} the covariance matrix is derived from the
#'    hessian matrix. For L-moments, non-parametric bootstrap is used.
#'
#' @param lmm L-moments of the data.
#'   Can be use to speed up multiple calls of FitAmax
#'    See \code{\link[lmomco]{lmoms}}.
#'
#' @param nsim Number of simulations used to evaluate the covariance matrix
#'   when using L-moment based estimator.
#'
#' @return
#'
#' \item{data}{Data Values.}
#' \item{lmom}{L-moments.}
#' \item{para}{Parameter estimates.}
#' \item{varcov}{Covariance matrix of the parameter}
#' \item{llik}{Value of the log-likelihood}
#'
#'
#' @section References:
#'
#' Coles, S. (2001). An introduction to statistical
#'   modeling of extreme values. Springer Verlag.
#'
#' Hosking, J. R. M., & Wallis, J. R. (1997). Regional frequency analysis:
#'   an approach based on L-moments. Cambridge Univ Pr.
#'
#' @seealso \code{\link{predict.amax}}, \code{\link{gofTest}},
#'   \code{\link{plot.amax}} .
#'
#' @export
#'
#' @examples
#'
#' ## Extract a time series of annual maxima
#' x <- ExtractAmax(flow~date, flowStJohn)$flow
#'
#' ## Fitting of GEV distribution using L-moments
#'
#' fit <- FitAmax(x,'gev')
#' print(fit)
#' coef(fit)
#' AIC(fit)
#' fit$lmom
#'
#' ## The evaluation of the variance-covariance matrix can be turn down
#' fit <- FitAmax(x,'gev', varcov = FALSE)
#'
#' ## Using Maximum likelihood
#' fit <- FitAmax(x,'gev', method ='mle')
#' print(fit)
#' vcov(fit)
#'
#' ## Standard deviation of the parameter
#' sqrt(diag(vcov(fit)))
#'
#' ## Chose the best distribution according to AIC
#' FitAmax.auto(x, distr = c('gev','glo','gno','pe3'), method = 'mle')
#'
#'

FitAmax <-
  function(x, distr = 'gev', method = 'lmom',
           varcov = TRUE, lmm = NULL, nsim = 1000){

  #################################################
  ## Verify that all values are finite values
  if(!all(is.finite(x)))
    stop('There is one (or more) non finite values')

  if(varcov & method == 'lmom' & nsim < 2)
    stop('There is not enough simulations (nsim) to perform boostrap')

  ## Keep only the firs distribution passed
  distr <- distr[1]
  #################################################

  ## Compute the L-moments and associated parameters
  if(is.null(lmm))
    lmm <- lmomco::lmoms(x)

  f <- lmomco::lmom2par(lmm,distr)

  if(method == 'mle'){

    ## Fit the distribution by maximum likelihood
    suppressWarnings(
      para <- try(lmomco::mle2par(x, distr, para.init = f, silent = TRUE,
                                  hessian = varcov)))

    ## If mle return an error, use the L-moment estimate
    if(class(para) == 'try-error'){
      para <- f
      varcov <- NA
      method = 'lmom'

      ## compute the log-likelihood
      llik <- sum(log(lmomco::dlmomco(x,para)))

      warning('Maximum likelihood has fail')

    } else {

      ## Verify and warn the user if optim does not converge
      if(para$optim$convergence != 0)
        warning('Maximum likelihood may have not coverge')

      ## (If necessary)
      ## Compute the covariance matrix by inversing the hessian matrix
      if(varcov)
        varcov <- chol2inv(chol(para$optim$hessian))
      else
        varcov <- NA

      ## compute the log-likelihood value
      llik <- -para$optim$value

      ## Remove the optim element from the list for a cleaner output
      para <- para[1:3]
    }

  } else if(method == 'lmom'){
    para <- f

    ## Compute the covariance matrix by boostrap if required
    if(varcov){

      xboot <- replicate(nsim, lmomco::rlmomco(length(x), para))

      pboot <- apply(xboot, 2, lmomco::lmoms)
      pboot <- lapply(pboot,lmomco::lmom2par, distr)
      pboot <- sapply(pboot, getElement, 'para')

      varcov <- as.matrix(Matrix::nearPD(cov(t(pboot)))$mat)

    } else
      varcov <- NA

    ## compute the log-likelihood
    llik <- sum(log(lmomco::dlmomco(x,para)))
  }



  ## Return an amax object
  ans <- list(lmom = lmm$lambdas,
              method = method,
              para = para$para,
              distr = distr,
              varcov = varcov,
              llik = llik,
              data = x)

  class(ans) <- 'amax'

  ans
}


#' @export
print.amax <- function(obj){

  cat('\nAt-site frequency analysis\n')

  cat('\nDistribution:', obj$distr,
      '\nAIC:', format(AIC(obj), digits = 4),
      '\nMethod:', obj$method)

  cat('\nEstimate:\n')
  print(obj$para, digit = 4)

  if(all(!is.na(obj$varcov))){

    se <- sqrt(diag(vcov(obj)))
    names(se) <- names(obj$para)

    cat('\nStd.err:\n')
    print(se, digits = 4)
  }

  cat('\nLmoments:\n')
  print(data.frame(l1 = obj$lmom[1],
                   lcv = obj$lmom[2]/obj$lmom[1],
                   lsk = obj$lmom[3]/obj$lmom[2],
                   lkt = obj$lmom[4]/obj$lmom[2]), digits = 4)
}

#' @export
coef.amax <- function(obj) obj$para

#' @export
AIC.amax <- function(obj, k = 2)
  as.numeric(k*length(obj$para) - 2*obj$llik)

#' @export
as.list.amax <- function(obj){
  class(obj) <- 'list'
  obj
}

#' @export
vcov.amax <- function(obj){

  ## if exist return the covariance matrix
  if(!is.null(obj$varcov))
    ans <- obj$varcov
  else
    ans <- NA

  ## return
  ans
}

#' @export
#' @rdname FitAmax
FitAmax.auto <-
  function(x, distr = c('gev','gno', 'pe3', 'glo'), ...,
          tol.gev = 0){

  ## compute lmoments once
  lmm <- lmomco::lmoms(x)

  ## Fit data and compute the AIC
  fits <- lapply(distr, function(z) try(FitAmax(x, distr = z, lmm = lmm, ...)))
  crit <- lapply(fits, function(z) try(AIC(z)))

  ## If the fitting fails put AIC to Infinity
  crit.err <- sapply(crit, function(z) class(z) != 'numeric')

  if(any(crit.err))
    crit[[crit.err]] <- Inf

  crit <- unlist(crit)

  ## It is assumed for small difference in AIC, the fitting is similar
  ## Therefore GEV could selected as default due to its theoritical
  ## justification. The tolerate difference is tol.gev
  gid <- which(distr == 'gev')
  crit[gid] <- crit[gid] - tol.gev

  ## return
  fits[[which.min(crit)]]
}


######################################################################
#' Predict return levels
#'
#' Return the flood quantile of annual maximum distribution and
#' Confident intervals are provided by bootstrap.
#'
#' @param obj Output from  \code{\link{FitAmax}}
#'
#' @param q Probabilities associated to the return level. For example,
#'   a 100 years return period is equivalent to \code{q = 0.99}.
#'
#' @param se Return the standard deviation of the return level using
#'   the delta method. The fitted model must
#'
#' @param ci Method to compute the confident interval. One of \code{'delta'}
#' for the delta method, \code{'boot'} for parametric boostrap and \code{'norm'}
#' for Monte-Carlo approximation assuming normality of the parameters.
#'
#' @param alpha Probability outside the confident interval.
#'
#' @param nsim Number of simulation use for resampling.
#'
#' @param out.matrix Logical. Should the resampling be returned. If true,
#'   a list is returned containing the prediction table (\code{pred}),
#'   the parameters (\code{para}) and the return levels (\code{qua}).
#'
#' @export
#'
#' @examples
#'
#' #' ## Extract an time series of annual maxima
#' x <- ExtractAmax(flow~date, flowStJohn)$flow
#'
#' ## Fitting of GEV distribution using L-moments
#' fit <- FitAmax(x,'gev', method = 'mle')
#'
#' ## Get the estimated quantile of 10 and 100 years return period
#' rp <- 1-1/c(10,100)
#' predict(fit, rp)
#' predict(fit, se = TRUE, ci = 'delta')
#'
#' ## The bootstrap sample used for CI are returned
#' fit <- FitAmax(x,'gev', varcov = FALSE)
#' boot <- predict(fit, rp, se = FALSE, ci = 'boot',
#'                 nsim = 500, out.matrix = TRUE)
#'
predict.amax <- function(obj, q = c(.5, .8, .9, .95, .98, .99),
                           se = FALSE, ci = 'none',
                           alpha = .05, nsim = 1000,
                           out.matrix = FALSE){

  n <- length(obj$data)

  ## Function that compute the return level for a given vector of parameter
  FunQ <- function(z, q0){
    lmomco::qlmomco(q0, lmomco::vec2par(z, obj$distr))
  }

  ## Compute Return level
  ans <- FunQ(obj$para, q)

  ## Compute confident intervals by resampling techniques
  if(ci == 'boot'){

    ## using Parametric bootstrap
    bootp <- matrix(NA,nsim,length(obj$para))
    p0 <- lmomco::vec2par(obj$para, obj$distr)

    for(ii in 1:nsim){

      repeat{
        ## simulate
        b <- lmomco::rlmomco(n,p0)

        ## estimate
        if(obj$method == 'gml'){
          suppressWarnings(p <- try(
            FitGev(b, varcov = FALSE, mu = obj$prior[1], sig2 = obj$prior[2]),
            silent = TRUE))

        } else {
          suppressWarnings(p <- try(
            FitAmax(b, distr = obj$distr, method = obj$method, varcov = FALSE),
            silent = TRUE))
        }

        ## Sample only feasible set
        if(any(class(p) != 'try-error'))
          if(p$method == obj$method)
            break

      }# end repeat


      bootp[ii,] <- p$para

    }# end for

  } else if(ci == 'norm'){

    ## Verify requirements for using the delta method
    if(any(is.na(obj$varcov)))
      stop('Covariance matrix was not estimated')

    bootp <- mnormt::rmnorm(nsim, obj$para, obj$varcov)

  } else
    bootp <- NA



  if(!any(is.na(bootp))){
    ## Compute the return level
    bootq <- t(apply(bootp,1, FunQ, q))

    ## if only one return period was passed in argument
    ## pivot the matrix
    if(all(dim(bootq) == c(1,nsim)))
      bootq <- t(bootq)

    ## Compute the confident interval
    bnd <- t(apply(bootq, 2, quantile, c(alpha/2,1 - alpha/2)))

    colnames(bnd) <- c('lower','upper')
  }


  ## Compute the standard deviation via the delta method
  if(se | ci == 'delta'){

    ## Verify requirements for using the delta method
    if(any(is.na(obj$varcov)))
      stop('Covariance matrix was not estimated')

    ## could eventually be replaced by analogic formulas
    g <- sapply(q, function(z) numDeriv::grad(FunQ, obj$para, q0 = z))
    sig <- sqrt(diag(crossprod(g, obj$varcov %*% g)))

    if(ci == 'delta'){
      nq <- abs(qnorm(alpha/2))
      bnd <- cbind(lower = ans - sig * nq, upper = ans + sig * nq)
    }

  }

  ## Built the final output
  ans <- data.frame(pred = ans)

  if(se)
    ans <- cbind(ans, se = sig)

  if(ci %in% c('boot','delta','norm'))
    ans <- cbind(ans, bnd)

  if(ncol(ans) == 1)
    ans <- ans[,1]

  if(out.matrix)
    ans <- list(pred = ans, para = bootp, qua = bootq)

  return(ans)
}

#############################################
#' Return level plot
#'
#'
#' @param obj Output from \code{\link{FitAmax}}.
#'
#' @param ci Logical. Does confident interval should be display. if so the
#'   the Monte-Carlo approximation method is used.
#'   See \code{\link{predict.amax}}.
#'
#' @param col.ci,lty.ci,lwd.ci Graphical parameters definining the display
#'    of the confident interval.
#'
#' @param ... Other graphical parameters. See \code{\link{par}}.
#'
#' @export
#'
#' @examples
#'
#' x <- ExtractAmax(flow~date, flowStJohn)$flow
#'
#' fit <- FitAmax(x, distr = 'gev', method = 'mle')
#'
#' plot(fit, ci = TRUE)

plot.amax <- function(obj, main = 'Return level plot',
                        xlab = 'Return period',
                        ylab = 'Return levels',
                        ci = FALSE, col.ci = 'red', lty.ci = 2,
                        lwd.ci = 1, ...){

      n <- length(obj$data)
      x <- sort(obj$data)
      p <- obj$para
      nrt <- 200

      ## Plotting axis
      xat <- c(1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 5000, 10000)

      prt <- (seq(n) - .5) / n
      ip <- seq(min(prt), max(prt), l = nrt)
      llip <- -log(-log(ip))

      pat <- 1-1/xat
      irt <- 1-1/ip

      ## Plot empirical points
      plot(-log(-log(prt)), x, axes = FALSE, main = main,
           ylab = ylab, xlab = xlab, ...)

      axis(2)

      axis(1, at = -log(-log(pat)), lab = xat)

      ## if Confident interval are to be plotted
      if(ci){

        bnd <- predict(obj, q = ip, se = FALSE, ci = 'delta')

        lines(llip, bnd[,1], col = col.ci, lwd = lwd.ci)

        lines(llip,smooth.spline(irt,bnd[,2])$y,
              lty = lty.ci, col = col.ci, lwd = lwd.ci)

        lines(llip,smooth.spline(irt,bnd[,3])$y,
              lty = lty.ci, col = col.ci, lwd = lwd.ci)

      } else{
        bnd <- predict(obj, q = ip, se = FALSE, ci = 'none')
        lines(llip, bnd, col = col.ci, lwd = lwd.ci)
      }
}

#############################
#' L-moment ratio diagram
#'
#' Plot the L-skew and L-kurtosis on the l-moment ratio diagram.
#'
#' @param x Matrix (n x 2) containing the L-skew and L-kurtosis
#'  respectively.
#'
#' @param pos Position of the legend. See \link{legend}.

#' @param ... Other ploting parameters. See \link{par}.
#'
#' @export
#'
#' @examples
#' library(lmomco)
#'
#' ## Replicate 20 times the simulation of a GEV distribution
#' Fsim <- function() rAmax(100,c(100,20,-.1),'gev')
#' x <- replicate(20,Fsim())
#'
#' ## Compute the L-ratios and display the diagram
#' FLcoef <- function(z) lmoms(z)$ratio[3:4]
#' ll <- apply(x,2, FLcoef )
#'
#' Ldiag(t(ll), pch = 16, cex = .7)
#' points(t(rowMeans(ll)), col = 2, pch = 17, cex = 2)
#'
Ldiag <- function(x, pos = 'bottomright',
                  xlim = NULL, ylim = NULL, ...){

  if(is.null(xlim)) xlim <- c(.9 * min(x[,1]), 1.1 * max(x[,1]))
  if(is.null(ylim)) ylim <- c(.9 * min(x[,2]), 1.1 * max(x[,2]))

  lmomco::plotlmrdia(lmomco::lmrdia(),
                     nopoints = TRUE, nolimits = TRUE,noaep = TRUE,
                     nogpa = TRUE, nogov = TRUE, xlim = xlim,
                     ylim = ylim)


  points(x, ...)

  if(!is.null(pos)) legend(pos, col = c(2,3,4,6), lty = c(2,1,2,1),
                           legend = c('GEV','GLO','LN3','PE3'))
}


######################################################################
#' Fit Generalized extreme value (GEV) distribution
#'
#' Fit a GEV distribution on annual maxima using the generalized
#' maximum likelihood method with Beta prior.
#' The output is of the class \code{amax}. See \link{FitAmax}.
#' Asymptotic result are computed like the maximum likelihood approach.
#'
#' @param x Data.
#' @param varcov Logical. Should the covariance matrix be returned.
#' @param mu,sig2 Mean and variance of the Beta prior.
#' @param method.optim Optimisation method used by \code{\link{optim}}
#' @param ... Other parameter pass to \code{\link{optim}}
#'
#' @section References:
#'
#' Martins, E.S., Stedinger, J.R., 2000. Generalized maximum-likelihood
#' generalized extreme-value quantile estimators for hydrologic data.
#' Water Resour. Res. 36, 737–744. https://doi.org/10.1029/1999WR900330
#'
#' @export
#'
#' @examples
#'
#' x <- ExtractAmax(flow~date, flowStJohn)$flow
#'
#' ## Using the default physiographic prior.
#' fit <- FitGev(x)
#' print(fit)
#' coef(fit)
#' vcov(fit)
#' predict(fit, ci = 'delta')
#'
#' ## A uniform prior on interlval -0.5 to 0.5 can be used to approximate
#' ## the maximum likelihood estimate.
#'
#' AIC(FitAmax(x, distr = 'gev', method ='mle'))
#' AIC(FitGev(x, mu = 0, sig2 = 1/12))
#'
#' ## A regional study can be performed by using an empirical prior
#' ## Here 20 sites with the same GEV distribution are simulated
#' ## without priors.
#' ## Then a regional estimate is obtained using an empirical prior
#'
#' xmat <- replicate(20, rAmax(20, c(100,3, -.1), 'gev') )
#' flist <- apply(xmat,2, FitAmax, distr = 'gev', varcov = FALSE)
#' pmat <- sapply(flist, getElement,'para')
#'
#' kap0 <- pmin(.5,pmax(-.5,pmat[3,]))
#'
#' FitGev(xmat[,1], mu = mean(kap0), sig2 = var(kap0))
#' FitAmax(xmat[,1], distr = 'gev', method = 'mle')
#'
FitGev <- function(x, varcov = TRUE, mu = -.1, sig2 = 0.015,
                   method.optim = 'BFGS',...){


   ## Verify that all values are finite values
  if(!all(is.finite(x)))
    stop('There is one (or more) non finite values')

  ## Compute the L-moments and associated parameters as starting value
  lmm <- lmomco::lmoms(x)
  startp <- lmomco::lmom2par(lmm,'gev')$para

  ## apply transformation to parameter to ensure positivness of scale parameter
  ## and modify the sign of the shape parameter to agree with the parametrization
  startp[2] <- log(startp[2])

  ## Find the parameter of the prior Beta distribution based on
  ## mean and variance passed in arguments
  mu <- mu + .5
  a <- mu * ( (mu * (1-mu) / sig2) - 1)
	b <- a * (1 - mu) / mu

	if(a <= 0 | b <=0)
	  stop('The parameters of the Beta distribution are not correct')

	## Function returning Negative log-likelihood of the model
  Nllik <- function(p, w = 1){
    -sum(dgev(x,p[1],exp(p[2]), p[3] ,log=TRUE)) -
      w * dbeta(p[3]+.5, a, b, log = TRUE)
  }

  ## optimize the Nllik to obtain the estimate
  out <- optim(startp, Nllik, hessian = varcov, method = method.optim, ...)

  ## Compute the covariance matrix if necessary by iversion of the hessian
  ## matrix
  if(varcov)
    varcov <- chol2inv(chol(out$hessian))
  else
    varcov <- NA

  ## Create the output object
  pout <- out$par
  pout[2] <- exp(pout[2])
  names(pout) <- c('xi','alpha','kappa')

  ans <- list(para = pout,
              distr = 'gev',
              llik  = -Nllik(out$par, w = 0),
              varcov = varcov,
              method = 'gml',
              lmom = lmm$lambdas,
              data = x,
              prior = c(mu - 0.5, sig2))

  class(ans) <- 'amax'

  return(ans)
}

## Copy from R-package evd with modification of the parametrization
dgev <- function (x, xi = 0, alpha = 1, kappa = 0, log = FALSE){

  ## Verify if the parameter are valid
  if (min(alpha) <= 0)
    stop("invalid alpha")

  if (length(kappa) != 1)
    stop("invalid -kappa")

  ## center and alpha
  x <- (x - xi)/alpha


  if (kappa == 0){

    ## Case of Gumbel distribution
    d <- log(1/alpha) - x - exp(-x)

  } else {

    nn <- length(x)
    xx <- 1 - kappa * x
    xxpos <- xx[xx > 0 | is.na(xx)]
    alpha <- rep(alpha, length.out = nn)[xx > 0 | is.na(xx)]
    d <- numeric(nn)
    d[xx > 0 | is.na(xx)] <- log(1/alpha) - xxpos^(1/kappa) -
      (1-1/kappa) * log(xxpos)
    d[xx <= 0 & !is.na(xx)] <- -Inf
  }

  if (!log)
    d <- exp(d)

  return(d)
}

############################################################################
#' Basic probability functions for annual maxima
#'
#' Density, distribution function, quantile function and random generation
#' for various distribution used for modeling annual maxima.
#'
#' @name Amax
#' @param x,q Vector of quantiles.
#' @param p Vector of probabilities.
#' @param n	Number of observations.
#' @param para Vector of parameters for the distribution.
#' @param distr Characters indicating the distribution family.
#' @param log Logical. If TRUE, probabilities \code{p} are given as
#'   \code{log(p)}.
#'
#' @export
#'
#' @examples
#'
#' u <- runif(5)
#' u
#' x <- qAmax(u, c(100,3,.001), 'gno')
#' pAmax(x, c(100,3,.001), 'gno')
#'
#' x <- rAmax(100, c(100,30,0), 'gev')
#' sum(dAmax(x, c(100,30, 0), 'gev', log = TRUE))
#'
#'

#' @export
#' @rdname dAmax
dAmax <- function(x, para, distr, log = FALSE){
  ans <- lmomco::dlmomco(x, lmomco::vec2par(para,distr))

  if(log)
    ans <- log(ans)

  return(ans)
}

#' @export
#' @rdname dAmax
pAmax <- function(q, para, distr)
  lmomco::plmomco(q, lmomco::vec2par(para,distr))

#' @export
#' @rdname dAmax
qAmax <- function(p, para, distr)
  lmomco::qlmomco(p, lmomco::vec2par(para,distr))

#' @export
#' @rdname dAmax
rAmax <- function(n, para, distr)
  lmomco::rlmomco(n, lmomco::vec2par(para,distr))