R/atSite.R

##################################################
#' At-site analysis
#'
#' Return the fitting of the best distribution according to AIC/BIC
#' among an initial set. This function
#' is a wrapping of the functions found in the package
#' \code{\link{lmomco-package}}.
#' The function atSite2par extract the parameters in the format of
#' function \code{\link{vec2par}}.
#' The function \code{update} refit the distribution using the
#' new parameters passed in arguments.
#'
#' @param x Vector of a sample.
#'
#' @param method String that determines the estimation method.
#'    Either maximum likelihood ('mle') or L-moments ('lmom').
#'
#' @param distr List of strings that represents candidates distributions
#'  to fit. By default: \code{'gev', 'glo', 'pe3', 'gno'}.
#'  See \code{\link{vec2par}} for the list of available distribution.
#'
#' @param k Penalty factor for criterion \code{ k*p - 2*L}, where \code{p}
#'    is the number of parameter and \code{L} is the log-likelihood.
#'    If \code{k = 2} (default) the criterion is the AIC and if \code{k = log(n)}
#'    the criterion is the BIC.
#'
#' @param tol.gev Numerical threshold representing the accepted
#'  difference between criteria to consider similar.
#'  In that case the GEV distribution is preferred over
#'  the best distribution identified.
#'
#'  @param rm.zero Logical. Should the zero value be removed.
#'
#' @section References:
#'
#' Coles, S. (2001). An introduction to statistical
#'   modeling of extreme values. Springer Verlag.
#'
#' Hosking, J. R. M., & Wallis, J. R. (1997). Regional frequency analysis:
#'   an approach based on L-moments. Cambridge Univ Pr.
#'
#' @seealso \code{\link{mle2par}}, \code{\link{lmom2par}}.
#'
#' @export
#'
#' @examples
#'
#' library(lmomco)
#' x <- rlmomco(49, vec2par(c(100,20,.1),'gev'))
#'
#' ax <- atSite(x)
#'
#' print(ax)
#'
#' ax2 <- update(ax, method = 'mle', tol.gev = 2)
#'

atSite <- function(x, distr = c('gev','glo','pe3','ln3'),
                   method = 'lmom', k = 2,
                   warn = FALSE, tol.gev = 0,
                   rm.zero = FALSE){

  isFinite<- is.finite(x)

  ## warning message
  if(warn &  sum(!isFinite) > 0)
    warning('There is one (or more) non finite values')

  x <- x[isFinite]

  ## Remove zero values
  if(rm.zero) x <- x[x>0]

  nl <- length(distr)
  n <- length(x)

  l <- lmomco::lmoms(x)

  ## assure that GEV is a candidate if tol.gev > 0
  if(tol.gev > 0) distr <- c(distr,'gev')

  distr <- unique(distr)

  ## Loop across distributions
  for(ii in 1:nl){
    type <- distr[ii]

    ans <- list(type = type)

    ## Fit the distribution
    f <- NULL
    if(method == 'lmom'){
      suppressWarnings(try(
        f <- lmomco::lmom2par(l,type),silent = TRUE))

    } else if(method == 'mle'){
      suppressWarnings(try(
        f <- lmomco::mle2par(x,type, para.init = l),silent = TRUE))
    }

    ## if mle fails use lmom
    if(is.null(f)) {
      suppressWarnings(try(f <- lmomco::lmom2par(l,type),silent = TRUE))
    }

    ## Save estimate and compute AIC if necessary
    ans$aic <- Inf

    if(is.null(f)){
      ans$estimate <- NA

    }else{
      ans$estimate <- f$para

      if(!is.null(f$AIC) & k == 2) ans$aic <- f$AIC  ## AIC already computed
      else suppressWarnings(try(ans$aic <- par2aic(x, f, type, k = k)))

    }

    ## Keep the gev fit
    if(type == 'gev') bestGEV <- ans

    ## Keep the best fit
    if(ii == 1) best <- ans
    else if(best$aic > ans$aic) best <- ans

  }

  ## If GEV should be prefered, replace the best fit
  if(tol.gev > 0 & !(best$type %in% c('gev','gumb'))){
    if(abs(best$aic - bestGEV$aic) < tol.gev) best <- bestGEV
  }

  ## Return final object
  best$lmom <- l$lambdas
  best$data <- x
  best$k <- k
  best$method <- method
  best$tol.gev <- tol.gev

  class(best) <- c('atSite')

  return(best)
}

#' @export
#' @rdname atSite
atSite2par <- function(obj) lmomco::vec2par(obj$estimate,obj$type)

#' @export
#' @rdname atSite
update.atSite <- function(obj, data = NULL,
                          method = NULL,
                          type = NULL,
                          k = NULL,
                          tol.gev = NULL){

  if(!is.null(method)) obj$method <- method
  if(!is.null(type)) obj$type <- type
  if(!is.null(k)) obj$k <- k
  if(!is.null(data)) obj$data <- data
  if(!is.null(tol.gev)) obj$tol.gev <- tol.gev

  return(atSite(obj$data, method = obj$method,
         distr = obj$type, k = obj$k, tol.gev = obj$tol.gev))
}

#' @export
print.atSite <- function(obj){
  cat('\nAt-site analysis\n')

  cat('\nDistribution:', obj$type, '\nObs:', length(obj$data),
      '\nAIC:', format(obj$aic, digits = 6),
      '\nMethod:', obj$method)

  cat('\nParameters:\n')
  print(obj$estimate)

  cat('\nLmoments:\n')
  print(data.frame(l1 = obj$lmom[1],
                  lcv = obj$lmom[2]/obj$lmom[1],
                  lsk = obj$lmom[3]/obj$lmom[2],
                  lkt = obj$lmom[4]/obj$lmom[2]), digits = 4)
}

###################################################
#' Mann-Kendall test for trend
#'
#' Return the results of the Mann-Kendall test for trends. In case
#' of autocorrelation, the p-value of the test is obtained by
#' block bootstrap. This function is a wrapper for the function
#' \code{\link{MannKendall}}.
#'
#' The automatic selection of the block bootstrap is chosen according to the
#' best order of an autoregressive model using the AIC criteria and
#' Yule-Walker estimates.
#' See \code{\link{ar}} for more details.
#' Previously, the ranks of the original data are transformed into
#' normal variable using either the estimated distribution or the ranks.
#'
#' The function \code{mannKendallPeaks} is performing the Mann-Kendall on
#' the magnitude of the peaks for a POT analysis.
#'
#' @param obj Output of function \code{\link{atSite}} or \code{\link{fitPot}}.
#'
#' @param nboot Number of bootstrap sample.
#'
#' @param lag.k,lag.max Lag size for the bootstrap blocks.
#'   If \code{lag.k} is \code{NULL} it is automatically selected and
#'   force to be inferior to \code{lag.max}.
#'
#' @param use.rank Does ranks should be used to transform data into
#'    uniform [0,1] variable or the model parameter should be used. Necessary
#'    for the selection of \code{lag.k}.
#'
#' @param ... Other parameter pass to \code{\link{tsboot}}.
#'
#' @seealso \code{\link{atSite}}, \code{\link{MannKendall}}, \code{\link{ar}}.
#'
#' @export
#'
#' @examples
#'
#' library(lmomco)
#'
#' # TS with trend
#' xset <- rlmomco(100, vec2par(c(100,20,0), 'gev')) + (1:100)/5
#'
#' ax <- atSite(xset)
#' mannKendall(ax)
#'
#' mannKendall(ax, lag.k = 0)
#' mannKendall(ax, lag.k = 3)
#'
#'
mannKendall <- function(obj, ...) UseMethod('mannKendall',obj)

#' @export
mannKendall.default <-function(obj,...) Kendall::MannKendall(obj,...)

#' @export
#' @rdname mannKendall
mannKendall.atSite <- function(obj, lag.k = NULL,
                               lag.max = 3, nboot = 1000,
                               use.rank = TRUE, ...){

  ans <- list()

  ## Test under serial independence
  ans$stat <- Kendall::MannKendall(obj$data)

  ## Automatic selection of the bootstrap block size
  if(is.null(lag.k) & nboot > 0){
    ## Transform into normal variable
    if(use.rank) z <- qnorm(rankit(obj))
    else z <- qnorm(as.uniform(obj))

    ## Select AIC best order
    lag.k <- min(lag.max, ar(as.ts(z))$order + 1)
  }

  if(lag.k > 0){
    ## Perform block bootstrap if necessary
    ans$boot <- boot::tsboot(obj$data, R=nboot, l=lag.k, sim="fixed",
                             statistic = function(z)
                               Kendall::MannKendall(z)$tau, ...)$t

    ans$pvalue <- mean(abs(ans$boot)>abs(ans$stat$tau))

  }else{
    ans$pvalue <- ans$stat$sl
  }

  ans$lag.k <- lag.k
  ans$nboot <- nboot

  class(ans) <- c('mannKendall.atSite')
  return(ans)
}

#' @export
print.mannKendall.atSite <- function(obj){

  if(obj$lag.k>0){
    cat('\n\nMann-Kendall Test for trend using block bootstrap\n')
    cat('\nN:', round(obj$nboot),'lag:', obj$lag.k,'\n\n')
  }else{
    cat('\n\nMann-Kendall Test for trend\n')
  }

  ans <- data.frame(tau = obj$stat$tau, pvalue = obj$pvalue)
  print(ans, digits = 4, row.names = FALSE)

}


#############################################
#' Various plot of the at-site analysis of a time series
#'
#' Produce various plot for the visualisation of the fitting of
#' a \code{\link{atSite}} object.
#' Including Return level plot (\code{plot}), QQ-plot (\code{qqplot}),
#' histogram (\code{hist}) and density plot (\code{density})
#'
#' @param obj Output from \code{\link{atSite}}.
#'
#' @param ci Logical. Does confident interval should be display.
#'
#' @param col.ci,lty.ci,lwd.ci Graphical parameters definining the display
#'    of the confident interval.
#'
#' @param col.dens,lty.dens,lwd.dens Graphical parameters definining the display
#'    of the nonparametric density lines.
#'
#' @param nsim Number of simulation for the computation of
#'   the confident interval.
#'
#' @param upper.tail Does the return periods are associated
#'   to the upper.tail (TRUE) or the lower tail (FALSE) of the
#'   distribution.
#'
#'
#' @param ... Other graphical parameters. See \code{\link{par}}.
#'
#' @export
#'
#' @examples
#'
#' library(lmomco)
#'
#' ax <- atSite(rlmomco(50,vec2par(c(100,20,0),'gev')))
#'
#' plot(ax, ci = TRUE)
#' qqplot(ax)
#' hist(ax)
#' density(ax)

plot.atSite <- function(obj, main = 'Return level plot',
                        xlab = 'Return period',
                        ylab = 'Return levels',
                        ci = FALSE, col.ci = 'red', lty.ci = 2,
                        lwd.ci = 1, nsim = 100,
                        upper.tail = TRUE, ...){

      n <- length(obj$data)
      x <- sort(obj$data)
      p <- atSite2par(obj)
      nrt <- 200

      ## Plotting axis
      xat = c(1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 5000, 10000)

      prt <- (1:n)/(n+1)
      ip <- seq(min(prt), max(prt), l = nrt)
      llip <- -log(-log(ip))

      if(upper.tail){
        pat <- 1-1/xat
        irt <- 1/(1-ip)

      }else{
        pat <- 1/xat
        irt <- 1/ip
      }

      ## Plot empirical points
      plot(-log(-log(prt)), x, axes = FALSE, main = main,
           ylab = ylab, xlab = xlab, ...)

      axis(2)

      axis(1, at = -log(-log(pat)), lab = xat)

      ## if Confident interval are to be plotted
      if(ci){

        bnd <- predict(obj, rt = irt,
                       ci = ci, nsim = nsim,
                       upper.tail = upper.tail)

        lines(llip, bnd[,1], col = col.ci, lwd = lwd.ci)
        lines(llip,smooth.spline(irt,bnd[,2])$y,
              lty = lty.ci, col = col.ci, lwd = lwd.ci)

        lines(llip,smooth.spline(irt,bnd[,3])$y,
              lty = lty.ci, col = col.ci, lwd = lwd.ci)

      } else{
        bnd <- predict(obj, rt = irt,
                       upper.tail = upper.tail)
        lines(llip, bnd, col = col.ci, lwd = lwd.ci)
      }
}

#' @export
#' @rdname plot.atSite
qqplot.atSite <- function(obj, main = 'QQ-plot',
                          ylab = 'Empirical quantiles',
                          xlab = 'Theoretical quantiles',
                          ci = FALSE, nsim = 100,
                          col.ci = 'red', lty.ci = 2, lwd.ci = 1, ...){
  n <- length(obj$data)
  nrt <- 200
  prt <- (1:n)/(n+1)
  qt <- lmomco::qlmomco(prt, atSite2par(obj))

  plot(qt, sort(obj$data), xlab = xlab, ylab = ylab, main = main, ...)
  abline(0,1, col = col.ci, lwd = lwd.ci)

  ## if Confident interval are to be plotted
  if(ci){

    irt <- (1:nrt)/(nrt+1)
    qt <- lmomco::qlmomco(irt, atSite2par(obj))
    bnd <- predict(obj, rt = 1/(1-irt),
                   ci = ci, nsim = nsim)

    lines(qt,smooth.spline(qt,bnd[,2])$y, lty = 3,
          col = col.ci, lwd = lwd.ci)
    lines(qt,smooth.spline(qt,bnd[,3])$y, lty = 3,
          col = col.ci, lwd = lwd.ci)

  }
}

#' @export
qqplot <- function(x,...) UseMethod('qqplot',x)

#' @export
qqplot.numeric <- function(x, y, ...) stats::qqplot(x, y, ...)

#' @export
ppplot <- function(obj,...) UseMethod('ppplot',obj)

#' @export
ppplot.atSite <- function(obj, main = 'PP-plot',
                               ylab = 'Empirical probabilities',
                               xlab = 'Theoretical probabilities', ...){
  n <- length(obj$data)
  prt <- (1:n)/(n+1)
  u <- sort(lmomco::plmomco(obj$data, atSite2par(obj)))

  plot(u, prt, xlab = xlab, ylab = ylab, main = main, ...)
  abline(0,1)
}

#' @export
#' @rdname plot.atSite
hist.atSite <- function(obj, main = 'Histogram',
                        xlab = 'Sample', ...){

  hist(obj$data, main = main, xlab = xlab,...)
}

#' @export
#' @rdname plot.atSite
density.atSite <- function(obj, main = 'Density plot',
                           xlab = 'Sample', ylab = 'Density',
                           ylim = NULL, col.dens = 'red',
                           lty.dens = 1, lwd.dens = 1, ...){

  npts <- 200
  x <- seq(min(obj$data), max(obj$data), len = npts)

  dpts <- lmomco::dlmomco(x,atSite2par(obj))
  dfun <- stats::density(obj$data)

  if(is.null(ylim)) ylim <- c(0, 1.15 * max(c(dfun$y,dpts)))

  plot(x,dpts, type = 'l', ylab = ylab, xlab = xlab,
       ylim = ylim, main = main, ...)

  lines(dfun, col = col.dens, lty = lty.dens, lwd = lwd.dens)
}


#############################
#' L-moment ratio diagram
#'
#' Plot the L-skew and L-kurtosis on the l-moment ratio diagram.
#'
#' @param x Matrix (n x 2) containing the L-skew and L-kurtosis
#'  respectively.
#'
#' @param pos Position of the legend. See \link{legend}.

#' @param ... Other ploting parameters. See \link{par}.
#'
#' @seealso \code{\link{atSite}}.
#'
#' @export
#'
#' @examples
#' library(lmomco)
#'
#' xx=replicate(20,rlmomco(100,vec2par(c(100,20,-.1),'gev')))
#'
#' ll=t(apply(xx,2, function(z) lmoms(z)$ratio[3:4]))
#'
#' ldiag(ll)
#'
ldiag <- function(x, pos = 'bottomright',
                  xlim = NULL, ylim = NULL, ...){

  if(is.null(xlim)) xlim <- c(.9 * min(x[,1]), 1.1 * max(x[,1]))
  if(is.null(ylim)) ylim <- c(.9 * min(x[,2]), 1.1 * max(x[,2]))

  lmomco::plotlmrdia(lmomco::lmrdia(), nopoints = TRUE, nolimits = TRUE,noaep = TRUE,
              nogpa = TRUE, nogov = TRUE, xlim = xlim,
              ylim = ylim)


  points(x, ...)

  if(!is.null(pos)) legend(pos, col = c(2,3,4,6), lty = c(2,1,2,1),
                           legend = c('GEV','GLO','LN3','PE3'))
}

################################################
#' Aikaike Information Criterion
#'
#' Returns the AIC in respect of a distribution and its parameter. More
#' precisely, the criteria is \code{k*p - 2 * L} where \code{p} is
#' the number of parameter, \code{L} is the log-likelihood and \code{k}
#' a penalty factor.
#'
#' @param x Vector of sample data.
#' @param p Vector of parameter.
#' @param type String that determines the distribution.
#'   See \code{\link{lmomco}}.
#' @param k Penalty factor. For AIC, k = 2 and for BIC, k = log(n)
#'
#' @export
par2aic <- function(x, p, type, k = 2){
  d <- length(p)
  llik <- lmomco::dlmomco(x, p)
  return(k*d - 2*sum(log(llik)))
}

#################################################
#' Predict at-site return levels
#'
#' Return the return levels, or quantiles associated
#' with output of function
#' \code{atSite} for given return periods.
#'
#' @param obj Output from \link{atSite}
#'
#' @param rt A vector of return periods.
#'
#' @param upper.tails Does the return periods are associated
#'   to the upper tails or the lower tail (FALSE) of
#'   the distribution.
#'
#' @param ci Does the confident interval should be display.
#'    The interval is estimated by MC simulations. Can take time.
#'
#' @param alpha Confident interval are symetric and computed
#'    with confident level 1-alpha.
#'
#' @param nsim Number of simulation used to estimate the
#'    confident intervals.
#'
#' @export
#'
predict.atSite <- function(obj, rt = c(2,5,10,20,50,100),
                           upper.tail = TRUE,
                           ci = FALSE,
                           alpha = .05,
                           nsim = 1000){

  ## Does the return period is for the upper or the lower
  ## tail
  if(upper.tail) q <- 1-1/rt
  else q <- 1/rt

  p0 <- atSite2par(obj)
  n <- length(obj$data)

  ## Compute predited value
  ans <- lmomco::qlmomco(q, p0)
  names(ans) <- paste('rt',rt,sep='')

  ## Compute confident intervals by MC simulations
  if(ci){

    mat <- matrix(NA,nsim,length(q))
    for(ii in 1:nsim){

      p <- NULL

      ## Sample only feasible set
      while(is.null(p)){
        ## simulate
        b <- lmomco::qlmomco(runif(n),p0)
        ## estimate
        try(p <- atSite2par(update(obj, data = b)))
      }
      ## predict
      mat[ii,] <- lmomco::qlmomco(q,p)
    }

    bnd <- t(apply(mat, 2, quantile, c(alpha/2,1 - alpha/2)))
    ans <- cbind(ans,bnd)
  }

  return(ans)

}

##################################
#' Uniform sample
#'
#' Transforms the data to the uniform space [0,1].
#' To this end, the function
#' \code{rankit} uses ranks, while the function \code{as.uniform} use
#' the estimated distribution.
#'
#' @param obj Output from \link{atSite}
#'
#' @seealso \code{\link{atSite}}.

#' @export
as.uniform <- function(obj) UseMethod('as.uniform', obj)

#' @export
as.uniform.default <- function(obj) rankit(obj)

#' @export
rankit.atSite <- function(obj) rankit(obj$data)

#' @export
as.uniform.atSite <- function(obj){
  para <- atSite2par(obj)
  return(lmomco::plmomco(obj$data,para))
}


##################################################################################
#' Anderson-Daring goodness-of-fit test
#'
#' Perform the Anderson-darling goodness-of-fit test as well of the
#' modified versions of the test, which puts more weight to the lower or
#' upper tails of the distribution. The null hypothesis is
#' that the data are generated by a given distribution (composite).
#' P-values are computed by parametric bootstrap.
#'
#' @param x,obj Vector of data or output of function \link{atSite}.
#'
#' @param type Distribution of the null hypothesis. See \link{vec2par}.
#'
#' @param method Estimation method.
#'
#' @param nboot Number of bootstrap sample for calculating the p-value.
#'
#' @param para An object containing an estimation of the distribution.
#'   See \link{vec2par} (optional).
#'
#' @param bsample Logical. Should the bootstrap sample be returned.
#'
#' @param cores Number of cores to use for parallel computing.
#'
#' @section References:
#'
#' Heo, J.-H., Shin, H., Nam, W., Om, J., & Jeong, C. (2013).
#' Approximation of modified Anderson–Darling test statistics for
#' extreme value distributions with unknown shape parameter.
#' Journal of Hydrology, 499, 41–49.
#' https://doi.org/10.1016/j.jhydrol.2013.06.008
#'
#' @export
#'
#' @examples
#'
#' library(lmomco)
#'
#' x0 <- rlmomco(50,vec2par(c(100,5),'nor'))
#'
#' adTest(x0, 'gum', nboot = 100)
#'
#' print(ax <- atSite(x0))
#' adTest(ax)

adTest <- function(obj,...) UseMethod('adTest', obj)

#' @export
#' @rdname adTest
adTest.default <- function(x, type, method = 'lmom', nboot = 1000,
                           para = NULL, bsample = FALSE,  ... ){

  x <- as.numeric(x)
  n <- length(x)

  ## Compute AD stats
  ans <- list()
  if(is.null(para)){

    if(method == 'lmom') para <- lmomco::lmom2par(lmomco::lmoms(x), type, ...)
    else if(method == 'pot1d') para <- lmomco::vec2par(c(0,fpot1d(x)),'gpa')
    else if(method == 'mle') para <- lmomco::mle2par(x,type, ...)

    ans$stat <- adStat(x, para = para, ...)

  } else {
    ans$stat <- adStat(x, para = para, ...)
    type <- para$type
  }


  ## Simulate boostrap sample
  xboot <- replicate(nboot, lmomco::rlmomco(n,para))

  ## Compute ad stats
  boot <- apply(xboot, 2, adStat, type = type, method = method, ...)

  ## Compute p-value
  ans$pvalue <- c(mean(boot[1,] > ans$stat[1]),
                  mean(boot[2,] > ans$stat[2]),
                  mean(boot[3,] > ans$stat[3]))

  ## Return final object
  ans$method <- method
  ans$estimate <- para$para
  ans$type <- type

  if(bsample) ans$boot <- boot

  class(ans) <- 'adTest'

  return(ans)
}

#' @export
#' @rdname adTest
#'
adTest.atSite <- function(obj, nboot = 1000, bsample = FALSE, cores = NULL){

    para <- atSite2par(obj)
    ans <- adTest(obj$data, para = para, nboot = nboot,
                  bsample = bsample, cores = cores,
                  method = obj$method)

    return(ans)
}



#' @export
adStat <- function(x, type = 'nor', method = 'lmom', para = NULL, ...){

  ## Estimate the parameters if necessary
  if(is.null(para)){
    if(method == 'lmom') para <- lmomco::lmom2par(lmomco::lmoms(x), type, ...)
    else if(method == 'pot1d') para <- lmomco::vec2par(c(0,fpot1d(x)),'gpa')
    else if(method == 'mle') para <- lmomco::mle2par(x,type, ...)
  }

  n <- length(x)
  u <- sort(lmomco::plmomco(x,para))
  w <- (2*(1:n)-1)/n
  su <- 2*sum(u)
  al <- -3*n/2 + su - sum(w * log(u))
  au <- n/2 - su - sum((2-w)*log(1-u))
  ad <- al + au

  return(c(ad,al,au))
}

#' @export
print.adTest <- function(obj){
  cat('\nAnderson-Darling Tests for goodness-of-fit',
      '\nType =', obj$type,
      '\nMethod =', obj$method,'\n\n')

  tab <- data.frame(Stat = c('AD','AL','AU'),
                    Value = obj$stat,
                    pvalue = obj$pvalue)

  print(tab, digits = 4, row.names = FALSE)
}


########################################################################
#' Modified Shapiro-Wilk test
#'
#' Return the statistics and p-value of the Shapiro-Wilk test on normalized
#' observation.
#'
#' @param x,obj Sample or output from fitted distribution.
#'
#' @seealso \link{shapiro.test}
#' @export
shapiroTest <- function(obj) UseMethod('shapiroTest',obj)

#' @export
shapiroTest.default <- function(obj) shapiro.test(obj)

#' @export
#' @rdname shapiroTest
shapiroTest.atSite <- function(obj, ...){
  tmp <- shapiroTest(qnorm(as.uniform(obj)))
  ans <- list(statistic = tmp$statistic, pvalue = tmp$p.value,
              type = obj$type, method = obj$method)

  class(ans) <- 'shapiroTest'

  return(ans)
}

#' @export
print.shapiroTest <- function(obj){
  cat('\n\nModified Shapiro-Wilk - goodness of fit test\n')
  cat('\nDistr: ', obj$type)
  cat('\nMethod: ', obj$method)
  cat('\nW: ', obj$statistic)
  cat('\np-value: ', obj$pvalue,'\n')
}





######################################################################
#' Fit Generalized extreme value (GEV) distribution
#'
#' Fit a GEV distribution to data using either the maximum likelihood or
#' the generalized maximum likelihood method of Martins and Stedingers (2000)
#'
#' @param x Vector of sample.
#' @param varcov Logical. Should the covariance matrix be computed.
#' @param method Method of estimation. Either 'ml' or 'gml'.
#' @param method.optim Optimisation method used by \code{optim}
#' @param ... Parameter pass to \code{\link{optim}}
#'
#' @section References:
#'
#' Martins, E.S., Stedinger, J.R., 2000. Generalized maximum-likelihood
#' generalized extreme-value quantile estimators for hydrologic data.
#' Water Resour. Res. 36, 737–744. https://doi.org/10.1029/1999WR900330
#'
#' @export
#'
#' @examples
#'
#' library(lmomco)
#' xset <- rlmomco(50, vec2par(c(100,30,.1), 'gev'))
#'
#' fit <- FitGev(xset)
#' print(fit)
#' predict(fit)
#' plot(fit, xset)
#'
FitGev <- function(x, varcov = TRUE, method = 'gml',
                   method.optim = 'BFGS',...){

  startp <- c(mean(x),log(sd(x)),0)

  nllik <- function(p, w){
    -sum(dgev(x,p[1],exp(p[2]),p[3] ,log=TRUE)) -
      w * dbeta(p[3]+.5,9,6, log = T)
  }

  if(method == 'ml'){
    w0 <- 0
  } else if(method == 'gml'){
    w0 <- 1
  }

  ## optimize the likelihood
  out <- optim(startp, nllik, w = w0, hessian = varcov,
               method = method.optim, ...)

  ans <- list(estimate = out$par,
              llik  = -nllik(out$par, w = 0),
              method = method)

  class(ans) <- 'FitGev'

  ans$estimate[2] <- exp(out$par[2])
  names(ans$estimate) <- c('location','scale','shape')

  if(varcov) ans$varcov <- chol2inv(chol(out$hessian))

  return(ans)
}

## Copy from R-package evd
dgev <- function (x, loc = 0, scale = 1, shape = 0, log = FALSE)
{
  if (min(scale) <= 0)
    stop("invalid scale")
  if (length(shape) != 1)
    stop("invalid shape")
  x <- (x - loc)/scale
  if (shape == 0)
    d <- log(1/scale) - x - exp(-x)
  else {
    nn <- length(x)
    xx <- 1 + shape * x
    xxpos <- xx[xx > 0 | is.na(xx)]
    scale <- rep(scale, length.out = nn)[xx > 0 | is.na(xx)]
    d <- numeric(nn)
    d[xx > 0 | is.na(xx)] <- log(1/scale) - xxpos^(-1/shape) -
      (1/shape + 1) * log(xxpos)
    d[xx <= 0 & !is.na(xx)] <- -Inf
  }
  if (!log)
    d <- exp(d)
  d
}

#' @export
aic <- function(x,...) UseMethod('aic',x)
#' @export
aic.default <- function(x,...) AIC(x,...)
#' @export
aic.FitGev <- function(obj, k = 2) 3*k - 2*obj$llik


#' @export
print.FitGev <- function(obj){
  cat('\nAt-site analysis\n')

  cat('\nDistribution: GEV',
      '\nAIC:', round(aic(obj), 4),
      '\nMethod:', obj$method)

  cat('\nParameters:\n')
  print(round(obj$estimate,4))

  if(!is.null(obj$varcov)){
    cat('\nStandard error:\n')
    print(round(sqrt(diag(obj$varcov)),4 ))
  }
}


############################################################
#' Predict return levels
#'
#' Return the return levels of GEV distribution. Standard error is obtained
#' by the
#'
#' @param obj Model output from \code{\link{FitGev}}.
#' @param p Vector of return periods.
#' @param ci Logical. Should confidence interval be returned.
#' @param ci.nsim Number of simulation used to approximate
#' the confidence interval
#' @param alpha Outside probability of the confidence intervals.
#'
#' @seealso \link{FitGev}
#'
#' @export
predict.FitGev <- function(obj, p = c(2,5,10,20,50,100),
                           ci = FALSE, ci.nsim = 1000, alpha = .05){

  ## predict
  ishp <- 1/obj$estimate[3]
  yp <- -log(1-1/p)
  ypi <- yp^(-obj$estimate[3])
  nypi <- (1-ypi)*ishp

  PredFun <- function(p) p[1] - p[2]/p[3] * (1-yp^(-p[3]))

  ans <- list(pred = PredFun(obj$estimate))

  ## delta method
  if(!is.null(obj$varcov)){
    d <- cbind(1, -nypi,
               obj$estimate[2] * ishp * (nypi - ypi*log(yp)))

    ans$se <- sqrt(apply(d, 1, function(d) crossprod(d, obj$varcov %*% d)))
  }

  if(ci){
    ## MCMC approximation for confident intervals
    ps <- mnormt::rmnorm(ci.nsim, obj$estimate, varcov = obj$varcov)
    pci <- apply(ps, 1, PredFun)
    ans$se0 <- apply(pci, 1, sd)
    ans$lb <- apply(pci, 1, quantile, alpha/2)
    ans$ub <- apply(pci, 1, quantile, 1-alpha/2)
  }

  out <- matrix(unlist(ans), nrow = length(p))
  colnames(out) <- names(ans)
  rownames(out) <- as.character(p)

  return(out)
}

#' @export
plot.FitGev <- function(obj, x, ci = TRUE,
                       ylab = 'Return levels',
                       xlab = 'Return period [-log(1-p)]',...){

  p <- seq_along(x) / (length(x)+1)
  p0 <- seq(min(p),max(p), l = 100)
  z <- predict(obj,1/(1-p0), ci = ci)
  yrange <- c(0.95 * min(z[,'lb']), 1.05 * max(z[,'ub']))

  yp0 <- -log(1-p0)
  plot(yp0, z[,'pred'], type = 'l', ylim = yrange,
       ylab = ylab, xlab = xlab, ...)
  points(-log(1-p), sort(x), pch = '+')

  if(ci){
    lines(yp0, z[,'lb'], type = 'l', lty = 2)
    lines(yp0, z[,'ub'], type = 'l', lty = 2)
  }


}
martindurocher/floodRFA documentation built on June 5, 2019, 8:44 p.m.