floodStat: FloodNet Regional Frequency Analysis

#############################################################
#' Goodness-of-fit test
#'
#' Return the statistic and p-value of a goodness of fit test on
#' AMAX and POT mdoels. The null hypotesis (composite)
#' is that the data were generated by the fitted distribution.
#'
#' @param obj Output from \code{\link{FitAmax}} or \code{\link{FitPot}}.
#'
#' @param method Test to be performed. Either Anderson-Darling \code{ad},
#'   or modified Shapiro-Wilk ('shapiro').
#'   For a POT model, the method \code{adtab}
#'   perform the Anderson-Darling test and interpolates the
#'   p-value from a table.
#'
#' @export
#'
#' @section References:
#'
#'  Choulakian, V., Stephens, M.A., 2001. Goodness-of-Fit Tests for the
#'  Generalized Pareto Distribution. Technometrics 43, 478–484.
#'  https://doi.org/10.2307/1270819
#'
#'  Heo, J.-H., Shin, H., Nam, W., Om, J., Jeong, C., 2013. Approximation of
#'  modified Anderson–Darling test statistics for extreme value distributions
#'  with unknown shape parameter. Journal of Hydrology 499, 41–49.
#'  https://doi.org/10.1016/j.jhydrol.2013.06.008
#'
#'  Ba, I., Ashkar, F., 2017. Discrimination between a group of
#'  three-parameter distributions for hydro-meteorological frequency modeling.
#'  Can. J. Civ. Eng. 45, 351–365. https://doi.org/10.1139/cjce-2017-0416
#'
#' @examples
#'
#' ## The same simulated distribution should be accepted p-value >.05
#' x <- rAmax(100, c(100,30,-.1), 'gev')
#'
#' fit <- FitAmax(x, 'gev', varcov = FALSE, nsim = 100)
#' GofTest(fit)
#'
#' ## The same apply to POT model
#' fit <- FitPot(flow~date, flowStJohn, u = 1000,
#'                declust = 'flood', r = 14)
#'
#' ## By default using a table of value with maximum likelihood
#'GofTest(fit)
#'
GofTest <- function(x, ...) UseMethod('GofTest', x)

#' @export
#' @rdname GofTest
GofTest.amax <- function(obj, method = 'ad', nsim = 1000, ...){

  ## Verification that GML was not used when fitting
  if(obj$method == 'gml')
    stop('The test is not available for Generalized maximum likelihood')

  ## Simulate boostrap sample
  n <- length(obj$data)
  opara <- lmomco::vec2par(obj$para, obj$distr)
  xboot <- replicate(nsim, lmomco::rlmomco(n,opara))

  ## Compute the test statistics for the data and the bootstrap samples
  if(method == 'ad'){
    stat <- AdStat(obj$data, para = opara)
    boot <- apply(xboot, 2, AdStat, type = obj$distr,
                                    method = obj$method, ...)
    pv <- mean(boot > stat, na.rm = TRUE)

  } else if(method == 'shapiro') {
    stat <- ShapiroStat(obj$data, para = opara)
    boot <- apply(xboot, 2, ShapiroStat, type = obj$distr,
                  method = obj$method, ...)
    pv <- mean(boot < stat, na.rm = TRUE)
  }

  ans <- list(stat = stat,
              pvalue = pv,
              distr = obj$distr,
              method = method)

  class(ans) <- 'amaxGof'

  ## return
  ans
}

#' @export
print.amaxGof <- function(obj){

  if(obj$method %in% c('ad','adtab'))
    methodName <- 'Anderson-Darling'
  else if(obj$method == 'shapiro')
    methodName <- 'Modified Shapiro-Wilk'

  cat('\nGoodness-of-fit test\n',
        '\nTest =', methodName,
        '\nDistribution =', obj$distr)

  cat('\nstatistic :', round(obj$stat,4),
        '\np-value :', round(obj$pvalue,4), '\n\n')
}

## function to compute the statistics of the Anderson-Darling test
AdStat <- function(x, type = NULL, method = NULL, para = NULL, ...){

  ## Estimate the parameters if necessary
  if(is.null(para)){
    if(method == 'lmom'){
      para <- lmomco::lmom2par(lmomco::lmoms(x), type, ...)
    } else if(method == 'mle'){
      para <- suppressWarnings(lmomco::mle2par(x,type, ...))
     } else if(method == 'fgpa1d'){
      para <- fgpa1d(x)
      para <- lmomco::vec2par(c(0,para), 'gpa')
    } else if(method == 'fgpa2d'){
      para <- fgpa2d(x)
      para <- lmomco::vec2par(c(0,para), 'gpa')
    } else if(method == 'fgpaLmom'){
      para <- fgpaLmom(x)
      para <- lmomco::vec2par(c(0,para), 'gpa')
    }
  }

  n <- length(x)
  u <- sort(lmomco::plmomco(x,para))
  w <- (2*(1:n)-1)/n
  ans <- n + sum(w * log(u)) + sum((2-w)*log(1-u))

  return(-ans)
}

## Function to compute the statistics of the modified Shapiro-Wilk test
ShapiroStat <- function(x, type = NULL, method = NULL, para = NULL, ...){
  ## Estimate the parameters if necessary
  if(is.null(para)){
    if(method == 'lmom')
      para <- lmomco::lmom2par(lmomco::lmoms(x), type, ...)
    else if(method == 'mle')
      para <- lmomco::mle2par(x,type, ...)
    else if(method == 'fgpa1d'){
      para <- fgpa1d(x)
      para <- lmomco::vec2par(c(0,para), 'gpa')
    } else if(method == 'fgpa2d'){
      para <- fgpa2d(x)
      para <- lmomco::vec2par(c(0,para), 'gpa')
    } else if(method == 'fgpaLmom'){
      para <- fgpaLmom(x)
      para <- lmomco::vec2par(c(0,para), 'gpa')
    }
  }

  z <- qnorm(lmomco::plmomco(x,para))
  shapiro.test(z)$statistic
}


###########################################################################
#' @export
#' @rdname GofTest
GofTest.fpot <- function(obj, method = 'adtab', nsim = 1000){

  opara <- lmomco::vec2par(c(0,obj$estimate), 'gpa')
  n <- length(obj$excess)

  ## Case Anderson-Darling using a table
  if(method == 'adtab'){
    stat <- AdStat(obj$excess, para = opara)
    pv <- AdGpaTable(obj$estimate[2], A2 = stat)

  ## Case anderson darling using boostrap
  } else if(method == 'ad'){

    if(obj$method == 'lmom')
      fitMethod <- 'fgpaLmom'
    else if(obj$method == 'mle')
      fitMethod <- 'fgpa1d'
    else if(obj$method == 'mle2')
      fitMethod <- 'fgpa2d'

    stat <- AdStat(obj$excess, para = opara)

    xboot <- replicate(nsim, lmomco::rlmomco(n,opara))
    boot <- apply(xboot, 2, AdStat, type = 'gpa', method = fitMethod)
    pv <- mean(boot > stat, na.rm = TRUE)

  ## modified shapiro-wilk using boostrap
  } else if(method == 'shapiro'){

    if(obj$method == 'lmom')
      fitMethod <- 'fgpaLmom'
    else if(obj$method == 'mle')
      fitMethod <- 'fgpa1d'
    else if(obj$method == 'mle2')
      fitMethod <- 'fgpa2d'

    stat <- ShapiroStat(obj$excess, para = opara)

    xboot <- replicate(nsim, lmomco::rlmomco(n,opara))
    boot <- apply(xboot, 2, ShapiroStat, type = 'gpa', method = fitMethod)
    pv <- mean(boot < stat, na.rm = TRUE)
  }

  ans <- list(stat = stat,
              pvalue = pv,
              distr = 'gpa',
              method = method)

  class(ans) <- 'amaxGof'

  return(ans)
}




#########################################################################
#' Anderson-Darling (AD) test for the Generalized Pareto distribution (GPA)
#'
#' Return the critical value or the p-value of the AD test for the GPA
#' distribution. The results are obtained by linear interpolation of the
#' table provided by Choulakian and Stephens (2001) when scale and shape
#' parameter are estimated by maximum likelihood.
#'
#' @param kap Shape parameter of the GPA.
#'
#' @param pval P-value of the test.
#'
#' @param A2 Statistics of the AD test.
#'   If \code{NULL} the function return
#'   the critical value associated with \code{'pval'}.
#'   Otherwise the function return the p-value associated with the provided
#'   statistics.
#'
#' @param ... Additional parameter for the function \code{\link{approx}}.
#'
#' @details
#'
#' Choulakian V, Stephens MA. Goodness-of-Fit Tests for the Generalized
#'   Pareto Distribution. Technometrics. 2001;43(4):478–84.
#'
#' @seealso \link{interp2d}
#'
#'
#' @examples
#'
#' AdGpaTable(kap = -.11, A2 = .93)
#' AdGpaTable(kap = -.11, pval = 0.05)

AdGpaTable <- function(kap, pval = 0.05, A2 = NULL, ...){

  ## Table of for the GPA
  ad <- c(0.339,	0.471,	0.641,	0.771,	0.905,	1.086,	1.226,	1.559,
          0.356,	0.499,	0.685,	0.830,	0.978,	1.180,	1.336,	1.707,
          0.376,	0.534,	0.741,	0.903,	1.069,	1.296,	1.471,	1.893,
          0.386,	0.550,	0.766,	0.935,	1.110,	1.348,	1.532,	1.966,
          0.397,	0.569,	0.796,	0.974,	1.158,	1.409,	1.603,	2.064,
          0.410,	0.591,	0.831,	1.020,	1.215,	1.481,	1.687,	2.176,
          0.426,	0.617,	0.873,	1.074,	1.283,	1.567,	1.788,	2.314,
          0.445,	0.649,	0.924,	1.140,	1.365,	1.672,	1.909,	2.475,
          0.468,	0.688,	0.985,	1.221,	1.465,	1.799,	2.058,	2.674,
          0.496,	0.735,	1.061,	1.321,	1.590,	1.958,	2.243,	2.922)

  ad <- t(matrix(ad, 8 ,10))

  kap0 <- c(-0.9, -0.5, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5)
  pval0 <- c(0.5,	0.25,	0.1,	0.05,	0.025,	0.01,	0.005,	0.001)


  ## Verify table range
  if(pval < 0.001){
    warning('P-value outside range. Value 0.001 will be used')
    pval <- 0.001
  }

  if(pval > 0.5){
    warning('P-value outside range. Value 0.5 will be used')
    pval <- 0.5
  }

  if(kap < (-0.9)){
    warning('Kappa outside range. Value -0.9 will be used')
    kap <- (-0.9)
  }

  if(kap > 0.5){
    warning('Kappa outside range. Value 0.5 will be used')
    kap <- 0.5
  }

  if(is.null(A2))
    ans <- interp2d(kap0, pval0, ad, kap, pval, reverse = FALSE,...)
  else
    ans <- interp2d(kap0, pval0, ad, kap, A2, reverse = TRUE, ...)

  return(ans)
}

########################################################################
#' Interpolation of a table
#'
#' Return linear interpolation of a table. Can returns either the
#' interpolation of the values of the table or the interpolations
#' the columns associated with a value in the table(\code{reverse}).
#'
#' @param x0 References in row of \code{z}.
#'
#' @param y0 References in column of \code{z}
#'
#' @param z0 Matrix representing a table to interpolate.
#'
#' @param x  Point where to interpolate in row.
#'
#' @param y  Point where to interpolate in column.
#'
#' @param reverse Logical. Should the Refrence value en \code{y} be
#'  interpolated instead of \code{z}.
#'
#'
#' @seealso \link{approx}
#'
#' @examples
#'
#' x <- 1:10
#' y <- 1:10
#' z <- outer(x,y)
#'
#' interp2d(x,y,z,5,5) # 5*5  = 25
#' interp2d(x,y,z,5.1,5.1) # 26.01
#'
#' interp2d(x,y,z,5.1,26, reverse = TRUE)
#'
interp2d <- function(x0, y0, z0, x, y, reverse = FALSE, ...){

  ## Verify consistency between x0, y0 and z0
  if(length(x0) != nrow(z0) | length(y0) != ncol(z0))
    stop('Wrong dimension passed in z0')

  ## Case interpolating z0
  if(!reverse){

    ## y0 is exact
    if(sum(y0 == y)>0){
      z <- z0[, which(y0 == y)]

      ## case y0 is not exact
    } else{
      z <- sapply(seq(nrow(z0)),
                  function(k) approx(y0, z0[k,], xout = y, ...)$y)
    }

    ## Interpolation z
    if(x < min(x0)) ans <- max(z)
    else if(x> max(x0)) ans <- min(z)
    else ans <- approx(x0, z, xout = x, ...)$y

  ## Case Interpolation y instead of z
  } else {

    ##case x0 is exact
    if(sum(x0 == x) > 0){
      z <- z0[which(x0 == x),]

      ##case x0 is not exact
    } else {
      z <- sapply(seq(ncol(z0)),
                  function(k) approx(x0, z0[,k], xout = x, ...)$y)
    }

    ## interpolating y
    if(y < min(z)) ans <- max(y0)
    else if(y > max(z)) ans <- min(y0)
    else ans <- approx(z, y0, xout = y, ...)$y
  }

  return(ans)
}