#############################################################
#' Goodness-of-fit test
#'
#' Return the statistic and p-value of a goodness of fit test on
#' AMAX and POT mdoels. The null hypotesis (composite)
#' is that the data were generated by the fitted distribution.
#'
#' @param obj Output from \code{\link{FitAmax}} or \code{\link{FitPot}}.
#'
#' @param method Test to be performed. Either Anderson-Darling \code{ad},
#' or modified Shapiro-Wilk ('shapiro').
#' For a POT model, the method \code{adtab}
#' perform the Anderson-Darling test and interpolates the
#' p-value from a table.
#'
#' @export
#'
#' @section References:
#'
#' Choulakian, V., Stephens, M.A., 2001. Goodness-of-Fit Tests for the
#' Generalized Pareto Distribution. Technometrics 43, 478–484.
#' https://doi.org/10.2307/1270819
#'
#' Heo, J.-H., Shin, H., Nam, W., Om, J., Jeong, C., 2013. Approximation of
#' modified Anderson–Darling test statistics for extreme value distributions
#' with unknown shape parameter. Journal of Hydrology 499, 41–49.
#' https://doi.org/10.1016/j.jhydrol.2013.06.008
#'
#' Ba, I., Ashkar, F., 2017. Discrimination between a group of
#' three-parameter distributions for hydro-meteorological frequency modeling.
#' Can. J. Civ. Eng. 45, 351–365. https://doi.org/10.1139/cjce-2017-0416
#'
#' @examples
#'
#' ## The same simulated distribution should be accepted p-value >.05
#' x <- rAmax(100, c(100,30,-.1), 'gev')
#'
#' fit <- FitAmax(x, 'gev', varcov = FALSE, nsim = 100)
#' GofTest(fit)
#'
#' ## The same apply to POT model
#' fit <- FitPot(flow~date, flowStJohn, u = 1000,
#' declust = 'flood', r = 14)
#'
#' ## By default using a table of value with maximum likelihood
#'GofTest(fit)
#'
GofTest <- function(x, ...) UseMethod('GofTest', x)
#' @export
#' @rdname GofTest
GofTest.amax <- function(obj, method = 'ad', nsim = 1000, ...){
## Verification that GML was not used when fitting
if(obj$method == 'gml')
stop('The test is not available for Generalized maximum likelihood')
## Simulate boostrap sample
n <- length(obj$data)
opara <- lmomco::vec2par(obj$para, obj$distr)
xboot <- replicate(nsim, lmomco::rlmomco(n,opara))
## Compute the test statistics for the data and the bootstrap samples
if(method == 'ad'){
stat <- AdStat(obj$data, para = opara)
boot <- apply(xboot, 2, AdStat, type = obj$distr,
method = obj$method, ...)
pv <- mean(boot > stat, na.rm = TRUE)
} else if(method == 'shapiro') {
stat <- ShapiroStat(obj$data, para = opara)
boot <- apply(xboot, 2, ShapiroStat, type = obj$distr,
method = obj$method, ...)
pv <- mean(boot < stat, na.rm = TRUE)
}
ans <- list(stat = stat,
pvalue = pv,
distr = obj$distr,
method = method)
class(ans) <- 'amaxGof'
## return
ans
}
#' @export
print.amaxGof <- function(obj){
if(obj$method %in% c('ad','adtab'))
methodName <- 'Anderson-Darling'
else if(obj$method == 'shapiro')
methodName <- 'Modified Shapiro-Wilk'
cat('\nGoodness-of-fit test\n',
'\nTest =', methodName,
'\nDistribution =', obj$distr)
cat('\nstatistic :', round(obj$stat,4),
'\np-value :', round(obj$pvalue,4), '\n\n')
}
## function to compute the statistics of the Anderson-Darling test
AdStat <- function(x, type = NULL, method = NULL, para = NULL, ...){
## Estimate the parameters if necessary
if(is.null(para)){
if(method == 'lmom'){
para <- lmomco::lmom2par(lmomco::lmoms(x), type, ...)
} else if(method == 'mle'){
para <- suppressWarnings(lmomco::mle2par(x,type, ...))
} else if(method == 'fgpa1d'){
para <- fgpa1d(x)
para <- lmomco::vec2par(c(0,para), 'gpa')
} else if(method == 'fgpa2d'){
para <- fgpa2d(x)
para <- lmomco::vec2par(c(0,para), 'gpa')
} else if(method == 'fgpaLmom'){
para <- fgpaLmom(x)
para <- lmomco::vec2par(c(0,para), 'gpa')
}
}
n <- length(x)
u <- sort(lmomco::plmomco(x,para))
w <- (2*(1:n)-1)/n
ans <- n + sum(w * log(u)) + sum((2-w)*log(1-u))
return(-ans)
}
## Function to compute the statistics of the modified Shapiro-Wilk test
ShapiroStat <- function(x, type = NULL, method = NULL, para = NULL, ...){
## Estimate the parameters if necessary
if(is.null(para)){
if(method == 'lmom')
para <- lmomco::lmom2par(lmomco::lmoms(x), type, ...)
else if(method == 'mle')
para <- lmomco::mle2par(x,type, ...)
else if(method == 'fgpa1d'){
para <- fgpa1d(x)
para <- lmomco::vec2par(c(0,para), 'gpa')
} else if(method == 'fgpa2d'){
para <- fgpa2d(x)
para <- lmomco::vec2par(c(0,para), 'gpa')
} else if(method == 'fgpaLmom'){
para <- fgpaLmom(x)
para <- lmomco::vec2par(c(0,para), 'gpa')
}
}
z <- qnorm(lmomco::plmomco(x,para))
shapiro.test(z)$statistic
}
###########################################################################
#' @export
#' @rdname GofTest
GofTest.fpot <- function(obj, method = 'adtab', nsim = 1000){
opara <- lmomco::vec2par(c(0,obj$estimate), 'gpa')
n <- length(obj$excess)
## Case Anderson-Darling using a table
if(method == 'adtab'){
stat <- AdStat(obj$excess, para = opara)
pv <- AdGpaTable(obj$estimate[2], A2 = stat)
## Case anderson darling using boostrap
} else if(method == 'ad'){
if(obj$method == 'lmom')
fitMethod <- 'fgpaLmom'
else if(obj$method == 'mle')
fitMethod <- 'fgpa1d'
else if(obj$method == 'mle2')
fitMethod <- 'fgpa2d'
stat <- AdStat(obj$excess, para = opara)
xboot <- replicate(nsim, lmomco::rlmomco(n,opara))
boot <- apply(xboot, 2, AdStat, type = 'gpa', method = fitMethod)
pv <- mean(boot > stat, na.rm = TRUE)
## modified shapiro-wilk using boostrap
} else if(method == 'shapiro'){
if(obj$method == 'lmom')
fitMethod <- 'fgpaLmom'
else if(obj$method == 'mle')
fitMethod <- 'fgpa1d'
else if(obj$method == 'mle2')
fitMethod <- 'fgpa2d'
stat <- ShapiroStat(obj$excess, para = opara)
xboot <- replicate(nsim, lmomco::rlmomco(n,opara))
boot <- apply(xboot, 2, ShapiroStat, type = 'gpa', method = fitMethod)
pv <- mean(boot < stat, na.rm = TRUE)
}
ans <- list(stat = stat,
pvalue = pv,
distr = 'gpa',
method = method)
class(ans) <- 'amaxGof'
return(ans)
}
#########################################################################
#' Anderson-Darling (AD) test for the Generalized Pareto distribution (GPA)
#'
#' Return the critical value or the p-value of the AD test for the GPA
#' distribution. The results are obtained by linear interpolation of the
#' table provided by Choulakian and Stephens (2001) when scale and shape
#' parameter are estimated by maximum likelihood.
#'
#' @param kap Shape parameter of the GPA.
#'
#' @param pval P-value of the test.
#'
#' @param A2 Statistics of the AD test.
#' If \code{NULL} the function return
#' the critical value associated with \code{'pval'}.
#' Otherwise the function return the p-value associated with the provided
#' statistics.
#'
#' @param ... Additional parameter for the function \code{\link{approx}}.
#'
#' @details
#'
#' Choulakian V, Stephens MA. Goodness-of-Fit Tests for the Generalized
#' Pareto Distribution. Technometrics. 2001;43(4):478–84.
#'
#' @seealso \link{interp2d}
#'
#'
#' @examples
#'
#' AdGpaTable(kap = -.11, A2 = .93)
#' AdGpaTable(kap = -.11, pval = 0.05)
AdGpaTable <- function(kap, pval = 0.05, A2 = NULL, ...){
## Table of for the GPA
ad <- c(0.339, 0.471, 0.641, 0.771, 0.905, 1.086, 1.226, 1.559,
0.356, 0.499, 0.685, 0.830, 0.978, 1.180, 1.336, 1.707,
0.376, 0.534, 0.741, 0.903, 1.069, 1.296, 1.471, 1.893,
0.386, 0.550, 0.766, 0.935, 1.110, 1.348, 1.532, 1.966,
0.397, 0.569, 0.796, 0.974, 1.158, 1.409, 1.603, 2.064,
0.410, 0.591, 0.831, 1.020, 1.215, 1.481, 1.687, 2.176,
0.426, 0.617, 0.873, 1.074, 1.283, 1.567, 1.788, 2.314,
0.445, 0.649, 0.924, 1.140, 1.365, 1.672, 1.909, 2.475,
0.468, 0.688, 0.985, 1.221, 1.465, 1.799, 2.058, 2.674,
0.496, 0.735, 1.061, 1.321, 1.590, 1.958, 2.243, 2.922)
ad <- t(matrix(ad, 8 ,10))
kap0 <- c(-0.9, -0.5, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5)
pval0 <- c(0.5, 0.25, 0.1, 0.05, 0.025, 0.01, 0.005, 0.001)
## Verify table range
if(pval < 0.001){
warning('P-value outside range. Value 0.001 will be used')
pval <- 0.001
}
if(pval > 0.5){
warning('P-value outside range. Value 0.5 will be used')
pval <- 0.5
}
if(kap < (-0.9)){
warning('Kappa outside range. Value -0.9 will be used')
kap <- (-0.9)
}
if(kap > 0.5){
warning('Kappa outside range. Value 0.5 will be used')
kap <- 0.5
}
if(is.null(A2))
ans <- interp2d(kap0, pval0, ad, kap, pval, reverse = FALSE,...)
else
ans <- interp2d(kap0, pval0, ad, kap, A2, reverse = TRUE, ...)
return(ans)
}
########################################################################
#' Interpolation of a table
#'
#' Return linear interpolation of a table. Can returns either the
#' interpolation of the values of the table or the interpolations
#' the columns associated with a value in the table(\code{reverse}).
#'
#' @param x0 References in row of \code{z}.
#'
#' @param y0 References in column of \code{z}
#'
#' @param z0 Matrix representing a table to interpolate.
#'
#' @param x Point where to interpolate in row.
#'
#' @param y Point where to interpolate in column.
#'
#' @param reverse Logical. Should the Refrence value en \code{y} be
#' interpolated instead of \code{z}.
#'
#'
#' @seealso \link{approx}
#'
#' @examples
#'
#' x <- 1:10
#' y <- 1:10
#' z <- outer(x,y)
#'
#' interp2d(x,y,z,5,5) # 5*5 = 25
#' interp2d(x,y,z,5.1,5.1) # 26.01
#'
#' interp2d(x,y,z,5.1,26, reverse = TRUE)
#'
interp2d <- function(x0, y0, z0, x, y, reverse = FALSE, ...){
## Verify consistency between x0, y0 and z0
if(length(x0) != nrow(z0) | length(y0) != ncol(z0))
stop('Wrong dimension passed in z0')
## Case interpolating z0
if(!reverse){
## y0 is exact
if(sum(y0 == y)>0){
z <- z0[, which(y0 == y)]
## case y0 is not exact
} else{
z <- sapply(seq(nrow(z0)),
function(k) approx(y0, z0[k,], xout = y, ...)$y)
}
## Interpolation z
if(x < min(x0)) ans <- max(z)
else if(x> max(x0)) ans <- min(z)
else ans <- approx(x0, z, xout = x, ...)$y
## Case Interpolation y instead of z
} else {
##case x0 is exact
if(sum(x0 == x) > 0){
z <- z0[which(x0 == x),]
##case x0 is not exact
} else {
z <- sapply(seq(ncol(z0)),
function(k) approx(x0, z0[,k], xout = x, ...)$y)
}
## interpolating y
if(y < min(z)) ans <- max(y0)
else if(y > max(z)) ans <- min(y0)
else ans <- approx(z, y0, xout = y, ...)$y
}
return(ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.