# R/ev.test.R In goft: Tests of Fit for some Probability Distributions

#### Documented in ev_test

```# -------------------------------------------------------------------------------------------------------
# Correlation and variance ratio tests for the extreme value distributions (Gumbel, Frechet and Weibull)
# N is the number of Monte Carlo samples used to approximate critical values of the variance ratio test
# -------------------------------------------------------------------------------------------------------
ev_test <- function(x, dist = "gumbel", method = "cor", N = 1000){
DNAME <- deparse(substitute(x))
if (!is.numeric(x)) stop(paste(DNAME, "must be a numeric vector"))
if (sum(is.na(x)) > 0) warning("NA values have been deleted")
x <- x[!is.na(x)]
x <- as.vector(x)
n <- length(x)   # adjusted sample size without NA values
if (n <= 1) stop("sample size must be larger than 1")
samplerange <- max(x) - min(x)
if (samplerange == 0) stop("all observations are identical")

if (any(c("gumbel", "frechet", "weibull") == dist) == FALSE) stop("Invalid distribution. \nValid distributions are 'gumbel', 'frechet' and 'weibull'. ")

if (dist == "frechet" && min(x) < 0) stop("The dataset contains negative observations. \nAll data must be non-negative real numbers when testing the Frechet distribution hypothesis.")
if (dist == "weibull" && max(x) > 0) stop("The dataset contains positive observations. \nAll data must be negative real numbers when testing the Weibull extreme value distribution hypothesis.")

if (any(c("cor", "ratio") == method) == FALSE) stop("Invalid method. \nValid methods are 'cor'  and 'ratio'. ")

if( dist == "frechet")  x <- log(x)
if( dist == "weibull")  x <- -log(-x)

if(method == "cor"){
x <- -x
if (n < 20 || n > 250) stop("The correlation test requires a sample size between 20 and 250.")

cor.stat <- function(x){
z   <- combn(x,2,max)
nz  <- length(z)
Fn  <- rep(NA,nz)
Fn  <- ecdf(z)
FnZ <- Fn(z)
y   <- rep(NA,nz)
y   <- log(qweibull(FnZ, 1, lower.tail = TRUE))
r   <- NA
logic  <- (z!= max(z))
r      <- cor(z[logic], y[logic], method = "pearson")
rl     <- log(1-r)
return(rl)
}
rl <- cor.stat(x)
median_n <- -3.02921 - .03846 * n + .00023427 * n**2 - .000000471091 * n**3
if (n <= 60 ){
s_n <- 0.7588 - 0.01697 * n + 0.000399 * n**2 - 0.000003 * n**3
}
if (n > 60) s_n <- .53

p.value <- pnorm(rl, mean = median_n, sd = s_n, lower.tail = FALSE)
r <- 1 - exp(rl)
results <- list(statistic = c(R = r), p.value = p.value,
method = paste("Correlation test of fit for the ", dist, " distribution"),
data.name = DNAME)
}

if(method == "ratio"){
ratio.stat <- function(x){
m    <- mean(x)
frac <- 1 / (1:n)
summ <- rep(NA, n)
for(i in 1:n){
summ[i] <- sum(frac[i:n])
}
y <- sort(x)
s.kim <- m - mean(y * summ)             # Kimball's estimator for the scale parameter
t <- (pi * s.kim)^2 / (6 * var(x))      # variance ratio test statistic
return(t)
}
ratio.stat_c <- ratio.stat(x)    # observed value of the test statistic
null.dist <- replicate(N, ratio.stat(-log(rexp(n))))
p1 <- sum(ratio.stat_c < null.dist) / N
p2 <- sum(ratio.stat_c > null.dist) / N
pvalue <- 2 * min(p1, p2)    # approximated p-value by Monte Carlo simulation
results <- list(statistic = c(T = ratio.stat_c), p.value = pvalue,
method = paste("Variance ratio test of fit for the ", dist, " distribution"),
data.name = DNAME)
}
class(results) = "htest"
return(results)
}
```

## Try the goft package in your browser

Any scripts or data that you put into this service are public.

goft documentation built on May 2, 2019, 6:32 a.m.