tests/t-mgedist.R

require("fitdistrplus")

nsample <- 10

# (1) Fit of a Weibull distribution to serving size data by maximum 
# goodness-of-fit estimation using all the distances available
# 

data(groundbeef)
serving <- groundbeef$serving
mgedist(serving, "weibull", gof="CvM")$estimate
mgedist(serving, "weibull", gof="CvM", silent = FALSE)$estimate
mgedist(serving, "weibull", gof="KS")$estimate
mgedist(serving, "weibull", gof="AD")$estimate
mgedist(serving, "weibull", gof="ADR")$estimate
mgedist(serving, "weibull", gof="ADL")$estimate
mgedist(serving, "weibull", gof="AD2R")$estimate
mgedist(serving, "weibull", gof="AD2L")$estimate
mgedist(serving, "weibull", gof="AD2")$estimate

#fitted values are around (2.123932 82.145177)

# (2) Fit of a uniform distribution using Cramer-von Mises or
# Kolmogorov-Smirnov distance
# 

set.seed(1234)
u <- runif(nsample, min=5, max=10)
mgedist(u, "unif", gof="CvM")$estimate
mgedist(u, "unif", gof="KS")$estimate

#fitted values are around (5.495686 9.630573)

# (3) Fit of a triangular distribution using Cramer-von Mises or
# Kolmogorov-Smirnov distance
# 
require("mc2d")
set.seed(1234)
v <- rtriang(nsample, min=5, mode=6, max=10)
mgedist(v, "triang", start = list(min=4, mode=6,max=9), gof="CvM")$estimate
mgedist(v, "triang", start = list(min=4, mode=6,max=9), gof="KS")$estimate

#fitted values are around (4.896674 7.390675 8.494165 )

# (4) scaling problem
# the simulated dataset (below) has particularly small values, hence without scaling (10^0),
# the optimization raises an error. The for loop shows how scaling by 10^i
# for i=1,...,6 makes the fitting procedure work correctly.
if(FALSE)
{
  set.seed(1234)
  x2 <- rnorm(nsample, 1e-4, 2e-4)
  for(i in 6:0)
    cat(i, try(mgedist(x2*10^i,"cauchy")$estimate, silent=TRUE), "\n")
  
  # 6 19.86906 122.0622 
  # 5 1.986906 12.20622 
  # 4 0.1986906 1.220622 
  # 3 0.01986906 0.1220622 
  # 2 0.001986906 0.01220622 
  # 1 NA NA 
  # 0 NA NA 
}


# (5) scaling problem
#
if(FALSE)
{
  x <- c(-0.00707717, -0.000947418, -0.00189753, 
         -0.000474947, -0.00190205, -0.000476077, 0.00237812, 0.000949668, 
         0.000474496, 0.00284226, -0.000473149, -0.000473373, 0, 0, 0.00283688, 
         -0.0037843, -0.0047506, -0.00238379, -0.00286807, 0.000478583, 
         0.000478354, -0.00143575, 0.00143575, 0.00238835, 0.0042847, 
         0.00237248, -0.00142281, -0.00142484, 0, 0.00142484, 0.000948767, 
         0.00378609, -0.000472478, 0.000472478, -0.0014181, 0, -0.000946522, 
         -0.00284495, 0, 0.00331832, 0.00283554, 0.00141476, -0.00141476, 
         -0.00188947, 0.00141743, -0.00236351, 0.00236351, 0.00235794, 
         0.00235239, -0.000940292, -0.0014121, -0.00283019, 0.000472255, 
         0.000472032, 0.000471809, -0.0014161, 0.0014161, -0.000943842, 
         0.000472032, -0.000944287, -0.00094518, -0.00189304, -0.000473821, 
         -0.000474046, 0.00331361, -0.000472701, -0.000946074, 0.00141878, 
         -0.000945627, -0.00189394, -0.00189753, -0.0057143, -0.00143369, 
         -0.00383326, 0.00143919, 0.000479272, -0.00191847, -0.000480192, 
         0.000960154, 0.000479731, 0, 0.000479501, 0.000958313, -0.00383878, 
         -0.00240674, 0.000963391, 0.000962464, -0.00192586, 0.000481812, 
         -0.00241138, -0.00144963)
  
  
  #only i == 0, no scaling, should not converge.
  for(i in 6:0)
    cat(i, try(mgedist(x*10^i,"cauchy")$estimate, silent=TRUE), "\n")
  
  # 6 -283.1442 1185.903 
  # 5 -28.31442 118.5903 
  # 4 -2.831442 11.85903 
  # 3 -0.2831442 1.185903 
  # 2 -0.02831442 0.1185903 
  # 1 -0.002831442 0.01185903
  # 0 NA NA 
}


# (6) test error messages
#

dnorm2 <- pnorm2 <- function(x, a)
  "NA"
x <- rexp(10)

#should get a one-line error 
res <- mgedist(x, "norm2", start=list(a=1))
#as in 
attr(try(log("a"), silent=TRUE), "condition")


try(mgedist(c(serving, "a"), "gamma"))
try(mgedist(c(serving, NA), "gamma"))
try(mgedist(c(serving, Inf), "gamma"))
try(mgedist(c(serving, -Inf), "gamma"))
try(mgedist(c(serving, NaN), "gamma"))


# (7) test the component optim.message

x <- rnorm(1000)
#change parameter to obtain unsuccessful convergence
mgedist(x, "norm", control=list(maxit=2), start=list(mean=1e5, sd=1), optim.method="L-BFGS-B", lower=0)

# (8) test bounds

x <- rnorm(1000)
mgedist(x, "norm", optim.method="L-BFGS-B", lower=c(-Inf, 0)) #optim and L-BFGS-B
mgedist(x, "norm", optim.method="Nelder", lower=c(-Inf, 0))

# (9) numerical issue with log(1-p)

obs <- c(rep(1, 14), 10)
try(mgedist(obs, "weibull", gof="AD", control=list(trace=1, REPORT=1)))

# (10) large sample size issue

if(FALSE)
{
  set.seed(123)
  obs <- rlnorm(1e6, 3, 2)
  for(i in 2:6)
    cat(i, try(mgedist(obs[1:10^i], "lnorm", control=list(trace=0, REPORT=1))$estimate, silent=TRUE), "\n")
  
  # 2 3.143734 1.819549 
  # 3 3.023688 1.955416 
  # 4 2.995646 1.993392 
  # 5 3.002682 2.000394 
  # 6 2.999036 1.999887 
  # 7 3.000222 1.999981 
}

Try the fitdistrplus package in your browser

Any scripts or data that you put into this service are public.

fitdistrplus documentation built on April 3, 2025, 7:35 p.m.