Nothing
require("fitdistrplus")
nbboot <- 100
nbboot <- 10
nsample <- 100
nsample <- 10
visualize <- FALSE # TRUE for manual tests with visualization of results
# (1) basic fit of a gamma distribution by maximum likelihood estimation
#
data(groundbeef)
serving <- groundbeef$serving
fitg <- fitdist(serving, "gamma")
summary(fitg)
plot(fitg)
cdfcomp(fitg, addlegend=FALSE)
#check names
names(fitdist(serving, "gamma", optim.method="Brent", lower=0, upper=10, fix.arg=list(shape=2))$estimate)
names(fitdist(serving, "gamma", optim.method="Nelder-Mead")$estimate)
names(fitdist(serving, "gamma", optim.method="BFGS")$estimate)
# names(fitdist(serving, "gamma", optim.method="CG", start=list(shape=4, rate=1/20))$estimate)
if(visualize) { # check ERROR on aarch64-apple-darwin20.4.0 (64-bit) (2021/05/12)
set.seed(1234)
names(fitdist(serving, "gamma", optim.method="L-BFGS-B", lower=0)$estimate)
}
# (7) custom optimization function
#
#create the sample
set.seed(1234)
mysample <- rexp(nsample, 5)
mystart <- list(rate=8)
res1 <- fitdist(mysample, dexp, start= mystart, optim.method="Nelder-Mead")
#show the result
summary(res1)
#the warning tell us to use optimise, because the Nelder-Mead is not adequate.
#to meet the standard 'fn' argument and specific name arguments, we wrap optimize,
myoptimize <- function(fn, par, ...)
{
res <- optimize(f=fn, ..., maximum=FALSE)
#assume the optimization function minimize
standardres <- c(res, convergence=0, value=res$objective,
par=res$minimum, hessian=NA)
return(standardres)
}
#call fitdist with a 'custom' optimization function
res2 <- fitdist(mysample, dexp, start=mystart, custom.optim=myoptimize,
interval=c(0, 100))
#show the result
summary(res2)
# (8) custom optimization function - another example with the genetic algorithm
#
if(any(installed.packages()[,"Package"] == "rgenoud") && visualize)
{
#set a sample
fit1 <- fitdist(serving, "gamma")
summary(fit1)
#wrap genoud function rgenoud package
mygenoud <- function(fn, par, ...)
{
require("rgenoud")
res <- genoud(fn, starting.values=par, ...)
standardres <- c(res, convergence=0, counts=NULL)
return(standardres)
}
#call fitdist with a 'custom' optimization function
fit2 <- fitdist(serving, "gamma", custom.optim=mygenoud, nvars=2, start=as.list(fit1$estimate),
Domains=cbind(c(0, 0), c(10, 10)), boundary.enforcement=1,
print.level=0, hessian=TRUE)
summary(fit2)
}
# (11) Fit of a Pareto distribution by numerical moment matching estimation
#
if(any(installed.packages()[,"Package"] == "actuar") && visualize)
{
require("actuar")
#simulate a sample
set.seed(1234)
x4 <- rpareto(nsample, 6, 2)
#empirical raw moment
memp <- function(x, order)
ifelse(order == 1, mean(x), sum(x^order)/length(x))
#fit
fP <- fitdist(x4, "pareto", method="mme", order=c(1, 2), memp="memp",
start=list(shape=10, scale=10), lower=1, upper=Inf)
summary(fP)
plot(fP)
}
# (14) scaling problem - too small values
#
if (visualize) # LONG TO RUN ON CRAN
{
x2 <- c(-0.00707717, -0.000947418, -0.00189753,
-0.000474947, -0.00190205, -0.000476077, 0.00237812, 0.000949668,
0.000474496, 0.00284226, -0.000473149, -0.000473373, 0, 0, 0.00283688,
-0.0037843, -0.0047506, -0.00238379, -0.00286807, 0.000478583,
0.000478354, -0.00143575, 0.00143575, 0.00238835, 0.0042847,
0.00237248, -0.00142281, -0.00142484, 0, 0.00142484, 0.000948767,
0.00378609, -0.000472478, 0.000472478, -0.0014181, 0, -0.000946522,
-0.00284495, 0, 0.00331832, 0.00283554, 0.00141476, -0.00141476,
-0.00188947, 0.00141743, -0.00236351, 0.00236351, 0.00235794,
0.00235239, -0.000940292, -0.0014121, -0.00283019, 0.000472255,
0.000472032, 0.000471809, -0.0014161, 0.0014161, -0.000943842,
0.000472032, -0.000944287, -0.00094518, -0.00189304, -0.000473821,
-0.000474046, 0.00331361, -0.000472701, -0.000946074, 0.00141878,
-0.000945627, -0.00189394, -0.00189753, -0.0057143, -0.00143369,
-0.00383326, 0.00143919, 0.000479272, -0.00191847, -0.000480192,
0.000960154, 0.000479731, 0, 0.000479501, 0.000958313, -0.00383878,
-0.00240674, 0.000963391, 0.000962464, -0.00192586, 0.000481812,
-0.00241138, -0.00144963)
for(i in 6:0)
{
cat("\nscaling", 10^i, "\n")
res <- try(mledist(x2*10^i, "cauchy"), silent=TRUE)
if(inherits(res, "try-error"))
print(res)
else
{
cat("estimate\n")
print(res$estimate)
cat("Hessian\n")
print(res$hessian)
}
}
}
# (15) scaling problem - too big values
#
if (visualize) # LONG TO RUN ON CRAN
{
x1 <- c(
1401928684, 1413455609, 1432458425, 1436910475, 1494883250, 1565770323, 1577486458,
1568908053, 1606424896, 1632264979, 1780495643, 1865525923, 2035689865, 2141429306,
2335443964, 2465661689, 2563368221, 2845012431, 2949890881, 3180645942, 3309009836,
3618581152, 4109197451, 4064662257, 4028375795, 4176781983, 4303024833, 4493470109
)
for(i in 0:5)
{
cat("\nscaling", 10^(-2*i), "\n")
res <- mledist(x1*10^(-2*i), "norm")
Hm1 <- try(solve(res$hessian), silent=TRUE)
if(inherits(Hm1, "try-error"))
print(Hm1)
else
{
cat("estimate\n")
print(res$estimate)
cat("Hessian\n")
print(res$hessian)
cat("inverse Hessian\n")
print(Hm1)
}
}
fitdist(x1, "norm")
fitdist(x1*1e-6, "norm")
}
# (16) Fit of a lognormal distribution on acute toxicity values of endosulfan for
# nonarthropod invertebrates, using maximum likelihood estimation
# to estimate what is called a species sensitivity distribution
# (SSD) in ecotoxicology, followed by estimation of the 5 percent quantile value of
# the fitted distribution, what is called the 5 percent hazardous concentration (HC5)
# in ecotoxicology, with its two-sided 95 percent confidence interval calculated by
# parametric bootstrap
#
data(endosulfan)
ATV <- subset(endosulfan, group == "NonArthroInvert")$ATV
log10ATV <- log10(subset(endosulfan, group == "NonArthroInvert")$ATV)
fln <- fitdist(log10ATV, "norm")
quantile(fln, probs = 0.05)
# (17) Fit of a triangular distribution using Cramer-von Mises or
# Kolmogorov-Smirnov distance
#
if(any(installed.packages()[,"Package"] == "mc2d") && visualize)
{
set.seed(1234)
require("mc2d")
t <- rtriang(100,min=5,mode=6,max=10) # nsample not used : does not converge with a too small sample
fCvM <- fitdist(t,"triang",method="mge",start = list(min=4, mode=6,max=9),gof="CvM")
fKS <- fitdist(t,"triang",method="mge",start = list(min=4, mode=6,max=9),gof="KS")
cdfcomp(list(fCvM,fKS))
}
# (18) gumbel distribution
#
dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b))
pgumbel <- function(q, a, b) exp(-exp((a-q)/b))
qgumbel <- function(p, a, b) a-b*log(-log(p))
data(danishuni)
fitdist(danishuni$Loss, "gumbel", start=list(a=5, b=10))
# (19) check the 'start' argument
#
if (FALSE) # NO INTEREST WITHOUT VISUALIZATION OF THE RESULT
{
#create the sample
mysample <- rexp(nsample, 5)
mystart2 <- list(rate2=8)
mystart3 <- list(8)
try( fitdist(mysample, dexp, start= mystart2, method="mle") )
try( fitdist(mysample, dexp, start= mystart3, method="mle") )
try( fitdist(mysample, dexp, start= mystart2, method="mme") )
try( fitdist(mysample, dexp, start= mystart3, method="mme") )
try( fitdist(mysample, dexp, start= mystart2, method="qme", probs=1/2) )
try( fitdist(mysample, dexp, start= mystart3, method="qme", probs=1/2) )
try( fitdist(mysample, dexp, start= mystart2, method="mge", gof="AD") )
try( fitdist(mysample, dexp, start= mystart3, method="mge", gof="AD") )
}
# (20) example with dexgauss
# would require to suggest the package gamlss.dist in the Description file
#
#if(any(installed.packages()[,"Package"] == "gamlss.dist"))
#{
# require("gamlss.dist")
# set.seed(1234)
# a=rexGAUS(100,mu=500,sigma=50,nu=75)
# fitdist(a,dexGAUS,start=list(mu=median(a),sigma=sqrt(var(a)/2),nu=sqrt(var(a)/2)))
#}
# (21) check the 'keepdata' argument
#
if (visualize) # REQUIRES VISUALIZATION OF THE RESULTS
{
#create the sample
x <- rexp(1e6, 5)
summary(x)
f1 <- fitdist(x, "exp", keepdata=FALSE)
f2 <- fitdist(x, "exp", keepdata=TRUE)
par(mfrow=c(1,2))
cdfcomp(f1)
cdfcomp(f2)
}
# (22) relevant example for zero modified geometric distribution
#
dzmgeom <- function(x, p1, p2)
{
p1 * (x == 0) + (1-p1)*dgeom(x-1, p2)
}
pzmgeom <- function(q, p1, p2)
{
p1 * (q >= 0) + (1-p1)*pgeom(q-1, p2)
}
rzmgeom <- function(n, p1, p2)
{
u <- rbinom(n, 1, 1-p1) #prob to get zero is p1
u[u != 0] <- rgeom(sum(u != 0), p2)+1
u
}
x2 <- rzmgeom(nsample, 1/2, 1/10)
table(x2)
#this is the MLE which converges almost surely and in distribution to the true value.
initp1 <- function(x) list(p1=mean(x == 0))
fitdist(x2, "zmgeom", start=list(p1=1/2, p2=1/2))
f2 <- fitdist(x2, "zmgeom", fix.arg=initp1, start=list(p2=1/2))
print(f2)
summary(f2)
f2 <- fitdist(x2, "zmgeom", fix.arg=list(p1=1/2), start=list(p2=1/2))
print(f2)
summary(f2)
# (23) check the use of weights with MLE
#
set.seed(1234)
x <- rpois(nsample, 10)
xtab <- table(x)
xval <- sort(unique(x))
f1 <- fitdist(x, "pois")
f2 <- fitdist(xval, "pois", weights = xtab)
f1$estimate
f2$estimate #should be identical
# (24) check the use of weights with other methods
#
set.seed(1234)
x <- rpois(nsample, 10)
xtab <- table(x)
xval <- sort(unique(x))
(f1 <- fitdist(x, "norm", method = "mle"))
(f2 <- fitdist(xval, "norm", weights = xtab, method = "mle"))
(f1 <- fitdist(x, "norm", method = "mme"))
(f2 <- fitdist(xval, "norm", weights = xtab, method = "mme"))
(f1 <- fitdist(x, "norm", method = "qme", probs=c(1/4, 3/4)))
(f2 <- fitdist(xval, "norm", method = "qme", weights = xtab, probs=c(1/4, 3/4) ))
fitdist(x, "norm", method="mge", gof = "CvM")
try(fitdist(xval, "norm", method="mge", gof = "CvM", weights = xtab)) # not yet developped
# (24b) check the use of weights with qme with a discrete distribution
#
set.seed(1234)
x <- rpois(nsample, 10)
xtab <- table(x)
xval <- sort(unique(x))
(f1 <- fitdist(x, "pois", method = "qme", probs=c(1/2)))
(f2 <- fitdist(xval, "pois", method = "qme", weights = xtab, probs=c(1/2) )) # similar to f1
fitdist(xval, "pois", method = "qme", weights = xtab, probs=c(1/2), optim.method="SANN", control=list(maxit=1000)) #
fitdist(x, "pois", method = "qme", probs=c(1/2), optim.method="SANN", control=list(maxit=1000)) # should be similar
# should give similar results for big samples
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.