FAQ.R
In fitdistrplus: Help to Fit of a Parametric Distribution to Non-Censored or Censored Data

## ----setup, echo=FALSE, message=FALSE, warning=FALSE--------------------------
require("fitdistrplus")
set.seed(1234)
options(digits = 3)

## ----eval=FALSE---------------------------------------------------------------
# dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b))
# pgumbel <- function(q, a, b) exp(-exp((a-q)/b))
# qgumbel <- function(p, a, b) a-b*log(-log(p))
# data(groundbeef)
# fitgumbel <- fitdist(groundbeef$serving, "gumbel", start=list(a=10, b=10))

## ----eval=FALSE---------------------------------------------------------------
# dzmgeom <- function(x, p1, p2) p1 * (x == 0) + (1-p1)*dgeom(x-1, p2)
# pzmgeom <- function(q, p1, p2) p1 * (q >= 0) + (1-p1)*pgeom(q-1, p2)
# rzmgeom <- function(n, p1, p2)
# {
#   u <- rbinom(n, 1, 1-p1) #prob to get zero is p1
#   u[u != 0] <- rgeom(sum(u != 0), p2)+1
#   u
# }
# x2 <- rzmgeom(1000, 1/2, 1/10)
# fitdist(x2, "zmgeom", start=list(p1=1/2, p2=1/2))

## ----message=FALSE------------------------------------------------------------
data("endosulfan")
require("actuar")
fendo.B <- fitdist(endosulfan$ATV, "burr", start = list(shape1 = 0.3, 
                                                        shape2 = 1, rate = 1))
summary(fendo.B)

## ----fig.height=3.5, fig.width=7----------------------------------------------
x3 <- rlnorm(1000)
f1 <- fitdist(x3, "lnorm", method="mle") 
f2 <- fitdist(x3, "lnorm", method="mme")
par(mfrow=1:2, mar=c(4,4,2,1))
cdfcomp(list(f1, f2), do.points=FALSE, xlogscale = TRUE, main = "CDF plot")
denscomp(list(f1, f2), demp=TRUE, main = "Density plot")

## -----------------------------------------------------------------------------
c("E(X) by MME"=as.numeric(exp(f2$estimate["meanlog"]+f2$estimate["sdlog"]^2/2)), 
	"E(X) by MLE"=as.numeric(exp(f1$estimate["meanlog"]+f1$estimate["sdlog"]^2/2)), 
	"empirical"=mean(x3))
c("Var(X) by MME"=as.numeric(exp(2*f2$estimate["meanlog"]+f2$estimate["sdlog"]^2) * 
                               (exp(f2$estimate["sdlog"]^2)-1)), 
	"Var(X) by MLE"=as.numeric(exp(2*f1$estimate["meanlog"]+f1$estimate["sdlog"]^2) * 
	                             (exp(f1$estimate["sdlog"]^2)-1)), 
	"empirical"=var(x3))

## -----------------------------------------------------------------------------
set.seed(1234)
x <- rnorm(100, mean = 1, sd = 0.5)
(try(fitdist(x, "exp")))

## -----------------------------------------------------------------------------
fitdist(x[x >= 0], "exp")
fitdist(x - min(x), "exp")

## -----------------------------------------------------------------------------
set.seed(1234)
x <- rnorm(100, mean = 0.5, sd = 0.25)
(try(fitdist(x, "beta")))

## -----------------------------------------------------------------------------
fitdist(x[x > 0 & x < 1], "beta")
fitdist((x - min(x)*1.01) / (max(x) * 1.01 - min(x) * 1.01), "beta")

## ----message=FALSE, fig.height=4, fig.width=7---------------------------------
dtexp <- function(x, rate, low, upp)
{
  PU <- pexp(upp, rate=rate)
  PL <- pexp(low, rate=rate)
  dexp(x, rate) / (PU-PL) * (x >= low) * (x <= upp) 
}
ptexp <- function(q, rate, low, upp)
{
  PU <- pexp(upp, rate=rate)
  PL <- pexp(low, rate=rate)
  (pexp(q, rate)-PL) / (PU-PL) * (q >= low) * (q <= upp) + 1 * (q > upp)
}
n <- 200
x <- rexp(n); x <- x[x > .5 & x < 3]
f1 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x)))
f2 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=.5, upp=3))
gofstat(list(f1, f2))
par(mfrow=c(1,1), mar=c(4,4,2,1))
cdfcomp(list(f1, f2), do.points = FALSE, xlim=c(0, 3.5))

## ----message=FALSE, fig.height=3.5, fig.width=7-------------------------------
dtiexp <- function(x, rate, low, upp)
{
  PU <- pexp(upp, rate=rate, lower.tail = FALSE)
  PL <- pexp(low, rate=rate)
  dexp(x, rate) * (x >= low) * (x <= upp) + PL * (x == low) + PU * (x == upp)
}
ptiexp <- function(q, rate, low, upp)
  pexp(q, rate) * (q >= low) * (q <= upp) + 1 * (q > upp)
n <- 100; x <- pmax(pmin(rexp(n), 3), .5)
# the loglikelihood has a discontinous point at the solution
par(mar=c(4,4,2,1), mfrow=1:2)
llcurve(x, "tiexp", plot.arg="low", fix.arg = list(rate=2, upp=5), min.arg=0, max.arg=.5, lseq=200)
llcurve(x, "tiexp", plot.arg="upp", fix.arg = list(rate=2, low=0), min.arg=3, max.arg=4, lseq=200)

## ----fig.height=3.5, fig.width=7----------------------------------------------
(f1 <- fitdist(x, "tiexp", method="mle", start=list(rate=3, low=0, upp=20)))
(f2 <- fitdist(x, "tiexp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x))))
gofstat(list(f1, f2))
par(mfrow=c(1,1), mar=c(4,4,2,1))
cdfcomp(list(f1, f2), do.points = FALSE, addlegend=FALSE, xlim=c(0, 3.5))
curve(ptiexp(x, 1, .5, 3), add=TRUE, col="blue", lty=3)
legend("bottomright", lty=1:3, col=c("red", "green", "blue", "black"), 
        legend=c("full MLE", "MLE fixed arg", "true CDF", "emp. CDF"))

## ----fig.height=4, fig.width=7------------------------------------------------
trueval <- c("min"=3, "max"=5)
x <- runif(n=500, trueval[1], trueval[2])

f1 <- fitdist(x, "unif")
delta <- .01
par(mfrow=c(1,1), mar=c(4,4,2,1))
llsurface(x, "unif", plot.arg = c("min", "max"), min.arg=c(min(x)-2*delta, max(x)-delta),
          max.arg=c(min(x)+delta, max(x)+2*delta), main="likelihood surface for uniform",
        loglik=FALSE)
abline(v=min(x), h=max(x), col="grey", lty=2)
points(f1$estimate[1], f1$estimate[2], pch="x", col="red")
points(trueval[1], trueval[2], pch="+", col="blue")
legend("bottomright", pch=c("+","x"), col=c("blue","red"), c("true", "fitted"))
delta <- .2
llsurface(x, "unif", plot.arg = c("min", "max"), min.arg=c(3-2*delta, 5-delta),
          max.arg=c(3+delta, 5+2*delta), main="log-likelihood surface for uniform")
abline(v=min(x), h=max(x), col="grey", lty=2)
points(f1$estimate[1], f1$estimate[2], pch="x", col="red")
points(trueval[1], trueval[2], pch="+", col="blue")
legend("bottomright", pch=c("+","x"), col=c("blue","red"), c("true", "fitted"))

## -----------------------------------------------------------------------------
dunif2 <- function(x, min, max) dunif(x, min, max)
punif2 <- function(q, min, max) punif(q, min, max)
f2 <- fitdist(x, "unif2", start=list(min=0, max=10), lower=c(-Inf, max(x)),
              upper=c(min(x), Inf))
print(c(logLik(f1), logLik(f2)), digits=7)
print(cbind(coef(f1), coef(f2)), digits=7)

## -----------------------------------------------------------------------------
x <- rbeta(1000, 3, 3)
dbeta2 <- function(x, shape, ...)
	dbeta(x, shape, shape, ...)
pbeta2 <- function(q, shape, ...)
	pbeta(q, shape, shape, ...)	
fitdist(x, "beta2", start=list(shape=1/2))

## -----------------------------------------------------------------------------
x <- rbeta(1000, .3, .3)
fitdist(x, "beta2", start=list(shape=1/2), optim.method="L-BFGS-B", lower=1e-2)	

## ----message=FALSE, fig.height=4, fig.width=6---------------------------------
require("mc2d")
x2 <- rpert(n=2e2, min=0, mode=1, max=2, shape=3/4)
eps <- sqrt(.Machine$double.eps)
f1 <- fitdist(x2, "pert", start=list(min=-1, mode=0, max=10, shape=1),
              lower=c(-Inf, -Inf, -Inf, 0), upper=c(Inf, Inf, Inf, Inf))
f2 <- fitdist(x2, "pert", start=list(mode=1, shape=1), 
              fix.arg=list(min=min(x2)-eps, max=max(x2)+eps),
              lower=c(min(x2), 0), upper=c(max(x2), Inf))
print(cbind(coef(f1), 
            c(f2$fix.arg["min"], coef(f2)["mode"], f2$fix.arg["max"], coef(f2)["shape"])), 
      digits=7)
gofstat(list(f1,f2))
par(mfrow=c(1,1), mar=c(4,4,2,1))
cdfcomp(list(f1,f2))

## ----fig.height=3, fig.width=6------------------------------------------------
set.seed(1234)
x <- rgamma(n = 100, shape = 2, scale = 1)
# fit of the good distribution
fgamma <- fitdist(x, "gamma")
# fit of a bad distribution
fexp <- fitdist(x, "exp")
g <- gofstat(list(fgamma, fexp), fitnames = c("gamma", "exp"))
par(mfrow=c(1,1), mar=c(4,4,2,1))
denscomp(list(fgamma, fexp), legendtext = c("gamma", "exp"))
# results of the tests
## chi square test (with corresponding table with theoretical and observed counts)
g$chisqpvalue
g$chisqtable
## Anderson-Darling test
g$adtest
## Cramer von  Mises test
g$cvmtest
## Kolmogorov-Smirnov test
g$kstest

## ----fig.height=3.5, fig.width=7----------------------------------------------
set.seed(1234)
x1 <- rpois(n = 100, lambda = 100)
f1 <- fitdist(x1, "norm")
g1 <- gofstat(f1)
g1$kstest

x2 <- rpois(n = 10000, lambda = 100)
f2 <- fitdist(x2, "norm")
g2 <- gofstat(f2)
g2$kstest

par(mfrow=c(1,2), mar=c(4,4,2,1))
denscomp(f1, demp = TRUE, addlegend = FALSE, main = "small sample")
denscomp(f2, demp = TRUE, addlegend = FALSE, main = "big sample")

## ----fig.height=3.5, fig.width=7----------------------------------------------
set.seed(1234)
x3 <- rpois(n = 500, lambda = 1)
f3 <- fitdist(x3, "norm")
g3 <- gofstat(f3)
g3$kstest

x4 <- rpois(n = 50, lambda = 1)
f4 <- fitdist(x4, "norm")
g4 <- gofstat(f4)
g4$kstest

par(mfrow=c(1,2), mar=c(4,4,2,1))
denscomp(f3, addlegend = FALSE, main = "big sample") 
denscomp(f4, addlegend = FALSE, main = "small sample")

## -----------------------------------------------------------------------------
g3$chisqtable
g3$chisqpvalue
g4$chisqtable
g4$chisqpvalue

## -----------------------------------------------------------------------------
set.seed(1234)
g <- rgamma(100, shape = 2, rate = 1)
(f <- fitdist(g, "gamma"))
(f0 <- fitdist(g, "exp"))
L <- logLik(f)
k <- length(f$estimate) # number of parameters of the complete distribution
L0 <- logLik(f0)
k0 <- length(f0$estimate) # number of parameters of the simplified distribution
(stat <- 2*L - 2*L0)
(critical_value <- qchisq(0.95, df = k - k0))
(rejected <- stat > critical_value)

## ----eval=FALSE---------------------------------------------------------------
# n <- 1e3
# x <- rlnorm(n)
# descdist(x)

## ----echo=FALSE, fig.width=5, fig.height=5------------------------------------
n <- 1e3
x <- rlnorm(n)
par(mar=c(4,4,2,1), mfrow=c(1,1))
descdist(x)

## ----echo=FALSE---------------------------------------------------------------
skewness.th <- function(sigma)
  (exp(sigma^2)+2)*sqrt(exp(sigma^2)-1)
kurtosis.th <- function(sigma)
  crossprod(1:3, exp(4:2*sigma^2))-3

## ----echo=FALSE, fig.width=7, fig.height=3.5----------------------------------
moment <- function(obs, k) 
  mean(scale(obs)^k)
skewness <- function(obs) 
  moment(obs, 3)
kurtosis <- function(obs) 
  moment(obs, 4)

n <- 5e6
somen <- rep(c(1, 2, 5), length=18) * rep(10^(1:6), each=3)
x <- rlnorm(n)

par(mar=c(4,4,2,1), mfrow=1:2)
plot(somen, sapply(somen, function(n) skewness(x[1:n])), type="b", ylab="skewness")
abline(a=skewness.th(1), b=0, col="grey")
plot(somen, sapply(somen, function(n) kurtosis(x[1:n])), type="b", ylab="skewness")
abline(a=kurtosis.th(1), b=0, col="grey")

## -----------------------------------------------------------------------------
dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE) dlnorm(x+shift, mean, sigma, log=log)
pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE) plnorm(q+shift, mean, sigma, log.p=log.p)
qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE) qlnorm(p, mean, sigma, log.p=log.p)-shift
dshiftlnorm_no <- function(x, mean, sigma, shift) dshiftlnorm(x, mean, sigma, shift)
pshiftlnorm_no <- function(q, mean, sigma, shift) pshiftlnorm(q, mean, sigma, shift)

## -----------------------------------------------------------------------------
data(dataFAQlog1)
y <- dataFAQlog1
D <- 1-min(y)
f0 <- fitdist(y+D, "lnorm")
start <- list(mean=as.numeric(f0$estimate["meanlog"]),  
              sigma=as.numeric(f0$estimate["sdlog"]), shift=D)
# works with BFGS, but not Nelder-Mead
f <- fitdist(y, "shiftlnorm", start=start, optim.method="BFGS")
summary(f)

## ----error=FALSE--------------------------------------------------------------
f2 <- try(fitdist(y, "shiftlnorm_no", start=start, optim.method="BFGS"))
print(attr(f2, "condition"))

## -----------------------------------------------------------------------------
sum(log(dshiftlnorm_no(y, 0.16383978, 0.01679231, 1.17586600 )))
log(prod(dshiftlnorm_no(y, 0.16383978, 0.01679231, 1.17586600 )))
sum(dshiftlnorm(y, 0.16383978, 0.01679231, 1.17586600, TRUE ))

## ----eval=FALSE, echo=TRUE----------------------------------------------------
# double dlnorm(double x, double meanlog, double sdlog, int give_log)
# {
#     double y;
# 
# #ifdef IEEE_754
#     if (ISNAN(x) || ISNAN(meanlog) || ISNAN(sdlog))
# 	return x + meanlog + sdlog;
# #endif
#     if(sdlog <= 0) {
# 	if(sdlog < 0) ML_ERR_return_NAN;
# 	// sdlog == 0 :
# 	return (log(x) == meanlog) ? ML_POSINF : R_D__0;
#     }
#     if(x <= 0) return R_D__0;
# 
#     y = (log(x) - meanlog) / sdlog;
#     return (give_log ?
# 	    -(M_LN_SQRT_2PI   + 0.5 * y * y + log(x * sdlog)) :
# 	    M_1_SQRT_2PI * exp(-0.5 * y * y)  /	 (x * sdlog));
#     /* M_1_SQRT_2PI = 1 / sqrt(2 * pi) */
# 
# }

## ----eval=FALSE, echo=TRUE----------------------------------------------------
# -(M_LN_SQRT_2PI   + 0.5 * y * y + log(x * sdlog))

## ----eval=FALSE, echo=TRUE----------------------------------------------------
# M_1_SQRT_2PI * exp(-0.5 * y * y)  /	 (x * sdlog))

## -----------------------------------------------------------------------------
f2 <- fitdist(y, "shiftlnorm", start=start, lower=c(-Inf, 0, -min(y)), optim.method="Nelder-Mead")
summary(f2)
print(cbind(BFGS=f$estimate, NelderMead=f2$estimate))


## -----------------------------------------------------------------------------
data(dataFAQscale1)
head(dataFAQscale1)
summary(dataFAQscale1)

## -----------------------------------------------------------------------------
for(i in 6:0)
cat(10^i, try(mledist(dataFAQscale1*10^i, "cauchy")$estimate), "\n")

## -----------------------------------------------------------------------------
data(dataFAQscale2)
head(dataFAQscale2)
summary(dataFAQscale2)

## -----------------------------------------------------------------------------
for(i in 0:5)
cat(10^(-2*i), try(mledist(dataFAQscale2*10^(-2*i), "cauchy")$estimate), "\n")

## ----scalenormal, echo=TRUE, warning=FALSE------------------------------------
set.seed(1234)
x <- rnorm(1000, 1, 2)
fitdist(x, "norm", lower=c(-Inf, 0))

## ----shapeburr, echo=TRUE, warning=FALSE--------------------------------------
x <- rburr(1000, 1, 2, 3)
fitdist(x, "burr", lower=c(0, 0, 0), start=list(shape1 = 1, shape2 = 1, 
  rate = 1))

## ----probgeom, echo=TRUE, warning=FALSE---------------------------------------
x <- rgeom(1000, 1/4)
fitdist(x, "geom", lower=0, upper=1)

## ----shiftexp, echo=TRUE, warning=FALSE---------------------------------------
dsexp <- function(x, rate, shift)
  dexp(x-shift, rate=rate)
psexp <- function(x, rate, shift)
  pexp(x-shift, rate=rate)
rsexp <- function(n, rate, shift)
  rexp(n, rate=rate)+shift
x <- rsexp(1000, 1/4, 1)
fitdist(x, "sexp", start=list(rate=1, shift=0), upper= c(Inf, min(x)))

## ----message=FALSE------------------------------------------------------------
require("GeneralizedHyperbolic")
myoptim <- function(fn, par, ui, ci, ...)
{
  res <- constrOptim(f=fn, theta=par, method="Nelder-Mead", ui=ui, ci=ci, ...)
  c(res, convergence=res$convergence, value=res$objective, par=res$minimum, hessian=res$hessian)
}
x <- rnig(1000, 3, 1/2, 1/2, 1/4)
ui <- rbind(c(0,1,0,0), c(0,0,1,0), c(0,0,1,-1), c(0,0,1,1))
ci <- c(0,0,0,0)
fitdist(x, "nig", custom.optim=myoptim, ui=ui, ci=ci, start=list(mu = 0, delta = 1, alpha = 1, beta = 0))

## ----fig.height=3.5, fig.width=7----------------------------------------------
pgeom(0:3, prob=1/2)
qgeom(c(0.3, 0.6, 0.9), prob=1/2)
par(mar=c(4,4,2,1), mfrow=1:2)
curve(pgeom(x, prob=1/2), 0, 10, n=301, main="c.d.f.")
curve(qgeom(x, prob=1/2), 0, 1, n=301, main="q.f.")

## -----------------------------------------------------------------------------
x <- c(0, 0, 0, 0, 1, 1, 3, 2, 1, 0, 0)
median(x[-1]) #sample size 10
median(x) #sample size 11

## ----fig.height=4, fig.width=7------------------------------------------------
x <- rgeom(100, 1/3)
L2 <- function(p)
  (qgeom(1/2, p) - median(x))^2
L2(1/3) #theoretical value
par(mfrow=c(1,1), mar=c(4,4,2,1))
curve(L2(x), 0.10, 0.95, xlab=expression(p), ylab=expression(L2(p)), main="squared differences", n=301)

## -----------------------------------------------------------------------------
fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/2), control=list(trace=1, REPORT=1))
fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/20), control=list(trace=1, REPORT=1))

## -----------------------------------------------------------------------------
fitdist(x, "geom", method="qme", probs=1/2, optim.method="SANN", start=list(prob=1/20))
fitdist(x, "geom", method="qme", probs=1/2, optim.method="SANN", start=list(prob=1/2))

## ----fig.height=4, fig.width=7------------------------------------------------
x <- rpois(100, lambda=7.5)
L2 <- function(lam)
  (qpois(1/2, lambda = lam) - median(x))^2
par(mfrow=c(1,1), mar=c(4,4,2,1))
curve(L2(x), 6, 9, xlab=expression(lambda), ylab=expression(L2(lambda)), main="squared differences", n=201)

## -----------------------------------------------------------------------------
fitdist(x, "pois", method="qme", probs=1/2, start=list(lambda=2))
fitdist(x, "pois", method="qme", probs=1/2, optim.method="SANN", start=list(lambda=2))

## -----------------------------------------------------------------------------
#NB: using the logical vector condition is the optimal way to compute pdf and cdf
dtgamma <- function(x, shape, rate, low, upp)
{
  PU <- pgamma(upp, shape = shape, rate = rate)
  PL <- pgamma(low, shape = shape, rate = rate)
  dgamma(x, shape, rate) / (PU - PL) * (x >= low) * (x <= upp) 
}
ptgamma <- function(q, shape, rate, low, upp)
{
  PU <- pgamma(upp, shape = shape, rate = rate)
  PL <- pgamma(low, shape = shape, rate = rate)
  (pgamma(q, shape, rate) - PL) / (PU - PL) * (q >= low) * (q <= upp) + 1 * (q > upp)
}

## -----------------------------------------------------------------------------
rtgamma <- function(n, shape, rate, low=0, upp=Inf, maxit=10)
{
  stopifnot(n > 0)
  if(low > upp)
    return(rep(NaN, n))
  PU <- pgamma(upp, shape = shape, rate = rate)
  PL <- pgamma(low, shape = shape, rate = rate)
  #simulate directly expected number of random variate
  n2 <- n/(PU-PL)
  x <- rgamma(n, shape=shape, rate=rate)
  x <- x[x >= low & x <= upp]
  i <- 0 
  while(length(x) < n && i < maxit)
  {
    n2 <- (n-length(x))/(PU-PL)
    y <- rgamma(n2, shape=shape, rate=rate)
    x <- c(x, y[y >= low & y <= upp])
    i <- i+1
  }
  x[1:n]
}

## -----------------------------------------------------------------------------
n <- 100 ; shape <- 11 ; rate <- 3 ; x0 <- 5
x <- rtgamma(n, shape = shape, rate = rate, low=x0)

## -----------------------------------------------------------------------------
fit.NM.2P <- fitdist(
  data = x,
  distr = "tgamma",
  method = "mle",
  start = list(shape = 10, rate = 10),
  fix.arg = list(upp = Inf, low=x0),
  lower = c(0, 0), upper=c(Inf, Inf))
fit.NM.3P <- fitdist(
  data = x,
  distr = "tgamma",
  method = "mle",
  start = list(shape = 10, rate = 10, low=1),
  fix.arg = list(upp = Inf),
  lower = c(0, 0, -Inf), upper=c(Inf, Inf, min(x)))

## ----echo=FALSE---------------------------------------------------------------
showpar <- function(matpar)
{
  matpar <- rbind(matpar,
                  "mean sq. error"= apply(matpar, 2, function(x) mean( (x - matpar[, "true value"])^2 )),
                  "rel. error"= apply(matpar, 2, function(x) mean( abs(x - matpar[, "true value"])/matpar[, "true value"] ))
  )
  matpar
}
matpar <- cbind(
    "fit3P" = coef(fit.NM.3P),
    "fit2P" = c(coef(fit.NM.2P), x0),
    "true value" = c(shape, rate, x0))
showpar(matpar)

## ----fig.height=4, fig.width=7, echo=FALSE------------------------------------
par(mar=c(4,4,2,1), mfrow=c(1,1))
cdfcomp(list(fit.NM.2P, fit.NM.3P), do.points = FALSE, addlegend = FALSE)
curve(ptgamma(x, shape=shape, rate=rate, low=x0, upp=Inf), add=TRUE, col="blue", lty=3)
legend("bottomright", lty=c(1,1,2,3), col=1:4, bty="n",
       c("empirical", "2-param optimization", "3-param optimization", "theoretical"))

## ----echo=FALSE, warning=FALSE, message=FALSE, fig.height=3.5, fig.width=7----
#make trace in dtgamma
dtgamma <- function(x, shape, rate, low, upp)
{
  PU <- pgamma(upp, shape = shape, rate = rate)
  PL <- pgamma(low, shape = shape, rate = rate)
  if(myfile != "")
    cat(shape, rate, "\n", file=myfile, append = TRUE)
  dgamma(x, shape, rate) / (PU - PL) * (x >= low) * (x <= upp) 
}
myfile <- "MLE-NelderMead-dataset.txt"
fit.NM.2P <- fitdist(
  data = x,
  distr = "tgamma",
  method = "mle",
  start = list(shape = 10, rate = 10),
  fix.arg = list(low = x0, upp = Inf),
  #control=list(trace=1, REPORT=1),
  optim.method="Nelder-Mead",
  lower = c(0, 0), upper=c(Inf, Inf))

myfile <- "MLE-BFGS-dataset.txt"
fit.BFGS.2P <- fitdist(
  data = x,
  distr = "tgamma",
  method = "mle",
  start = list(shape = 10, rate = 10),
  fix.arg = list(low = x0, upp = Inf),
  optim.method="L-BFGS-B",
  lower = c(1e-2, 1e-2))

myfile <- "" #stop put traces in txt files
MLE.BFGS.dataset.z.iter <- read.table("MLE-BFGS-dataset.txt", sep =" ")[, 1:2]
MLE.NM.dataset.z.iter <- read.table("MLE-NelderMead-dataset.txt", sep =" ")[, 1:2]
maxarg <- rbind(apply(MLE.BFGS.dataset.z.iter, 2, max), apply(MLE.NM.dataset.z.iter, 2, max))
maxarg <- apply(maxarg, 2, max)

par(mar=c(4,4,2,1), mfrow=1:2)
#BFGS
llsurface(x, "tgamma", fix.arg = list(low = x0, upp = Inf), plot.arg=c("shape", "rate"),
          min.arg=c(0, 0), max.arg=maxarg, main="BFGS iterates")
points(shape, rate, pch="O", col="red")
#add BFGS iterates
points(MLE.BFGS.dataset.z.iter, col="grey", pch="+")
points(coef(fit.BFGS.2P)["shape"], coef(fit.BFGS.2P)["rate"], col="black", pch="+")

#Nelder Mead
llsurface(x, "tgamma", fix.arg = list(low = x0, upp = Inf), plot.arg=c("shape", "rate"),
          min.arg=c(0, 0), max.arg=maxarg, main="Nelder-Mead iterates")
points(shape, rate, pch="O", col="red")
#add NM iterates
points(MLE.NM.dataset.z.iter, col="grey", pch="+")
points(coef(fit.NM.2P)["shape"], coef(fit.NM.2P)["rate"], col="black", pch="+")

#remove traces
invisible(file.remove("MLE-NelderMead-dataset.txt"))
invisible(file.remove("MLE-BFGS-dataset.txt"))

## ----echo=FALSE, fig.width=4, fig.height=4------------------------------------
getrate <- function(shape)
{
  myfit <- mledist(data = x,
    distr = "tgamma",
    start = list(rate = 1),
    fix.arg = list(low = x0, upp = Inf, shape = shape))
  c("rate"=myfit$estimate, "loglik"=myfit$loglik)
}
shapehat <- seq(.1, 15, by= .1)
ratellhat <- t(sapply(shapehat, getrate))
par(mar=c(4,4,2,1), mfrow=c(1,1))
plot(shapehat, ratellhat[, "loglik"], type="l", ylab="fitted log-likelihood", xlab="shape")
points(shape, ratellhat[which(shapehat == shape), "loglik"], col="red", pch="O")
legend("bottom", lty=c(1, NA), col=c("black", "red"), c("fitted log-lik", "true value"),
       pch=c(NA, "O"), bty="n")

## -----------------------------------------------------------------------------
fit.gamma <- fitdist(
  data = x-x0,
  distr = "gamma",
  method = "mle")

## ----echo=FALSE---------------------------------------------------------------
matpar <- cbind(
    "fit3P" = coef(fit.NM.3P),
    "fit2P orig. data" = c(coef(fit.NM.2P), x0),
    "fit2P shift data" = c(coef(fit.gamma), x0),
    "true value" = c(shape, rate, x0))
showpar(matpar)

## ----echo=FALSE---------------------------------------------------------------
x <- rtgamma(n*10, shape = shape, rate = rate, low=x0)
fit.NM.2P <- fitdist(
  data = x,
  distr = "tgamma",
  method = "mle",
  start = list(shape = 10, rate = 10),
  fix.arg = list(upp = Inf, low=x0),
  lower = c(0, 0), upper=c(Inf, Inf))
fit.NM.3P <- fitdist(
  data = x,
  distr = "tgamma",
  method = "mle",
  start = list(shape = 10, rate = 10, low=1),
  fix.arg = list(upp = Inf),
  lower = c(0, 0, -Inf), upper=c(Inf, Inf, min(x)))
matpar <- cbind(
    "fit3P" = coef(fit.NM.3P),
    "fit2P orig. data" = c(coef(fit.NM.2P), x0),
    "true value" = c(shape, rate, x0))
showpar(matpar)

## ----fig.height=4, fig.width=4, warning = FALSE-------------------------------
set.seed(1234)
n <- rnorm(30, mean = 10, sd = 2)
fn <- fitdist(n, "norm")
bn <- bootdist(fn)
bn$CI
fn$estimate + cbind("estimate"= 0, "2.5%"= -1.96*fn$sd, "97.5%"= 1.96*fn$sd)
par(mfrow=c(1,1), mar=c(4,4,2,1))
llplot(fn, back.col = FALSE)

## ----fig.height=4, fig.width=4, warning = FALSE-------------------------------
set.seed(1234)
g <- rgamma(30, shape = 0.1, rate = 10)
fg <- fitdist(g, "gamma")
bg <- bootdist(fg)
bg$CI
fg$estimate + cbind("estimate"= 0, "2.5%"= -1.96*fg$sd, "97.5%"= 1.96*fg$sd)
par(mfrow=c(1,1), mar=c(4,4,2,1))
llplot(fg, back.col = FALSE)

## ----fig.height=4, fig.width=7, warning = FALSE-------------------------------
data(salinity)
log10LC50 <- log10(salinity)
fit <- fitdistcens(log10LC50, "norm", control=list(trace=1))
# Bootstrap 
bootsample <- bootdistcens(fit, niter = 101)
#### We used only 101 iterations in that example to limit the calculation time but
#### in practice you should take at least 1001 bootstrap iterations
# Calculation of the quantile of interest (here the 5 percent hazard concentration)
(HC5 <- quantile(bootsample, probs = 0.05))
# visualizing pointwise confidence intervals on other quantiles
par(mfrow=c(1,1), mar=c(4,4,2,1))
CIcdfplot(bootsample, CI.output = "quantile", CI.fill = "pink", xlim = c(0.5,2), main = "")

## -----------------------------------------------------------------------------
exposure <- 1.2
# Bootstrap sample of the PAF at this exposure
PAF <- pnorm(exposure, mean = bootsample$estim$mean, sd = bootsample$estim$sd)
# confidence interval from 2.5 and 97.5 percentiles
quantile(PAF, probs = c(0.025, 0.975))

## ----fig.height=4, fig.width=7, warning = FALSE-------------------------------
f.ln.MME <- fitdist(rlnorm(1000), "lnorm", method = "mme", order = 1:2)
# Bootstrap 
b.ln.50 <- bootdist(f.ln.MME, niter = 50)
b.ln.100 <- bootdist(f.ln.MME, niter = 100)
b.ln.200 <- bootdist(f.ln.MME, niter = 200)
b.ln.500 <- bootdist(f.ln.MME, niter = 500)

d1 <- density(b.ln.50, b.ln.100, b.ln.200, b.ln.500)
plot(d1)

## ----fig.height=7, fig.width=7, warning = FALSE-------------------------------
data(groundbeef)
serving <- groundbeef$serving
fit <- fitdist(serving, "gamma")
par(mfrow = c(2,2), mar = c(4, 4, 1, 1))
denscomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", fitcol = "orange")
qqcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", line01lty = 2)
cdfcomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", fitcol = "orange", lines01 = TRUE)
ppcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", line01lty = 2)

## ----fig.height= 4, fig.width= 7, warning = FALSE-----------------------------
require("ggplot2")
fitW <- fitdist(serving, "weibull")
fitln <- fitdist(serving, "lnorm")
fitg <- fitdist(serving, "gamma")
dcomp <- denscomp(list(fitW, fitln, fitg), legendtext = c("Weibull", "lognormal", "gamma"),
    xlab = "serving sizes (g)", xlim = c(0, 250), 
    fitcol = c("red", "green", "orange"), fitlty = 1, fitlwd = 1:3, 
    xlegend = "topright", plotstyle = "ggplot", addlegend = FALSE)
dcomp + ggplot2::theme_minimal() + ggplot2::ggtitle("Ground beef fits")

## ----fig.height= 6, fig.width= 7, warning = FALSE-----------------------------
data(endosulfan)
ATV <- subset(endosulfan, group == "NonArthroInvert")$ATV
taxaATV <- subset(endosulfan, group == "NonArthroInvert")$taxa
f <- fitdist(ATV, "lnorm")
cdfcomp(f, xlogscale = TRUE, main = "Species Sensitivty Distribution", 
    xlim = c(1, 100000), name.points = taxaATV, 
    addlegend = FALSE, plotstyle = "ggplot")

## -----------------------------------------------------------------------------
dtoy <- data.frame(left = c(NA, 2, 4, 6, 9.7, 10), right = c(1, 3, 7, 8, 9.7, NA))
dtoy

## -----------------------------------------------------------------------------
exitage <- c(81.1,78.9,72.6,67.9,60.1,78.3,83.4,66.9,74.8,80.5,75.6,67.1,
             75.3,82.8,70.1,85.4,74,70,71.6,76.5)
death <- c(0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0)

## -----------------------------------------------------------------------------
svdata <- Surv2fitdistcens(exitage, event=death)

## ----fig.height= 4, fig.width= 7----------------------------------------------
flnormc <- fitdistcens(svdata, "lnorm")
fweic <- fitdistcens(svdata, "weibull")
par(mfrow=c(1,1), mar=c(4,4,2,1))
cdfcompcens(list(fweic, flnormc), xlim=range(exitage), xlegend = "topleft")

## ----fig.height= 3.5, fig.width= 7--------------------------------------------
par(mfrow = c(1,2), mar = c(3, 4, 3, 0.5))
plotdistcens(dtoy, NPMLE = FALSE)
data(smokedfish)
dsmo <-  log10(smokedfish)
plotdistcens(dsmo, NPMLE = FALSE)

## ----fig.height= 7, fig.width= 7----------------------------------------------
par(mfrow = c(2, 2),  mar = c(3, 4, 3, 0.5))
# Turnbull algorithm with representation of middle points of equivalence classes
plotdistcens(dsmo, NPMLE.method = "Turnbull.middlepoints", xlim = c(-1.8, 2.4))
# Turnbull algorithm with representation of equivalence classes as intervals
plotdistcens(dsmo, NPMLE.method = "Turnbull.intervals")
# Wang algorithm with representation of equivalence classes as intervals
plotdistcens(dsmo, NPMLE.method = "Wang")

## ----echo = FALSE, fig.height= 6, fig.width= 7--------------------------------
d <- data.frame(left = c(NA, 2, 4, 6, 9.5, 10), right = c(1, 3, 7, 8, 9.5, NA))
addbounds <- function(d)
{
  xbounds <- c(d$left, d$right)
  xboundsnotNA <- xbounds[!is.na(xbounds)]
  abline(v = xboundsnotNA, col = "grey")
}
addLR <- function(d)
{
  Lbounds <- d$left[!is.na(d$left)]
  Rbounds <- d$right[!is.na(d$right)]
  range <- range(c(Lbounds,Rbounds)) 
  eps <- (range[2] - range[1]) * 0.01
  text(x = Lbounds-eps, y = 0.05, labels = "L", col = "red", cex = 0.75)
  text(x = Rbounds+eps, y = 0.05, labels = "R", col = "red", cex = 0.75)
}
addeq <- function(deq)
{
  left <- deq$left
  left[is.na(left)] <- -100
  right <- deq$right
  right[is.na(right)] <- 100
  rect(left, -2, right, 2, density = 10)
}
par(mfrow = c(2,1), mar = c(2, 4, 3, 0.5))
# First step
plotdistcens(d, NPMLE = FALSE, lwd = 2, col = "blue", main = "Step 1 : identification of equivalence classes")
addbounds(d)
addLR(d)
deq <- data.frame(left = c(NA, 2, 6, 9.5, 10), right = c(1, 3, 7,9.5, NA))
addeq(deq)
# Second step
plotdistcens(d, lwd = 2, main = "Step 2 : estimation of mass probabilities")

## -----------------------------------------------------------------------------
fnorm <- fitdistcens(dsmo,"norm")
flogis <- fitdistcens(dsmo,"logis")
# comparison of AIC values
summary(fnorm)$aic
summary(flogis)$aic

## ----fig.height= 7, fig.width= 7----------------------------------------------
par(mar = c(2, 4, 3, 0.5))
plot(fnorm)

## ----fig.height= 4, fig.width= 4----------------------------------------------
par(mfrow=c(1,1), mar=c(4,4,2,1))
cdfcompcens(list(fnorm, flogis), fitlty = 1)
qqcompcens(list(fnorm, flogis))
ppcompcens(list(fnorm, flogis))

## ----fig.height= 4, fig.width= 7----------------------------------------------
qqcompcens(list(fnorm, flogis), lwd = 2, plotstyle = "ggplot",
  fitcol = c("red", "green"), fillrect = c("pink", "lightgreen"),
  legendtext = c("normal distribution", "logistic distribution"))
Any scripts or data that you put into this service are public.
fitdistrplus documentation built on April 3, 2025, 7:35 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
fitdistrplus
Help to Fit of a Parametric Distribution to Non-Censored or Censored Data

inst/doc/FAQ.R
In fitdistrplus: Help to Fit of a Parametric Distribution to Non-Censored or Censored Data

Try the fitdistrplus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fitdistrplus Help to Fit of a Parametric Distribution to Non-Censored or Censored Data

inst/doc/FAQ.R In fitdistrplus: Help to Fit of a Parametric Distribution to Non-Censored or Censored Data

Try the fitdistrplus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fitdistrplus
Help to Fit of a Parametric Distribution to Non-Censored or Censored Data

inst/doc/FAQ.R
In fitdistrplus: Help to Fit of a Parametric Distribution to Non-Censored or Censored Data