## The complete and the restricted log-likelihoods
complete_log_likelihood <- function (mu, theta, p, x, z, i, x_i, t) {
log_density <- ditnb(x = x, mu = mu, theta = theta, p = 0, i = i, t = t, return_log = TRUE)
if (p == 0) {
res <- (1 - z) * log_density
}
else {
res <- z * log(p) * sum(as.numeric(x_i)) + (1 - z) * (log(1 - p) + log_density)
}
return(sum(res))
}
restricted_log_likelihood <- function(pars, x, p, i, t) {
mu <- pars[1]
theta <- pars[2]
log_likelihood <- ditnb(x = x, mu = mu, theta = theta, p = p, i = i, t = t, return_log = TRUE)
return(-sum(log_likelihood))
}
#' Control function for the \link{em_itnb} function.
#'
#' @description Creates a list of default options.
#'
#' @param trace Numeric (>= 0): showing a trace every \code{trace} number of iterations.
#' @param tolerance Numeric: Convergence tolerance.
#' @param iteration_min Numeric (>= 0): The minimum number of allowed iterations.
#' @param iteration_max Numeric (>= \code{iteration_min}): The maximum number of allowed iterations.
#' @param save_trace TRUE/FALSE: should the entire trace be stored?
#'
#' @return A list of default arguments for the \link{em_itnb} function.
#' @export
em_itnb_control <- function(trace = 0L, tolerance = 1e-6, iteration_min = 5L, iteration_max = 10000L, save_trace = FALSE) {
if (!is.numeric(trace)) {
stop("'trace' has to be numeric.")
}
trace <- ceiling(trace)
if (!is.numeric(tolerance)) {
stop("'tolerance' has to be numeric.")
}
if (tolerance <= .Machine$double.eps) {
stop("'tolerance' has to be bigger than two times the machine epsilon.")
}
if (!is.numeric(iteration_min)) {
stop("'iteration_min' has to be numeric.")
}
iteration_min <- ceiling(iteration_min)
if (!is.numeric(iteration_max)) {
stop("'iteration_min' has to be numeric.")
}
iteration_max <- ceiling(iteration_max)
if (iteration_min > iteration_max) {
stop("'iteration_max' has be larger than 'iteration_min'.")
}
if (!is.logical(save_trace)) {
stop("'save_trace' has to be logical.")
}
res <- list(trace = trace, tolerance = tolerance, iteration_max = iteration_max,
iteration_min = iteration_min, save_trace = save_trace)
return(res)
}
#' @title itnb parameter optimisation
#'
#' @description Parameter optimisation for data generated by a r.v. following an itnb distribution using the ENM algorithm.
#'
#' @param x Vector: Observed counts.
#' @param i Numeric: The inflation point.
#' @param t Numeric: The truncation point.
#' @param control List: A control object, see \link{em_itnb_control} for details.
#'
#' @return An object of class \link{itnb-object}.
#'
#' @export
em_itnb <- function(x, i, t, control = list()) {
UseMethod("em_itnb")
}
#' @rdname em_itnb
#' @method em_itnb numeric
#'
#' @example inst/examples/em_itnb_example.R
#'
#' @export
em_itnb.numeric <- function(x, i, t, control = list()) {
control <- do.call(em_itnb_control, control)
if (!is.numeric(i)) {
stop("'i' has to be numeric.")
}
i <- ceiling(i)
if (!is.numeric(t)) {
stop("'t' has to be numeric.")
}
t <- ceiling(t)
x_i <- x == i
p <- mean(x_i)
mu <- mean(x[!x_i])
var_x <- var(x[!x_i])
theta <- ifelse(var_x > mu, mu^2 / (var_x - mu), 100)
z_star <- (p * as.numeric(x_i)) /
(p * as.numeric(x_i) + (1 - p) * ditnb(x = x, mu = mu, theta = theta, p = 0, i = i, t = t, return_log = FALSE))
complete_log_likelihood <- complete_log_likelihood(
mu = mu, theta = theta, p = p, x = x,
z = z_star, i = i, x_i = x_i, t = t
)
not_converged <- TRUE
if (control$trace > 0) {
cat("Iteration :", 0, "\t Current log-likelihood:", complete_log_likelihood, "\t Absolute change in log-likelihood:", NA, "\n")
cat("\t Parameters (Initial): ", "\t mu =", mu, "\t theta =", theta, "\t p =", p, "\n")
}
if (control$save_trace) {
trace_list <- vector("list", control$iteration_max)
trace_list[[1]] <- data.frame(
Iteration = 0,
mu = mu,
theta = theta,
p = p,
LogLikelihood = complete_log_likelihood,
AbsoluteChange = NA
)
}
j = 1
while (not_converged) {
## E-step
z_star <- (p * as.numeric(x_i)) /
(p * as.numeric(x_i) + (1 - p) * ditnb(x = x, mu = mu, theta = theta, p = 0, i = i, t = t, return_log = FALSE))
## M-step
p <- sum(z_star * as.numeric(x_i)) / sum(1 - z_star * (1 - as.numeric(x_i)))
pars <- optim(
par = c(mu, theta),
fn = restricted_log_likelihood,
x = x,
p = p,
i = i,
t = t,
method = "L-BFGS-B",
lower = c(t, .Machine$double.eps),
upper = c(Inf, Inf),
control = list(trace = FALSE)
)$par
mu <- pars[1]
theta <- pars[2]
## Updating convergence results
complete_log_likelihood_old <- complete_log_likelihood
complete_log_likelihood <- complete_log_likelihood(mu = mu, theta = theta, p = p, x = x, z = z_star, i = i, x_i = x_i, t = t)
change_log_likelihood <- abs(complete_log_likelihood - complete_log_likelihood_old)
not_converged <- ifelse(j < control$iteration_min, TRUE, (change_log_likelihood > control$tolerance) && (j < control$iteration_max))
## Tracing
if ((control$trace > 0) & (((j %% control$trace) == 0) || (!not_converged))) {
cat("Iteration :", j, "\t Current log-likelihood:", complete_log_likelihood, "\t Absolute change in log-likelihood:", change_log_likelihood, "\n")
cat("\t Parameters: ", "\t mu =", mu, "\t theta =", theta, "\t p =", p, "\n")
}
if (control$save_trace) {
trace_list[[j + 1]] <- data.frame(
Iteration = j,
mu = mu,
theta = theta,
p = p,
LogLikelihood = complete_log_likelihood,
AbsoluteChange = change_log_likelihood
)
}
j = j + 1
}
returned_trace <- NULL
if (control$save_trace) {
trace_list <- trace_list[seq_len(j)]
returned_trace <- do.call("rbind", trace_list)
}
res <- list(
n = length(x),
x = x,
i = i,
t = t,
mu = mu,
theta = theta,
p = p,
log_likelihood = complete_log_likelihood,
converged = !not_converged,
trace = returned_trace
)
class(res) <- "itnb"
return(res)
}
#' Confidence envelopes of itnb-object.
#'
#' @description Simulated confidence envelopes of the parameters estimated by the \link{em_itnb} function.
#'
#' @param object An \link{itnb-object}.
#' @param level Numeric: The confidence level. If left as \code{NULL} all parametric bootstrap simulations are returned.
#' @param trace TRUE/FALSE: should a trace be shown?
#' @param nr_simulations Numeric: The number of simulations used to create the confidence envelopes.
#' @param parametric TRUE/FALSE: should the envelopes be simulated using the parametric bootstrap?
#' @param plot TRUE/FALSE: should a density plot of the parameters be returned?
#'
#' @return If \code{level = NULL} a matrix with bootstrap simulations, otherwise a matrix of lower and upper confidence limits for each parameter.
#'
#' @export
simulate_ci <- function(object, level = 0.95, trace = TRUE, nr_simulations = 2000, parametric = FALSE, plot = FALSE) {
UseMethod("simulate_ci")
}
#' @rdname simulate_ci
#' @method simulate_ci itnb
#'
#' @example inst/examples/simulation_itnb_example.R
#'
#' @export
simulate_ci.itnb <- function(object, level = 0.95, trace = TRUE, nr_simulations = 2000, parametric = FALSE, plot = FALSE) {
##
n <- object$n
x <- object$x
i <- object$i
t <- object$t
##
mu <- object$mu
theta <- object$theta
p <- object$p
##
mu_e <- rep(NA, nr_simulations)
theta_e <- rep(NA, nr_simulations)
p_e <- rep(NA, nr_simulations)
if (trace) {
pb <- progress_bar$new(
format = paste0(ifelse(parametric, "Parametric", "Non-parametric"), " bootstrap (samples = ", nr_simulations, ")", ": [:bar] :percent Eta: :eta"),
total = nr_simulations,
clear = FALSE,
width = 120
)
}
for (j in seq_len(nr_simulations)) {
if (trace)
pb$tick()
if (parametric) {
x_j <- ritnb(n = n, mu = mu, theta = theta, p = p, i = i, t = t)
}
else {
x_j <- x[sample(n, n, replace = TRUE)]
}
pars_j <- em_itnb(x = x_j, i = i, t = t)
mu_e[j] <- pars_j$mu
theta_e[j] <- pars_j$theta
p_e[j] <- pars_j$p
}
if (is.null(level)) {
complete_data_frame <- cbind(mu_e, theta_e, p_e)
colnames(complete_data_frame) <- c("mu", "theta", "p")
res_data_frame <- complete_data_frame
}
else {
alpha <- (1 - level) / 2
quantiles_mu <- quantile(mu_e, probs = c(alpha, 1 - alpha))
quantiles_theta <- quantile(theta_e, probs = c(alpha, 1 - alpha))
quantiles_p <- quantile(p_e, probs = c(alpha, 1 - alpha))
confint_data_frame <- rbind(quantiles_mu, quantiles_theta, quantiles_p)
rownames(confint_data_frame) <- c("mu", "theta", "p")
res_data_frame <- confint_data_frame
}
if (plot) {
par(mfrow = c(3, 1))
hist(mu_e, breaks = "fd", xlab = bquote(mu), ylab = "Density", probability = TRUE, main = paste(ifelse(parametric, "Parametric", "Non-parametric"), "bootstrap samples"), cex.lab = 1.5, cex.main = 1.5);
if (!is.null(level)) abline(v = quantiles_mu[1], col = "dodgerblue2", lwd = 2); abline(v = quantiles_mu[2], col = "dodgerblue2", lwd = 2)
hist(theta_e, breaks = "fd", xlab = bquote(theta), ylab = "Density", probability = TRUE, main = paste(ifelse(parametric, "Parametric", "Non-parametric"), "bootstrap samples"), cex.lab = 1.5, cex.main = 1.5)
if (!is.null(level)) abline(v = quantiles_theta[1], col = "dodgerblue2", lwd = 2); abline(v = quantiles_theta[2], col = "dodgerblue2", lwd = 2)
hist(p_e, breaks = "fd", xlab = bquote(pi), ylab = "Density", probability = TRUE, main = paste(ifelse(parametric, "Parametric", "Non-parametric"), "bootstrap samples"), cex.lab = 1.5, cex.main = 1.5)
if (!is.null(level)) abline(v = quantiles_p[1], col = "dodgerblue2", lwd = 2); abline(v = quantiles_p[2], col = "dodgerblue2", lwd = 2)
par(mfrow = c(1, 1))
}
return(res_data_frame)
}
#' Plot parameter trace of an \link{itnb-object}
#'
#' @description A function plotting the EM parameter trace returned by the \link{em_itnb} function. Note: the function can only be used if the argument \code{save_trace} in the \link{em_itnb_control} function was set to \code{TRUE}.
#'
#' @param x An \link{itnb-object}.
#' @param ... Additional arguments passed to the \link[graphics]{plot} function.
#'
#' @export
plot.itnb <- function(x, ...) {
if (is.null(x$trace))
stop("The 'trace' tibble was not found. Set 'save_trace = TRUE' in the 'em_itnb_control' function, and re-run the optimisation routine.")
itnb_trace <- x$trace[-1,]
par(mfrow = c(2, 2))
plot(itnb_trace$Iteration, itnb_trace$LogLikelihood, type = "l", xlab = "Iteration", ylab = "Log-likelihood", ...)
plot(itnb_trace$Iteration, itnb_trace$mu, type = "l", xlab = "Iteration", ylab = bquote(mu), ...)
plot(itnb_trace$Iteration, itnb_trace$theta, type = "l", xlab = "Iteration", ylab = bquote(theta), ...)
plot(itnb_trace$Iteration, itnb_trace$p, type = "l", xlab = "Iteration", ylab = bquote(pi), ...)
par(mfrow = c(1, 1))
return(invisible(NULL))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.