R/em-itnb.R

Defines functions plot.itnb simulate_ci.itnb simulate_ci em_itnb.numeric em_itnb em_itnb_control restricted_log_likelihood

Documented in em_itnb em_itnb_control em_itnb.numeric plot.itnb simulate_ci simulate_ci.itnb

## The complete and the restricted log-likelihoods
complete_log_likelihood <- function (mu, theta, p, x, z, i, x_i, t) {
    log_density <- ditnb(x = x, mu = mu, theta = theta, p = 0, i = i, t = t, return_log = TRUE)

    if (p == 0) {
        res <- (1 - z) * log_density
    }
    else {
        res <- z * log(p) * sum(as.numeric(x_i)) + (1 - z) * (log(1 - p) + log_density)
    }

    return(sum(res))
}

restricted_log_likelihood <- function(pars, x, p, i, t) {
    mu <- pars[1]
    theta <- pars[2]

    log_likelihood <- ditnb(x = x, mu = mu, theta = theta, p = p, i = i, t = t, return_log = TRUE)
    return(-sum(log_likelihood))
}

#' Control function for the \link{em_itnb} function.
#'
#' @description Creates a list of default options.
#'
#' @param trace Numeric (>= 0): showing a trace every \code{trace} number of iterations.
#' @param tolerance Numeric: Convergence tolerance.
#' @param iteration_min Numeric (>= 0): The minimum number of allowed iterations.
#' @param iteration_max Numeric (>= \code{iteration_min}): The maximum number of allowed iterations.
#' @param save_trace TRUE/FALSE: should the entire trace be stored?
#'
#' @return A list of default arguments for the \link{em_itnb} function.
#' @export
em_itnb_control <- function(trace = 0L, tolerance = 1e-6, iteration_min = 5L, iteration_max = 10000L, save_trace = FALSE) {
    if (!is.numeric(trace)) {
        stop("'trace' has to be numeric.")
    }
    trace <- ceiling(trace)

    if (!is.numeric(tolerance)) {
        stop("'tolerance' has to be numeric.")
    }

    if (tolerance <= .Machine$double.eps) {
        stop("'tolerance' has to be bigger than two times the machine epsilon.")
    }

    if (!is.numeric(iteration_min)) {
        stop("'iteration_min' has to be numeric.")
    }
    iteration_min <- ceiling(iteration_min)

    if (!is.numeric(iteration_max)) {
        stop("'iteration_min' has to be numeric.")
    }
    iteration_max <- ceiling(iteration_max)

    if (iteration_min > iteration_max) {
        stop("'iteration_max' has be larger than 'iteration_min'.")
    }

    if (!is.logical(save_trace)) {
        stop("'save_trace' has to be logical.")
    }

    res <- list(trace = trace, tolerance = tolerance, iteration_max = iteration_max,
                iteration_min = iteration_min, save_trace = save_trace)
    return(res)
}

#' @title itnb parameter optimisation
#'
#' @description Parameter optimisation for data generated by a r.v. following an itnb distribution using the ENM algorithm.
#'
#' @param x Vector: Observed counts.
#' @param i Numeric: The inflation point.
#' @param t Numeric: The truncation point.
#' @param control List: A control object, see \link{em_itnb_control} for details.
#'
#' @return An object of class \link{itnb-object}.
#'
#' @export
em_itnb <- function(x, i, t, control = list()) {
    UseMethod("em_itnb")
}

#' @rdname em_itnb
#' @method em_itnb numeric
#'
#' @example inst/examples/em_itnb_example.R
#'
#' @export
em_itnb.numeric <- function(x, i, t, control = list()) {
    control <- do.call(em_itnb_control, control)

    if (!is.numeric(i)) {
        stop("'i' has to be numeric.")
    }
    i <- ceiling(i)

    if (!is.numeric(t)) {
        stop("'t' has to be numeric.")
    }
    t <- ceiling(t)

    x_i <- x == i
    p <- mean(x_i)
    mu <- mean(x[!x_i])
    var_x <- var(x[!x_i])

    theta <- ifelse(var_x > mu, mu^2 / (var_x - mu), 100)
    z_star <- (p * as.numeric(x_i)) /
        (p * as.numeric(x_i) + (1 - p) * ditnb(x = x, mu = mu, theta = theta, p = 0, i = i, t = t, return_log = FALSE))

    complete_log_likelihood <- complete_log_likelihood(
        mu = mu, theta = theta, p = p, x = x,
        z = z_star, i = i, x_i = x_i, t = t
    )

    not_converged <- TRUE
    if (control$trace > 0) {
        cat("Iteration :", 0, "\t Current log-likelihood:", complete_log_likelihood, "\t Absolute change in log-likelihood:", NA, "\n")
        cat("\t Parameters (Initial): ", "\t mu =", mu, "\t theta =", theta, "\t p =", p, "\n")
    }

    if (control$save_trace) {
        trace_list <- vector("list", control$iteration_max)
        trace_list[[1]] <- data.frame(
            Iteration = 0,
            mu = mu,
            theta = theta,
            p = p,
            LogLikelihood = complete_log_likelihood,
            AbsoluteChange = NA
        )
    }

    j = 1
    while (not_converged) {
        ## E-step
        z_star <- (p * as.numeric(x_i)) /
            (p * as.numeric(x_i) + (1 - p) * ditnb(x = x, mu = mu, theta = theta, p = 0, i = i, t = t, return_log = FALSE))

        ## M-step
        p <- sum(z_star * as.numeric(x_i)) / sum(1 - z_star * (1 - as.numeric(x_i)))

        pars <- optim(
            par = c(mu, theta),
            fn = restricted_log_likelihood,
            x = x,
            p = p,
            i = i,
            t = t,
            method = "L-BFGS-B",
            lower = c(t, .Machine$double.eps),
            upper = c(Inf, Inf),
            control = list(trace = FALSE)
        )$par

        mu <- pars[1]
        theta <- pars[2]

        ## Updating convergence results
        complete_log_likelihood_old <- complete_log_likelihood
        complete_log_likelihood <- complete_log_likelihood(mu = mu, theta = theta, p = p, x = x, z = z_star, i = i, x_i = x_i, t = t)
        change_log_likelihood <- abs(complete_log_likelihood - complete_log_likelihood_old)

        not_converged <- ifelse(j < control$iteration_min, TRUE, (change_log_likelihood > control$tolerance) && (j < control$iteration_max))

        ## Tracing
        if ((control$trace > 0) & (((j %% control$trace) == 0) || (!not_converged))) {
            cat("Iteration :", j, "\t Current log-likelihood:", complete_log_likelihood, "\t Absolute change in log-likelihood:", change_log_likelihood, "\n")
            cat("\t Parameters: ", "\t mu =", mu, "\t theta =", theta, "\t p =", p, "\n")
        }

        if (control$save_trace) {
            trace_list[[j + 1]] <- data.frame(
                Iteration = j,
                mu = mu,
                theta = theta,
                p = p,
                LogLikelihood = complete_log_likelihood,
                AbsoluteChange = change_log_likelihood
            )
        }

        j = j + 1
    }

    returned_trace <- NULL
    if (control$save_trace) {
        trace_list <- trace_list[seq_len(j)]
        returned_trace <- do.call("rbind", trace_list)
    }

    res <- list(
        n = length(x),
        x = x,
        i = i,
        t = t,
        mu = mu,
        theta = theta,
        p = p,
        log_likelihood = complete_log_likelihood,
        converged = !not_converged,
        trace = returned_trace
    )

    class(res) <- "itnb"
    return(res)
}

#' Confidence envelopes of itnb-object.
#'
#' @description Simulated confidence envelopes of the parameters estimated by the \link{em_itnb} function.
#'
#' @param object An \link{itnb-object}.
#' @param level Numeric: The confidence level. If left as \code{NULL} all parametric bootstrap simulations are returned.
#' @param trace TRUE/FALSE: should a trace be shown?
#' @param nr_simulations Numeric: The number of simulations used to create the confidence envelopes.
#' @param parametric TRUE/FALSE: should the envelopes be simulated using the parametric bootstrap?
#' @param plot TRUE/FALSE: should a density plot of the parameters be returned?
#'
#' @return If \code{level = NULL} a matrix with bootstrap simulations, otherwise a matrix of lower and upper confidence limits for each parameter.
#'
#' @export
simulate_ci <- function(object, level = 0.95, trace = TRUE, nr_simulations = 2000, parametric = FALSE, plot = FALSE) {
    UseMethod("simulate_ci")
}

#' @rdname simulate_ci
#' @method simulate_ci itnb
#'
#' @example inst/examples/simulation_itnb_example.R
#'
#' @export
simulate_ci.itnb <- function(object, level = 0.95, trace = TRUE, nr_simulations = 2000, parametric = FALSE, plot = FALSE) {
    ##
    n <- object$n
    x <- object$x
    i <- object$i
    t <- object$t

    ##
    mu <- object$mu
    theta <- object$theta
    p <- object$p

    ##
    mu_e <- rep(NA, nr_simulations)
    theta_e <- rep(NA, nr_simulations)
    p_e <- rep(NA, nr_simulations)

    if (trace) {
        pb <- progress_bar$new(
            format = paste0(ifelse(parametric, "Parametric", "Non-parametric"), " bootstrap (samples = ", nr_simulations, ")", ": [:bar] :percent Eta: :eta"),
            total = nr_simulations,
            clear = FALSE,
            width = 120
        )
    }

    for (j in seq_len(nr_simulations)) {
        if (trace)
            pb$tick()

        if (parametric) {
            x_j <- ritnb(n = n, mu = mu, theta = theta, p = p, i = i, t = t)
        }
        else {
            x_j <- x[sample(n, n, replace = TRUE)]
        }

        pars_j <- em_itnb(x = x_j, i = i, t = t)

        mu_e[j] <- pars_j$mu
        theta_e[j] <- pars_j$theta
        p_e[j] <- pars_j$p
    }

    if (is.null(level)) {
        complete_data_frame <- cbind(mu_e, theta_e, p_e)
        colnames(complete_data_frame) <- c("mu", "theta", "p")

        res_data_frame <- complete_data_frame
    }
    else {
        alpha <- (1 - level) / 2

        quantiles_mu <- quantile(mu_e, probs = c(alpha, 1 - alpha))
        quantiles_theta <- quantile(theta_e, probs = c(alpha, 1 - alpha))
        quantiles_p <- quantile(p_e, probs = c(alpha, 1 - alpha))

        confint_data_frame <- rbind(quantiles_mu, quantiles_theta, quantiles_p)
        rownames(confint_data_frame) <- c("mu", "theta", "p")

        res_data_frame <- confint_data_frame
    }

    if (plot) {
        par(mfrow = c(3, 1))
        hist(mu_e, breaks = "fd", xlab = bquote(mu), ylab = "Density", probability = TRUE, main = paste(ifelse(parametric, "Parametric", "Non-parametric"), "bootstrap samples"), cex.lab = 1.5, cex.main = 1.5);
        if (!is.null(level)) abline(v = quantiles_mu[1], col = "dodgerblue2", lwd = 2); abline(v = quantiles_mu[2], col = "dodgerblue2", lwd = 2)

        hist(theta_e, breaks = "fd", xlab = bquote(theta), ylab = "Density", probability = TRUE, main = paste(ifelse(parametric, "Parametric", "Non-parametric"), "bootstrap samples"), cex.lab = 1.5, cex.main = 1.5)
        if (!is.null(level)) abline(v = quantiles_theta[1], col = "dodgerblue2", lwd = 2); abline(v = quantiles_theta[2], col = "dodgerblue2", lwd = 2)

        hist(p_e, breaks = "fd", xlab = bquote(pi), ylab = "Density", probability = TRUE, main = paste(ifelse(parametric, "Parametric", "Non-parametric"), "bootstrap samples"), cex.lab = 1.5, cex.main = 1.5)
        if (!is.null(level)) abline(v = quantiles_p[1], col = "dodgerblue2", lwd = 2); abline(v = quantiles_p[2], col = "dodgerblue2", lwd = 2)

        par(mfrow = c(1, 1))
    }

    return(res_data_frame)
}

#' Plot parameter trace of an \link{itnb-object}
#'
#' @description A function plotting the EM parameter trace returned by the \link{em_itnb} function. Note: the function can only be used if the argument \code{save_trace} in the \link{em_itnb_control} function was set to \code{TRUE}.
#'
#' @param x An \link{itnb-object}.
#' @param ... Additional arguments passed to the \link[graphics]{plot} function.
#'
#' @export
plot.itnb <- function(x, ...) {
    if (is.null(x$trace))
        stop("The 'trace' tibble was not found. Set 'save_trace = TRUE' in the 'em_itnb_control' function, and re-run the optimisation routine.")

    itnb_trace <- x$trace[-1,]

    par(mfrow = c(2, 2))
    plot(itnb_trace$Iteration, itnb_trace$LogLikelihood, type = "l", xlab = "Iteration", ylab = "Log-likelihood", ...)

    plot(itnb_trace$Iteration, itnb_trace$mu, type = "l", xlab = "Iteration", ylab = bquote(mu), ...)

    plot(itnb_trace$Iteration, itnb_trace$theta, type = "l", xlab = "Iteration", ylab = bquote(theta), ...)

    plot(itnb_trace$Iteration, itnb_trace$p, type = "l", xlab = "Iteration", ylab = bquote(pi), ...)
    par(mfrow = c(1, 1))

    return(invisible(NULL))
}
svilsen/itnb documentation built on Sept. 7, 2024, 1:30 a.m.