Nothing
#' Confidence Intervals using Profile Likelihood
#'
#' Calculates confidence intervals for one or more parameters in a fitted
#' model object. A function that returns the log-likelihood must be supplied,
#' either directly via the argument `loglik` or using a [`logLikFn`] S3
#' generic.
#'
#' @param object A fitted model object. This object must have a `coef` S3
#' method. If `faster = TRUE` then it must also have a `vcov` S3 method.
#' @param loglik A named function that returns the log-likelihood based on
#' input parameter values and data. The first argument must be the vector of
#' model parameters. If the likelihood is zero for any observation in the
#' data then the function should return `-Inf.`
#'
#' Alternatively, `loglik` does not need to be supplied if a [`logLikFn`] S3
#' method has been created for `object`. The `profileCI` package provides
#' `logLikFn.glm`, which is used in an example in **Examples**.
#' @param ... Further arguments to be passed to `loglik`.
#' @param parm A vector specifying the parameters for which confidence
#' intervals are calculated, either a vector of numbers or a vector of names.
#' The default, `which = "all"`, produces confidence intervals for all the
#' parameters.
#' @param level The confidence level required. A numeric scalar in (0, 1).
#' @param profile A logical scalar. If `TRUE` then confidence intervals
#' based on a profile log-likelihood are returned. If `FALSE` then intervals
#' based on approximate large sample normal theory, which are symmetric about
#' the MLE, are returned.
#' @param mult A positive numeric scalar. Controls the increment by which the
#' parameter of interest is increased/decreased when profiling above/below
#' its MLE. The increment is `mult * se / 100` where `se` is the estimated
#' standard error of the estimator of the parameter. Decreasing `mult`
#' profiles at more points but will be slower.
#' @param faster A logical scalar. If `faster = TRUE` then the profiling of the
#' log-likelihood is in search of a lower (upper) confidence limit is
#' started at the corresponding symmetric lower (upper) confidence limit.
#' @param epsilon Only relevant if `profile = TRUE`. A numeric vector of values
#' that determine the accuracy of the confidence limits. `epsilon` is
#' recycled to the length of the parameter vector `parm`.
#'
#' * If `epsilon[i] > 0` then this value is passed as the argument `epsilon`
#' to the [`itp::itp`] function, which estimates the parameter values for
#' which the profile log-likelihood for parameter `i` drops to the value
#' that defines the confidence limits, once profiling has been successful
#' in finding an interval within which this value lies.
#'
#' * If `epsilon[i] < 0` quadratic interpolation is used, which will tend to
#' be faster.
#'
#' * If `epsilon[i] = 0` then linear interpolation is used, which will be
#' faster still.
#' @param optim_args A list of further arguments (other than `par` and `fn`) to
#' pass to `[stats::optim]`.
#' @details The default, `epsilon = -1`, should work well enough in most
#' circumstances, but to achieve a specific accuracy set `epsilon` to be
#' a small positive value, for example, `epsilon = 1e-4`.
#'
#' The defaults `mult = 32` and `faster = TRUE` are designed to calculate
#' confidence intervals fairly quickly. If the object returned from
#' `profileCI` is plotted, using [`plot.profileCI`], then we will not obtain
#' a smooth plot of a profile log-likelihood. Setting `faster = FALSE` and
#' reducing `mult`, perhaps to `8` or `16` should produce a smoother plot.
#' @return An object of class `c("profileCI", "matrix", "array")`. A numeric
#' matrix with 2 columns giving the lower and upper confidence limits for
#' each parameter. These columns are labelled as `(1-level)/2` and
#' `1-(1-level)/2`, expressed as a percentage, by default `2.5%` and `97.5%`.
#' The row names are the names of the parameters supplied in `parm`.
#'
#' If `profile = TRUE` then the returned object has extra attributes `crit`,
#' `level` and `for_plot`. The latter is a named list of length equal to the
#' number of parameters. Each component is a 2-column numeric matrix. The
#' first column contains values of the parameter and the second column the
#' corresponding values of the profile log-likelihood. The profile
#' log-likelihood is equal to the attribute `crit` at the limits of the
#' confidence interval. The attribute `level` is the input argument `level`.
#' If `faster = FALSE` or `epsilon > 0` then the attributes `lower_pars` and
#' `upper_pars` are lists that provide, for each profiling, the values of the
#' parameters for the last maximisation of the log-likelihood.
#' @return A matrix with columns giving the object
#' `c("profileCI", "matrix", "array")`.
#' @seealso [`plot.profileCI`] and [`print.profileCI`].
#' @examples
#' ## From example(glm)
#' counts <- c(18, 17, 15, 20, 10, 20, 25, 13, 12)
#' outcome <- gl(3, 1, 9)
#' treatment <- gl(3, 3)
#' glm.D93 <- glm(counts ~ outcome + treatment, family = poisson())
#' confint(glm.D93)
#' confint.default(glm.D93)
#'
#' # A logLikFn.glm S3 method is provided in profileCI so we do not need to
#' # supply loglik explicitly
#' prof <- profileCI(glm.D93)
#' prof
#'
#' # Supplying a Poisson GLM log-likelihood explicitly
#' poisson_loglik <- function(pars) {
#' lambda <- exp(model.matrix(glm.D93) %*% pars)
#' loglik <- stats::dpois(x = glm.D93$y, lambda = lambda, log = TRUE)
#' return(sum(loglik))
#' }
#' # This will be a bit slower than profile.glm() because glm.fit() is fast
#' prof <- profileCI(glm.D93, loglik = poisson_loglik)
#' prof
#' plot(prof, parm = 1)
#' plot(prof, parm = "outcome2")
#'
#' # Supplying a more general Poisson GLM log-likelihood
#' poisson_loglik_2 <- function(pars, glm_object) {
#' lambda <- exp(model.matrix(glm_object) %*% pars)
#' loglik <- stats::dpois(x = glm_object$y, lambda = lambda, log = TRUE)
#' return(sum(loglik))
#' }
#' prof <- profileCI(glm.D93, loglik = poisson_loglik_2, glm_object = glm.D93)
#' prof
#' @export
profileCI <- function(object, loglik, ..., parm = "all", level = 0.95,
profile = TRUE, mult = 32, faster = TRUE, epsilon = -1,
optim_args = list()) {
# If loglik is missing then check whether object has a logLikFn method
# If it does then use it, otherwise throw an error
if (missing(loglik)) {
find_logLikFn <- function(x) {
return(!is.null(utils::getS3method("logLikFn", x, optional = TRUE)))
}
has_logLikFnMethod <- sapply(class(object), FUN = find_logLikFn)
if (any(has_logLikFnMethod)) {
loglik <- function(pars) {
return(logLikFn(object, pars = pars))
}
} else {
stop("If \"object\" has no logLikFn method then loglik must be supplied")
}
}
# Check and set parm
cf <- coef(object)
if (is.null(names(cf))) {
names(cf) <- paste0("par", 1:length(cf))
}
parm_names <- names(cf)
if (is.character(parm)) {
if (!all(is.element(parm, c(parm_names, "all")))) {
p_message <- paste0("''", parm_names, "''", collapse = ",")
stop("Character, ''parm'' must be ''all'', or a subset of ", p_message)
}
} else {
if (!all(is.element(parm, 1:length(cf)))) {
p_message <- paste0(1:length(cf), collapse = ",")
stop("Numeric, ''parm'' must be ''all'', or a subset of ", p_message)
}
}
if (length(parm) == 1 && parm == "all") {
parm <- parm_names
} else if (is.numeric(parm)) {
parm <- parm_names[parm]
}
# Logical vector indicating which parameters to include
which_parm <- is.element(parm_names, parm)
# Check the input confidence level
if (level <= 0 | level >= 1) {
stop("''level'' must be in (0, 1)")
}
# Use the vcov method to calculate symmetric CIs
# If profile = FALSE then we return these
# If faster = TRUE then we pass these intervals to faster_profile_ci()
if (!profile | faster) {
z_val <- stats::qnorm(1 - (1 - level) / 2)
mles <- coef(object)[parm]
ses <- sqrt(diag(vcov(object)))[parm]
sym_lower <- mles - z_val * ses
sym_upper <- mles + z_val * ses
ci_mat <- cbind(sym_lower, sym_upper)
rownames(ci_mat) <- parm
low <- paste0(100 * (1 - level)/ 2, "%")
up <- paste0(100 - 100 * (1 - level)/ 2, "%")
colnames(ci_mat) <- c(low, up)
ci_sym_mat <- ci_mat
} else {
ci_mat <- matrix(NA, ncol = 2, nrow = length(parm))
}
# If profile = FALSE then return the symmetric intervals
if (!profile) {
rownames(ci_mat) <- parm
attr(ci_mat, "interval_type") <- "symmetric"
attr(ci_mat, "level") <- level
class(ci_mat) <- c("profileCI", "matrix", "array")
return(ci_mat)
}
# The number of parameters
n_parm <- length(parm)
# Force epsilon to have length equal to the number of parameters
epsilon <- rep_len(epsilon, n_parm)
# Extract the parameter numbers to include
parm_numbers <- (1:length(parm_names))[which_parm]
# An empty list in which to store the profile log-likelihood values
# Likewise for the values of the parameters at the confidence limits
for_plot <- list()
lower_pars <- list()
upper_pars <- list()
# Calculate the estimated standard errors (to set inc below)
ses <- sqrt(diag(vcov(object)))[parm]
# Set up the negated log-likelihood function
negated_loglik_fn <- function(parm, ...) {
return(-loglik(parm, ...))
}
# Loop over all parameters
# It is possible for stats::option() to throw an error, particularly if mult
# is large. This may reflect poor starting values. in an attempt to avoid
# this problem, we reduce (halve) mult iteratively down to the minimum value
# min_mult. If no fit is successful then we return NAs.
min_mult <- 1
save_mult <- mult
for (i in 1:n_parm) {
success <- FALSE
if (faster) {
while (mult >= min_mult) {
# Set inc based on the estimated standard errors
inc <- mult * ses / 100
conf_list <- faster_profile_ci(object = object,
negated_loglik_fn = negated_loglik_fn,
which = parm_numbers[i],
which_name = parm[i],
level = level, mle = coef(object),
ci_sym_mat = ci_sym_mat,
inc = inc[i], epsilon = epsilon[i], ...)
if (!is.null(conf_list$optim_error)) {
mult <- mult / 2
} else {
success <- TRUE
mult <- min_mult - 1
}
}
} else {
while (mult >= min_mult) {
# Set inc based on the estimated standard errors
inc <- mult * ses / 100
conf_list <- profile_ci(object = object,
negated_loglik_fn = negated_loglik_fn,
which = parm_numbers[i], level = level,
mle = coef(object), inc = inc[i],
epsilon = epsilon[i], ...)
if (!is.null(conf_list$optim_error)) {
mult <- mult / 2
} else {
success <- TRUE
mult <- min_mult - 1
}
}
}
if (success) {
ci_mat[i, ] <- conf_list$par_prof[c(1, 3)]
colnames(conf_list$for_plot)[1] <- paste0(parm[i], "_values")
for_plot[[i]] <- conf_list$for_plot
lower_pars[[i]] <- conf_list$lower_pars
upper_pars[[i]] <- conf_list$upper_pars
} else {
ci_mat[i, ] <- matrix(NA, 1, 2)
for_plot[[i]] <- NA
lower_pars[[i]] <- NA
upper_pars[[i]] <- NA
}
# Reset mult
mult <- save_mult
}
names(for_plot) <- parm
# Format the matrix to be returned
low <- paste0(100 * (1 - level)/ 2, "%")
up <- paste0(100 - 100 * (1 - level)/ 2, "%")
colnames(ci_mat) <- c(low, up)
rownames(ci_mat) <- parm
attr(ci_mat, "for_plot") <- for_plot
attr(ci_mat, "crit") <- conf_list$crit
attr(ci_mat, "interval_type") <- "profile"
attr(ci_mat, "level") <- level
if (any(epsilon > 0) || !faster) {
prof_names <- paste0("when profiling for ", parm_names)
names(lower_pars) <- prof_names[which_parm]
names(upper_pars) <- prof_names[which_parm]
}
attr(ci_mat, "lower_pars") <- lower_pars
attr(ci_mat, "upper_pars") <- upper_pars
class(ci_mat) <- c("profileCI", "matrix", "array")
return(ci_mat)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.