Nothing
# ============================== conf_region ===============================
#' Two-dimensional confidence regions
#'
#' Calculates the (profile, if necessary) loglikelihood for a pair of
#' parameters from which confidence regions can be plotted using
#' \code{\link{plot.confreg}}.
#'
#' @param object An object of class \code{"chandwich"} returned by
#' \code{adjust_loglik}.
#' @param which_pars A vector of length 2 specifying the 2 (unfixed)
#' parameters for which confidence region is required.
#' Can be either a numeric vector, specifying indices of the components
#' of the \strong{full} parameter vector, or a character vector of
#' parameter names, which must be a subset of those supplied in
#' \code{par_names} in the call to \code{\link{adjust_loglik}} that
#' produced \code{object}.
#'
#' \code{which_pars} must not have any parameters in common with
#' \code{attr(object, "fixed_pars")}. \code{which_pars} must not contain
#' all of the unfixed parameters, i.e. there is no point in profiling over
#' all the unfixed parameters.
#'
#' If \code{which_pars} is not supplied but the current model has exactly
#' two free parameters, i.e. \code{attr(object, "p_current") = 2} then
#' \code{which_pars} is set to \code{attr(object, "free_pars") = 2}.
#' @param range1,range2 Numeric vectors of length 2. Respective ranges (of
#' the form \code{c(lower, upper)}) of values of \code{which_pars[1]} and
#' \code{which_pars[2]} over which to profile.
#' Missing values in \code{range1} and/or \code{range2} are
#' filled in using \code{conf} and \code{mult}. See below for details.
#' @param conf A numeric scalar in (0, 100). The highest confidence level
#' of interest. This is only relevant if \code{range1} and/or \code{range2}
#' are not completely specified. In that event \code{conf} is used,
#' in combination with \code{mult}, to try to set up the grid of parameter
#' values to include the largest confidence region of interest.
#' @param mult A numeric vector of length 1 or the same length as
#' \code{which_pars}.
#' The search for the profile loglikelihood-based confidence limits is
#' conducted over the corresponding symmetric confidence intervals
#' (based on approximate normal theory), extended
#' by a factor of the corresponding component of \code{mult}.
#' @param num A numeric vector of length 1 or 2. The numbers of values at which
#' to evaluate the profile loglikelihood either side of the MLE.
#' \code{num[i]} relates to \code{which_pars[i]}. If \code{num} has length
#' 1 then \code{num} is replicated to have length 2.
#' @param type A character scalar. The argument \code{type} to the function
#' returned by \code{\link{adjust_loglik}}, that is, the type of adjustment
#' made to the independence loglikelihood function.
#' @param ... Further arguments to be passed to \code{\link[stats]{optim}}.
#' These may include \code{gr}, \code{method}, \code{lower}, \code{upper}
#' or \code{control}. Any arguments that are not appropriate for
#' \code{\link[stats]{optim}}, i.e. not in
#' \code{methods::formalArgs(stats::optim)},
#' will be removed without warning.
#' @return An object of class "confreg", a list with components
#' \item{grid1, grid2}{Numeric vectors. Respective values of
#' \code{which_pars[1]} and \code{which_pars[2]} in the grid over which
#' the (profile) loglikelihood is evaluated. }
#' \item{max_loglik}{A numeric scalar. The value value of
#' the loglikelihood at its maximum.}
#' \item{prof_loglik}{An 2 \code{num} + 1 by 2 \code{num} + 1
#' numeric matrix containing the values of the (profile) loglikelihood.}
#' \item{type}{A character scalar. The input \code{type}.}
#' \item{which_pars}{A numeric or character vector. The input
#' \code{which_pars}. If the \code{which_pars} was numeric then
#' it is supplemented by the parameter names, if these are available
#' in \code{object}.}
#' \item{name}{A character scalar. The name of the model,
#' stored in \code{attr(object, "name")}.}
#' @seealso \code{\link{adjust_loglik}} to adjust a user-supplied
#' loglikelihood function.
#' @seealso \code{\link{conf_intervals}} for confidence intervals for
#' individual parameters.
#' @seealso \code{\link{compare_models}} to compare nested models using an
#' (adjusted) likelihood ratio test.
#' @seealso \code{\link{plot.confreg}}.
#' @examples
#' # -------------------------- GEV model, owtemps data -----------------------
#' # ------------ following Section 5.2 of Chandler and Bate (2007) -----------
#'
#' gev_loglik <- function(pars, data) {
#' o_pars <- pars[c(1, 3, 5)] + pars[c(2, 4, 6)]
#' w_pars <- pars[c(1, 3, 5)] - pars[c(2, 4, 6)]
#' if (isTRUE(o_pars[2] <= 0 | w_pars[2] <= 0)) return(-Inf)
#' o_data <- data[, "Oxford"]
#' w_data <- data[, "Worthing"]
#' check <- 1 + o_pars[3] * (o_data - o_pars[1]) / o_pars[2]
#' if (isTRUE(any(check <= 0))) return(-Inf)
#' check <- 1 + w_pars[3] * (w_data - w_pars[1]) / w_pars[2]
#' if (isTRUE(any(check <= 0))) return(-Inf)
#' o_loglik <- log_gev(o_data, o_pars[1], o_pars[2], o_pars[3])
#' w_loglik <- log_gev(w_data, w_pars[1], w_pars[2], w_pars[3])
#' return(o_loglik + w_loglik)
#' }
#'
#' # Initial estimates (method of moments for the Gumbel case)
#' sigma <- as.numeric(sqrt(6 * diag(var(owtemps))) / pi)
#' mu <- as.numeric(colMeans(owtemps) - 0.57722 * sigma)
#' init <- c(mean(mu), -diff(mu) / 2, mean(sigma), -diff(sigma) / 2, 0, 0)
#'
#' # Log-likelihood adjustment of the full model
#' par_names <- c("mu[0]", "mu[1]", "sigma[0]", "sigma[1]", "xi[0]", "xi[1]")
#' large <- adjust_loglik(gev_loglik, data = owtemps, init = init,
#' par_names = par_names)
#'
#' \donttest{
#' # Plots like those in Figure 4 of Chandler and Bate (2007)
#' # (a)
#' which_pars <- c("mu[0]", "mu[1]")
#' reg_1 <- conf_region(large, which_pars = which_pars)
#' reg_none_1 <- conf_region(large, which_pars = which_pars, type = "none")
#' plot(reg_1, reg_none_1)
#' # (b)
#' which_pars <- c("sigma[0]", "sigma[1]")
#' reg_2 <- conf_region(large, which_pars = which_pars)
#' reg_none_2 <- conf_region(large, which_pars = which_pars, type = "none")
#' plot(reg_2, reg_none_2)
#' # (c)
#' # Note: the naive and bivariate model contours are the reversed in the paper
#' which_pars <- c("sigma[0]", "xi[0]")
#' reg_3 <- conf_region(large, which_pars = which_pars)
#' reg_none_3 <- conf_region(large, which_pars = which_pars, type = "none")
#' plot(reg_3, reg_none_3)
#' }
#'
#' # --------- Misspecified Poisson model for negative binomial data ----------
#'
#' # ... following Section 5.1 of the "Object-Oriented Computation of Sandwich
#' # Estimators" vignette of the sandwich package
#' # https://cran.r-project.org/web/packages/sandwich/vignettes/sandwich-OOP.pdf
#'
#' # Simulate data
#' set.seed(123)
#' x <- rnorm(250)
#' y <- rnbinom(250, mu = exp(1 + x), size = 1)
#' # Fit misspecified Poisson model
#' fm_pois <- glm(y ~ x + I(x^2), family = poisson)
#' summary(fm_pois)$coefficients
#'
#' # Contributions to the independence loglikelihood
#' pois_glm_loglik <- function(pars, y, x) {
#' log_mu <- pars[1] + pars[2] * x + pars[3] * x ^ 2
#' return(dpois(y, lambda = exp(log_mu), log = TRUE))
#' }
#' pars <- c("alpha", "beta", "gamma")
#' # Linear model (gamma fixed at 0)
#' pois_lin <- adjust_loglik(pois_glm_loglik, y = y, x = x, par_names = pars,
#' fixed_pars = "gamma")
#' pois_vertical <- conf_region(pois_lin)
#' pois_none <- conf_region(pois_lin, type = "none")
#' plot(pois_none, pois_vertical, conf = c(50, 75, 95, 99), col = 2:1, lwd = 2,
#' lty = 1)
#' @export
conf_region <- function(object, which_pars = NULL, range1 = c(NA, NA),
range2 = c(NA, NA), conf = 95, mult = 2, num = c(10, 10),
type = c("vertical", "cholesky", "spectral", "none"),
...) {
type <- match.arg(type)
# Check that arguments supplied in ... can be passed to stats::optim()
optim_args <- list(...)
optim_names <- names(optim_args)
ok <- optim_names %in% methods::formalArgs(stats::optim)
optim_args <- optim_args[ok]
# Adjust conf to make it more applicable to the marginal intervals used to
# set the default grid on which the profile loglikelihood is calculated
conf_for_search <- sqrt(conf) * 10
num <- rep_len(num, 2)
# If which_pars has not been supplied and there are 2 free parameters in
# the current model then set which_pars to these 2 parameters
if (is.null(which_pars) & attr(object, "p_current") == 2) {
which_pars <- attr(object, "free_pars")
}
n_which_pars <- length(which_pars)
if (n_which_pars != 2) {
stop("which_pars must be a vector of length 2")
}
if (length(range1) != 2) {
stop("range1 must be a vector of length 2")
}
if (length(range2) != 2) {
stop("range2 must be a vector of length 2")
}
if (all(!is.na(range1))) {
range1 <- sort(range1)
}
if (all(!is.na(range2))) {
range2 <- sort(range2)
}
n_mult <- length(mult)
if (n_mult != 1 & n_mult != n_which_pars) {
stop("mult must have length 1 or the same length as which_pars")
} else if (n_mult == 1) {
mult <- rep(mult, n_which_pars)
}
# Fixed parameters, values at which they are fixed and parameter names
fixed_pars <- attr(object, "fixed_pars")
fixed_at <- attr(object, "fixed_at")
full_par_names <- attr(object, "full_par_names")
p <- attr(object, "p_full")
# If which_par is a character
if (is.character(which_pars)) {
if (is.null(full_par_names)) {
stop("which_pars can be character only if par_names is supplied")
}
if (!all(which_pars %in% full_par_names)) {
stop("which_pars is not a subset of ", deparse(full_par_names))
}
temp <- which_pars
which_pars <- which(full_par_names %in% which_pars)
names(which_pars) <- temp
} else {
if (!is.null(full_par_names)) {
names(which_pars) <- full_par_names[which_pars]
}
}
if (any(which_pars %in% fixed_pars)) {
stop("which_pars & attr(object,''fixed_pars'') have parameters in common")
}
# Marginal symmetric confidence intervals for each of the 2 parameters
if (is.null(fixed_pars)) {
free_pars <- 1:p
} else {
free_pars <- (1:p)[-fixed_pars]
}
res_mle <- attr(object, "res_MLE")
res_SE <- res_adjSE <- numeric(p)
res_SE[free_pars] <- attr(object, "SE")
res_adjSE[free_pars] <- attr(object, "adjSE")
z_val <- stats::qnorm(1 - (1 - conf_for_search / 100) / 2)
which_mle <- res_mle[which_pars]
#
lower <- c(range1[1], range2[1])
upper <- c(range1[2], range2[2])
if (type == "none") {
the_SEs <- res_SE[which_pars]
} else {
the_SEs <- res_adjSE[which_pars]
}
sym_lower <- which_mle - z_val * the_SEs
sym_upper <- which_mle + z_val * the_SEs
if (!is.finite(lower[1])) {
lower[1] <- which_mle[1] - mult[1] * z_val * the_SEs[1]
}
if (!is.finite(upper[1])) {
upper[1] <- which_mle[1] + mult[1] * z_val * the_SEs[1]
}
if (!is.finite(lower[2])) {
lower[2] <- which_mle[2] - mult[2] * z_val * the_SEs[2]
}
if (!is.finite(upper[2])) {
upper[2] <- which_mle[2] + mult[2] * z_val * the_SEs[2]
}
sym_CI <- cbind(sym_lower, sym_upper)
colnames(sym_CI) <- c("lower", "upper")
#
# 2D profile loglikelihood
#
# Set up a grid of values of the parameters in which_pars
temp1 <- seq(lower[1], which_mle[1], length.out = num[1] + 1)
temp2 <- seq(which_mle[1], upper[1], length.out = num[1] + 1)[-1]
grid1 <- c(temp1, temp2)
temp1 <- seq(lower[2], which_mle[2], length.out = num[2] + 1)
temp2 <- seq(which_mle[2], upper[2], length.out = num[2] + 1)[-1]
grid2 <- c(temp1, temp2)
leng1 <- length(grid1)
leng2 <- length(grid2)
# Matrix to store the profile loglikelihood values
z <- matrix(nrow = leng1, ncol = leng2)
# MLE of all free parameters, i.e. excluding fixed parameters and which_pars
theta_start <- res_mle[-c(fixed_pars, which_pars)]
#
# To avoid illegal starting values, move from the MLE downwards and
# then back up, at each stage using neighbouring estimates as
# starting values.
#
# In which positions are the MLEs in the grid?
mle_idx1 <- num[1] + 1
mle_idx2 <- num[2] + 1
eval_order1 <- c(rev(1:(mle_idx1 - 1)), mle_idx1:(2 * num[1] + 1))
eval_order2 <- c(rev(1:(mle_idx2 - 1)), mle_idx2:(2 * num[2] + 1))
start_array <- array(dim = c(length(theta_start), leng1, leng2))
start_array[, mle_idx1, mle_idx2] <- theta_start
message("Waiting for profiling to be done...")
utils::flush.console()
for (i in eval_order1) {
i_nbr <- max(i - 1, 1):(i + 1)
i_nbr <- i_nbr[i_nbr <= leng1]
for (j in eval_order2) {
j_nbr <- max(j - 1, 1):(j + 1)
j_nbr <- j_nbr[j_nbr <= leng2]
theta <- rowMeans(start_array[, i_nbr, j_nbr], na.rm = TRUE)
if (any(is.na(theta))) {
theta <- theta_start
}
prof_vals <- c(grid1[i], grid2[j])
prof_args <- c(list(object = object, prof_pars = which_pars,
prof_vals = prof_vals, init = theta, type = type),
optim_args)
zz <- try(do.call(profile_loglik, prof_args), silent = TRUE)
if (inherits(zz, "try-error")) {
z[i, j] <- NA
} else {
z[i, j] <- zz
start_array[, i, j] <- attr(zz, "free_pars")
if (j == mle_idx2) {
theta_start <- theta
}
}
}
}
conf_region_list <- list(grid1 = grid1, grid2 = grid2, prof_loglik = z,
max_loglik = attr(object, "max_loglik"),
type = type, which_pars = which_pars,
name = attr(object, "name"))
class(conf_region_list) <- "confreg"
return(conf_region_list)
}
# ============================== conf_intervals ===============================
#' Confidence intervals
#'
#' Calculates confidence intervals for individual parameters.
#'
#' @param object An object of class \code{"chandwich"} returned by
#' \code{adjust_loglik}.
#' @param which_pars A vector specifying the (unfixed) parameters for which
#' confidence intervals are required. Can be either a numeric vector,
#' specifying indices of the components of the \strong{full} parameter
#' vector, or a character vector of parameter names, which must be a subset
#' of those supplied in \code{par_names} in the call to
#' \code{\link{adjust_loglik}} that produced \code{object}.
#'
#' \code{which_pars} must not have any parameters in common with
#' \code{attr(object, "fixed_pars")}. \code{which_pars} must not contain
#' all of the unfixed parameters, i.e. there is no point in profiling over
#' all the unfixed parameters.
#'
#' If missing, all parameters are included.
#' @param init A numeric vector of initial estimates of the values of the
#' parameters that are not fixed and are not in \code{which_pars}.
#' Should have length \code{attr(object, "p_current") - length(which_pars)}.
#' If \code{init} is \code{NULL} or is of the wrong length then the
#' relevant components from the MLE stored in \code{object} are used.
#' @param conf A numeric scalar in (0, 100). Confidence level for the
#' intervals.
#' @param mult A numeric vector of length 1 or the same length as
#' \code{which_pars}.
#' The search for the profile loglikelihood-based confidence limits is
#' conducted over the corresponding symmetric confidence intervals
#' (based on approximate normal theory), extended
#' by a factor of the corresponding component of \code{mult}.
#' @param num A numeric scalar. The number of values at which to evaluate the
#' profile loglikelihood either side of the MLE. Increasing \code{num}
#' increases the accuracy of the confidence limits, but the code will take
#' longer to run.
#' @param type A character scalar. The argument \code{type} to the function
#' returned by \code{\link{adjust_loglik}}, that is, the type of adjustment
#' made to the independence loglikelihood function.
#' @param profile A logical scalar. If \code{FALSE} then only intervals based
#' on approximate large sample normal theory, which are symmetric about the
#' MLE, are returned (in \code{sym_CI}) and \code{prof_CI} in the returned
#' object will contain \code{NA}s.
#' @param ... Further arguments to be passed to \code{\link[stats]{optim}}.
#' These may include \code{gr}, \code{method}, \code{lower}, \code{upper}
#' or \code{control}.
#' @details Calculates (profile, if necessary) likelihood-based confidence
#' intervals for individual parameters, and also provides symmetric intervals
#' based on a normal approximation to the sampling distribution of the
#' estimator. See also the S3 confint method
#' \code{\link{confint.chandwich}}.
#' @return An object of class "confint", a list with components
#' \item{conf}{The argument \code{conf}.}
#' \item{cutoff}{A numeric scalar. For values inside the
#' confidence interval the profile loglikelihood lies above
#' \code{cutoff}.}
#' \item{parameter_vals, prof_loglik_vals}{\code{2 * num + 1} by
#' \code{length(which_pars)} numeric matrices.
#' Column i of \code{parameter_vals} contains the profiled values of
#' parameter \code{which_par[i]}. Column i of \code{prof_loglik_vals}
#' contains the corresponding values of the profile loglikelihood.}
#' \item{sym_CI, prof_CI}{\code{length(which_pars)}
#' by 2 numeric matrices. Row i of \code{sym_CI} (\code{prof_CI})
#' contains the symmetric (profile loglikelihood-based) confidence
#' intervals for parameter \code{which_pars[i]}.} If a value in
#' \code{prof_CI} is \code{NA} then this means that the search for the
#' confidence limit did no extend far enough. A remedy is to increase
#' the value of \code{mult}, or the relevant component of \code{mult},
#' and perhaps also increase \code{num}.
#' \item{max_loglik}{The value of the adjusted loglikelihood
#' at its maximum, stored in \code{object$max_loglik}.}
#' \item{type}{The argument \code{type} supplied in the call
#' to \code{conf_intervals}, i.e. the type of loglikelihood adjustment.}
#' \item{which_pars}{The argument \code{which_pars}.}
#' \item{name}{A character scalar. The name of the model,
#' stored in \code{attr(object, "name")}.}
#' \item{p_current}{The number of free parameters in the current model.}
#' \item{fixed_pars, fixed_at}{\code{attr(object, "fixed_pars")} and
#' \code{attr(object, "fixed_at")}, the arguments \code{fixed_pars} and
#' \code{fixed_at} to \code{\link{adjust_loglik}}, if these were
#' supplied.}
#' @seealso \code{\link{confint.chandwich}} S3 confint method for objects
#' of class \code{"chandwich"} returned from \code{\link{adjust_loglik}}.
#' @seealso \code{\link{adjust_loglik}} to adjust a user-supplied
#' loglikelihood function.
#' @seealso \code{\link{summary.chandwich}} for maximum likelihood estimates
#' and unadjusted and adjusted standard errors.
#' @seealso \code{\link{plot.chandwich}} for plots of one-dimensional adjusted
#' loglikelihoods.
#' @seealso \code{\link{conf_region}} for a confidence region for
#' a pair of parameters.
#' @seealso \code{\link{compare_models}} to compare nested models using an
#' (adjusted) likelihood ratio test.
#' @seealso \code{\link{plot.confint}}, \code{\link{print.confint}}.
#' @examples
#' # ------------------------- Binomial model, rats data ----------------------
#'
#' # Contributions to the independence loglikelihood
#' binom_loglik <- function(prob, data) {
#' if (prob < 0 || prob > 1) {
#' return(-Inf)
#' }
#' return(dbinom(data[, "y"], data[, "n"], prob, log = TRUE))
#' }
#' rat_res <- adjust_loglik(loglik = binom_loglik, data = rats, par_names = "p")
#'
#' # 95% likelihood-based confidence intervals, vertically adjusted
#' ci <- conf_intervals(rat_res)
#' plot(ci)
#' # Unadjusted
#' conf_intervals(rat_res, type = "none")
#'
#' # -------------------------- GEV model, owtemps data -----------------------
#' # ------------ following Section 5.2 of Chandler and Bate (2007) -----------
#'
#' gev_loglik <- function(pars, data) {
#' o_pars <- pars[c(1, 3, 5)] + pars[c(2, 4, 6)]
#' w_pars <- pars[c(1, 3, 5)] - pars[c(2, 4, 6)]
#' if (isTRUE(o_pars[2] <= 0 | w_pars[2] <= 0)) return(-Inf)
#' o_data <- data[, "Oxford"]
#' w_data <- data[, "Worthing"]
#' check <- 1 + o_pars[3] * (o_data - o_pars[1]) / o_pars[2]
#' if (isTRUE(any(check <= 0))) return(-Inf)
#' check <- 1 + w_pars[3] * (w_data - w_pars[1]) / w_pars[2]
#' if (isTRUE(any(check <= 0))) return(-Inf)
#' o_loglik <- log_gev(o_data, o_pars[1], o_pars[2], o_pars[3])
#' w_loglik <- log_gev(w_data, w_pars[1], w_pars[2], w_pars[3])
#' return(o_loglik + w_loglik)
#' }
#'
#' # Initial estimates (method of moments for the Gumbel case)
#' sigma <- as.numeric(sqrt(6 * diag(var(owtemps))) / pi)
#' mu <- as.numeric(colMeans(owtemps) - 0.57722 * sigma)
#' init <- c(mean(mu), -diff(mu) / 2, mean(sigma), -diff(sigma) / 2, 0, 0)
#'
#' # Log-likelihood adjustment of the full model
#' par_names <- c("mu[0]", "mu[1]", "sigma[0]", "sigma[1]", "xi[0]", "xi[1]")
#' large <- adjust_loglik(gev_loglik, data = owtemps, init = init,
#' par_names = par_names)
#'
#' # 95% likelihood-based confidence intervals, vertically adjusted
#' large_v <- conf_intervals(large, which_pars = c("xi[0]", "xi[1]"))
#' large_v
#' plot(large_v)
#' plot(large_v, which_par = "xi[1]")
#' \donttest{
#' # Unadjusted
#' large_none <- conf_intervals(large, which_pars = c("xi[0]", "xi[1]"),
#' type = "none")
#' large_none
#' plot(large_v, large_none)
#' plot(large_v, large_none, which_par = "xi[1]")
#' }
#'
#' # --------- Misspecified Poisson model for negative binomial data ----------
#'
#' # ... following Section 5.1 of the "Object-Oriented Computation of Sandwich
#' # Estimators" vignette of the sandwich package
#' # https://cran.r-project.org/web/packages/sandwich/vignettes/sandwich-OOP.pdf
#'
#' # Simulate data
#' set.seed(123)
#' x <- rnorm(250)
#' y <- rnbinom(250, mu = exp(1 + x), size = 1)
#' # Fit misspecified Poisson model
#' fm_pois <- glm(y ~ x + I(x^2), family = poisson)
#' summary(fm_pois)$coefficients
#'
#' # Contributions to the independence loglikelihood
#' pois_glm_loglik <- function(pars, y, x) {
#' log_mu <- pars[1] + pars[2] * x + pars[3] * x ^ 2
#' return(dpois(y, lambda = exp(log_mu), log = TRUE))
#' }
#' pars <- c("alpha", "beta", "gamma")
#' pois_quad <- adjust_loglik(pois_glm_loglik, y = y, x = x, par_names = pars)
#' conf_intervals(pois_quad)
#' @export
conf_intervals <- function(object, which_pars = NULL, init = NULL, conf = 95,
mult = 1.5, num = 10, type =
c("vertical", "cholesky", "spectral", "none"),
profile = TRUE, ...) {
type <- match.arg(type)
# Fixed parameters, values at which they are fixed and parameter names
fixed_pars <- attr(object, "fixed_pars")
fixed_at <- attr(object, "fixed_at")
full_par_names <- attr(object, "full_par_names")
# If which_pars is not supplied then set it to all (unfixed) parameters
p <- attr(object, "p_full")
if (is.null(which_pars)) {
if (is.null(fixed_pars)) {
which_pars <- 1:p
} else {
which_pars <- (1:p)[-fixed_pars]
}
}
n_which_pars <- length(which_pars)
n_mult <- length(mult)
if (n_mult != 1 & n_mult != n_which_pars) {
stop("mult must have length 1 or the same length as which_pars")
} else if (n_mult == 1) {
mult <- rep(mult, n_which_pars)
}
# If which_pars is a character vector
if (is.character(which_pars)) {
if (is.null(full_par_names)) {
stop("which_pars can be character only if par_names is supplied")
}
if (!all(which_pars %in% full_par_names)) {
stop("which_pars is not a subset of ", deparse(full_par_names))
}
temp <- which_pars
which_pars <- which(full_par_names %in% which_pars)
names(which_pars) <- temp
} else {
if (!is.null(full_par_names)) {
names(which_pars) <- full_par_names[which_pars]
}
}
if (any(which_pars %in% fixed_pars)) {
stop("which_pars & attr(object,''fixed_pars'') have parameters in common")
}
# Symmetric confidence intervals
if (is.null(fixed_pars)) {
free_pars <- 1:p
} else {
free_pars <- (1:p)[-fixed_pars]
}
res_mle <- attr(object, "res_MLE")
res_SE <- res_adjSE <- numeric(p)
res_SE[free_pars] <- attr(object, "SE")
res_adjSE[free_pars] <- attr(object, "adjSE")
z_val <- stats::qnorm(1 - (1 - conf / 100) / 2)
which_mle <- res_mle[which_pars]
if (type == "none") {
sym_lower <- which_mle - z_val * res_SE[which_pars]
sym_upper <- which_mle + z_val * res_SE[which_pars]
lower <- which_mle - mult * z_val * res_SE[which_pars]
upper <- which_mle + mult * z_val * res_SE[which_pars]
} else {
sym_lower <- which_mle - z_val * res_adjSE[which_pars]
sym_upper <- which_mle + z_val * res_adjSE[which_pars]
lower <- which_mle - mult * z_val * res_adjSE[which_pars]
upper <- which_mle + mult * z_val * res_adjSE[which_pars]
}
sym_CI <- cbind(sym_lower, sym_upper)
colnames(sym_CI) <- c("lower", "upper")
# Confidence intervals using profile loglikelihood
prof_CI <- matrix(NA, nrow = n_which_pars, ncol = 2)
colnames(prof_CI) <- c("lower", "upper")
parameter_vals <- matrix(NA, nrow = 2 * num + 1, ncol = n_which_pars)
prof_loglik_vals <- matrix(-Inf, nrow = 2 * num + 1, ncol = n_which_pars)
if (!is.null(full_par_names)) {
rownames(prof_CI) <- full_par_names[which_pars]
colnames(parameter_vals) <- full_par_names[which_pars]
colnames(prof_loglik_vals) <- full_par_names[which_pars]
}
max_loglik <- attr(object, "max_loglik")
cutoff <- max_loglik - stats::qchisq(conf / 100, 1) / 2
# Only profile if profile = TRUE
if (profile) {
# Insert the MLEs and the maximised loglikelihood values
parameter_vals[num + 1, ] <- which_mle
prof_loglik_vals[num + 1, ] <- rep(max_loglik, n_which_pars)
# 100% confidence interval: where profile loglikelihood > cutoff
message("Waiting for profiling to be done...")
utils::flush.console()
for (i in 1:length(which_pars)) {
# MLE not including parameter being profiled and fixed parameters
sol <- res_mle[-c(which_pars[i], fixed_pars)]
# Lower tail ...
par_low <- lower[i]
par_vals <- seq(from = which_mle[i], to = par_low,
length.out = num + 1)[-1]
crossed <- FALSE
j <- 1
while (!crossed && j <= num) {
opt <- profile_loglik(object, prof_pars = which_pars[i],
prof_vals = par_vals[j],
init = sol, type = type, ...)
sol <- attr(opt, "free_pars")
parameter_vals[num - j + 1, i] <- par_vals[j]
prof_loglik_vals[num - j + 1, i] <- opt
crossed <- opt < cutoff
j <- j + 1
}
# Reset initial estimates
sol <- res_mle[-c(which_pars[i], fixed_pars)]
# Upper tail ...
par_up <- upper[i]
par_vals <- seq(from = which_mle[i], to = par_up,
length.out = num + 1)[-1]
crossed <- FALSE
j <- 1
while (!crossed && j <= num) {
opt <- profile_loglik(object, prof_pars = which_pars[i],
prof_vals = par_vals[j],
init = sol, type = type, ...)
sol <- attr(opt, "free_pars")
parameter_vals[num + j + 1, i] <- par_vals[j]
prof_loglik_vals[num + j + 1, i] <- opt
crossed <- opt < cutoff
j <- j + 1
}
# For debugging purposes only
# plot(parameter_vals[, i], prof_loglik_vals[, i])
# abline(v = which_mle[i])
# abline(h = cutoff)
# Use linear interpolation to estimate the confidence limits
y <- prof_loglik_vals[, i]
x <- parameter_vals[, i]
# Find where the profile loglikelihood crosses cutoff
temp <- diff(y - cutoff > 0)
# Lower limit
z <- which(temp == 1)
if (length(z) == 0) {
low <- NA
} else {
low <- x[z] + (cutoff - y[z]) * (x[z + 1] - x[z]) / (y[z + 1] - y[z])
}
# Upper limit
z <- which(temp == -1)
if (length(z) == 0) {
up <- NA
} else {
up <- x[z] + (cutoff - y[z]) * (x[z + 1] - x[z]) / (y[z + 1] - y[z])
}
prof_CI[i, ] <- c(low, up)
}
}
conf_list <- list(conf = conf, cutoff = cutoff,
parameter_vals = parameter_vals,
prof_loglik_vals = prof_loglik_vals, sym_CI = sym_CI,
prof_CI = prof_CI, max_loglik = attr(object, "max_loglik"),
type = type, which_pars = which_pars,
name = attr(object, "name"),
p_current = attr(object, "p_current"))
if (!is.null(attr(object, "fixed_pars"))){
conf_list <- c(conf_list, list(fixed_pars = attr(object, "fixed_pars"),
fixed_at = attr(object, "fixed_at")))
}
class(conf_list) <- "confint"
return(conf_list)
}
# ============================== profile_loglik ===============================
#' Profile loglikelihood
#'
#' Calculates the profile loglikelihood for a subset of the model parameters.
#' This function is provided primarily so that it can be called by
#' \code{\link{conf_intervals}} and \code{\link{conf_region}}.
#'
#' @param object An object of class \code{"chandwich"} returned by
#' \code{adjust_loglik}.
#' @param prof_pars A vector specifying the subset of the (unfixed) parameters
#' over which to profile. Can be either a numeric vector, specifying indices
#' of the components of the \strong{full} parameter vector, or a character
#' vector of parameter names, which must be a subset of those supplied in
#' \code{par_names} in the call to \code{\link{adjust_loglik}} that produced
#' \code{object}.
#'
#' \code{prof_pars} must not have any parameters in common with
#' \code{attr(object, "fixed_pars")}. \code{prof_pars} must not contain
#' all of the unfixed parameters, i.e. there is no point in profiling over
#' all of the unfixed parameters.
#' @param prof_vals A numeric vector. Values of the parameters in
#' \code{prof_pars}. If \code{prof_vals = NULL} then the MLEs stored
#' in \code{object} of the parameters \code{prof_pars} are used.
#' @param init A numeric vector of initial estimates of the values of the
#' parameters that are not fixed and are not in \code{prof_pars}.
#' Should have length \code{attr(object, "p_current") - length(prof_pars)}.
#' If \code{init} is \code{NULL} or is of the wrong length then the
#' relevant components from the MLE stored in \code{object} are used.
#' @param type A character scalar. The argument \code{type} to the function
#' returned by \code{\link{adjust_loglik}}, that is, the type of adjustment
#' made to the independence loglikelihood function.
#' @param ... Further arguments to be passed to \code{\link[stats]{optim}}.
#' These may include \code{gr}, \code{method}, \code{lower}, \code{upper}
#' or \code{control}.
#' @return A numeric vector of length 1. The value of the profile
#' loglikelihood. The returned object has the attribute \code{"free_pars"}
#' giving the optimal values of the parameters that remain after the
#' parameters \code{prof_pars} and \code{attr(object, "fixed_pars")} have
#' been removed from the full parameter vector. If there are no such
#' parameters, which happens if an attempt is made to profile over
#' \emph{all} non-fixed parameters, then this attribute is not present and
#' the value returned is calculated using the function \code{object}.
#' @seealso \code{\link{adjust_loglik}} to adjust a user-supplied
#' loglikelihood function.
#' @seealso \code{\link{conf_intervals}} for confidence intervals for
#' individual parameters.
#' @seealso \code{\link{conf_region}} for a confidence region for
#' a pair of parameters.
#' @examples
#' # -------------------------- GEV model, owtemps data -----------------------
#' # ------------ following Section 5.2 of Chandler and Bate (2007) -----------
#'
#' gev_loglik <- function(pars, data) {
#' o_pars <- pars[c(1, 3, 5)] + pars[c(2, 4, 6)]
#' w_pars <- pars[c(1, 3, 5)] - pars[c(2, 4, 6)]
#' if (isTRUE(o_pars[2] <= 0 | w_pars[2] <= 0)) return(-Inf)
#' o_data <- data[, "Oxford"]
#' w_data <- data[, "Worthing"]
#' check <- 1 + o_pars[3] * (o_data - o_pars[1]) / o_pars[2]
#' if (isTRUE(any(check <= 0))) return(-Inf)
#' check <- 1 + w_pars[3] * (w_data - w_pars[1]) / w_pars[2]
#' if (isTRUE(any(check <= 0))) return(-Inf)
#' o_loglik <- log_gev(o_data, o_pars[1], o_pars[2], o_pars[3])
#' w_loglik <- log_gev(w_data, w_pars[1], w_pars[2], w_pars[3])
#' return(o_loglik + w_loglik)
#' }
#'
#' # Initial estimates (method of moments for the Gumbel case)
#' sigma <- as.numeric(sqrt(6 * diag(var(owtemps))) / pi)
#' mu <- as.numeric(colMeans(owtemps) - 0.57722 * sigma)
#' init <- c(mean(mu), -diff(mu) / 2, mean(sigma), -diff(sigma) / 2, 0, 0)
#'
#' # Log-likelihood adjustment of the full model
#' par_names <- c("mu[0]", "mu[1]", "sigma[0]", "sigma[1]", "xi[0]", "xi[1]")
#' large <- adjust_loglik(gev_loglik, data = owtemps, init = init,
#' par_names = par_names)
#'
#' # Profile loglikelihood for xi1, evaluated at xi1 = 0
#' profile_loglik(large, prof_pars = "xi[1]", prof_vals = 0)
#'
#' # Model with xi1 fixed at 0
#' medium <- adjust_loglik(larger = large, fixed_pars = "xi[1]")
#' # Profile loglikelihood for xi0, evaluated at xi0 = -0.1
#' profile_loglik(medium, prof_pars = "xi[0]", prof_vals = -0.1)
#' @export
profile_loglik <- function(object, prof_pars = NULL, prof_vals = NULL,
init = NULL, type = c("vertical", "cholesky",
"spectral", "none"), ...) {
type <- match.arg(type)
if (is.null(prof_pars)) {
stop("prof_pars must be supplied")
}
fixed_pars <- attr(object, "fixed_pars")
fixed_at <- attr(object, "fixed_at")
full_par_names <- attr(object, "full_par_names")
if (is.character(prof_pars)) {
if (is.null(full_par_names)) {
stop("prof_pars can be character only if par_names is supplied")
}
if (!all(prof_pars %in% full_par_names)) {
stop("prof_pars is not a subset of ", deparse(full_par_names))
}
temp <- prof_pars
prof_pars <- which(full_par_names %in% prof_pars)
names(prof_pars) <- temp
} else {
if (!is.null(full_par_names)) {
names(prof_pars) <- full_par_names[prof_pars]
}
}
free_pars <- (1:attr(object, "p_full"))[-c(fixed_pars, prof_pars)]
if (!is.null(full_par_names)) {
names(free_pars) <- full_par_names[free_pars]
}
if (any(prof_pars %in% fixed_pars)) {
stop("prof_pars & attr(object,''fixed_pars'') have parameters in common")
}
# The MLE (including any fixed parameters) of the full parameter vector
full_mle <- attr(object, "res_MLE")
if (is.null(prof_vals)) {
prof_vals <- full_mle[prof_pars]
}
p_r <- length(free_pars)
# If, after removing the parameters over which we wish to profile, there are
# no free parameters, i.e. we are profiling over *all* unfixed parameters
# then just use the adjusted loglikelihood given by type
if (p_r == 0) {
to_return <- do.call(object, list(prof_vals, type = type))
return(to_return)
}
# Initial estimate: the MLE with fixed_pars set at the values in fixed_at
if (is.null(init) || length(init) != p_r) {
init <- full_mle[free_pars]
}
# Extract arguments to be passed to optim()
optim_args <- list(...)
# Use "BFGS", unless the user has chosen the method or if p_r = 1 and they
# have (inappropriately) chosen "Nelder-Mead" when p_r = 1
if (is.null(optim_args$method)) {
optim_args$method <- "BFGS"
} else if (p_r == 1 & optim_args$method == "Nelder-Mead") {
optim_args$method <- "BFGS"
}
# Set up a function to perform the profiling
# Fixed_pars are dealt with inside the function returned by adjust_loglik()
p <- attr(object, "p_current")
neg_prof_loglik <- function(x) {
pars <- numeric(p)
pars[rank(c(prof_pars, free_pars))] <- c(prof_vals, x)
return(-do.call(object, list(pars, type = type)))
}
# L-BFGS-B and Brent don't like Inf or NA or NaN
if (optim_args$method == "L-BFGS-B" || optim_args$method == "Brent") {
big_finite_val <- 10 ^ 10
neg_prof_loglik <- function(x) {
pars <- numeric(p)
pars[rank(c(prof_pars, free_pars))]<- c(prof_vals, x)
check <- -do.call(object, list(pars, type = type))
if (!is.finite(check)) {
check <- big_finite_val
}
return(check)
}
}
#
for_optim <- c(list(par = init, fn = neg_prof_loglik), optim_args)
#
temp <- do.call(stats::optim, for_optim)
# Return the profile loglikelihood value (NOT negated)
to_return <- -temp$value
attr(to_return, "free_pars") <- temp$par
return(to_return)
}
#' Confidence intervals for model parameters
#'
#' \code{confint} method for objects of class \code{"chandwich"}.
#' Computes confidence intervals for one or more model parameters based
#' on an object returned from \code{\link{adjust_loglik}}.
#'
#' @param object An object of class \code{"chandwich"}, returned by
#' \code{\link{adjust_loglik}}.
#' @param parm A vector specifying the (unfixed) parameters for which
#' confidence intervals are required.
#' If missing, all parameters are included.
#'
#' Can be either a numeric vector,
#' specifying indices of the components of the \strong{full} parameter
#' vector, or a character vector of parameter names, which must be a subset
#' of those supplied in \code{par_names} in the call to
#' \code{\link{adjust_loglik}} that produced \code{object}.
#'
#' \code{parm} must not have any parameters in common with
#' \code{attr(object, "fixed_pars")}. \code{parm} must not contain
#' all of the unfixed parameters, i.e. there is no point in profiling over
#' all the unfixed parameters.
#' @param level The confidence level required. A numeric scalar in (0, 1).
#' @param type A character scalar. The argument \code{type} to the function
#' returned by \code{\link{adjust_loglik}}, that is, the type of adjustment
#' made to the independence loglikelihood function.
#' @param profile A logical scalar. If \code{TRUE} then confidence intervals
#' based on an (adjusted) profile loglikelihood are returned. If
#' \code{FALSE} then intervals based on approximate large sample normal
#' theory, which are symmetric about the MLE, are returned.
#' @param ... Further arguments to be passed to \code{\link{conf_intervals}}.
#' @details For details see the documentation for the function
#' \code{\link{conf_intervals}}, on which \code{confint.chandwich} is based.
#' @return A matrix with columns giving lower and upper confidence limits for
#' each parameter. These will be labelled as (1 - level)/2 and
#' 1 - (1 - level)/2 in \% (by default 2.5\% and 97.5\%).
#' The row names are the names of the model parameters,
#' if these are available.
#' @seealso The underlying function \code{\link{conf_intervals}}. If you would
#' like to plot the profile loglikelihood function for a parameter then call
#' \code{\link{conf_intervals}} directly and then use the associated plot
#' method. Note that in \code{conf_intervals()} a parameter choice is
#' specified using an argument called \code{which_pars}, not \code{parm}.
#' @seealso \code{\link{conf_region}} for a confidence region for
#' pairs of parameters.
#' @seealso \code{\link{compare_models}} for an adjusted likelihood ratio test
#' of two models.
#' @seealso \code{\link{adjust_loglik}} to adjust a user-supplied
#' loglikelihood function.
#' @examples
#' # -------------------------- GEV model, owtemps data -----------------------
#' # ------------ following Section 5.2 of Chandler and Bate (2007) -----------
#'
#' gev_loglik <- function(pars, data) {
#' o_pars <- pars[c(1, 3, 5)] + pars[c(2, 4, 6)]
#' w_pars <- pars[c(1, 3, 5)] - pars[c(2, 4, 6)]
#' if (isTRUE(o_pars[2] <= 0 | w_pars[2] <= 0)) return(-Inf)
#' o_data <- data[, "Oxford"]
#' w_data <- data[, "Worthing"]
#' check <- 1 + o_pars[3] * (o_data - o_pars[1]) / o_pars[2]
#' if (isTRUE(any(check <= 0))) return(-Inf)
#' check <- 1 + w_pars[3] * (w_data - w_pars[1]) / w_pars[2]
#' if (isTRUE(any(check <= 0))) return(-Inf)
#' o_loglik <- log_gev(o_data, o_pars[1], o_pars[2], o_pars[3])
#' w_loglik <- log_gev(w_data, w_pars[1], w_pars[2], w_pars[3])
#' return(o_loglik + w_loglik)
#' }
#'
#' # Initial estimates (method of moments for the Gumbel case)
#' sigma <- as.numeric(sqrt(6 * diag(var(owtemps))) / pi)
#' mu <- as.numeric(colMeans(owtemps) - 0.57722 * sigma)
#' init <- c(mean(mu), -diff(mu) / 2, mean(sigma), -diff(sigma) / 2, 0, 0)
#'
#' # Log-likelihood adjustment of the full model
#' par_names <- c("mu[0]", "mu[1]", "sigma[0]", "sigma[1]", "xi[0]", "xi[1]")
#' large <- adjust_loglik(gev_loglik, data = owtemps, init = init,
#' par_names = par_names)
#' confint(large)
#' confint(large, profile = FALSE)
#' @export
confint.chandwich <- function(object, parm, level = 0.95,
type = c("vertical", "cholesky", "spectral",
"none"),
profile = TRUE, ...) {
if (!inherits(object, "chandwich")) {
stop("use only with \"chandwich\" objects")
}
type <- match.arg(type)
if (missing(parm)) {
temp <- conf_intervals(object = object, conf = 100 * level, type = type,
profile = profile, ...)
} else {
temp <- conf_intervals(object = object, which_pars = parm,
conf = 100 * level, type = type, profile = profile,
...)
}
if (profile) {
temp <- temp$prof_CI
} else {
temp <- temp$sym_CI
}
a <- (1 - level) / 2
a <- c(a, 1 - a)
pct <- paste(round(100 * a, 1), "%")
colnames(temp) <- pct
return(temp)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.