#' Bootstrap Confidence Intervals for Linear Regression Error Variances
#'
#' Uses bootstrap methods to compute approximate confidence intervals for
#' error variances in a heteroskedastic linear regression model based on
#' an auxiliary linear variance model (ALVM) or auxiliary nonlinear variance
#' model (ANLVM).
#'
#' @details \eqn{B} resampled versions of the original linear regression model
#' (which can be accessed using \code{object$ols}) are generated using a
#' nonparametric bootstrap method that is suitable for heteroskedastic
#' linear regression models, namely either the pairs bootstrap or the wild
#' bootstrap (bootstrapping residuals is \emph{not} suitable). Depending on
#' the class of \code{object}, either an ALVM or an ANLVM is fit to each of
#' the bootstrapped regression models. The distribution of the \eqn{B}
#' bootstrap estimates of each error variance \eqn{\omega_i},
#' \eqn{i=1,2,\ldots,n}, is used to construct an approximate confidence
#' interval for \eqn{\omega_i}. This is done using one of three methods.
#' The first is the percentile interval, which simply takes the empirical
#' \eqn{\alpha/2} and \eqn{1-\alpha/2} quantiles of the \eqn{i}th bootstrap
#' variance estimates. The second is the Bias-Corrected and accelerated
#' (BCa) method as described in \insertCite{Efron93;textual}{skedastic},
#' which is intended to improve on the percentile interval (although
#' simulations have not found it to yield better coverage probabilities).
#' The third is the naive standard normal interval, which takes
#' \eqn{\hat{\omega}_i \pm z_{1-\alpha/2} \widehat{\mathrm{SE}}}, where
#' \eqn{\widehat{\mathrm{SE}}} is the standard deviation of the \eqn{B}
#' bootstrap estimates of \eqn{\omega_i}. By default, the expansion
#' technique described in \insertCite{Hesterberg15;textual}{skedastic} is
#' also applied; evidence from simulations suggests that this \emph{does}
#' improve coverage probabilities.
#'
#' Technically, the hyperparameters of the ALVM, such as \eqn{\lambda} (for a
#' penalised polynomial or thin-plate spline model) or \eqn{n_c} (for a
#' clustering model) should be re-tuned every time the ALVM is fitted to
#' another bootstrapped regression model. However, due to the computational
#' cost, this is not done by \code{avm.ci} unless \code{retune} is set to
#' \code{TRUE}.
#'
#' When obtained from ALVMs, bootstrap estimates of \eqn{\omega_i} that fall on
#' the constraint boundary (i.e., are estimated to be near 0) are ignored
#' by default; there is an attempt to obtain \code{Brequired} bootstrap
#' estimates of each \eqn{\omega_i} that do not fall on the constraint
#' boundary. This fine-tuning can be turned off by setting the
#' \code{rm_onconstraint} argument to \code{FALSE}; the amount of effort
#' put into obtaining non-boundary estimates is controlled using the
#' \code{Bextra} argument. When ANLVMs are used, the default behaviour is to
#' try to obtain \code{Brequired} bootstrap estimates of \eqn{\omega} where
#' the Gauss-Newton algorithm applied for quasi-likelihood estimation
#' has converged, and ignore estimates obtained from non-convergent cases.
#' This behaviour can be toggled using the \code{rm_nonconverged} argument.
#'
#' @param object An object of class \code{"alvm.fit"} or of class
#' \code{"anlvm.fit"}, containing information on a fitted ALVM or ANLVM
#' @param bootobject An object of class \code{"bootlm"}, containing information
#' on a set of \eqn{B} bootstrapped versions of a linear regression model,
#' obtained by a nonparametric bootstrap method suitable for heteroskedastic
#' linear models. If set to \code{NULL} (the default), it is generated by
#' calling \code{\link{bootlm}}.
#' @param bootavmobject An object of class \code{"bootavm"}, containing
#' information on an ALVM or ANLVM fit to \eqn{B} bootstrapped linear
#' regression models. If set to \code{NULL} (the default), it is generated
#' by calling the non-exported function \code{bootavm}.
#' @param jackobject An object of class \code{"jackavm"}, containing
#' information on ALVMs or ANVLMs fit to jackknife versions of a linear
#' regression model. If set to \code{NULL} (the default), it is generated
#' by calling the non-exported function \code{jackavm}.
#' @param bootCImethod A character specifying the method to use when computing
#' the approximate bootstrap confidence interval. The default, \code{"pct"},
#' corresponds to the percentile interval. \code{"bca"} corresponds to the
#' Bias-Corrected and accelerated (BCa) modification of the percentile
#' interval. \code{"stdnorm"} corresponds to a naive standard normal
#' interval with bootstrap standard error estimates.
#' @param bootsampmethod A character specifying the method to use for
#' generating nonparametric bootstrap linear regression models. Corresponds
#' to the \code{sampmethod} argument of \code{\link{bootlm}} and defaults
#' to \code{"pairs"}. \strong{Warning:} in simulations, bootstrap intervals
#' computed using the wild bootstrap have shown very poor coverage
#' probabilities. Ignored unless \code{bootobject} is \code{NULL}.
#' @param Brequired An integer indicating the number of bootstrap regression
#' models that should be used to compute the bootstrap confidence intervals.
#' The default behaviour is to base the interval estimates only on bootstrap
#' ALVM variance estimates that are not on the constraint boundary or on
#' bootstrap ANLVMs where the estimation algorithm converged. Consequently,
#' if this is not the case for all of the first \code{Brequired} bootstrap
#' models, additional bootstrap models are used (up to a maximum of
#' \code{Bextra}). Defaults to \code{1000L}.
#' @param Bextra An integer indicating the maximum number of additional
#' bootstrap models that should be fitted in an attempt to obtain
#' \code{Brequired} appropriate sets of bootstrap variance estimates, as
#' explained above under \code{Brequired}. Defaults to \code{500L}.
#' Ignored if \code{rm_on_constraint} is set to \code{FALSE} (for an ALVM)
#' or if \code{rm_nonconverged} is set to \code{FALSE} (for an ANLVM).
#' @param conf.level A double representing the confidence level \eqn{1-\alpha};
#' must be between 0 and 1. Defaults to \code{0.95}.
#' @param expand A logical specifying whether to implement the expansion
#' technique described in \insertCite{Hesterberg15;textual}{skedastic}.
#' Defaults to \code{TRUE}.
#' @param retune A logical specifying whether to re-tune hyperparameters and
#' re-select features each time an ALVM or (in the case of feature
#' selection) ANLVM is fit to a bootstrapped regression model. If
#' \code{FALSE} (the default), the hyperparameter value and selected
#' features from the ALVM fit to the original model are reused in every
#' bootstrap model. Setting to \code{TRUE} is more theoretically sound but
#' increases computation time substantially.
#' @param qtype A numeric corresponding to the \code{type} argument of
#' \code{\link[stats]{quantile}}. Defaults to \code{6}.
#' @param rm_on_constraint A logical specifying whether to exclude
#' bootstrapped ALVMs from the interval estimation method where the ALVM
#' parameter estimate falls on the constraint boundary. Defaults to
#' \code{TRUE}.
#' @param rm_nonconverged A logical specifying whether to exclude bootstrapped
#' ANLVMs from the interval estimation method where the optimisation
#' algorithm used in quasi-likelihood estimation of the ANLVM parameter did
#' not converge. Defaults to \code{TRUE}.
#' @param jackknife_point A logical specifying whether to replace the point
#' estimates of the error variances \eqn{\omega} with jackknife estimates
#' based only on the leave-one-out auxiliary models where the parameter
#' estimates do not lie on the constraint boundary (in the ALVM case) or
#' where the quasi-likelihood estimation algorithm converged (in the ANLVM
#' case). Defaults to \code{FALSE}.
#' @param ... Other arguments to pass to non-exported helper functions
#'
#' @inheritParams bootlm
#'
#' @return An object of class \code{"avm.ci"}, containing the following:
#' \itemize{
#' \item \code{climits}, an \eqn{n\times 2} matrix with lower confidence
#' limits in the first column and upper confidence limits in the second
#' \item \code{var.est}, a vector of length \eqn{n} of point estimates
#' \eqn{\hat{\omega}} of the error variances. This is the same vector
#' passed within \code{object}, unless \code{jackknife_point} is
#' \code{TRUE}.
#' \item \code{conf.level}, corresponding to the eponymous argument
#' \item \code{bootCImethod}, corresponding to the eponymous argument
#' \item \code{bootsampmethod}, corresponding to the eponymous argument or
#' otherwise extracted from \code{bootobject}
#' }
#'
#' @references{\insertAllCited{}}
#' @importFrom Rdpack reprompt
#' @export
#' @seealso \code{\link{alvm.fit}}, \code{\link{anlvm.fit}},
#' \insertCite{Efron93;textual}{skedastic}
#'
#' @examples
#' mtcars_lm <- lm(mpg ~ wt + qsec + am, data = mtcars)
#' myalvm <- alvm.fit(mtcars_lm, model = "cluster")
#' # Brequired would of course not be so small in practice
#' ci.alvm <- avm.ci(myalvm, Brequired = 5)
avm.ci <- function(object, bootobject = NULL,
bootavmobject = NULL, jackobject = NULL,
bootCImethod = c("pct", "bca", "stdnorm"),
bootsampmethod = c("pairs", "wild"),
Bextra = 500L, Brequired = 1000L,
conf.level = 0.95, expand = TRUE,
retune = FALSE, resfunc = c("identity", "hccme"),
qtype = 6, rm_on_constraint = TRUE, rm_nonconverged = TRUE,
jackknife_point = FALSE, ...) {
if (!(inherits(object, "alvm.fit") || inherits(object, "anlvm.fit")))
stop("object should be of class alvm.fit or anlvm.fit")
bootCImethod <- match.arg(bootCImethod, c("pct", "bca", "stdnorm"))
X <- stats::model.matrix(object$ols)
y <- stats::model.response(stats::model.frame(object$ols))
n <- nrow(X)
p <- ncol(X)
if (length(conf.level) != 1 || conf.level <= 0 || conf.level >= 1) {
stop("Invalid conf.level")
}
var.est <- unname(object$var.est)
if (is.null(bootavmobject)) {
if (is.null(bootobject)) {
bootsampmethod <- match.arg(bootsampmethod, c("pairs", "wild"))
bootsampmethod2return <- bootsampmethod
resfunc <- match.arg(resfunc, c("identity", "hccme"))
bootobject <- bootlm(object = object$ols, sampmethod = bootsampmethod,
B = Brequired + Bextra, resfunc = resfunc, ...)
} else {
if (!(inherits(bootobject, "bootlm")))
stop("bootobject should be of class bootlm")
Bextra <- length(bootobject) - Brequired
bootsampmethod2return <- bootobject$sampmethod
}
bootavmobject <- bootavm(object = object, bootobject = bootobject,
retune = retune, returnall = FALSE, rm_on_constraint = rm_on_constraint,
rm_nonconverged = rm_nonconverged, Brequired = Brequired, ...)
} else {
# else just use bootavmobject that was passed
if (!inherits(bootavmobject, "bootavm"))
stop("bootavmobject must be an object of class \"bootavm\"")
}
B <- nrow(bootavmobject$var.ests)
probs <- sort(1 + c(-1, 1) * conf.level) / 2
if (expand) probs <- stats::pnorm(stats::qt(probs, n - 1) *
sqrt(n / (n - 1)))
if (bootCImethod %in% c("pct", "stdnorm")) {
probs2 <- matrix(data = rep(probs, each = n), nrow = 2,
byrow = TRUE)
} else if (bootCImethod == "bca") {
z0hat <- vapply(1:n, function(i) {
stats::qnorm(sum(bootavmobject$var.ests[, i] < var.est[i]) / B)
}, 0)
if (is.null(jackobject)) {
jackobject <- jackavm(object = object,
retune = retune,
returnwhat = "all", ...)
} else {
# jackobject was passed
}
omegahatdot <- rowMeans(jackobject$jackvarests)
ahat <- vapply(1:n, function(i) {
sum(vapply(1:n, function(j) {
(omegahatdot[i] - jackobject$jackvarests[i, j]) ^ 3
}, 0)) / (6 * sum(vapply(1:n, function(j) {
(omegahatdot[i] - jackobject$jackvarests[i, j]) ^ 2
}, 0)) ^ (3 / 2))
}, 0)
probs2 <- sapply(1:n, function(i) {
vapply(probs, function(p) {
if (is.infinite(z0hat[i])) {
ifelse(p < 0.5, 0, 1)
} else {
stats::pnorm(z0hat[i] + (z0hat[i] + stats::qnorm(p)) /
(1 - ahat[i] * (z0hat[i] + stats::qnorm(p))))
}
}, 0)
}, simplify = "array")
}
if (!jackknife_point) {
newvar.est <- var.est
} else {
if (is.null(jackobject)) {
jackobject <- jackavm(object = object,
retune = retune,
returnwhat = "all", ...)
}
newvar.est <- jackpoint(jackobject = jackobject,
constol = object$constol, ...)
}
if (bootCImethod %in% c("pct", "bca", "stdnorm")) {
if (bootCImethod %in% c("pct", "bca")) {
climits <- t(sapply(1:n, function(i) {
stats::quantile(bootavmobject$var.ests[, i], probs = probs2[, i],
type = qtype, names = FALSE)
}, simplify = "array"))
} else if (bootCImethod == "stdnorm") {
climits <- t(sapply(1:n, function(i) {
SEest <- stats::sd(bootavmobject$var.ests[, i])
c(max(0, newvar.est[i] - stats::qnorm(probs2[2, i]) * SEest),
newvar.est[i] + stats::qnorm(probs2[2, i]) * SEest)
}, simplify = "array"))
}
result <- list("climits" = climits, "var.est" = newvar.est,
"conf.level" = conf.level, "bootCImethod" = bootCImethod,
"bootsampmethod" = bootsampmethod2return)
}
class(result) <- "avm.ci"
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.