R/wilcox_keselman.R
In skedastic: Handling Heteroskedasticity in the Linear Regression Model

Documented in wilcox_keselman

#' Wilcox and Keselman's Test for Heteroskedasticity in a Linear Regression Model
#'
#' This function implements the nonparametric test of
#'    \insertCite{Wilcox06;textual}{skedastic} for testing for heteroskedasticity
#'    in a simple linear regression model, and extends it to the multiple linear
#'    regression model.
#'
#' @param gammapar A double value between 0 and 0.5 exclusive specifying the
#'    quantile value \eqn{gamma}. Defaults to 0.2.
#' @param B An integer specifying the number of nonparametric bootstrap samples
#'    to use to estimate standard error(s) of the quantile difference(s).
#'    Defaults to \code{500L}.
#' @param seed An integer specifying a seed to pass to
#'    \code{\link[base]{set.seed}} for random number generation. This allows
#'    reproducibility of bootstrap results. The value \code{NA}
#'    results in not setting a seed.
#' @param rqwarn A logical specifying whether warnings generated by
#'    \code{\link[quantreg]{rq.fit}} (such as 'Solution may be nonunique')
#'    should be printed (\code{TRUE}) or suppressed (\code{FALSE}). Defaults
#'    to \code{FALSE}.
#' @param matchWRS A logical specifying whether bootstrap samples should be
#'    generated in the exact same manner as in the \code{qhomtv2} function in
#'    \href{https://github.com/nicebread/WRS}{WRS} package. If \code{TRUE}, and
#'    \code{seed} is set to \code{2} and \code{B} to \code{100} and
#'    \code{p.adjust.method} to \code{"none"}, results will
#'    be identical to those of the default settings of \code{qhomtv2}.
#' @param p.adjust.method A character specifying the family-wise error rate
#'    method to use in adjusting \eqn{p}-values (if it is a multiple linear
#'    regression model). The value is passed to \code{\link[stats]{p.adjust}}.
#'    By default no adjustment is made.
#'
#' @inheritParams breusch_pagan
#'
#' @return An object of \code{\link[base]{class}} \code{"htest"}. If object is
#'    not assigned, its attributes are displayed in the console as a
#'    \code{\link[tibble]{tibble}} using \code{\link[broom]{tidy}}.
#' @references{\insertAllCited{}}
#' @importFrom Rdpack reprompt
#' @export
#' @seealso Rand R. Wilcox's package
#'    \href{https://github.com/nicebread/WRS}{WRS} on Github; in particular
#'    the functions \code{qhomt} and \code{qhomtv2}, which implement this
#'    method for simple and multiple linear regression respectively.
#'
#' @examples
#' mtcars_lm <- lm(mpg ~ wt + qsec + am, data = mtcars)
#' wilcox_keselman(mtcars_lm)
#'

wilcox_keselman <- function(mainlm, gammapar = 0.2, B = 500L,
                    p.adjust.method = "none", seed = NA, rqwarn = FALSE,
                    matchWRS = FALSE, statonly = FALSE) {

  if (!requireNamespace("quantreg", quietly = TRUE)) {
    stop(
      "Package \"quantreg\" must be installed to use this function.",
      call. = FALSE
    )
  }
  processmainlm(m = mainlm)

  hasintercept <- columnof1s(X)
  if (hasintercept[[1]]) {
    nonintercept <- 2:p
  } else {
    nonintercept <- 1:p
  }

  n <- nrow(X)
  if (gammapar <= 0 || gammapar >= 0.5) stop("gammapar must be between 0 and 0.5 exclusive.")
  gammapar <- c(gammapar, 1 - gammapar)

  if (!is.na(seed)) set.seed(seed)

  if (matchWRS) {
    set.seed(2)
    message("Since matchWRS set to TRUE, PRNG seed has been set to 2")
    bootsamp <- as.data.frame(t(matrix(sample(n, size = n * B, replace = TRUE),
                                       nrow = B)))
  } else {
    bootsamp <- as.data.frame(replicate(B, sample(1:n, n, replace = TRUE)))
    hassing <- TRUE
    while (hassing == TRUE) {
      checksing <- vapply(bootsamp, function(b)
        is.singular.mat(crossprod(X[b, ])), NA)
      if (all(!checksing)) {
        hassing <- FALSE
      } else {
        bootsamp[which(checksing)] <- replicate(sum(checksing),
                      sample(1:n, n, replace = TRUE))
      }
    }
  }

  warnfunc <- ifelse(rqwarn, identity, suppressWarnings)

  if (is.null(y)) stop("Response vector required but not extractable from `mainlm` argument")
  bootbetahat_gamma <- warnfunc(sapply(bootsamp, function(b) sapply(gammapar,
              function(q) quantreg::rq.fit(x = X[b, ], y = y[b],
              tau = q)$coefficients[nonintercept]), simplify = "array"))

  betahat_gamma <- warnfunc(sapply(gammapar, function(q) quantreg::rq.fit(x = X,
                      y = y, tau = q)$coefficients[nonintercept]))

  if (length(nonintercept) == 1) {
    dstar <- bootbetahat_gamma[1, ] - bootbetahat_gamma[2, ]
    sdstar <- sqrt(1 / (B - 1) * sum((dstar - mean(dstar)) ^ 2))
    d <- betahat_gamma[[1]] - betahat_gamma[[2]]
  } else {
    dstar <- bootbetahat_gamma[, 1, ] - bootbetahat_gamma[, 2, ]
    sdstar <- vapply(1:length(nonintercept), function(j) sqrt(1 / (B - 1) *
              sum((dstar[j, ] - mean(dstar[j, ])) ^ 2)), NA_real_)
    d <- betahat_gamma[, 1] - betahat_gamma[, 2]
  }

  teststat <- unname(d / sdstar)
  if (statonly) return(teststat)
  pval <- 2 * stats::pnorm(abs(teststat), lower.tail = FALSE)
  if (!(p.adjust.method == "none")) {
    pval <- stats::p.adjust(p = pval, method = p.adjust.method)
  }

  rval <- structure(list(statistic = teststat, p.value = pval,
                         parameter = gammapar[1],
               null.value = 0, alternative = "two.sided"),
               class = "htest")
  broom::tidy(rval)
}