R/ols-breusch-pagan-test.R

#' Breusch pagan test
#'
#' @description
#' Test for constant variance. It assumes that the error terms are normally
#' distributed.
#'
#' @param model An object of class \code{lm}.
#' @param fitted.values Logical; if TRUE, use fitted values of regression model.
#' @param rhs Logical; if TRUE, specifies that tests for heteroskedasticity be
#' performed for the right-hand-side (explanatory) variables of the fitted
#' regression model.
#' @param multiple Logical; if TRUE, specifies that multiple testing be performed.
#' @param p.adj Adjustment for p value, the following options are available:
#' bonferroni, holm, sidak and none.
#' @param vars Variables to be used for heteroskedasticity test.
#'
#' @details
#' Breusch Pagan Test was introduced by Trevor Breusch and Adrian Pagan in 1979.
#' It is used to test for heteroskedasticity in a linear regression model.
#' It test whether variance of errors from a regression is dependent on the
#' values of a independent variable.
#'
#' \itemize{
#' \item Null Hypothesis: Equal/constant variances
#' \item Alternative Hypothesis: Unequal/non-constant variances
#' }
#'
#' Computation
#'
#' \itemize{
#'   \item Fit a regression model
#'   \item Regress the squared residuals from the above model on the independent variables
#'   \item Compute \eqn{nR^2}. It follows a chi square distribution with p -1 degrees of
#'         freedom, where p is the number of independent variables, n is the sample size and
#' 				 \eqn{R^2} is the coefficient of determination from the regression in step 2.
#' }
#'
#' @return \code{ols_test_breusch_pagan} returns an object of class \code{"ols_test_breusch_pagan"}.
#' An object of class \code{"ols_test_breusch_pagan"} is a list containing the
#' following components:
#'
#' \item{bp}{breusch pagan statistic}
#' \item{p}{p-value of \code{bp}}
#' \item{fv}{fitted values of the regression model}
#' \item{rhs}{names of explanatory variables of fitted regression model}
#' \item{multiple}{logical value indicating if multiple tests should be performed}
#' \item{padj}{adjusted p values}
#' \item{vars}{variables to be used for heteroskedasticity test}
#' \item{resp}{response variable}
#' \item{preds}{predictors}
#'
#' @references
#' T.S. Breusch & A.R. Pagan (1979), A Simple Test for Heteroscedasticity and
#' Random Coefficient Variation. Econometrica 47, 1287–1294
#'
#' Cook, R. D.; Weisberg, S. (1983). "Diagnostics for Heteroskedasticity in Regression". Biometrika. 70 (1): 1–10.
#'
#' @section Deprecated Function:
#' \code{ols_bp_test()} has been deprecated. Instead use \code{ols_test_breusch_pagan()}.
#'
#' @family heteroskedasticity tests
#'
#' @examples
#' # model
#' model <- lm(mpg ~ disp + hp + wt + drat, data = mtcars)
#'
#' # use fitted values of the model
#' ols_test_breusch_pagan(model)
#'
#' # use independent variables of the model
#' ols_test_breusch_pagan(model, rhs = TRUE)
#'
#' # use independent variables of the model and perform multiple tests
#' ols_test_breusch_pagan(model, rhs = TRUE, multiple = TRUE)
#'
#' # bonferroni p value adjustment
#' ols_test_breusch_pagan(model, rhs = TRUE, multiple = TRUE, p.adj = 'bonferroni')
#'
#' # sidak p value adjustment
#' ols_test_breusch_pagan(model, rhs = TRUE, multiple = TRUE, p.adj = 'sidak')
#'
#' # holm's p value adjustment
#' ols_test_breusch_pagan(model, rhs = TRUE, multiple = TRUE, p.adj = 'holm')
#'
#' @importFrom stats anova
#'
#' @export
#'
ols_test_breusch_pagan <- function(model, fitted.values = TRUE, rhs = FALSE, multiple = FALSE,
                        p.adj = c("none", "bonferroni", "sidak", "holm"), vars = NA) UseMethod("ols_test_breusch_pagan")

#' @export
#'
ols_test_breusch_pagan.default <- function(model, fitted.values = TRUE, rhs = FALSE, multiple = FALSE,
                                p.adj = c("none", "bonferroni", "sidak", "holm"), vars = NA) {

  check_model(model)
  check_logic(fitted.values)
  check_logic(rhs)
  check_logic(multiple)

  suppressWarnings(
    if (!is.na(vars[1])) {
      check_modelvars(model, vars)
      fitted.values <- FALSE
    }
  )

  method <- match.arg(p.adj)
  p.adj <- match.arg(p.adj)
  check_options(p.adj)
  l      <- ols_prep_avplot_data(model)
  n      <- nrow(l)

  response <-
    l %>%
    names() %>%
    extract(1)

  predictors <-
    l %>%
    names() %>%
    extract(-1)

  if (fitted.values) {
    vars <- NA

    if (rhs) {
      if (multiple) {

        start <- bp_case_one(l, model)
        loops <- bp_case_loop(start$np, start$nam, start$l)
        inter <- bp_case_inter(start$l, start$np, loops$tstat)
        bp    <- inter$bp
        p     <- bp_case_adj(method, loops$pvals, start$np, inter$ps)

      } else {
        result <- bp_case_2(l, model)
        bp     <- result$bp
        p      <- pchisq(bp, df = result$df, lower.tail = FALSE)
      }
    } else {
      bp <- bp_case_3(model)
      p  <- pchisq(bp, df = 1, lower.tail = FALSE)
    }
  } else {
    if (multiple) {
      if (rhs) {
        start <- bp_case_one(l, model)
        loops <- bp_case_loop(start$np, start$nam, start$l)
        inter <- bp_case_inter(start$l, start$np, loops$tstat)
        bp    <- inter$bp
        p     <- bp_case_adj(method, loops$pvals, start$np, inter$ps)

      } else {
        len_vars <- length(vars)

        if (len_vars > 1) {
          start   <- bp_case_one(l, model)
          len_var <- length(vars)
          loops   <- bp_case_loop(len_var, vars, start$l)
          inter   <- bp_case_5_inter(l, model, vars, loops$tstat)
          bp      <- inter$bp
          p       <- bp_case_adj(method, loops$pvals, inter$np, inter$ps)
        } else {
          result <- bp_case_7(l, model, vars)
          bp     <- result$bp
          p      <- pchisq(bp, df = result$df, lower.tail = FALSE)
        }
      }
    } else {
      if (rhs) {
        result <- bp_case_6(l, model)
        bp     <- result$bp
        p      <- pchisq(bp, df = result$df, lower.tail = FALSE)
      } else {
        result <- bp_case_7(l, model, vars)
        bp     <- result$bp
        p      <- pchisq(bp, df = result$df, lower.tail = FALSE)
      }
    }
  }

  out <- list(
    bp       = bp,
    p        = p,
    fv       = fitted.values,
    rhs      = rhs,
    multiple = multiple,
    padj     = method,
    vars     = vars,
    resp     = response,
    preds    = predictors
  )

  class(out) <- "ols_test_breusch_pagan"

  return(out)
}

#' @export
#' @rdname ols_test_breusch_pagan
#' @usage NULL
#'
ols_bp_test <- function(model, fitted.values = TRUE, rhs = FALSE, multiple = FALSE,
                        p.adj = c("none", "bonferroni", "sidak", "holm"), vars = NA) {
  .Deprecated("ols_test_breusch_pagan()")
}


#' @export
#'
print.ols_test_breusch_pagan <- function(x, ...) {
  print_bp_test(x)
}

#' @description
#' Computes breusch pagan statistics and degrees of freedom when:
#' * fit = TRUE
#' * rhs = TRUE
#' * multiple = TRUE
#'
#' @param l A tibble created using `avplots_data()`.
#' @param model An object of class \code{lm}.
#'
#' @noRd
#'
bp_case_2 <- function(l, model) {

  n         <- model_rows(model)
  var_resid <- residual_var(model, n)
  ind       <- ind_bp(model, var_resid)

  df <-
    l %>%
    select(-1) %>%
    ncol()

  l %<>%
    select(-1) %>%
    bind_cols(ind)

  bp <- bp_model(l)

  list(bp = bp, df = df)

}

#' @description
#' Computes breusch pagan statistics and degrees of freedom when:
#' * fit = TRUE
#' * rhs = FALSE
#'
#' @param model An object of class \code{lm}.
#'
#' @noRd
#'
bp_case_3 <- function(model) {

  `Sum Sq`     <- NULL
  pred         <- fitted(model)
  scaled_resid <- resid_scaled(model, pred)

  lm(scaled_resid ~ pred) %>%
    anova() %>%
    use_series(`Sum Sq`) %>%
    extract(1) %>%
    divide_by(2)

}

#' @description
#' Computes breusch pagan statistics and degrees of freedom when:
#' * fit = FALSE
#' * rhs = TRUE
#' * multiple = FALSE
#'
#' @param l A tibble created using `avplots_data()`.
#' @param model An object of class \code{lm}.
#'
#' @noRd
#'
bp_case_6 <- function(l, model) {

  n         <- nrow(l)
  var_resid <- residual_var(model, n)
  ind       <- ind_bp(model, var_resid)

  np <-
    l %>%
    names() %>%
    extract(-1) %>%
    length()

  bp <-
    l %>%
    bind_cols(ind) %>%
    select(-1) %>%
    bp_model()

  list(bp = bp, df = np)

}

#' @description
#' Computes breusch pagan statistics and degrees of freedom when:
#' * fit = FALSE
#' * rhs = FALSE
#' * multiple = FALSE
#'
#' @importFrom rlang !!! syms
#' @importFrom magrittr subtract
#'
#' @param l A tibble created using `avplots_data()`.
#' @param model An object of class \code{lm}.
#' @param vars Variables to be used for the test.
#'
#' @noRd
#'
bp_case_7 <- function(l, model, vars) {

  n         <- nrow(l)
  var_resid <- residual_var(model, n)
  ind       <- ind_bp(model, var_resid)

  l %<>%
    select(!!! syms(vars)) %>%
    bind_cols(ind)

  bp <- bp_model(l)

  nd <-
    l %>%
    ncol() %>%
    subtract(1)

  list(bp = bp, df = nd)

}

#' @description Fit model using the columns in the data set.
#'
#' @param l A tibble created using `avplots_data()`.
#'
#' @noRd
#'
bp_model <- function(l) {

  l %>%
    lm(ind ~ ., data = .) %>%
    bp_fit()

}

bp_fit <- function(l) {

  l %>%
    fitted() %>%
    raise_to_power(2) %>%
    sum() %>%
    divide_by(2)

}

ind_bp <- function(model, var_resid) {

  model %>%
    residuals() %>%
    raise_to_power(2) %>%
    divide_by(var_resid) %>%
    subtract(1) %>%
    tibble() %>%
    set_colnames("ind")

}

#' @description
#' Computes breusch pagan statistics and degrees of freedom when:
#' * fit = TRUE
#' * rhs = TRUE
#' * multiple = TRUE
#'
#' @param l A tibble created using `avplots_data()`.
#' @param model An object of class \code{lm}.
#'
#' @noRd
#'
bp_case_one <- function(l, model) {

  nam <-
    l %>%
    names() %>%
    extract(-1)

  np        <- length(nam)
  n         <- nrow(l)
  var_resid <- residual_var(model, n)
  ind       <- ind_bp(model, var_resid)

  l %<>%
    bind_cols(ind)

  list(np = np, nam = nam, l = l)

}

#' @description
#' Computes breusch pagan statistic and p values when multiple = TRUE
#'
#' @noRd
#'
bp_case_loop <- function(np, nam, l) {

  tstat <- c()
  pvals <- c()

  for (i in seq_len(np)) {

    form <- as.formula(paste("ind ~", nam[i]))

    tstat[i] <-
      l %>%
      lm(form, data = .) %>%
      bp_fit()

    pvals[i] <- pchisq(tstat[i], df = 1, lower.tail = FALSE)

  }

  list(tstat = tstat, pvals = pvals)

}

#' @description
#' Computes breusch pagan statistic and p values when multiple = FALSE.
#'
#' @noRd
#'
bp_case_inter <- function(l, np, tstat) {

  comp <-
    l %>%
    select(-1) %>%
    lm(ind ~ ., data = .) %>%
    bp_fit()

  ps <- pchisq(comp, df = np, lower.tail = FALSE)

  bp <-
    comp %>%
    prepend(tstat)

  list(bp = bp, ps = ps)

}

#' @description Computes adjusted p values.
#'
#' @noRd
#'
bp_case_adj <- function(method, pvals, np, ps) {

  if (method == "bonferroni") {

    bpvals <- pmin(1, pvals * np)
    p      <- c(bpvals, ps)

  } else if (method == "sidak") {

    spvals <- pmin(1, 1 - (1 - pvals) ^ np)
    p      <- c(spvals, ps)

  } else if (method == "holm") {

    j <-
      pvals %>%
      length() %>%
      seq_len(.) %>%
      rev()

    h <-
      pvals %>%
      order() %>%
      order()

    pvals_sort <-
      pvals %>%
      sort() %>%
      multiply_by(j)

    pholms <-
      pmin(1, pvals_sort) %>%
      extract(h)

    p <- c(pholms, ps)

  } else {

    p <- c(pvals, ps)

  }

  return(p)

}

#' @description Computes breusch pagan statistic and p values when:
#' * fit = FALSE
#' * rhs = FALSE
#' * multiple = FALSE
#'
#' @noRd
#'
bp_case_5_inter <- function(l, model, vars, tstat) {

  n         <- nrow(l)
  var_resid <- residual_var(model, n)
  ind       <- ind_bp(model, var_resid)

  l %<>%
    select(-1) %>%
    select(!!! syms(vars)) %>%
    bind_cols(ind)

  np <-
    l %>%
    ncol() %>%
    subtract(1)

  ps <-
    l %>%
    bp_model() %>%
    pchisq(df = np, lower.tail = FALSE)

  bp <-
    l %>%
    bp_model() %>%
    prepend(tstat)

  list(bp = bp, ps = ps, np = np)

}
cmlopera/olsrr documentation built on May 26, 2019, 10:34 a.m.