In jlopezper/whitestrap: White Test and Bootstrapped White Test for Heteroskedasticity

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  # out.width = "100%"
  fig.width = 8, 
  fig.height = 5.5
)

whitestrap

This package presents the White's Test of Heterocedasticity and a bootstrapped version of it, developed under the methodology of Jeong, J., Lee, K. (1999) (see references for further details).

Installation

You can install the development version from GitHub with:

# install.packages("devtools")
devtools::install_github("jlopezper/whitestrap")

Example

This is an example which shows you how you can use the white_test and white_test_boot functions:

library(whitestrap)

set.seed(123)
# Let's simulate some heteroscedastic data
n <- 100
y <- 1:n
sd <- runif(n, min = 0, max = 4)
error <- rnorm(n, 0, sd*y)
X <- y + error
df <- data.frame(y, X)
# OLS model
fit <- lm(y ~ X, data = df)

# White's test and Bootstrapped White's test
white_test(fit)
white_test_boot(fit)

In either case, the returned object is a list with the value of the statistical test and the p-value of the test. For the bootstrapped version, the number of bootstrap samples is also provided.

names(white_test(fit))
names(white_test_boot(fit))

Comparison between the original and the bootstrap version

One way to compare the results of both tests is through simulations. The following plot shows the distribution of 500 simulations where the p-value of both tests is computed. The data used for this purpose were artificially generated to be heterocedastic, so low p-values are desirable.

load("./R/sysdata.rda")

library(ggplot2)
simulation_data <- tidyr::gather(simulation_data, -sample_size, key='ptypes', value='values')
simulation_data <- dplyr::mutate(simulation_data, sample_size = paste0('sample size: ', sample_size))
simulation_data$sample_size <- factor(simulation_data$sample_size, 
                                      levels = c("sample size: 20",
                                                 "sample size: 30",
                                                 "sample size: 40",
                                                 "sample size: 50",
                                                 "sample size: 60",
                                                 "sample size: 80",
                                                 "sample size: 100",
                                                 "sample size: 200",
                                                 "sample size: 300"))

ggplot(simulation_data, aes(x=values, fill=ptypes)) +
  geom_density(alpha=0.4, color=NA) +
  theme_minimal() +
  facet_wrap(sample_size~., scales="free") +
  scale_fill_brewer(palette="Set2", labels = c('P-value', 'Bootstrapped p-value')) +
  labs(x = 'P-values',
       title = "P-values from White's test and Bootstrapped White's test",
       subtitle = "500 simulations with data suffering from heteroscedasticity",
       fill = "") +
  theme(strip.text.x = element_text(size = 11),
        plot.title = element_text(size=15),
        plot.subtitle = element_text(size=13),
        legend.text = element_text(size=11),
        axis.text  = element_text(size=11),
        axis.title  = element_text(size=11),
        legend.position = 'bottom',
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"))

If we look at the cumulative distribution functions of both p-value distributions, we see that in small samples the bootstrapped test returns smaller p-values with higher probability.

# create ECDF of data
ggplot(simulation_data, aes(x = values, group = ptypes, color = ptypes))+
  stat_ecdf(size=.5) +
  facet_wrap(sample_size~.,scales="free") +
  scale_color_brewer(palette="Set2", labels = c('P-value', 'Bootstrapped p-value'))  +
  theme_minimal() +
  theme(legend.position ="bottom") +
  xlab("P-Value") +
  ylab("CDF")  +
  labs(title = "CDF of p-values from White's test and Bootstrapped White's test",
       subtitle = "500 simulations with data suffering from heteroscedasticity",
       color = "") +
  theme(strip.text.x = element_text(size = 11),
        plot.title = element_text(size=15),
        plot.subtitle = element_text(size=13),
        legend.text = element_text(size=11),
        axis.text  = element_text(size=11),
        axis.title  = element_text(size=11),
        legend.position = 'bottom',
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"))

In order to check for differences between the two distributions, a two-sample Kolmogorov–Smirnov test is run. In this case, we'll test whether one distribution stochastically dominates another, so the test will be run for both alternative sides (CDF (BW) > CDF (W) and CDF (BW) < CDF (W)). We see from the results that CDF (BW) statistically outperforms CDF (W) for samples < 60. No differences are appreciated with samples greater than or equal to 60.

load("./R/sysdata.rda")

ks_less <- sapply(unique(simulation_data$sample_size), function (x) {
  ttt <-  dplyr::filter(simulation_data, sample_size == x)
  ks.test(ttt$pvalues_boot, ttt$pvalues, alternative = 'less', exact = TRUE)$p.value
})

ks_greater <- sapply(unique(simulation_data$sample_size), function (x) {
    ttt <-  dplyr::filter(simulation_data, sample_size == x)
    ks.test(ttt$pvalues_boot, ttt$pvalues, alternative = 'greater', exact = TRUE)$p.value
  })


ddd <- data.frame(
  sample_size = unique(simulation_data$sample_size),
  p_values_less = ks_less,
  p_values_greater = ks_greater
)

ddd <- tidyr::gather(ddd, -sample_size, key='ptypes', value='values')
ggplot(ddd, aes(x=as.factor(sample_size), y=values, group = ptypes)) +
  geom_line(aes(color = ptypes)) +
  geom_point(aes(color = ptypes)) +
  scale_color_brewer(palette="Set2", labels = c('CDF (BW) > CDF (W)', 'CDF (BW) < CDF (W)'))  +
  theme_minimal() +
  labs(x = 'Sample size',
       y = 'P-value',
       color = "Alternative hypothesis",
       title = 'Two-sample Kolmogorov–Smirnov test',
       caption = "W: White's test; BW: Bootstrapped White's test") +
  theme(strip.text.x = element_text(size = 11),
        plot.title = element_text(size=15),
        plot.subtitle = element_text(size=13),
        legend.title = element_text(size=11.5),
        legend.text = element_text(size=11),
        axis.text  = element_text(size=11),
        axis.title  = element_text(size=11),
        legend.position = 'bottom',
        panel.border = element_blank(),
        # panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"))

References

Jeong, J., & Lee, K. (1999). Bootstrapped White’s test for heteroskedasticity in regression models. Economics Letters, 63(3), 261-267.
White, H. (1980). A Heteroskedasticity-Consistent Covariance Matrix Estimator and a Direct Test for Heteroskedasticity. Econometrica, 48(4), 817-838.
Wooldridge, Jeffrey M., 1960-. (2012). Introductory econometrics : a modern approach. Mason, Ohio, South-Western Cengage Learning.

jlopezper/whitestrap documentation built on June 6, 2020, 5:46 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

jlopezper/whitestrap
White Test and Bootstrapped White Test for Heteroskedasticity

In jlopezper/whitestrap: White Test and Bootstrapped White Test for Heteroskedasticity

whitestrap

Installation

Example

Comparison between the original and the bootstrap version

References

R Package Documentation

Browse R Packages

We want your feedback!

jlopezper/whitestrap White Test and Bootstrapped White Test for Heteroskedasticity

In jlopezper/whitestrap: White Test and Bootstrapped White Test for Heteroskedasticity

whitestrap

Installation

Example

Comparison between the original and the bootstrap version

References

R Package Documentation

Browse R Packages

We want your feedback!

jlopezper/whitestrap
White Test and Bootstrapped White Test for Heteroskedasticity