This vignette will assess the accuracy and efficiency of the checknormality implementations of the Shapiro-Wilk Test

knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
library(checknormality)
library (bench)
library (dplyr)
library (ggbeeswarm)
set.seed(800)

print_accuracy_results <- function (dat, num) {
  trial1 <- stats::shapiro.test(dat)
  trial2 <- sw_test(dat, approach = "original")
  trial3 <- sw_test(dat, approach = "modified")
  trial4 <- sw_test(dat, approach = "royston", use_c = F)
  trial5 <- sw_test(dat, approach = "royston", use_c = T)

  toprint <- data.frame (
    description = c("stats", "checknormality_original", "checknormality_modified",
                    "checknormality_royston_R", "checknormality_royston_Rcpp"),
    trial_values = c(trial1[[num]], trial2[[num]], trial3[[num]], trial4[[num]], 
                     trial5[[num]])) 

  t1 <- rep (toprint$trial_values[1], each = 5)
  t2 <- toprint$trial_values

  toreturn <- cbind (toprint, "compared to stats" = mapply (all.equal, t1, t2)) %>%
    as.data.frame() 

  return (toreturn)
}

print_accuracy_results_roy <- function (dat, num) {
  trial1 <- stats::shapiro.test(dat)
  trial2 <- sw_test(dat, approach = "royston", use_c = F)
  trial3 <- sw_test(dat, approach = "royston", use_c = T)

  toprint <- data.frame (
    description = c("stats", "checknormality_royston_R", "checknormality_royston_Rcpp"),
    trial_values = c(trial1[[num]], trial2[[num]], trial3[[num]])) 

  t1 <- rep (toprint$trial_values[1], each = 3)
  t2 <- toprint$trial_values

  toreturn <- cbind (toprint, "compared to stats" = mapply (all.equal, t1, t2)) %>%
    as.data.frame() 

  return (toreturn)
}

print_efficiency_results <- function (dat, num, cutoff) {

  toreturn <- bench::mark (round (stats::shapiro.test(dat)[[num]], cutoff),
  round (checknormality::sw_test(dat, approach = "original")[[num]], cutoff),
  round (checknormality::sw_test(dat, approach = "modified")[[num]], cutoff),
  round (checknormality::sw_test(dat, approach = "royston", use_c = F)[[num]], cutoff),
  round (checknormality::sw_test(dat, approach = "royston", use_c = T)[[num]], cutoff))

  return (toreturn)
}

print_efficiency_results_roy <- function (dat, num, cutoff) {

  toreturn <- bench::mark (round (stats::shapiro.test(dat)[[num]], cutoff),
  round (checknormality::sw_test(dat, approach = "royston", use_c = F)[[num]], cutoff),
  round (checknormality::sw_test(dat, approach = "royston", use_c = T)[[num]], cutoff))

  return (toreturn)
}

Small Sample (n = 40)

Assessing accuracy

set1 <- rnorm (40, 0, 1)

# to compare the W test statistic
results1Wa <- print_accuracy_results (set1, 1)
knitr::kable(results1Wa,
             caption = "Comparing W Test Statistic Across Implementations")
# to compare the p value
results1Pa <- print_accuracy_results (set1, 2)
knitr::kable(results1Pa,
             caption = "Comparing P-Value Across Implementations")

Assessing Efficiency

#to compare efficiency
results1We <- print_efficiency_results(set1, 1, 0.005)
plot (results1We)

Medium Sample (n = 400, only Royston approach is used)

Assessing accuracy

set2 <- rnorm (400, 0, 1)

# to compare the W test statistic
results2Wa <- print_accuracy_results_roy (set2, 1)
knitr::kable(results2Wa,
             caption = "Comparing W Test Statistic Across Implementations")
# to compare the p value
results2Pa <- print_accuracy_results_roy (set2, 2)
knitr::kable(results2Pa,
             caption = "Comparing P-Value Across Implementations")
#to compare efficiency
results2We <- print_efficiency_results_roy(set2, 1, 0.0004)
plot (results2We)

Large Sample (n = 4000, only Royston approach is used)

set3 <- rnorm (4000, 0, 1)

# to compare the W test statistic
results3Wa <- print_accuracy_results_roy (set3, 1)
knitr::kable(results3Wa,
             caption = "Comparing W Test Statistic Across Implementations")
# to compare the p value
results3Pa <- print_accuracy_results_roy (set3, 2)
knitr::kable(results3Pa,
             caption = "Comparing P-Value Across Implementations")
#to compare efficiency
results3We <- print_efficiency_results_roy (set3, 1)

plot (results3We)

Conclusions



chrsshn/checknormality documentation built on Dec. 31, 2020, 10:01 p.m.