R/auto_normalize.r

#' @title auto_normalize
#'
#' @description all the stats around if you want to apply a Box-Cox transform
#'
#' @importFrom magrittr "%>%"
#' @importFrom moments agostino.test anscombe.test
#' @importFrom ggpubr annotate_figure ggarrange ggline
#' @importFrom stats density shapiro.test
#' @export
#'
#' @param x the Numeric vector to transform
#' @return the transformed vector
#'
#' @author Mark Newman, \email{mark@trinetteandmark.com}
#' @keywords macros
#' @family normalization
#'
#' @examples
#'   \dontshow{
#'     library(moments)
#'     library(ggpubr)
#'     library(stats)
#'     library(mnmacros)}
#'   model <-
#'     testdata.apply_bcskew0 %>%
#'     auto_normalize()
#'   model$stats
#'   model$plot %>% plot()
#'   model$transformed %>% head()
#'
auto_normalize = function(x) {
  
  bc0 <- x %>% apply_bcskew0()
  
  d1 <- x %>% density()
  d2 <- bc0 %>% density()
  d1b <- cbind(d1$x,d1$y) %>% data.frame()
  d2b <- cbind(d2$x,d2$y) %>% data.frame()
  p <- ggarrange(
    ggline(d1b, x="X1", y = "X2", plot_type = "l", ylab = "Density", xlab = sprintf("Bandwidth = %.3f", d1$bw), title = "Origional", numeric.x.axis = T),
    ggline(d2b, x="X1", y = "X2", plot_type = "l", ylab = F, xlab = sprintf("Bandwidth = %.3f", d2$bw), title = "Box-Cox Transform", numeric.x.axis = T),
    ncol = 2, nrow = 1) %>%
    annotate_figure(top = sprintf("Density Plot (n = %s)", length(x)))
  
  os1 <- x %>% agostino.test()
  os2 <- x %>% anscombe.test()
  os3 <- x %>% shapiro.test()
  ts1 <- bc0 %>% agostino.test()
  ts2 <- bc0 %>% anscombe.test()
  ts3 <- bc0 %>% shapiro.test()
  stats <- c(
    "agostino",
    sprintf("skew = %.3f (p = %.3f)", os1$statistic['skew'], os1$p.value),
    sprintf("skew = %.3f (p = %.3f)", ts1$statistic['skew'], ts1$p.value),
    "anscombe",
    sprintf("kurt = %.3f (p = %.3f)", os2$statistic['kurt'], os2$p.value),
    sprintf("kurt = %.3f (p = %.3f)", ts2$statistic['kurt'], ts2$p.value),
    "shapiro",
    sprintf("W = %.3f (p = %.3f)", os3$statistic['W'], os3$p.value),
    sprintf("W = %.3f (p = %.3f)", ts3$statistic['W'], ts3$p.value)) %>%
    matrix(byrow = T, ncol = 3) %>%
    data.frame()
  colnames(stats) <- c("Test", "Origional", "Transformed")
  
  list(transformed = bc0, plot = p, stats = stats)
}
markanewman/AnalysisMacros documentation built on May 22, 2019, 2:41 p.m.