feR: Basic functions that I use all the time

Documented in is.normal normal.test

#' normal.test
#'
#' Test if a sample is normal or not based on the best available test,
#' the default is Shapiro-Wilks but if conditions are not met then
#' Lilliefor's correction for Kolmogorov-Smirnof is used instead
#'
#' It also gives a boolean `is.normal` value based on p.sig
#'
#' @param x vector with numeric data.
#' @param digits sumber of decimal values (default = 2).
#' @param p.sig p value under witch we consider the test statistically significant (default = 0.05).
#' @param p.sig.small p value defined as "small" (default = 0.01).
#' @param p.sig.very.small p value defined as "very small", this value will be some
#'                         times represented as p<0.001 (default = 0.001).
#' @param stop.on.error if the function should stop on errors or just print them and go on (default=TRUE).
#' @param show.error wether to show an error (default=TRUE).
#'
#' @return a feR.normality that is a data.frame with:
#'  + is.normal: (bool) TRUE if p.value is below p.sig
#'  + p.exact.value: p value for the test
#'  + test: (character) test used -> SW for Shapiro-Wilks, Lillie(KS) for Lilliefor's correction for Kolmogorov-Smirnof test
#'  + statistic: (num) value of the statistic corresponding to the test performed
#'  + p.value: (num) p value rounded to "digits" decimal places
#'
#' @examples
#'
#' feR::normal.test(mtcars$mpg)
#'
#' @export
normal.test <- function(x, x.name=NULL, digits = 2,
                        p.sig = 0.05, p.sig.small = 0.01, p.sig.very.small = 0.001,
                        stop.on.error = TRUE, show.error = TRUE, DEBUG = FALSE) {



  x <- x[!is.na(x)]
  n.valid = length(x)

  nor.test.error = FALSE
  if (n.valid > 3 & n.valid < 5000) {
    nor.test = "SW"
    norm = tryCatch(shapiro.test(x),
                    error = function(e) {

                      if (stop.on.error == TRUE) {
                        stop(e)
                      } else {

                        if (show.error) print(e)
                      }

                      NA
                    })


  } else if (n.valid > 4) {
    nor.test = "Lillie (KS)"
    norm = tryCatch(nortest::lillie.test(x),
                error = function(e) {
                              if (stop.on.error) stop(e)
                              if (show.error) {
                                print(e)
                                cat("\n")
                              }
                              NA
                            })

  } else nor.test.error = TRUE




  if (!nor.test.error) {
    p.value = norm$p.value
    norm.stat = norm$statistic
    nor.test.NA = FALSE
    if (length(norm) <= 1) if (is.na(norm)) nor.test.NA = TRUE

  } else nor.test.NA = TRUE


  if (nor.test.NA) {
    p.value = NA
    norm.stat = NA
    nor.test = "n too low"
    nor.test.error = TRUE
  }


  # cat("\n Norm...5")
  if (!nor.test.error) is.normal = p.value > p.sig
  else is.normal = FALSE
  is.normal <- data.frame(is.normal = is.normal)
  is.normal$p.value = p.value
  is.normal$test = nor.test
  is.normal$statistic = norm.stat

  if ( (p.value <= p.sig.very.small) & !nor.test.error)  p.norm <- paste0(" <",p.sig.very.small)
  else p.norm <- round(p.value, digits = digits + 1)

  is.normal$p.value <- p.norm

  class(is.normal) = append("feR_normality", class(is.normal))
  attr(is.normal,"p.sig") = p.sig
  return(is.normal)
}


#' @export
is.feR_normality <- function(obj) {
  return("feR_normality" %in% class(obj))
}

#' is.normal
#'
#' Evaluates if a vector 'x' is follows a normal distribution or not
#'
#' If 'y' parameter is passed 'x' will be divided in as many groups as levels has 'y' and
#' return TRUE only if all groups of 'x' pass the normality test
#'
#' @param x numeric vector
#' @param y (default NULL) factor vector to be used as groups
#' @param p.sig (default=0.05) value under wich p has to be to reject normality
#'
#' @return (bool) TRUE if the vector is normal, FALSE in any other case
#' @examples
#'
#' is.normal(mtcars$mpg)
#'
#' is.normal(mtcars$mpg, y = mtcars$vs)
#'
#' @export
is.normal <- function(x, y=NULL, p.sig = 0.05, stop.on.error = TRUE) {

  UseMethod("is.normal",x)
}

#' @export
is.normal.feR_normality <- function(obj, stop.on.error = TRUE) {
  normal <- FALSE
  if ("p.sig" %in% names(attributes(obj))) p.sig = attr(obj,"p.sig")
  else p.sig = 0.05

  if (!is.na(obj$p.value)) if (obj$p.value < p.sig) normal <- TRUE
  return(normal)
}


#' @export
is.normal.feR_desc_num <- function(obj, stop.on.error = TRUE) {
  normal <- FALSE
  if ("p.sig" %in% names(attributes(obj))) p.sig = attr(obj,"p.sig")
  else p.sig = 0.05

  if (sum(is.na(obj$norm.p.value)) > 0) return(normal)

  if (all(obj$p.value > p.sig)) normal <- TRUE
  return(normal)

}


#' @export
is.normal.feR_describe_numeric <- is.normal.feR_desc_num

#' @export
is.normal.feR_describe_numeric_list <- is.normal.feR_desc_num



#' @export
is.normal.default <- function(x, y=NULL, p.sig = 0.05, stop.on.error = TRUE) {

  if (is.null(y)) {
    r <- feR::normal.test(x, p.sig = p.sig)
    return(r$is.normal)
  } else {
    if (!is.factor(y)) y <- factor(y)
    d <- data.frame(x = x,y = y)
    d <- na.omit(d)


    for(g in levels(d$y)) {
      n.x <- d$x[d$y==g]
      if (!is.normal(n.x, p.sig = p.sig)) return(FALSE)
    }
    return(TRUE)
  }
}