R/summ.R

Defines functions summ summ.default summ.numeric summ.list summ.data.frame

Documented in summ summ.data.frame summ.default summ.list summ.numeric

#' @title Number Summary for numerical data
#'
#' @description
#'
#' \code{summ} generates seven summary statistics for numerical data and tests
#' for normality on the fly.
#'
#' @param x numeric object, list or data frame
#' @param y a factor or character object
#' @param data a data frame object (Optional)
#' @param rnd specify rounding of numbers. See \code{\link{round}}.
#' @param na.rm A logical value to specify missing values, <NA> in the table
#' @param print.table logical value to display formatted outputs
#' @param ... optional arguments
#'
#' @import stats
#' @details
#' \code{summ} reports number of observations in the dataset, missing data,
#' seven number summary statistics, coefficient of variation (CV.) and normality
#' test.
#' Normality test is perfomed by Shapiro-Wilk Normality Test. See more at
#' \code{\link{shapiro.test}}.
#'
#' CV = Standard Deviation / MEAN * 100
#'
#' \strong{ANNOTATIONS}
#'
#' Obs. = observation
#'
#' NA. = missing data
#'
#' Mean = Mean value
#'
#' Std.Dev = Standard deviation
#'
#' Median = Median value
#'
#' Q1 = First quartile or percentile
#'
#' Q3 = Third quartile or percentile
#'
#' Min = Minimum value
#'
#' Max = Maximum value
#'
#' Normal.test = test by Shapiro-Wilk Normality Test
#'
#' @seealso \code{\link{summBy}}, \code{\link{tab}}, \code{\link{xtab}}
#' @keywords number summary, statistics, descriptive, five number
#' @author Myo Minn Oo (Email: \email{dr.myominnoo@@gmail.com} |
#' Website: \url{https://myominnoo.github.io/})
#' @examples
#' \dontrun{
#' # single variable
#' summ(age, infert)
#' summ(infert$age)
#'
#' # multiple variable
#' summ(c(age, case, parity), infert)
#'
#' # whole dataframe
#' summ(infert)
#' summ(iris)
#' summ(mtcars)
#'
#' # subset: one category and one number
#' infert.new <- keep(infert, education, age)
#' summ(infert.new)
#' }


#' @export
summ <- function(x, data = NULL, rnd = 1, na.rm = FALSE,
                 print.table = TRUE)
{
  arguments <- as.list(match.call())
  x.name <- (deparse(substitute(x)))
  x.name <- unlist(strsplit(gsub("^c\\(|\\)$", "", x.name), ","))

  catch <- tryCatch(is.data.frame(x), error=function(e) {})
  if (!is.null(data))
    x <- eval(substitute(x), data)
  if (is.null(catch)) catch <- FALSE
  if (catch) x <- data.frame()
  if (length(x.name) > 1) x <- list()
  UseMethod("summ", x)
}


#' @rdname summ
#' @export
summ.default <- function(...)
{
  stop("... Wrong data type ...")
}

#' @rdname summ
#' @export
summ.numeric <- function(x, data = NULL, rnd = 1, na.rm = FALSE,
                         print.table = TRUE)
{
  x.name <- deparse(substitute(x))
  if (!is.null(data)) {
    x <- eval(substitute(x), data)
  }

  len <- ifelse(na.rm, length(x[!is.na(x)]), length(x))
  na <- length(x[is.na(x)])
  na.rm <- TRUE
  mu <- mean(x, na.rm = na.rm)
  std <- sd(x, na.rm = na.rm)
  cv <- std / mu * 100
  q <- round(quantile(x, probs = c(0, .25, .5, .75, 1), na.rm = na.rm), rnd)
  v <- round(c(mu, std, cv, q), rnd)

  pvalue <- tryCatch({
    suppressWarnings(shapiro.test(x)$p.value)
  }, error = function(err) {
    return(NA)
  })
  pvalue <- sprintf(pvalue, fmt = '%#.5f')

  f <- data.frame(Obs. = len, NA. = na, Mean = v[1], Std.Dev = v[2],
                  Median = v[6], Q1 = v[5], Q3 = v[7],
                  Min = v[4], Max = v[8],
                  CV. = v[3], Normal.test = pvalue,
                  stringsAsFactors = FALSE)
  row.names(f) <- x.name

  x.lbl <- attr(x, "label")
  x.lbl <- ifelse(is.null(x.lbl), "NULL", x.lbl)
  texts <- paste("Number Summary: ", x.name, "\n",
                 "label: ", paste0(x.lbl), sep = "", collapse = "")

  if (print.table) printText(f, texts, "label: ")

  invisible(f)
}


#' @rdname summ
#' @export
summ.list <- function(x, data = NULL, rnd = 1, na.rm = FALSE,
                      print.table = TRUE)
{
  arguments <- as.list(match.call())
  x.name <- deparse(substitute(x))
  x.name <- unlist(strsplit(gsub("^c\\(|\\)$", "", x.name), ","))
  x.name <- gsub(" ", "", x.name)

  data.list <- NULL
  if (is.null(data)) {
    data <- lapply(x.name, function(z) eval(parse(text = z)))
    data <- as.data.frame(data)
    names(data) <- x.name
  } else {
    data <- data[, x.name]
    names(data) <- x.name
  }

  f <- do.call(rbind, lapply(data, function(z){
    summ.numeric(z, na.rm = na.rm, rnd = rnd, print.table = FALSE) }))

  x.lbl <- (sapply(x.name, function(z) attr(data[, z], "label")))
  if (print.table) {
    texts <- paste0("Number Summary: ", paste(x.name, collapse = " | "), collapse = "")
    printText(f, texts)

    if (!is.null(unlist(x.lbl))) {
      printMsg(paste0("labels: "))
      for (i in 1:length(x.name)) {
        printMsg(paste0(x.name[i], ": ", x.lbl[i], collapse = ""))
      }
    }
  }

  invisible(f)
}

#' @rdname summ
#' @export
summ.data.frame <- function(x, data = NULL, rnd = 1, na.rm = FALSE,
                            print.table = TRUE)
{
  data <- x
  vars <- names(x)
  type.numeric <- c("int", "double", "numeric")

  vars.type <- sapply(vars, function(z) class(unlist(x[ , z])))
  vars.names <- vars[(vars.type %in% type.numeric)]
  data <- data[, vars.names]

  if (is.data.frame(data)) {
    if (ncol(data) == 0)
      stop("... no numerical variables found ...")
    names.invalid <- grep("^([[:alpha:]]|[.][._[:alpha:]])[._[:alnum:]]*$",
                          vars.names, value = TRUE, invert = TRUE)
    if (length(names.invalid) > 0) {
      vars.names[vars.names %in% names.invalid] <- paste0("v", names.invalid)
      names(data) <- vars.names
    }
  }

  if (is.data.frame(data)) {
    f <- do.call(rbind, lapply(data, function(z){
      summ.numeric(z, na.rm = na.rm, rnd = rnd, print.table = FALSE) }))
    x.lbl <- lapply(data, function(z) attr(z, "label"))
  } else {
    f <- summ.numeric(data, na.rm = na.rm, rnd = rnd, print.table = FALSE)
    row.names(f) <- vars.names
    x.lbl <- attr(data, "label")
  }

  if (print.table) {
    if (is.data.frame(data)) {
      texts <- paste0("Number Summary: ",
                      paste(vars.names, collapse = " | "),
                      collapse = "")
      printText(f, texts)

      x.lbl <- (sapply(vars.names, function(z) attr(data[, z], "label")))
      if (!is.null(unlist(x.lbl))) {
        printMsg(paste0("labels: "))
        for (i in 1:length(vars.names)) {
          printMsg(paste0(vars.names[i], ": ", x.lbl[i], collapse = ""))
        }
      }

    } else {
      x.lbl <- ifelse(is.null(x.lbl), "NULL", x.lbl)
      texts <- paste0("Number Summary: ", vars.names, "\n",
                      "label: ", paste0(x.lbl), collapse = "")
      printText(f, texts, "label: ")
    }
  }

  invisible(f)
}
myominnoo/mStats_beta documentation built on Feb. 29, 2020, 8:17 a.m.