R/isum.by.R

Defines functions isum.by

Documented in isum.by

#' @title Number summary statistics by groups
#' @description
#' \code{isum.by} generates seven number summary statistics and tests normality
#' on the fly grouped by a categorical variable.
#' @param x a numeric object
#' @param by a factor or character object
#' @param data a data frame object (Optional)
#' @param rnd specify rounding of numbers. See \code{\link{round}}.
#' @param na.rm A logical value to specify missing values, <NA> in the table
#' @details
#' Normality test is perfomed by Shapiro-Wilk Normality Test. See more at
#' \code{\link{shapiro.test}}.
#'
#' If the second variable has two levels, it performs either Student's t-test
#' \code{\link{t.test}} or Wilcoxon test (Mann-Whitney's test)
#' \code{\link{wilcox.test}}. If more than two levels, ANOVA
#' \code{\link{aov}} or Kruskal-Wallis rank sum test \code{\link{kruskal.test}}
#' is carried out to test the difference between different groups.
#' @import stats
#' @seealso \code{\link{isum}}, \code{\link{tab}}, \code{\link{xtab}}
#' @keywords number summary
#' @author Myo Minn Oo (Email: \email{dr.myominnoo@@gmail.com} |
#' Website: \url{https://myominnoo.github.io/})
#' @examples
#' str(infert)
#' isum.by(infert$age, infert$education)
#' isum.by(age, education, data = infert)
#' isum.by(age, spontaneous, data = infert)
#' isum.by(age, case, data = infert)

#' @export
isum.by <- function(x, by, data = NULL, rnd = 1, na.rm = FALSE)
{
  if (!is.null(data)) {
    arguments <- as.list(match.call())
    x <- eval(substitute(x), data)
    by <- eval(substitute(by), data)
    x.name <- arguments$x
    by.name <- arguments$by
  } else {
    x.name <- deparse(substitute(x))
    by.name <- deparse(substitute(by))
  }
  if (na.rm) {
    l <- as.character(unique(na.omit(by)))
    x <- x[!is.na(x)]
  } else
    l <- as.character(unique(by))
  l <- ifelse(is.na(l), "<NA>", l)
  i <- NA
  f <- do.call(
    rbind,
    lapply(l, function(z) {
      if (z == "<NA>") {i = x[which(is.na(by))]} else {i = x[which(by == z)]}
      len = length(i)
      na = length(i[is.na(i)])
      mu <- mean(i, na.rm = TRUE)
      std <- sd(i, na.rm = TRUE)
      q <- quantile(i, probs = c(0, .25, .5, .75, 1), na.rm = TRUE)
      v <- round(c(mu, std, q), rnd)
      pvalue <- tryCatch({
        suppressWarnings(shapiro.test(i)$p.value)
      }, error = function(err) {
        return(NA)
      })
      pvalue <- ifelse(pvalue < 0.00001, "< 0.00001", round(pvalue, 5))
      f <- data.frame(Obs. = len, NA. = na, Mean = v[1], Std.Dev = v[2],
                      Median = v[5], Q1 = v[4], Q3 = v[6],
                      Min = v[3], Max = v[7], Normality = pvalue,
                      stringsAsFactors = FALSE)
      row.names(f) <- z
      f
    })
  )

  pvalue <- tryCatch({
    suppressWarnings(shapiro.test(i)$p.value)
  }, error = function(err) {
    return(NA)
  })
  pvalue <- ifelse(pvalue < 0.00001, "< 0.00001", round(pvalue, 5))
  f <- rbind(
    f,
    "-" = rep("-", 10),
    combined = c(length(x), length(x[is.na(x)]),
                 round(mean(x, na.rm = TRUE), rnd),
                 round(sd(x, na.rm = TRUE), rnd),
                 round(quantile(x, probs = c(0, .25, .5, .75, 1), na.rm = TRUE), rnd),
                 pvalue)
  )

  if (length(l) > 2) {
    pvalue <- tryCatch({
      suppressWarnings(summary(aov(x ~ by))[[1]][1,5])
    }, error = function(err) {
      return(NA)})
    pvalue.name <- 'ANOVA'

    pvalue <- c(
      pvalue,
      tryCatch({
        suppressWarnings(kruskal.test(x ~ by)$p.value)
      }, error = function(err) {
        return(NA)})
    )
    pvalue.name <- c(pvalue.name, 'Kruskal-Wallis')

  } else {
    pvalue <- tryCatch({
      suppressWarnings(t.test(x ~ by)$p.value)
    }, error = function(err) {
      return(NA)
    })
    pvalue.name <- 't-test'

    pvalue <- c(
      pvalue,
      tryCatch({
        suppressWarnings(wilcox.test(x ~ by)$p.value)
      }, error = function(err) {
        return(NA)})
    )
    pvalue.name <- c(pvalue.name, 'Wilcoxon')
  }

  cat("\nNumber summary: ", x.name, " x ", by.name, "\np-value (",
      pvalue.name[1], ") : ", pvalue[1], "\np-value (",
      pvalue.name[2], ") : ", pvalue[2], "\n\n", sep = "")
  return(f)
}
myominnoo/stats2 documentation built on Nov. 4, 2019, 8:33 p.m.