stats2: Tools of statistical analysis and data management

Documented in xtab

#' @title Cross tabulation
#' @description
#' \code{xtab} generates cross tabulation of two variables.
#' @param x a factor object
#' @param y a factor object
#' @param data a data frame object (Optional)
#' @param row.pct a logical value: if NULL, a default table without any percentages
#' is produced. If TRUE, row percentages are shown and if FALSE, column percentages.
#' @param na.rm A logical value to specify missing values, <NA> in the table
#' @param rnd specify rounding of numbers. See \code{\link{round}}.
#' @details
#' Exploring data before jumping into complex analysis is always a necessity.
#' The first step of an analysis is always to summarize and display data.
#'
#' \code{tab}
#' produce two-way table of frequencies
#'
#' \strong{References:}
#' \enumerate{
#'   \item Essential Medical Statistics, Betty R. Kirkwood & Jonathan A.C. Sterne,
#'   Second Edition. Chapter 3
#'   \item An Introduction to MEdical Statistics, Martin Bland, Thrid Edition,
#'   Chapter 4
#' }
#'
#' @seealso \code{\link{tab}}
#' @keywords two-by-two table, 2x2 table, two-way table
#' @author Myo Minn Oo (Email: \email{dr.myominnoo@@gmail.com} |
#' Website: \url{https://myominnoo.github.io/})
#' @examples
#' str(infert)
#' xtab(infert$education, infert$induced)
#' xtab(education, induced, infert)
#' xtab(case, induced, infert)
#' xtab(spontaneous, induced, infert)
#'
#' xtab(spontaneous, induced, infert, row.pct = FALSE)
#' xtab(spontaneous, induced, infert, rnd = 2)
#' xtab(spontaneous, induced, infert)

#' @export
xtab <- function(x, y, data = NULL, row.pct = TRUE, na.rm = FALSE, rnd = 1)
{
  if (!is.null(data)) {
    arguments <- as.list(match.call())
    x <- eval(substitute(x), data)
    y <- eval(substitute(y), data)
    x.name <- arguments$x
    y.name <- arguments$y
  } else {
    x.name <- deparse(substitute(x))
    if (!is.null(y)) y.name <- deparse(substitute(y))
  }
  na.rm <- ifelse(na.rm, "no", "ifany")
  row.pct <- ifelse(is.null(row.pct), "none",
                    ifelse(row.pct, "row",
                           ifelse(!row.pct, "column", NULL)))

  # get tables
  t <- table(x, y, useNA = na.rm)
  t.c <- rbind(t, Total = colSums(t))
  t.r <- cbind(t, Total = rowSums(t))
  t.f <- cbind(t.c, Total = rowSums(t.c))

  p.r <- round(t.f / rowSums(t.c) * 100, rnd)
  p.c <- round(t(t(t.f) / colSums(t.r)) * 100, rnd)

  t.c.p <- NULL; t.r.p <- NULL
  n.c.p <- NULL; n.r.p <- NULL # names for headers
  for (i in seq_len(ncol(t.f)))
  {
    t.c.p <- cbind(t.c.p, cbind(t.f[,i], p.c[,i]))
    n.c.p <- c(n.c.p, c(colnames(t.f)[i], "(c%)"))
    t.r.p <- cbind(t.r.p, cbind(t.f[,i], p.r[,i]))
    n.r.p <- c(n.r.p, c(colnames(t.f)[i], "(r%)"))
  }
  colnames(t.c.p) <- n.c.p
  colnames(t.r.p) <- n.r.p
  names(attributes(t.f)$dimnames) <- c(x.name, y.name)
  names(attributes(t.c.p)$dimnames) <- c(x.name, y.name)
  names(attributes(t.r.p)$dimnames) <- c(x.name, y.name)

  f <- switch(row.pct,
              none = t.f,
              row = t.r.p,
              column = t.c.p)

  if (na.rm == "no") {
    data <- data.frame(cbind(x = x, y = y))
    data <- na.omit(data)
    x <- data$x
    y <- data$y
  }

  pvalue <- tryCatch({
    suppressWarnings(chisq.test(x, y, correct = FALSE)$p.value)
  }, error = function(err) {
    return(NA)
  })
  pvalue <- c(
    pvalue,
    tryCatch({
      suppressWarnings(fisher.test(x, y, simulate.p.value = FALSE)$p.value)
    }, error = function(err) {
      return(NA)
    })
  )
  pvalue <- ifelse(pvalue < 0.00001, "< 0.00001", round(pvalue, 5))
  pvalue.name <- c("Chi-squared Test", "Fisher's Exact Test")

  cat(paste0("\nCross-tabulation: ", x.name, " x ", y.name, "\n",
             "Note: ", row.pct, " percentages",
             "\np-value (", pvalue.name[1], ") : ", pvalue [1],
             "\np-value (", pvalue.name[2], ") : ", pvalue [2], "\n\n"))

  return(f)
}