Nothing
# modified.chisq.test.R
#
# Author: Xuye Luo, Mingzhou Song
#
# Modified:
# December 20, 2025. Updated documentation
#
# December 14, 2025. Renamed the method
# name to improve clarity
#
# December 11, 2025
#
#' @title Zero-Tolerant Pearson's Chi-squared Test for Contingency Tables
#'
#' @description Performs Pearson's chi-squared test
#' \insertCite{pearson1900}{Upsilon}
#' on contingency tables, slightly modified
#' to handle rows or columns of all zeros.
#'
#' @references
#' \insertRef{pearson1900}{Upsilon}
#'
#' @details
#' This test is useful if \emph{p}-value must be returned
#' on a contingency table with valid non-negative counts,
#' where the build-in R implementation of
#' \code{\link[stats]{chisq.test}} could return \code{NA}
#' as \emph{p}-value, regardless of a pattern being
#' strong or weak. See Examples.
#'
#' Unlike \code{\link[stats]{chisq.test}}, this
#' function handles tables with empty rows or columns (where
#' expected values are 0) by calculating the test
#' statistic over non-zero entries only. This prevents
#' the result from becoming \code{NA}, while giving
#' meaningful \emph{p}-values.
#'
#' @note This function only takes contingency table
#' as input. It does not support goodness-of-fit
#' test on vectors.
#' It does \strong{not} offer an option
#' to apply Yates's continuity correction
#' on 2 \eqn{\times} 2 tables.
#'
#' @param x a matrix or data frame of floating or integer
#' numbers to specify a contingency table. Entries
#' must be non-negative.
#'
#' @param log.p a logical. If \code{TRUE},
#' the \emph{p}-value is calculated in
#' closed form to \strong{natural logarithm} of \emph{p}-value
#' to improve numerical precision when
#' \emph{p}-value approaches zero.
#' Defaults to \code{FALSE}.
#'
#' @return A list with class \code{"htest"} containing:
#' \item{statistic}{the chi-squared test statistic (calculated ignoring entries of 0-expected count).}
#' \item{parameter}{the degrees of freedom.}
#' \item{p.value}{the \emph{p}-value by the test.}
#' \item{estimate}{Cramér's \emph{V} statistic.}
#' \item{observed}{the observed counts.}
#' \item{expected}{the expected counts under the null hypothesis.}
#'
#' @importFrom stats pchisq
#' @export
#'
#' @examples
#' library("Upsilon")
#'
#' # A table with a dominant function and an empty column
#' x <- matrix(
#' c(0, 3, 0,
#' 3, 0, 0),
#' nrow = 2, byrow = TRUE)
#' print(x)
#'
#' # Standard chisq.test fails or returns NA warning
#' chisq.test(x)
#'
#' # Modified chi-squared test is significant:
#' modified.chisq.test(x)
modified.chisq.test <- function(x, log.p = FALSE) {
METHOD <- "Zero-tolerant chi-squared test"
DNAME <- deparse(substitute(x))
x <- as.matrix(x)
# Calculate totals
n <- sum(x)
nr <- nrow(x)
nc <- ncol(x)
k <- min(nr, nc)
# Calculate Expected values matrix
sr <- rowSums(x)
sc <- colSums(x)
E <- outer(sr, sc, "*") / n
# Initialize variables
STATISTIC <- 0
ESTIMATE <- 0
PARAMETER <- (nr - 1L) * (nc - 1L)
# Empty table, insufficient dimensions, or zero total count
if (n == 0 || k <= 1) {
PVAL <- if (log.p) 0 else 1
} else {
# Use na.rm=TRUE to ignore division by zero errors where E=0
term <- (x - E)^2 / E
STATISTIC <- sum(term, na.rm = TRUE)
# Calculate Cramer's V
# Avoid division by zero in effect size calculation
denom <- n * (k - 1)
if (denom > 0) {
ESTIMATE <- sqrt(STATISTIC / denom)
}
PVAL <- stats::pchisq(STATISTIC, PARAMETER, lower.tail = FALSE, log.p = log.p)
}
names(STATISTIC) <- "X-squared"
names(ESTIMATE) <- "Cram\uE9r's V"
names(PARAMETER) <- "df"
names(PVAL) <- "p.value"
structure(
list(
statistic = STATISTIC,
estimate = ESTIMATE,
parameter = PARAMETER,
p.value = PVAL,
method = METHOD,
data.name = DNAME,
observed = x,
expected = E
),
class = "htest"
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.