Nothing
# modified.gtest.R
#
# Author: Xuye Luo, Mingzhou Song
#
# Modified:
# December 20, 2025. Updated documentation
#
# December 14, 2025. Renamed the method
# name to improve clarity
#
# December 11, 2025
#
#' @title Zero-Tolerant G-Test for Contingency Tables
#'
#' @description Performs \emph{G}-test
#' \insertCite{WOOLF:1957aa}{Upsilon}
#' on contingency tables, slightly modified
#' to handle rows or columns of all zeros.
#'
#' @references
#' \insertRef{WOOLF:1957aa}{Upsilon}
#'
#' @details
#' This test is useful if a \emph{p}-value must be returned
#' on a contingency table with valid non-negative counts,
#' where other implementations of \emph{G}-test could
#' return \code{NA} as the \emph{p}-value, regardless of a
#' pattern being strong or weak.
#'
#' This function handles tables with empty rows
#' or columns (where expected values are 0) by
#' calculating the test statistic over non-zero
#' entries only. This prevents the result from
#' becoming \code{NA}, while giving meaningful
#' \emph{p}-values.
#'
#' @param x a matrix or data frame of floating or integer
#' numbers to specify a contingency table. Entries
#' must be non-negative.
#'
#' @param log.p a logical. If \code{TRUE},
#' the \emph{p}-value is calculated in
#' closed form to \strong{natural logarithm} of \emph{p}-value
#' to improve numerical precision when
#' \emph{p}-value approaches zero.
#' Defaults to \code{FALSE}.
#'
#' @return A list with class \code{"htest"} containing:
#' \item{statistic}{the \emph{G} statistic (log-likelihood ratio).}
#' \item{parameter}{the degrees of freedom.}
#' \item{p.value}{the \emph{p}-value of the test.}
#' \item{estimate}{the value of mutual information.}
#' \item{method}{a character string indicating the method used.}
#' \item{data.name}{a character string, name of the input data.}
#' \item{observed}{the observed counts.}
#' \item{expected}{the expected counts under the null hypothesis.}
#'
#' @examples
#' library("Upsilon")
#'
#' # Create a sparse table with empty rows/cols
#' x <- matrix(
#' c(0, 3, 0,
#' 3, 0, 0),
#' nrow = 2, byrow = TRUE
#' )
#' print(x)
#' # Perform the modified G-test
#' modified.gtest(x)
#' @importFrom stats pchisq
#' @export
modified.gtest <- function(x, log.p = FALSE) {
METHOD <- "Zero-tolerant G-Test"
DNAME <- deparse(substitute(x))
x <- as.matrix(x)
# Basic properties
n <- sum(x)
nr <- nrow(x)
nc <- ncol(x)
k <- min(nr, nc)
# empty table or insufficient dimensions
if (n == 0 || k <= 1) {
STATISTIC <- 0
ESTIMATE <- 0
PARAMETER <- (nr - 1L) * (nc - 1L)
PVAL <- if (log.p) 0 else 1
E <- x
} else {
# Calculate Expected Frequencies
sr <- rowSums(x)
sc <- colSums(x)
E <- outer(sr, sc, "*") / n
term <- x * log(x / E)
STATISTIC <- 2 * sum(term, na.rm = TRUE)
# Degrees of Freedom
PARAMETER <- (nr - 1L) * (nc - 1L)
# P-value
PVAL <- stats::pchisq(STATISTIC, PARAMETER, lower.tail = FALSE, log.p = log.p)
# Mutual Information: I = G / 2N
ESTIMATE <- STATISTIC / (2 * n)
}
names(STATISTIC) <- "Log-likelihood Ratio (G)"
names(PARAMETER) <- "df"
names(PVAL) <- "p.value"
names(ESTIMATE) <- "Mutual Information"
structure(
list(
statistic = STATISTIC,
parameter = PARAMETER,
estimate = ESTIMATE,
p.value = PVAL,
method = METHOD,
data.name = DNAME,
observed = x,
expected = E
),
class = "htest"
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.