R/fast.gtest.R

Defines functions fast.gtest

Documented in fast.gtest

# fast.chisq.test.R
#
# Author: Xuye Luo, Joe Song
# 
# Updated:
#
# December 20, 2025
#   Updated documentation
#
# December 11, 2025

#' @title Fast Zero-Tolerant G-Test of Association
#'
#' @description Performs a fast zero-tolerant 
#' \emph{G}-test \insertCite{WOOLF:1957aa}{Upsilon}
#' to evaluate association between observations 
#' from two categorical variables.
#'
#' @references 
#' \insertRef{WOOLF:1957aa}{Upsilon}
#' 
#' @inheritParams fast.upsilon.test
#' @inherit fast.upsilon.test note
#'
#' @return A list with class \code{"htest"} containing the following components:
#' \item{statistic}{the \emph{G}-test statistic (Likelihood Ratio Chi-squared statistic).}
#' \item{parameter}{the degrees of freedom.}
#' \item{p.value}{the \emph{p}-value of the test.}
#' \item{estimate}{the mutual information between the two variables.}
#' \item{method}{a character string indicating the method used.}
#' \item{data.name}{a character string giving the names of the data.}
#'
#' @examples
#' library("Upsilon")
#' weather <- c(
#'   "rainy", "sunny", "rainy", "sunny", "rainy"
#' )
#' mood <- c(
#'   "wistful", "upbeat", "upbeat", "upbeat", "wistful"
#' )
#' 
#' fast.gtest(weather, mood)
#' 
#' # The result is equivalent to: 
#' modified.gtest(table(weather, mood))
#' @importFrom stats pchisq
#' @export
fast.gtest <- function(x, y, log.p = FALSE) {
  
  METHOD <- "G-test"
  DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
  
  # Basic validation
  if (length(x) != length(y)) {
    stop("Vectors 'x' and 'y' must have the same length.")
  }
  
  # Call C++ function
  gtest_list <- gtest_cpp(as.factor(x), as.factor(y))
  
  STATISTIC <- gtest_list$statistic
  n         <- as.numeric(gtest_list$n)
  nr        <- as.numeric(gtest_list$nr)
  nc        <- gtest_list$nc
  
  PARAMETER <- (nr - 1L) * (nc - 1L)
  PVAL      <- stats::pchisq(STATISTIC, PARAMETER, lower.tail = FALSE, log.p = log.p)
  
  # Estimate is Mutual Information: I(X;Y) = G / 2N
  ESTIMATE  <- STATISTIC / (2 * n)
  
  names(STATISTIC) <- "Likelihood Ratio G"
  names(PARAMETER) <- "df"
  names(PVAL)      <- "p.value"
  names(ESTIMATE)  <- "Mutual Information"
  
  structure(
    list(
      statistic = STATISTIC,
      estimate  = ESTIMATE,
      parameter = PARAMETER,
      p.value   = PVAL,
      method    = METHOD,
      data.name = DNAME,
      observed  = cbind(x, y)
    ),
    class = "htest"
  )
}

Try the Upsilon package in your browser

Any scripts or data that you put into this service are public.

Upsilon documentation built on March 7, 2026, 5:07 p.m.