Nothing
# fast.upsilon.test.R
#
# Author: Xuye Luo, Joe Song
#
# Updated:
#
# December 20, 2025
# - Modified the code to allow the input categorical
# variable to be of any vector type that is to be
# converted to factor vector type internally.
# - Rewrite examples.
# - Updated documentation.
#
# December 11, 2025
#' @importFrom Rcpp sourceCpp
#' @useDynLib Upsilon, .registration=TRUE
#'
#' @title Fast Upsilon Test of Association
#' between Two Categorical Variables
#'
#' @description
#' Performs a fast Upsilon test
#' \insertCite{luo2021upsilon}{Upsilon}
#' to evaluate association between
#' observations from two categorical variables.
#'
#' @inherit upsilon.test details
#'
#' @note The test uses an internal
#' hash table, instead of matrix, to
#' store the contingency table. Savings in
#' both runtime and memory saving can be
#' substantial if the contingency table is
#' sparse and large. The test is implemented
#' in C++, to give an additional layer of
#' speedup over an R implementation.
#'
#' @param x a vector to
#' specify observations of the first
#' categorical variable. The vector can be of
#' numeric, character, or logical type.
#' \code{NA} values must be removed or
#' replaced before calling the function.
#' @param y a vector to specify observations of
#' the second categorical variable.
#' Must not contain \code{NA} values and
#' must be of the same length as \code{x}.
#' @param log.p a logical. If \code{TRUE},
#' the \emph{p}-value is calculated in
#' closed form to \strong{natural logarithm} of \emph{p}-value
#' to improve numerical precision when
#' \emph{p}-value approaches zero.
#' Defaults to \code{FALSE}.
#'
#' @return A list with class \code{"htest"} containing the following components:
#' \item{statistic}{the Upsilon test statistic.}
#' \item{parameter}{the degrees of freedom.}
#' \item{p.value}{the \emph{p}-value of the test.}
#' \item{estimate}{the effect size derived from the Upsilon statistic.}
#' \item{method}{a character string indicating the method used.}
#' \item{data.name}{a character string giving the name of input data.}
#'
#' @references
#' \insertRef{luo2021upsilon}{Upsilon}
#' @importFrom stats pchisq
#' @export
#'
#' @examples
#' library("Upsilon")
#'
#' weather <- c(
#' "rainy", "sunny", "rainy", "sunny", "rainy"
#' )
#' mood <- c(
#' "wistful", "upbeat", "upbeat", "upbeat", "wistful"
#' )
#'
#' fast.upsilon.test(weather, mood)
#'
#' # The result is equivalent to:
#' upsilon.test(table(weather, mood))
fast.upsilon.test <- function(x, y, log.p = FALSE) {
METHOD <- "Upsilon Test"
DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
# Basic validation
if (length(x) != length(y)) {
stop("Vectors 'x' and 'y' must have the same length.")
}
# Call C++ function
upsilon_list <- upsilon_cpp(as.factor(x), as.factor(y))
STATISTIC <- upsilon_list$statistic
n <- as.numeric(upsilon_list$n)
nr <- as.numeric(upsilon_list$nr)
nc <- upsilon_list$nc
# Calculate Effect Size
ESTIMATE <- sqrt(STATISTIC / (n * nr * nc / 4))
PARAMETER <- (nr - 1L) * (nc - 1L)
PVAL <- stats::pchisq(STATISTIC, PARAMETER, lower.tail = FALSE, log.p = log.p)
names(STATISTIC) <- "Upsilon"
names(ESTIMATE) <- "Effect size"
names(PARAMETER) <- "df"
names(PVAL) <- "p.value"
structure(
list(
statistic = STATISTIC,
estimate = ESTIMATE,
parameter = PARAMETER,
p.value = PVAL,
method = METHOD,
data.name = DNAME,
observed = cbind(x, y)
),
class = "htest"
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.