Nothing
# upsilon.gof.test.R
#
# Author: Xuye Luo, Joe Song
#
# Updated:
# December 20, 2025.
# - Updated the documentation
#
# December 12, 2025
#' @title Upsilon Goodness-of-Fit Test for Count Data
#'
#' @description (FOR INTERNAL USE ONLY) Performs the Upsilon Goodness-of-Fit test to determine if a sample of
#' observed counts fits a specified probability distribution.
#' The Upsilon statistic uses a specific normalization (dividing by the average expected count)
#' which differs from the standard Pearson's Chi-squared test.
#'
#' @param x A numeric vector representing observed counts. Must be non-negative.
#' @param p A numeric vector of probabilities of the same length as \code{x}.
#' Defaults to a uniform distribution (1/length(x)).
#' @param rescale.p Logical. If \code{TRUE} (default), \code{p} is rescaled to sum to 1.
#' If \code{FALSE}, \code{p} must sum to 1, otherwise an error is raised.
#'
#' @param log.p a logical. If \code{TRUE},
#' the \emph{p}-value is calculated in
#' closed form to \strong{natural logarithm} of \emph{p}-value
#' to improve numerical precision when
#' \emph{p}-value approaches zero.
#' Defaults to \code{FALSE}.
#'
#' @return A list with class \code{"htest"} containing:
#' \item{statistic}{The Upsilon test statistic.}
#' \item{parameter}{The degrees of freedom (k - 1).}
#' \item{p.value}{The p-value of the test.}
#' \item{estimate}{The effect size.}
#' \item{method}{A character string indicating the method used.}
#' \item{data.name}{A character string giving the name(s) of the data.}
#' \item{observed}{The observed counts.}
#' \item{expected}{The expected counts.}
#' \item{residuals}{The Pearson residuals.}
#' \item{p.normalized}{The probability vector used (after rescaling if applicable).}
#'
#' @importFrom stats pchisq
#' @export
#'
#' @examples
#' library("Upsilon")
#'
#' # Test against uniform distribution
#' counts <- c(10, 20, 30)
#' upsilon.gof.test(counts)
#' @keywords internal
upsilon.gof.test <- function(
x,
p = rep(1/length(x), length(x)),
rescale.p = TRUE,
log.p = FALSE)
{
METHOD <- "Upsilon Goodness-of-Fit Test"
DNAME <- deparse(substitute(x))
x <- as.vector(x)
# Input Validation
if (any(x < 0, na.rm = TRUE)) {
stop("Observed counts 'x' must be non-negative.")
}
if (length(x) != length(p)) {
stop("Observed counts 'x' and probabilities 'p' must have the same length.")
}
# Rescaling Logic
p_sum <- sum(p)
if (rescale.p) {
if (abs(p_sum - 1) > 1e-9) {
p <- p / p_sum
}
} else {
if (abs(p_sum - 1) > 1e-9) {
stop("Probabilities 'p' must sum to 1 when rescale.p = FALSE.")
}
}
n <- sum(x)
k <- length(x)
PARAMETER <- k - 1L
# Handle empty data case
if (n == 0) {
STATISTIC <- 0
ESTIMATE <- 0
PVAL <- if (log.p) 0 else 1
E <- x
RESIDUALS <- x
} else {
# Calculate Expected Counts
E <- n * p
term <- (x - E)^2 / (n / k)
STATISTIC <- sum(term, na.rm = TRUE)
max_statistic <- k * n * (1 + sum(p^2) - 2 * min(p))
# Effect Size
ESTIMATE <- if (max_statistic > 0) sqrt(STATISTIC / max_statistic) else 0
# P-value and Residuals
PVAL <- stats::pchisq(STATISTIC, PARAMETER, lower.tail = FALSE, log.p = log.p)
RESIDUALS <- ifelse(E > 0, (x - E) / sqrt(E), 0)
}
names(STATISTIC) <- "Upsilon"
names(ESTIMATE) <- "Effect size"
names(PARAMETER) <- "df"
names(PVAL) <- "p.value"
structure(
list(
statistic = STATISTIC,
estimate = ESTIMATE,
parameter = PARAMETER,
p.value = PVAL,
method = METHOD,
data.name = DNAME,
observed = x,
expected = E,
residuals = RESIDUALS,
p.normalized = p
),
class = "htest"
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.