R/sstn.R
In sstn: Self-Similarity Test for Normality

Documented in sstn

#' @importFrom stats integrate rnorm sd
#'
#' @title Self-Similarity Test for Normality (SSTN)
#'
#' @description The SSTN is a statistical test for assessing whether a given sample originates
#' from a normal distribution. It is based on the iterative application of the
#' empirical characteristic function and compares it to the characteristic function
#' of the standard normal distribution. A Monte Carlo procedure is used to obtain
#' the empirical distribution of the test statistic under the null hypothesis.
#'
#' @param x Numeric vector of observations \eqn{(x_1, \dots, x_n)} drawn from
#'   a distribution with finite variance. Must have length \eqn{n \ge 2}.
#' @param B Integer. Number of Monte Carlo samples. Default is 500.
#' @param grid_length Integer. Number of grid points \eqn{H} for the evaluation
#'   of the standardized characteristic function. Default is 10.
#' @param t_max Positive numeric. Upper bound of the grid \eqn{t_H}, typically 4
#'   to cover the effective support of the normal characteristic function.
#'   Default is 4.
#' @param M_max Integer. Maximum number of iterations \eqn{N}. Default is 100.
#' @param beta Positive numeric. Weighting parameter in the discrepancy measure.
#'   Controls the decay rate of the exponential weight \eqn{\exp(-\beta t^2)}.
#'   Default is 0.5.
#' @param seed Optional integer. Random seed for reproducibility of Monte Carlo samples.
#'   Default is NULL (no fixed seed).
#' @param verbose Logical. If TRUE (default), prints a summary of the test results
#'   including the number of summands, test statistic, and \eqn{p}-value.
#'
#' @return An invisible list with the following components:
#' \item{test_statistic}{Numeric. The observed value \eqn{d_{M}} of the SSTN test statistic.}
#' \item{null_distribution}{Numeric vector of length \code{B}. Test statistics
#'   from the Monte Carlo samples under the null hypothesis.}
#' \item{number_summands}{Integer. The determined number of summands \eqn{M}.}
#' \item{p_value}{Numeric. The \eqn{p}-value of the test.}
#'
#' @author Akin Anarat \email{akin.anarat@hhu.de}
#'
#' @references
#' Anarat A. and Schwender, H. (2025). A normality test based on self-similarity. Submitted.
#'
#' @export
#'
#' @examples
#' set.seed(123)
#' # Sample from standard normal (null hypothesis true)
#' x <- rnorm(100)
#' res <- sstn(x)
#' res$p_value
#'
#' # Sample from Gamma distribution (null hypothesis false)
#' y <- rgamma(100, 1)
#' res2 <- sstn(y)
#' res2$p_value

sstn <- function(x, B = 500, grid_length = 10, t_max = 4, M_max = 100,
                 beta = 0.5, seed = NULL, verbose = TRUE) {

  if (!is.null(seed)) set.seed(seed)

  n <- length(x)
  if (n < 2) {
    stop("Condition violated: n >= 2")
  }

  if (beta <= 0) {
    stop("Condition violated: beta > 0")
  }

  mean_x <- mean(x)
  sd_x <- sd(x)

  z <- (x - mean_x) / sd_x * sqrt(n / (n - 1))

  H <- grid_length
  t <- seq(t_max / (2 * H - 1), t_max, length.out = H)
  dz <- (2 * t_max / H)

  phi_norm <- exp(-t^2 / 2)
  w <- exp(-beta * t^2)

  c <- integrate(function(x) (1 - exp(-x^2 / 2)) * exp(-beta * x^2), -10, 10)$value
  eps <- c / n

  m <- 1
  dist <- Inf
  deltas <- numeric(M_max)

  while (dist > eps && m <= M_max) {
    tm <- outer(z, t / sqrt(m))
    phi_z <- colMeans(exp(1i * tm))^m
    dist <- 2 * sum(w * abs(phi_norm - phi_z)^2) * dz
    deltas[m] <- dist
    m <- m + 1
  }

  M <- m - 1
  D <- sum(deltas)

  xb_mat <- matrix(rnorm(B * n), nrow = B)

  row_means <- rowMeans(xb_mat)
  row_sds   <- apply(xb_mat, 1, sd)

  zb_mat <- (xb_mat - row_means) / row_sds * sqrt(n / (n - 1))

  D_b <- apply(zb_mat, 1, function(zb) {
    deltas_b <- numeric(M)
    for (m in seq_len(M)) {
      tm_b <- outer(zb, t / sqrt(m))
      phi_zb <- colMeans(exp(1i * tm_b))^m
      deltas_b[m] <- 2 * sum(w * abs(phi_norm - phi_zb)^2) * dz
    }
    sum(deltas_b)
  })

  p_value <- mean(D_b >= D)

  if (verbose) {
    cat("SSTN\n")
    cat("--------------------------------------------------\n")
    cat(sprintf("Number of summands: M = %d\n", M))
    cat(sprintf("Test statistic: %.5f\n", D))
    cat(sprintf("p-value: %.5f\n", p_value))
  }

  invisible(list(
    test_statistic = D,
    null_distribution = D_b,
    number_summands = M,
    p_value = p_value
  ))
}