Nothing
#' NNS ANOVA: Nonparametric Analysis of Variance
#'
#' Performs a distribution-free ANOVA using partial-moment statistics to assess
#' differences between control and treatment groups. Depending on the setting of
#' \code{means.only}, the procedure tests either differences in central tendency
#' (means or medians) or differences across the full empirical distributions.
#'
#' The key output is the \code{Certainty} metric, a calibrated probability in
#' \eqn{[0, 1]} representing the likelihood that the groups being compared are
#' the *same* with respect to the chosen comparison mode:
#' \itemize{
#' \item If \code{means.only = TRUE}: \code{Certainty} is the probability that
#' the group \emph{means} (or medians, if \code{medians = TRUE}) are the same.
#' \item If \code{means.only = FALSE}: \code{Certainty} is the probability that
#' the two \emph{entire distributions} are the same.
#' }
#'
#' This makes \code{Certainty} the conceptual inverse of a classical p-value.
#' A *low* Certainty (e.g., < 0.10) indicates strong evidence of difference,
#' while a *high* Certainty (e.g., > 0.90) indicates strong evidence of similarity.
#'
#' @param control Numeric vector of control group observations
#' @param treatment Numeric vector of treatment group observations
#' @param means.only Logical; \code{FALSE} (default) uses full distribution analysis. Set \code{TRUE} for mean-only comparison
#' @param medians Logical; \code{FALSE} (default) uses means. Set \code{TRUE} for median-based analysis
#' @param confidence.interval Numeric [0,1]; confidence level for effect size bounds (e.g., 0.95)
#' @param tails Character; specifies CI tail(s): "both", "left", or "right"
#' @param pairwise logical; \code{FALSE} (default) Returns pairwise certainty tests when set to \code{pairwise = TRUE}.
#' @param robust logical; \code{FALSE} (default) Generates 100 independent random permutations to test results, and returns / plots 95 percent confidence intervals along with robust central tendency of all results for pairwise analysis only.
#' @param plot Logical; \code{TRUE} (default) generates distribution plot
#'
#' @return Returns a list containing:
#' \itemize{
#' \item \code{Control_Statistic}: Mean/median of control group
#' \item \code{Treatment_Statistic}: Mean/median of treatment group
#' \item \code{Grand_Statistic}: Grand mean/median
#' \item \code{Control_CDF}: CDF value at grand statistic (control)
#' \item \code{Treatment_CDF}: CDF value at grand statistic (treatment)
#' \item \code{Certainty}: Probability that the groups are the \emph{same}
#' (means-only or full distribution depending on \code{means.only}).
#' \item \code{Effect_Size_LB}: Lower bound of treatment effect (if CI requested)
#' \item \code{Effect_Size_UB}: Upper bound of treatment effect (if CI requested)
#' \item \code{Confidence_Level}: Confidence level used (if CI requested)
#' }
#'
#'
#'
#' @author Fred Viole, OVVO Financial Systems
#' @references Viole, F. and Nawrocki, D. (2013) "Nonlinear Nonparametric Statistics: Using Partial Moments" (ISBN: 1490523995)
#'
#' Viole, F. (2017) "Continuous CDFs and ANOVA with NNS" \doi{10.2139/ssrn.3007373}
#'
#' @examples
#' \dontrun{
#' ### Binary analysis and effect size
#' set.seed(123)
#' x <- rnorm(100) ; y <- rnorm(100)
#' NNS.ANOVA(control = x, treatment = y)
#'
#' ### Two variable analysis with no control variable
#' A <- cbind(x, y)
#' NNS.ANOVA(A)
#'
#' ### Medians test
#' NNS.ANOVA(A, means.only = TRUE, medians = TRUE)
#'
#' ### Multiple variable analysis with no control variable
#' set.seed(123)
#' x <- rnorm(100) ; y <- rnorm(100) ; z <- rnorm(100)
#' A <- cbind(x, y, z)
#' NNS.ANOVA(A)
#'
#' ### Different length vectors used in a list
#' x <- rnorm(30) ; y <- rnorm(40) ; z <- rnorm(50)
#' A <- list(x, y, z)
#' NNS.ANOVA(A)
#' }
#' @export
NNS.ANOVA <- function(
control,
treatment,
means.only = FALSE,
medians = FALSE,
confidence.interval = 0.95,
tails = "Both",
pairwise = FALSE,
plot = TRUE,
robust = FALSE
){
# normalize tails to lower case for downstream functions
tails <- tolower(tails)
if (!any(tails %in% c("left","right","both"))) {
stop("Please select tails from 'left', 'right', or 'both'")
}
if (!missing(treatment) && !is.null(treatment)) {
# with treatment
if (any(class(control) %in% c("tbl","data.table"))) control <- as.vector(unlist(control))
if (any(class(treatment) %in% c("tbl","data.table"))) treatment <- as.vector(unlist(treatment))
if (robust) {
# ---------------------------
# Robust path: resample pairs
# ---------------------------
l <- min(length(treatment), length(control))
treatment_p <- replicate(100, sample.int(l, replace = TRUE))
treatment_matrix <- matrix(treatment[treatment_p], ncol = dim(treatment_p)[2], byrow = FALSE)
treatment_matrix <- cbind(treatment[treatment_p[,1]], treatment_matrix[, rev(seq_len(ncol(treatment_matrix)))])
control_matrix <- matrix(control[treatment_p], ncol = dim(treatment_p)[2], byrow = FALSE)
full_matrix <- cbind(control[treatment_p[,1]], control_matrix, treatment[treatment_p[,1]], treatment_matrix)
nns.certainties <- sapply(
1:ncol(control_matrix),
function(g) NNS.ANOVA.bin(
control_matrix[, g],
treatment_matrix[, g],
means.only = means.only,
medians = medians,
plot = FALSE
)$Certainty
)
# Robust point estimate of certainty
robust_estimate <- gravity(nns.certainties)
# --- CI for robust certainty, honoring confidence.interval and tails
alpha <- if (tails == "both") (1 - confidence.interval) / 2 else 1 - confidence.interval
# Use degree = 0 to obtain empirical quantiles of the certainty samples
cer_lower_CI <- if (tails %in% c("both","left")) LPM.VaR(alpha, 0, nns.certainties) else NA_real_
cer_upper_CI <- if (tails %in% c("both","right")) UPM.VaR(alpha, 0, nns.certainties) else NA_real_
# optional plotting (restore red lines and red label)
if (plot) {
original.par <- par(no.readonly = TRUE); on.exit(par(original.par), add = TRUE)
par(mfrow = c(1, 2))
hist(nns.certainties, main = "NNS Certainty")
abline(v = robust_estimate, col = "red", lwd = 3)
if (tails %in% c("both","left")) abline(v = cer_lower_CI, col = "red", lwd = 2, lty = 3)
if (tails %in% c("both","right")) abline(v = cer_upper_CI, col = "red", lwd = 2, lty = 3)
mtext("Robust Certainty Estimate", side = 3, at = robust_estimate, col = "red")
}
# Compute main ANOVA (non-robust) results but DO NOT pass 'par'
base <- NNS.ANOVA.bin(
control,
treatment,
means.only = means.only,
medians = medians,
confidence.interval = confidence.interval,
plot = plot,
tails = tails
)
lvl <- round(100 * confidence.interval)
lower_label <- if (tails == "both") paste0("Lower ", lvl, "% CI") else paste0("Lower ", lvl, "% bound")
upper_label <- if (tails == "both") paste0("Upper ", lvl, "% CI") else paste0("Upper ", lvl, "% bound")
# Build return object to match prior shape (flatten base then append)
ci_vals <- numeric(0); ci_names <- character(0)
if (tails %in% c("both","left")) { ci_vals <- c(ci_vals, cer_lower_CI); ci_names <- c(ci_names, lower_label) }
if (tails %in% c("both","right")) { ci_vals <- c(ci_vals, cer_upper_CI); ci_names <- c(ci_names, upper_label) }
if (length(ci_vals)) names(ci_vals) <- ci_names
return(
as.list(c(
unlist(base),
"Robust Certainty Estimate" = robust_estimate,
ci_vals
))
)
} else {
# non-robust, binary ANOVA (already honors confidence.interval & tails)
return(
NNS.ANOVA.bin(
control,
treatment,
means.only = means.only,
medians = medians,
confidence.interval = confidence.interval,
plot = plot,
tails = tails
)
)
}
}
# ------------------------------
# Without treatment (k >= 2 vars)
# ------------------------------
if (is.list(control)) n <- length(control) else n <- ncol(control)
if (is.null(n) || n == 1)
stop("supply both 'control' and 'treatment' or a matrix-like 'control', or a list 'control'")
if (n >= 2) {
if (any(class(control) %in% c("tbl","data.table"))) {
A <- as.data.frame(control)
} else {
if (any(class(control) %in% "list")) {
A <- do.call(cbind, lapply(control, `length<-`, max(lengths(control))))
} else {
A <- control
}
}
} else {
A <- control
}
if (medians) {
mean.of.means <- mean(apply(A, 2, function(i) median(i, na.rm = TRUE)))
} else {
mean.of.means <- mean(colMeans(A, na.rm = TRUE))
}
if (!pairwise) {
# Continuous CDF for each variable from grand statistic
if (medians) {
LPM_ratio <- sapply(1:n, function(b) LPM.ratio(0, mean.of.means, na.omit(unlist(A[, b]))))
} else {
LPM_ratio <- sapply(1:n, function(b) LPM.ratio(1, mean.of.means, na.omit(unlist(A[, b]))))
}
lower.25.target <- mean(sapply(1:n, function(i) LPM.VaR(.25, 1, na.omit(unlist(A[,i])))))
upper.25.target <- mean(sapply(1:n, function(i) UPM.VaR(.25, 1, na.omit(unlist(A[,i])))))
lower.125.target <- mean(sapply(1:n, function(i) LPM.VaR(.125, 1, na.omit(unlist(A[,i])))))
upper.125.target <- mean(sapply(1:n, function(i) UPM.VaR(.125, 1, na.omit(unlist(A[,i])))))
raw.certainties <- vector("list", n - 1)
for (i in 1:(n - 1)) {
raw.certainties[[i]] <- sapply(
(i + 1):n,
function(b) NNS.ANOVA.bin(
na.omit(unlist(A[, i])),
na.omit(unlist(A[, b])),
means.only = means.only,
medians = medians,
mean.of.means = mean.of.means,
upper.25.target = upper.25.target,
lower.25.target = lower.25.target,
upper.125.target = upper.125.target,
lower.125.target = lower.125.target,
plot = FALSE
)$Certainty
)
}
# Certainty associated with samples
NNS.ANOVA.rho <- mean(unlist(raw.certainties))
# Graphs
if (plot) {
boxplot(
A,
las = 2,
ylab = "Variable",
horizontal = TRUE,
main = "NNS ANOVA",
col = c('steelblue', rainbow(n - 1))
)
abline(v = mean.of.means, col = "red", lwd = 4)
if (medians) mtext("Grand Median", side = 3, col = "red", at = mean.of.means) else mtext("Grand Mean", side = 3, col = "red", at = mean.of.means)
}
return(c("Certainty" = NNS.ANOVA.rho))
}
# pairwise = TRUE: return symmetric matrix of certainties
raw.certainties <- vector("list", n - 1)
for (i in 1:(n - 1)) {
raw.certainties[[i]] <- sapply(
(i + 1):n,
function(b) NNS.ANOVA.bin(
na.omit(unlist(A[, i])),
na.omit(unlist(A[, b])),
means.only = means.only,
medians = medians,
plot = FALSE
)$Certainty
)
}
certainties <- matrix(NA_real_, n, n)
certainties[lower.tri(certainties, diag = FALSE)] <- unlist(raw.certainties)
diag(certainties) <- 1
certainties <- pmax(certainties, t(certainties), na.rm = TRUE)
colnames(certainties) <- rownames(certainties) <- colnames(A)
if (plot) {
boxplot(
A,
las = 2,
ylab = "Variable",
horizontal = TRUE,
main = "ANOVA",
col = c('steelblue', rainbow(n - 1))
)
abline(v = mean.of.means, col = "red", lwd = 4)
if (medians) mtext("Grand Median", side = 3, col = "red", at = mean.of.means) else mtext("Grand Mean", side = 3, col = "red", at = mean.of.means)
}
return(certainties)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.