# R/symmetry.test.R In vlyubchich/lawstat: Tools for Biostatistics, Public Policy, and Law

#### Documented in symmetry.test

#' Test of Symmetry
#'
#' Perform test for symmetry about an unknown median. Users can choose among the
#' Cabilio--Masaro test \insertCite{Cabilio_Masaro_1996}{lawstat},
#' the Mira test \insertCite{Mira_1999}{lawstat},
#' or the MGG test \insertCite{Miao_etal_2006}{lawstat};
#' and between using asymptotic distribution of the respective statistics or
#' a distribution from \eqn{m}-out-of-\eqn{n} bootstrap
#' \insertCite{Lyubchich_etal_2016_symmetry}{lawstat}.
#' Additionally to the general distribution asymmetry, the function allows to test
#' for negative or positive skeweness (see the argument \code{side}).
#' \code{NA}s from the data are omitted.
#'
#' @details If the bootstrap option is used (\code{boot = TRUE}), a bootstrap
#' distribution is obtained for each candidate subsample size \eqn{m}. Then, a heuristic
#' method \insertCite{Bickel_etal_1997,Bickel_Sakov_2008}{lawstat}
#' is used for the choice of optimal \eqn{m}. Specifically, we use the Wasserstein metric
#' \insertCite{Ruschendorf_2001}{lawstat} to calculate distances between different
#' bootstrap distributions and select \eqn{m}, which corresponds to the minimal distance.
#' See \insertCite{Lyubchich_etal_2016_symmetry;textual}{lawstat} for more details.
#'
#'
#' @param x data to be tested for symmetry.
#' @param option test statistic to be applied. The options include statistic by
#' \insertCite{Miao_etal_2006;textual}{lawstat} (default),
#' \insertCite{Cabilio_Masaro_1996;textual}{lawstat},
#' and \insertCite{Mira_1999;textual}{lawstat}.
#' @param side choice from the three possible alternative hypotheses:
#' general distribution asymmetry (\code{side = "both"}, default),
#' left skewness (\code{side = "left"}), or right skewness (\code{side = "right"}).
#' @param boot logical value indicates whether \eqn{m}-out-of-\eqn{n} bootstrap will
#' be used to obtain critical values (default),
#' or asymptotic distribution of the chosen statistic.
#' @param B number of bootstrap replications to perform (default is 1000).
#' @param q scalar from 0 to 1 to define a set of possible \eqn{m} for the
#' \eqn{m}-out-of-\eqn{n} bootstrap. Default \code{q = 8/9}.
#' Possible \eqn{m} are then set as the values \code{unique(round(n*(q^j))}
#' greater than 4, where \code{n = length(x)} and \code{j = c(0:20)}.
#'
#'
#' @return A list of class \code{"htest"} with the following components:
#' \item{method}{name of the method.}
#' \item{data.name}{name of the data.}
#' \item{statistic}{value of the test statistic.}
#' \item{p.value}{\eqn{p}-value of the test.}
#' \item{alternative}{alternative hypothesis.}
#' \item{estimate}{bootstrap optimal \eqn{m} (given in the output only if bootstrap
#' was used, i.e., \code{boot = TRUE}).}
#'
#' @references
#' \insertAllCited{}
#'
#' @keywords distribution htest robust
#'
#' @author Joseph L. Gastwirth, Yulia R. Gel, Wallace Hui, Vyacheslav Lyubchich,
#' Weiwen Miao, Xingyu Wang (in alphabetical order)
#'
#' @export
#' @examples
#' data(zuni) #run ?zuni to see the data description
#' symmetry.test(zuni[,"Revenue"], boot = FALSE)
#'
symmetry.test <- function(x,
option = c("MGG", "CM", "M"),
side = c("both", "left", "right"),
boot = TRUE,
B = 1000,
q = 8 / 9)
{
DNAME <- deparse(substitute(x))
j <- c(0:20)
x <- na.omit(x)
n <- length(x)
m <- unique(round(n * (q ^ j))[I(round(n * (q ^ j)) > 4)])
x <- sort(x) - mean(x)
a <- x[x < mean(x)]
b <- x[x > mean(x)]
xx <- c(x, -a, -b)
option <- match.arg(option)
side <- match.arg(side)
if (option == "MGG") {
METHOD <- "test by Miao, Gel, and Gastwirth (2006)"
stat.function <-
function(x)
sqrt(length(x)) * (mean(x) - median(x)) / (sqrt(pi / 2) * mean(abs(x - median(x))) * sqrt(0.5707963))
}
if (option == "M") {
METHOD <- "test by Mira (1999)"
stat.function <- function(x) {
x <- sort(x)
N <- length(x)
D <-
N ^ (1 / 5) * (x[N / 2 + 0.5 * N ^ (4 / 5)] - x[N / 2 - 0.5 * N ^ (4 /
5) + 1])
g <- mean(x) - 2 / N * sum(x[x <= median(x)])
S <- 4 * var(x) + D ^ 2 - 4 * D * g
sqrt(N) * 2 * (mean(x) - median(x)) / sqrt(S)
}
}
if (option == "CM") {
METHOD <- "test by Cabilio and Masaro (1996)"
stat.function <-
function(x)
sqrt(length(x)) * (mean(x) - median(x)) / (sd(x) * sqrt(0.5707963))
}
STATISTIC <- stat.function(x)
names(STATISTIC) <- "Test statistic"
if (boot) {
BootstrapStatistic <- array(NA, c(length(m), B))
for (i in 1:length(m)) {
M <-
sapply(1:B, function(x)
sort(sample(
xx, size = m[i], replace = TRUE
)))
tmp <- sapply(1:B, function(x)
length(unique(M[, x])))
while (any(tmp == 1)) {
M[, tmp == 1] <-
sapply(which(tmp == 1), function(x)
sort(sample(
xx, size = m[i], replace = TRUE
)))
tmp <- sapply(1:B, function(x)
length(unique(M[, x])))
}
BootstrapStatistic[i, ] <- sort(apply(M, 2, stat.function))
}
Distance <-
sapply(1:(length(m) - 1), function(x)
sqrt(sum((BootstrapStatistic[(x + 1), ] - BootstrapStatistic[x, ]) ^ 2
)))
di <- which.min(Distance)
ESTIMATE <- m[di]
names(ESTIMATE) <- "bootstrap optimal m"
Tcrit <- sum(STATISTIC < BootstrapStatistic[di, ]) / B
} else {
Tcrit <- 1 - pnorm(STATISTIC)
}
if (side == "both") {
ALTERNATIVE <- "the distribution is asymmetric."
if (Tcrit < 0.5) {
P.VALUE <- Tcrit * 2
} else {
P.VALUE <- 2 * (1 - Tcrit)
}
}
if (side == "left") {
ALTERNATIVE <- "the distribution is negatively skewed."
P.VALUE <- 1 - Tcrit
}
if (side == "right") {
ALTERNATIVE <- "the distribution is positively skewed."
P.VALUE <- Tcrit
}
if (boot) {
METHOD <- paste("m-out-of-n bootstrap symmetry", METHOD)
structure(
list(
method = METHOD,
data.name = DNAME,
statistic = STATISTIC,
p.value = P.VALUE,
alternative = ALTERNATIVE,
estimate = ESTIMATE
),
class = "htest"
)
} else {
METHOD <- paste("Symmetry", METHOD)
structure(
list(
method = METHOD,
data.name = DNAME,
statistic = STATISTIC,
p.value = P.VALUE,
alternative = ALTERNATIVE
),
class = "htest"
)
}
}

vlyubchich/lawstat documentation built on April 17, 2023, 12:47 a.m.