#' Wilcoxon Signed Rank and Rank-Sum Tests
#'
#' Performs a Wilcoxon Signed Rank test on a vector of data or two paired
#' vectors of data, or performs a Wilcoxon Rank-Sum test on two unpaired
#' vectors of data.
#' @usage wilcoxon_test(x, y = NULL, alternative = c("two.sided", "less", "greater"),
#' mu = 0, paired = FALSE, exact = NULL, correct = TRUE)
#' @param x a numeric vector.
#' @param y an optional numeric vector.
#' @param alternative a string specifying the alternative hypothesis ("two.sided, "less", or "greater").
#' Only the first letter is necessary. Defaults to two sided.
#' @param mu a number representing a location or location difference to be tested for in the null hypothesis.
#' Defaults to 0.
#' @param paired a logical indicating whether to perform a paired or unpaired test.
#' Defaults to false.
#' @param exact a logical indicating whether to compute an exact p-value or an approximation.
#' @param correct A logical indicating whether to use a continuity correction or not.
#' @details If either y is not supplied or both x and y are supplied and paired is true,
#' then a Wilcoxon signed rank test is performed. The null hypothesis is that
#' the median of x (or x-y in the paired case) is equal to mu.
#'
#' If both x and y are supplied and paired is false, then a Wilcoxon rank-sum
#' (Mann-Whitney) test is performed. The null hypothesis is that the medians of
#' x and y differ by a location shift of mu. The alternative hypothesis "greater"
#' refers to x being shifted to the right of y.
#'
#' If exact is true, an exact p-value will be computed using the built-in
#' psignrank() or pwilcox() distribution functions. If exact is false, the
#' p-value is computed using a normal approximation. If exact is not supplied,
#' exact will default to true if the sample size is greater than 50 and there
#' are no ties. For the signed-rank test, there must also be no zeroes. Otherwise,
#' exact will be set to false.
#'
#' Warning: the built in function pwilcox() uses large amounts of memory and stack,
#' so if exact is true and one of the samples is large (thousands or more), the
#' function can crash R. To prevent this, exact should be set to false instead.
#'
#' The correct parameter is only relevant if exact is false. If correct is true,
#' a continuity correction will be applied in order to perform the normal approximation.
#' The correction is a shifting of the test statistic by 0.5.
#'
#' Unlike the built in function wilcox.test(), this function does not support
#' calculating confidence intervals, or taking in a formula as an argument.
#'
#' @return A list of class "htest" consisting of the following:
#' \itemize{
#' \item statistic - the value of the test statistic
#' \item parameter - for this test, NULL
#' \item p.value - the p-value
#' \item null.value - the value of mu
#' \item alternative - the alternative hypothesis
#' \item method - the type of test performed
#' \item data.name - the name(s) of the supplied data
#' }
#' @examples
#' x <- rnorm(50)
#' y1 <- rnorm(50)
#' y2 <- rnorm(100)
#' y3 <- rnorm(5000)
#' wilcoxon_test(x) # One-sample Wilcoxon signed-rank test
#' wilcoxon_test(x, alternative = "g", mu = 2) # Alternative hypothesis is that the median of x is greater than 2
#' wilcoxon_test(x, y1, paired = TRUE) # Paired Wilcoxon signed-rank test
#' \dontrun{
#' wilcoxon_test(x, y2, paired = TRUE) # Error, cannot perform paired test if vectors have different lengths
#' }
#' wilcoxon_test(x, y1, paired = FALSE) # Two-sample Wilcoxon rank-sum test
#' wilcoxon_test(x, y2, paired = FALSE, exact = TRUE) # Calculates exact p-value from Wilcoxon rank-sum distribution
#' wilcoxon_test(x, y2, paired = FALSE, exact = FALSE, correct = FALSE) # Uses normal approximation to calculate p-value, without continuity correction
#' wilcoxon_test(x, y2, paired = FALSE, exact = FALSE, correct = TRUE) # Same as above, but with continuity correction
#' \dontrun{
#' wilcoxon_test(x, y3, exact = TRUE) # y3 is too large, do not run with exact = T, could crash R
#' }
#' wilcoxon_test(x, y3, exact = FALSE) # Ok to run
#' @export
wilcoxon_test <- function(x, y = NULL, alternative = c("two.sided", "less", "greater"),
mu = 0, paired = FALSE, exact = NULL, correct = TRUE){
alternative <- match.arg(alternative) # Input must correspond to one of the options
data.name <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
names(mu) <- "location shift"
correction <- 0
if(paired){
if(is.null(y)){
stop("'y' is missing for paired test")
}
else if(length(x) != length(y)){
stop("'x' and 'y' must have the same length")
}
else{
x <- x - y # Paired test is equivalent to one-sample test on x - y
y <- NULL
}
}
if(is.null(y)){ # If y is null or paired = T, perform a Wilcoxon signed rank test
method <- "Wilcoxon signed rank test"
if(!paired){
data.name <- deparse(substitute(x))
names(mu) <- "location"
}
x <- x - mu
zero <- any(x == 0)
if(zero){ # Remove zeroes, cannot perform exact test anymore
x <- x[x != 0]
}
n <- length(x)
ties <- n > length(unique(abs(x))) # If there are ties, cannot perform exact test anymore
ranks <- rank(abs(x))
signed_ranks <- ranks * sign(x)
W <- sum(signed_ranks[signed_ranks > 0])
names(W) <- "V"
if(is.null(exact)){
exact <- n < 50 & !ties & !zero # Use exact test for eligible small samples
}
if(exact & !ties & !zero){
if(alternative == "two.sided"){
if(W <= n * (n + 1) / 4){ # Check if W is smaller or greater than median
p.value <- 2 * psignrank(W, n)
}
else{
p.value <- 2 - 2 * psignrank(W - 1, n) # Subtract 1 because distribution is discrete
}
}
else if(alternative == "less"){
p.value <- psignrank(W, n)
}
else if(alternative == "greater"){
p.value <- 1 - psignrank(W - 1, n)
}
}
else if(exact){
if(ties){
warning("cannot compute exact p-value with ties")
}
if(zero){
warning("cannot compute exact p-value with zeroes")
}
exact <- FALSE
}
if(!exact){ # If we can't use exact distribution, approximate with normal
diff_W <- W - n * (n + 1) / 4
var_W <- n * (n + 1) * (2 * n + 1) / 24
numties <- table(ranks)
var_W_reduction <- sum((numties ^ 3 - numties)) / 48 # Variance correction for ties
var_W <- var_W - var_W_reduction
if(correct){ # continuity correction
if(alternative == "two.sided"){
correction <- -sign(diff_W) / 2
}
else if(alternative == "less"){
correction <- 0.5
}
else if(alternative == "greater"){
correction <- -0.5
}
diff_W <- diff_W + correction
method <- paste(method, "with continuity correction")
}
z <- diff_W / sqrt(var_W)
if(alternative == "two.sided"){
p.value <- 2 - 2 * pnorm(abs(z))
}
else if(alternative == "less"){
p.value <- pnorm(z)
}
else if(alternative == "greater"){
p.value <- 1 - pnorm(z)
}
}
}
else{ # If y is given and paired = F, perform a Wilcoxon rank-sum (Mann-Whitney) test.
method <- "Wilcoxon rank sum test"
x <- x - mu
nx <- length(x)
ny <- length(y)
pooled <- c(x, y)
ties <- nx + ny > length(unique(pooled))
pooled_ranks <- rank(pooled)
W <- sum(pooled_ranks[1:nx]) - nx * (nx + 1) / 2
names(W) <- "W"
if(is.null(exact)){
exact <- nx < 50 & ny < 50 & !ties
}
if(exact & !ties){
if(alternative == "two.sided"){
if(W <= nx * ny / 2){ # Median is nx * ny / 2
p.value <- 2 * pwilcox(W, nx, ny)
}
else{
p.value <- 2 - 2 * pwilcox(W - 1, nx, ny)
}
}
else if(alternative == "less"){
p.value <- pwilcox(W, nx, ny)
}
else if(alternative == "greater"){
p.value <- 1 - pwilcox(W - 1, nx, ny)
}
}
else if(exact){
warning("cannot compute exact p-value with ties")
exact <- FALSE
}
if(!exact){
diff_W <- W - nx * ny / 2
var_W <- nx * ny * (nx + ny + 1) / 12
numties <- table(pooled_ranks)
var_W_reduction <- nx * ny * sum(numties ^ 3 - numties) / (12 * (nx + ny) * (nx + ny - 1))
var_W <- var_W - var_W_reduction
if(correct){ # continuity correction
if(alternative == "two.sided"){
correction <- -sign(diff_W) / 2
}
else if(alternative == "less"){
correction <- 0.5
}
else if(alternative == "greater"){
correction <- -0.5
}
diff_W <- diff_W + correction
method <- paste(method, "with continuity correction")
}
z <- diff_W / sqrt(var_W)
if(alternative == "two.sided"){
p.value <- 2 - 2 * pnorm(abs(z))
}
else if(alternative == "less"){
p.value <- pnorm(z)
}
else if(alternative == "greater"){
p.value <- 1 - pnorm(z)
}
}
}
p.value <- unname(p.value)
wilcoxon_list <- list(statistic = W, parameter = NULL, p.value = p.value,
null.value = mu, alternative = alternative,
method = method, data.name = data.name)
class(wilcoxon_list) <- "htest"
return(wilcoxon_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.