# R/wilcoxon.R In uwIntroStats: Descriptive Statistics, Inference, Regression, and Plotting in an Introductory Statistics Course

#### Documented in wilcoxon

```## Perform a wilcoxon test
## Args: y           - the first variable
##       x           - the second variable
##       alternative - the alternative hypothesis
##       mu          - the null hypothesis
##       paired      - paired test?
##       exact       - exact binomial?
##       correct     - continuity correction?
##       conf.int    - return CI?
##       conf.level  - default confidence level
## Returns:
## Version: 2015 05 25
wilcoxon <- function(y, x = NULL, alternative = "two.sided",
mu = 0, paired = FALSE, exact = NULL, correct = FALSE, conf.int = FALSE,
conf.level = 0.95) {
wilcoxon.do <-
function (y, x, alternative,
mu, paired, exact, correct, conf.int,
conf.level,...)
{
if (!missing(mu) && ((length(mu) > 1L) || !is.finite(mu)))
stop("'mu' must be a single number")
if (conf.int) {
if (!((length(conf.level) == 1L) && is.finite(conf.level) &&
(conf.level > 0) && (conf.level < 1)))
stop("'conf.level' must be a single number between 0 and 1")
}
if (!is.numeric(x))
stop("'x' must be numeric")
if (!is.null(y)) {
if (!is.numeric(y))
stop("'y' must be numeric")
DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
if (paired) {
if (length(x) != length(y))
stop("'x' and 'y' must have the same length")
OK <- complete.cases(x, y)
x <- x[OK] - y[OK]
y <- NULL
}
else {
x <- x[is.finite(x)]
y <- y[is.finite(y)]
}
}
else {
DNAME <- deparse(substitute(x))
if (paired)
stop("'y' is missing for paired test")
x <- x[is.finite(x)]
}
if (length(x) < 1L)
stop("not enough (finite) 'x' observations")
CORRECTION <- 0
if (is.null(y)) {
METHOD <- "Wilcoxon signed rank test"
x <- x - mu
ZEROES <- any(x == 0)
nZeroes <- sum(x==0)
sumZeroes <- 0
if (ZEROES)
x <- x[x != 0]
n <- as.double(length(x))
if (is.null(exact))
exact <- (n < 50)
r <- rank(abs(x))
signs <- sign(x)
nPos <- sum(signs>0)
nNeg <- sum(signs<0)
sumPos <- sum(r[x>0])
sumNeg <- sum(r[x<0])
STATISTIC <- setNames(sum(r[x > 0]), "V")
TIES <- length(r) != length(unique(r))
ePos <- n*(n+1)/4
if (exact && !TIES && !ZEROES) {# no adjustment necessary
PVAL <- switch(alternative, two.sided = {
p <- if (STATISTIC > (n * (n + 1)/4)) psignrank(STATISTIC -
1, n, lower.tail = FALSE) else psignrank(STATISTIC,
n)
min(2 * p, 1)
}, greater = psignrank(STATISTIC - 1, n, lower.tail = FALSE),
less = psignrank(STATISTIC, n))
z <- 2*(STATISTIC - ePos)/sqrt(n*(n+1)*(2*n+1)/6)
if (conf.int) {
x <- x + mu
alpha <- 1 - conf.level
diffs <- outer(x, x, "+")
diffs <- sort(diffs[!lower.tri(diffs)])/2
cint <- switch(alternative, two.sided = {
qu <- qsignrank(alpha/2, n)
if (qu == 0) qu <- 1
ql <- n * (n + 1)/2 - qu
achieved.alpha <- 2 * psignrank(trunc(qu) -
1, n)
c(diffs[qu], diffs[ql + 1])
}, greater = {
qu <- qsignrank(alpha, n)
if (qu == 0) qu <- 1
achieved.alpha <- psignrank(trunc(qu) - 1,
n)
c(diffs[qu], +Inf)
}, less = {
qu <- qsignrank(alpha, n)
if (qu == 0) qu <- 1
ql <- n * (n + 1)/2 - qu
achieved.alpha <- psignrank(trunc(qu) - 1,
n)
c(-Inf, diffs[ql + 1])
})
if (achieved.alpha - alpha > alpha/2) {
warning("requested conf.level not achievable")
conf.level <- 1 - signif(achieved.alpha, 2)
}
attr(cint, "conf.level") <- conf.level
ESTIMATE <- c(`(pseudo)median` = median(diffs))
}
}
else {# need to adjust for ties
NTIES <- table(r)
z <- STATISTIC - n * (n + 1)/4
SIGMA <- sqrt(n * (n + 1) * (2 * n + 1)/24 - sum(NTIES^3 -
NTIES)/48)
if (correct) {
CORRECTION <- switch(alternative, two.sided = sign(z) *
0.5, greater = 0.5, less = -0.5)
METHOD <- paste(METHOD, "with continuity correction")
}
z <- (z - CORRECTION)/SIGMA
PVAL <- switch(alternative, less = pnorm(z), greater = pnorm(z,
lower.tail = FALSE), two.sided = 2 * min(pnorm(z),
pnorm(z, lower.tail = FALSE)))
if (conf.int) {# calculate confidence interval if requested
x <- x + mu
alpha <- 1 - conf.level
mumin <- min(x)
mumax <- max(x)
wdiff <- function(d, zq) {
xd <- x - d
xd <- xd[xd != 0]
nx <- length(xd)
dr <- rank(abs(xd))
zd <- sum(dr[xd > 0]) - nx * (nx + 1)/4
NTIES.CI <- table(dr)
SIGMA.CI <- sqrt(nx * (nx + 1) * (2 * nx +
1)/24 - sum(NTIES.CI^3 - NTIES.CI)/48)
if (SIGMA.CI == 0)
stop("cannot compute confidence interval when all observations are tied",
call. = FALSE)
CORRECTION.CI <- if (correct) {
switch(alternative, two.sided = sign(zd) *
0.5, greater = 0.5, less = -0.5)
}
else 0
(zd - CORRECTION.CI)/SIGMA.CI - zq
}
cint <- switch(alternative, two.sided = {
repeat {
mindiff <- wdiff(mumin, zq = qnorm(alpha/2,
lower.tail = FALSE))
maxdiff <- wdiff(mumax, zq = qnorm(alpha/2))
if (mindiff < 0 || maxdiff > 0) alpha <- alpha *
2 else break
}
if (1 - conf.level < alpha * 0.75) {
conf.level <- 1 - alpha
warning("requested conf.level not achievable")
}
l <- uniroot(wdiff, c(mumin, mumax), tol = 1e-04,
zq = qnorm(alpha/2, lower.tail = FALSE))\$root
u <- uniroot(wdiff, c(mumin, mumax), tol = 1e-04,
zq = qnorm(alpha/2))\$root
c(l, u)
}, greater = {
repeat {
mindiff <- wdiff(mumin, zq = qnorm(alpha,
lower.tail = FALSE))
if (mindiff < 0) alpha <- alpha * 2 else break
}
if (1 - conf.level < alpha * 0.75) {
conf.level <- 1 - alpha
warning("requested conf.level not achievable")
}
l <- uniroot(wdiff, c(mumin, mumax), tol = 1e-04,
zq = qnorm(alpha, lower.tail = FALSE))\$root
c(l, +Inf)
}, less = {
repeat {
maxdiff <- wdiff(mumax, zq = qnorm(alpha))
if (maxdiff > 0) alpha <- alpha * 2 else break
}
if (1 - conf.level < alpha * 0.75) {
conf.level <- 1 - alpha
warning("requested conf.level not achievable")
}
u <- uniroot(wdiff, c(mumin, mumax), tol = 1e-04,
zq = qnorm(alpha))\$root
c(-Inf, u)
})
attr(cint, "conf.level") <- conf.level
correct <- FALSE
ESTIMATE <- c(`(pseudo)median` = uniroot(wdiff,
c(mumin, mumax), tol = 1e-04, zq = 0)\$root)
}
if (exact && TIES) {
warning("cannot compute exact p-value with ties")
if (conf.int)
warning("cannot compute exact confidence interval with ties")
}
if (exact && ZEROES) {
warning("cannot compute exact p-value with zeroes")
if (conf.int)
warning("cannot compute exact confidence interval with zeroes")
}
}
}
else {
if (length(y) < 1L)
stop("not enough 'y' observations")
METHOD <- "Wilcoxon rank sum test"
r <- rank(c(x - mu, y))
n.x <- as.double(length(x))
n.y <- as.double(length(y))
rankSumY <- sum(r[seq(from=length(x)+1, to=length(x)+length(y))]) #- n.y * (n.y + 1)/2
rankSumX <- sum(r[seq_along(x)]) #- n.x * (n.x + 1)/2
unadjVar <- (n.x * n.y/12) * ((n.x + n.y + 1))
expected <- (rankSumY+rankSumX)/2
if (is.null(exact))
exact <- (n.x < 50) && (n.y < 50)
STATISTIC <- c(W = sum(r[seq_along(x)]) - n.x * (n.x +
1)/2)
TIES <- (length(r) != length(unique(r)))
if (exact && !TIES) { #no need for correction
z <- STATISTIC - n.x * n.y/2
if (correct) {
CORRECTION <- switch(alternative, two.sided = sign(z) *
0.5, greater = 0.5, less = -0.5)
METHOD <- paste(METHOD, "with continuity correction")
}
z <- (z - CORRECTION)/sqrt(adjVar)

PVAL <- switch(alternative, two.sided = {
p <- if (STATISTIC > (n.x * n.y/2)) pwilcox(STATISTIC -
1, n.x, n.y, lower.tail = FALSE) else pwilcox(STATISTIC,
n.x, n.y)
min(2 * p, 1)
}, greater = {
pwilcox(STATISTIC - 1, n.x, n.y, lower.tail = FALSE)
}, less = pwilcox(STATISTIC, n.x, n.y))
if (conf.int) {
alpha <- 1 - conf.level
diffs <- sort(outer(x, y, "-"))
cint <- switch(alternative, two.sided = {
qu <- qwilcox(alpha/2, n.x, n.y)
if (qu == 0) qu <- 1
ql <- n.x * n.y - qu
achieved.alpha <- 2 * pwilcox(trunc(qu) - 1,
n.x, n.y)
c(diffs[qu], diffs[ql + 1])
}, greater = {
qu <- qwilcox(alpha, n.x, n.y)
if (qu == 0) qu <- 1
achieved.alpha <- pwilcox(trunc(qu) - 1, n.x,
n.y)
c(diffs[qu], +Inf)
}, less = {
qu <- qwilcox(alpha, n.x, n.y)
if (qu == 0) qu <- 1
ql <- n.x * n.y - qu
achieved.alpha <- pwilcox(trunc(qu) - 1, n.x,
n.y)
c(-Inf, diffs[ql + 1])
})
if (achieved.alpha - alpha > alpha/2) {
warning("Requested conf.level not achievable")
conf.level <- 1 - achieved.alpha
}
attr(cint, "conf.level") <- conf.level
ESTIMATE <- c(`difference in location` = median(diffs))
}
}
else { #need to adjust for ties
NTIES <- table(r)
tiedAdjVar <- sum(NTIES^3 - NTIES)/((n.x + n.y) * (n.x + n.y - 1))
z <- STATISTIC - n.x * n.y/2
SIGMA <- sqrt((n.x * n.y/12) * ((n.x + n.y + 1) -
sum(NTIES^3 - NTIES)/((n.x + n.y) * (n.x + n.y -
1))))
if (correct) {
CORRECTION <- switch(alternative, two.sided = sign(z) *
0.5, greater = 0.5, less = -0.5)
METHOD <- paste(METHOD, "with continuity correction")
}
z <- (z - CORRECTION)/SIGMA
PVAL <- switch(alternative, less = pnorm(z), greater = pnorm(z,
lower.tail = FALSE), two.sided = 2 * min(pnorm(z),
pnorm(z, lower.tail = FALSE)))
if (conf.int) {
alpha <- 1 - conf.level
mumin <- min(x) - max(y)
mumax <- max(x) - min(y)
wdiff <- function(d, zq) {
dr <- rank(c(x - d, y))
NTIES.CI <- table(dr)
dz <- (sum(dr[seq_along(x)]) - n.x * (n.x +
1)/2 - n.x * n.y/2)
CORRECTION.CI <- if (correct) {
switch(alternative, two.sided = sign(dz) *
0.5, greater = 0.5, less = -0.5)
}
else 0
SIGMA.CI <- sqrt((n.x * n.y/12) * ((n.x + n.y +
1) - sum(NTIES.CI^3 - NTIES.CI)/((n.x + n.y) *
(n.x + n.y - 1))))
if (SIGMA.CI == 0)
stop("cannot compute confidence interval when all observations are tied",
call. = FALSE)
(dz - CORRECTION.CI)/SIGMA.CI - zq
}
root <- function(zq) {
f.lower <- wdiff(mumin, zq)
if (f.lower <= 0)
return(mumin)
f.upper <- wdiff(mumax, zq)
if (f.upper >= 0)
return(mumax)
uniroot(wdiff, c(mumin, mumax), f.lower = f.lower,
f.upper = f.upper, tol = 1e-04, zq = zq)\$root
}
cint <- switch(alternative, two.sided = {
l <- root(zq = qnorm(alpha/2, lower.tail = FALSE))
u <- root(zq = qnorm(alpha/2))
c(l, u)
}, greater = {
l <- root(zq = qnorm(alpha, lower.tail = FALSE))
c(l, +Inf)
}, less = {
u <- root(zq = qnorm(alpha))
c(-Inf, u)
})
attr(cint, "conf.level") <- conf.level
correct <- FALSE
ESTIMATE <- c(`difference in location` = uniroot(wdiff,
c(mumin, mumax), tol = 1e-04, zq = 0)\$root)
}
if (exact && TIES) {
warning("cannot compute exact p-value with ties")
if (conf.int)
warning("cannot compute exact confidence intervals with ties")
}
}
}
names(mu) <- if (paired || !is.null(y))
"location shift"
else "location"
RVAL <- list(statistic = STATISTIC, parameter = NULL, p.value = as.numeric(PVAL),
null.value = mu, alternative = alternative, method = METHOD,
data.name = DNAME)
if("signed" %in% strsplit(METHOD, " ")[[1]]){# create formatted matrices for printing signed rank test
## create matrix of observed and expected values
printMat <- matrix(c(nPos, sumPos, ePos), nrow=1)
colnames(printMat) <- c("obs", "sum ranks", "expected")
printMat <- rbind(printMat, matrix(c(nNeg, sumNeg, ePos), nrow=1))
printMat <- rbind(printMat, matrix(c(nZeroes, sumZeroes, 0), nrow=1))
printMat <- rbind(printMat, apply(printMat, 2, sum))
rownames(printMat) <- c("positive", "negative", "zero", "all")
## matrix of variances
colnames(vars) <- " "
## format z and pvalue
if(conf.int){
inf <- matrix(c(STATISTIC, format(as.numeric(PVAL), digits=5), paste("[", cint[1],", ", cint[2],"]", sep=""), ESTIMATE), nrow=1)
inf <- rbind(inf, matrix(c(format(z, digits=5), format(1-pnorm(z), digits=5), " ", " "), nrow=1))
colnames(inf) <- c("Test Statistic", "p-value", "CI", "Point Estimate")
} else {
inf <- matrix(c(STATISTIC, format(as.numeric(PVAL), digits=5)), nrow=1)
inf <- rbind(inf, matrix(c(format(z, digits=5), format(1-pnorm(z), digits=5)), nrow=1))
colnames(inf) <- c("Test Statistic", "p-value")
}
rownames(inf) <- c(names(STATISTIC), "Z")
hyps <- matrix(c(mu, alternative),nrow=1)
colnames(hyps) <- c("H0", "Ha")
rownames(hyps) <- "Hypothesized Median"
} else{ # formatted matrices for printing rank sum test
printMat <- matrix(c(n.y, rankSumY, expected), nrow=1)
colnames(printMat) <- c("obs", "rank sum", "expected")
printMat <- rbind(printMat, matrix(c(n.x, rankSumX, expected), nrow=1))
printMat <- rbind(printMat, apply(printMat, 2, sum))
rownames(printMat) <- c("Y", "X", "combined")
## matrix of variances
colnames(vars) <- " "
## format z and pvalue
if(conf.int){
inf <- matrix(c(STATISTIC, format(as.numeric(PVAL), digits=5), paste("[", cint[1],", ", cint[2],"]", sep=""), ESTIMATE), nrow=1)
inf <- rbind(inf, matrix(c(format(z, digits=5), format(1-pnorm(z), digits=5), " ", " "), nrow=1))
colnames(inf) <- c("Test Statistic", "p-value", "CI", "Point Estimate")
} else {
inf <- matrix(c(STATISTIC, format(as.numeric(PVAL), digits=5)), nrow=1)
inf <- rbind(inf, matrix(c(format(z, digits=5), format(1-pnorm(z), digits=5)), nrow=1))
colnames(inf) <- c("Test Statistic", "p-value")
}
rownames(inf) <- c(names(STATISTIC), "Z")
hyps <- matrix(c(mu, alternative),nrow=1)
colnames(hyps) <- c("H0", "Ha")
rownames(hyps) <- "Hypothesized Median"
}
RVAL <- c(RVAL, list(table=printMat, vars=vars, hyps=hyps, inf=inf))

if (conf.int)
RVAL <- c(RVAL, list(conf.int = cint, estimate = ESTIMATE))
invisible(RVAL)
}

wilcox.obj <- wilcoxon.do(x=y,y=x,alternative=alternative,mu=mu,paired=paired,exact=exact,correct=correct,conf.int=conf.int,conf.level=conf.level)
wilcox.obj\$call <- match.call()
class(wilcox.obj) <- "wilcoxon"
wilcox.obj
}
```

## Try the uwIntroStats package in your browser

Any scripts or data that you put into this service are public.

uwIntroStats documentation built on Oct. 10, 2018, 5:04 p.m.