Prentice (Friedman/Wilcoxon/Kruskal) Rank Sum Test
Description
Performs a generalized Friedman rank sum test with replicated blocked data or, as special cases, a KruskalWallis rank sum test on data following a oneway layout or a Wilcoxon rank sum test following a oneway layout with only two groups.
Usage
1 2 3 4 5 6 7 8  prentice.test(y, groups, blocks = NULL,
score = "rank", blkwght = "prentice", condvar = TRUE,
alternative = "two.sided", mu = 0, paired = FALSE,
exact = NULL, correct = FALSE, df = 1, warn = 0, optim = TRUE)
mu.wilcox.test(y, groups, blocks = NULL, score = "rank",
paired = FALSE, exact = TRUE, correct = TRUE, ...)
mu.kruskal.test(y, groups, blocks, ... )
mu.friedman.test(y, groups, blocks, ... )

Arguments
y 
a numeric vector of data values,

groups 
factor or category object of the same length as 
blocks 
factor or category object of the same length as 
score 
character or function object, giving the score function to be
used. If

blkwght 
character object indicating the weights to apply to
see Wittkowski (1988) and AlvoCabilio (2005) for details. 
condvar 
if 
df 
if 
alternative 
character string, one of 
mu 
a single number representing the value of the mean or difference in means specified by the null hypothesis. 
paired 
if 
exact 
if 
correct 
if 
warn 
no warnings will be given if 
optim 
if 
... 
further arguments to be passed to or from methods. 
Details
prentice.test
is approximately twice as fast as
friedman.test
or kruskal.test
. In some cases, the
KruskalWallis test reduces to the Wilcoxon Ranksum test.
Thus, prentice.test
allows the additional parameters
mu
, paired
, exact
, and correct
,
to be entered, and passed. To ensure consistency of the results
between wilcox.test
and kruskal.test
,
the default for correct
is FALSE
in either case.
Value
A list with class "htest"
containing the following components:
statistic 
the value of chisquared statistic,
with 
parameter 
the degrees of freedom of the asymptotic chisquared distribution
associated with 
p.value 
the asymptotic or exact pvalue of the test. 
method 
a character string giving the name of the method used. 
data.name 
a character string (vector of length 1) containing the actual names
of the input arguments 
Null Hypothesis
The null hypothesis is that for any two observations chosen randomly from the same block, the probability that the first is larger than the second is the same as the probability that it is smaller.
Test Assumptions
The errors are assumed to be independent and identically distributed. The returned p.value should be interpreted carefully. It is only a largesample approximation whose validity increases with the size of the smallest of the groups and/or the number of blocks.
Algorithm
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48  prentice.test < function(
y,
groups,
blocks = NULL,
score = "rank", # NULL: y already scored
blkwght = "prentice", # block weights
<...>)
{
<...>
m < xTable(blocks,groups)
<...>
p < dim(m)[2]1 # planned number of groups
y.ok < <...>
y < y [y.ok]
groups < groups[y.ok]
blocks < blocks[y.ok]
M < xTable(blocks,groups)
<...>
mi < rowSums(m)
Mi < rowSums(M)
Wi < switch(tolower(blkwght),
prentice = (mi+1),
klotz = (Mi+1),
skillingsmack = sqrt(Mi+1),
rai = (Mi+1)/Mi,
<...>)
Bijk < Wi[blocks]
Tijk < Centered(
Score(FUNByIdx(y,blocks,wRank,na.any=FALSE)/(Mi[blocks]+1)),
blocks, Mi) * Bijk
T1 < qapply(Tijk,groups,sum)
A0i2 < (1/(Mi1))*qapply(Tijk^2,blocks,sum)
V0 < structure(dim=c(P,P), A0i2 %*% (
t(apply(M,1, function(x,P) diag(x)))  (1/Mi) *
t(apply(M,1,MC(function(x) outer1(x),list(outer1=outer1))))))
V1 < ginv(V0)
W < as.numeric(T1 %*% V1 %*% T1)
df.W < attr(V1,"rank")
p.W < 1  pchisq(W, df.W)
}

Author(s)
Knut M. Wittkowski kmw@rockefeller.edu
References
Friedman, M. (1937) Journal of the American Statistical Association, 32: 675701.
Lehmann, E. L. (1951) Annals of Mathematical Statistics, 22: 165179.
Kruskal, W. H. and Wallis, W. A. (1952) Journal of the American Statistical Association, 47: 583631.
Hajek, J. and Sidak, Z. (1967) Theory of rank tests, New York, NY: Academic.
Hollander, M. and Wolfe, D. A. (1973). Nonparametric Statistical Methods. New York, NY: John Wiley.
Lehmann, E. L. (1975). Nonparametrics: Statistical Methods Based on Ranks. Oakland, CA: HoldenDay.
Prentice, M. J. (1979) Biometrika, 66: 167170.
Wittkowski, K. M. (1988) Journal of the American Statistical Association, 83: 11631170.
Alvo, M. and Cabilio, P. (2005) Canadian Journal of StatisticsRevue Canadienne De Statistique, 33: 115129.
Wittkowski, K. M. (1992) Journal of the American Statistical Association, 87: 258.
Wittkowski, K. M. (1998) Biometrics, 54: 789¡§C791.
Randles, H. R. (2001) The American Statistician, 55: 96101.
Wittkowski, K. M., Lee, E., Nussbaum, R., Chamian, F. N. and Krueger, J. G. (2004) Statistics in Medicine, 23: 15791592.
See Also
wilcox.test
,
kruskal.test
,
friedman.test
,
rank
,
aov
Examples
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83  # friedman.test examples
treatments < factor(rep(c("Trt1", "Trt2", "Trt3"), each=4))
people < factor(rep(c("Subj1", "Subj2", "Subj3", "Subj4"), 3))
y < c(0.73,0.76,0.46,0.85,0.48,0.78,0.87,0.22,0.51,0.03,0.39,0.44)
print( friedman.test(y, treatments, people))
print(mu.friedman.test(y, treatments, people))
# Now suppose the data is in the form of a matrix,
# rows are people and columns are treatments.
# Generate 'ymat' and the factor objects:
ymat < matrix(c(0.73,0.76,0.46,0.85,0.48,0.78,0.87,0.22,0.51,
0.03,0.39,0.44), ncol=3)
bl < factor(as.vector(row(ymat)))
gr < factor(as.vector(col(ymat)))
print( friedman.test(ymat, gr, bl)) # same answer as above
print(mu.friedman.test(ymat, gr, bl))
# kruskal.test examples
# Data from Hollander and Wolfe (1973), p. 116
holl.y < c(2.9,3.0,2.5,2.6,3.2,3.8,2.7,4.0,2.4,2.8,3.4,3.7,2.2,2.0)
holl.grps < factor(c(1,1,1,1,1,2,2,2,2,3,3,3,3,3),
labels=c("Normal Subjects","Obstr. Airway Disease","Asbestosis"))
print( kruskal.test(holl.y, holl.grps))
print(mu.kruskal.test(holl.y, holl.grps))
# Now suppose the data is in the form of a table already,
# with groups in columns; note this implies that group
# sizes are the same.
tab.data < matrix(c(.38,.58,.15,.72,.09,.66,.52,.02,.59,.94,
.24,.94,.08,.97,.47,.92,.59,.77), ncol=3)
tab.data
y2 < as.vector(tab.data)
gr < factor(as.vector(col(tab.data))) # Groups are columns
print( kruskal.test(y2, gr))
print(mu.kruskal.test(y2, gr))
# wilcox.test examples
x < c(8.2, 9.4, 9.6, 9.7, 10.0, 14.5, 15.2, 16.1, 17.6, 21.5)
y < c(4.2, 5.2, 5.8, 6.4, 7.0, 7.3, 10.1, 11.2, 11.3, 11.5)
print( wilcox.test(x,y))
print(mu.wilcox.test(x,y))
print( wilcox.test(x,y, exact=FALSE))
print(mu.wilcox.test(x,y, exact=FALSE))
print( wilcox.test(x,y, exact=FALSE, correct=FALSE))
print(mu.wilcox.test(x,y, exact=FALSE, correct=FALSE))
xy < c(x,y)
groups < c(rep(1,length(x)),rep(2,length(y)))
print(prentice.test(xy,groups,exact=FALSE, correct=FALSE))
# compare speed
if (is.R()) sys.time < function (...) system.time(...)
n < 1000
data < runif(30*n)
grps < c(rep(1,10*n),rep(2,8*n),rep(3,12*n))
print(sys.time( kruskal.test( data,grps) ))
print(sys.time( mu.kruskal.test( data,grps,optim=FALSE) ))
print(sys.time( prentice.test(data,grps) ))
data < runif(600)
grps < rep(1:6,each=100)
blks < rep(1:100,length.out=length(data))
print(sys.time( friedman.test(data,grps,blks) ))
print(sys.time( mu.friedman.test(data,grps,blks,optim=FALSE) ))
print(sys.time( prentice.test(data,grps,blks) ))
data < runif(50000)
grps < rep(1:2,each=25000)
Wx < data[grps==1]
Wy < data[grps==2]
print(sys.time( wilcox.test(Wx,Wy) ))
print(sys.time( mu.wilcox.test(Wx,Wy,optim=FALSE) ))
print(sys.time( prentice.test(data,grps) ))
