Nothing
# mode: var is used to examine variance estimators, test is used to perform testing
# comb2: use combine to compute stat, use wmw to compute permutation distribution
mod.wmw.test=function(X,Y, alternative = c("two.sided", "less", "greater"), correct = TRUE, perm=NULL, mc.rep=1e4, method=c("combine","comb2","fp","wmw","fplarge","nsm3"), verbose=FALSE
, mode=c("test","var"), useC=TRUE # develeopment parameters
) {
alternative <- match.arg(alternative)
method <- match.arg(method)
mode <- match.arg(mode)
# if not converted to double, .Call may throw errors for integer X/Y
X=as.double(X[!is.na(X)])
Y=as.double(Y[!is.na(Y)])
m=length(X)
n=length(Y)
N=m+n
if(!correct) .corr=0 else .corr=switch(alternative, "two.sided" = 2, "greater" = -1, "less" = 1)
if(useC) {
z=.Call("mod_wmw_test", X, Y, .corr, switch(method,"wmw"=0,"fp"=1,"fplarge"=2,"combine"=3,"comb2"=3,"nsm3"=4), as.integer(1), as.integer(1))
} else {
#the following R implementation may give a different result under i386 software on 64-bit machine, b/c C is always 64 bit, but R is 32 bit
z=compute.Z(X,Y, alternative, correct, ifelse(method=="comb2","combine",method));
}
# for developing manuscript
if(mode=="var") return(compute.Z(X,Y, alternative, correct, ifelse(method=="comb2","combine",method), return.all=TRUE))
p.method=NULL
if(is.null(perm)) {
if (min(m,n)>=20) p.method="asymptotic"
} else if (!perm) {
p.method="asymptotic"
}
if(is.null(p.method)) {
if (choose(N,m)<=mc.rep) p.method="exact" else p.method="Monte Carlo"
}
if(verbose) print(p.method)
if (p.method=="asymptotic") {
# one sided p values are opposite to wilcox.test because the way U is computed here as the rank of Y's
pval=switch(alternative,
"two.sided" = 2 * pnorm(abs(z), lower.tail=FALSE),
"less" = pnorm(z, lower.tail=FALSE), # X<Y
"greater" = pnorm(z, lower.tail=TRUE) #X>Y
)
} else {
xy=c(X,Y)
if(p.method=="exact"){
indexes=combn(1:N, m)
if (useC) {
z.mc=.Call("mod_wmw_test", X, Y, .corr, switch(method,"wmw"=0,"fp"=1,"fplarge"=2,"combine"=3,"comb2"=0,"nsm3"=4), as.integer(0), indexes)
} else {
z.mc=apply(indexes, 2, function(ind.x) {
ind.y=setdiff(1:N, ind.x)
tmp=compute.Z(xy[ind.x], xy[ind.y], alternative=alternative, correct=correct, method=ifelse(method=="comb2","wmw",method))
tmp
})
}
#print(sort(z.mc))
## numerical instability
#t=2075
#print((z-z.mc[t]))
#print(which(z==z.mc))
} else if (p.method=="Monte Carlo") {
#### save rng state before set.seed in order to restore before exiting this function
save.seed <- try(get(".Random.seed", .GlobalEnv), silent=TRUE)
if (inherits(save.seed,"try-error")) { set.seed(1); save.seed <- get(".Random.seed", .GlobalEnv) }
set.seed(1)
if(useC) {
z.mc=.Call("mod_wmw_test", X, Y, .corr, switch(method,"wmw"=0,"fp"=1,"fplarge"=2,"combine"=3,"comb2"=0,"nsm3"=4), as.integer(mc.rep), as.integer(1))
} else {
z.mc=sapply(1:mc.rep, function(b) {
ind.x=sample(1:N, m)
ind.y=setdiff(1:N,ind.x)
compute.Z(xy[ind.x], xy[ind.y], alternative=alternative, correct=correct, method=ifelse(method=="comb2","wmw",method))
})
}
#### restore rng state
assign(".Random.seed", save.seed, .GlobalEnv)
}
numerical.stability.margin=1e-10 # e.g. in C, ((6.6)/(100))+((0.45)/(25)) != ((6.84)/(100))+((0.39)/(25)), some permutations should have identical stat as the obs., but end up not
pval=switch(alternative,
"two.sided" = mean(z.mc>=abs(z)*(1-numerical.stability.margin)) + mean(z.mc<= -abs(z)*(1-numerical.stability.margin)),
"less" = mean(z.mc>=z*(1-sign(z)*numerical.stability.margin)), # X<Y
"greater" = mean(z.mc<=z*(1+sign(z)*numerical.stability.margin)) #X>Y
)
}
pval
}
# R implementation, for debugging use
# ties are properly handled for WMW variance, but not properly handled for estimated Var
compute.Z=function(X,Y, alternative, correct, method, return.all=FALSE) {
m=length(X)
n=length(Y)
N=m+n
r <- rank(c(X, Y))
U=(sum(r[(m+1):N]) - n*(n+1)/2)/m/n
correction <- switch(alternative,
"two.sided" = sign(U-0.5) * 0.5/m/n,
"greater" = 0.5/m/n,
"less" = -0.5/m/n
)
if(correct) U=U-correction
# V_mn
var.wmw=1/12/m + 1/12/n + 1/12/m/n
# ties
NTIES <- table(r)
var.wmw <- var.wmw - sum(NTIES^3-NTIES)/(12*m*n*N*(N - 1))
# estimated var
Fx=ecdf(X); Fy=ecdf(Y)
a=var(Fy(X)); b=var(Fx(Y))
var.exact=a/m*(1-1/n) + b/n*(1-1/m) + mean(Fy(X))/m * mean(Fx(Y))/n
var.large=a/m + b/n # large sample approx
var.nsm3=a/m*(1-1/m) + b/n*(1-1/n) + mean(Fy(X))/m * mean(Fx(Y))/n
# combined test
var.3=min(var.wmw, var.exact)
z=(U-0.5)/sqrt(switch(method, "fp"=var.exact, "fplarge"=var.large, "combine"=var.3, "wmw"=var.wmw, "nsm3"=var.nsm3))
#print(z)
if(return.all) return(c(U=U, var.wmw=var.wmw, var.large=var.large, var.exact=var.exact, var.nsm3=var.nsm3, z=z)) else return(z)
}
mod.wsr.test=function(X,Y, debug=FALSE) {
m=length(X)
n=length(Y)
N=m+n
delta=Y-X
abs.delta=abs(delta)
sgn.delta=sign(delta)
pos.delta=ifelse(delta>0,1,0)
Fdelta=ecdf(delta)
h.1=1-Fdelta(-delta)
# # alternatively
# h.1.f=function(i) (sum(delta+delta[i]>0) - sum(delta[i]>0))/(length(delta)-1)
# h.1=numeric(m)
# for (i in 1:m) h.1[i]=h.1.f(i)
r.1 <- rank(abs.delta)
W.plus = mean(pos.delta*r.1)
W.plus.2=mean(sgn.delta*r.1) # definition in van der Vaart
var.W.plus = m * var(h.1) # var(W.plus.2) is 4 x var(W.plus)
var.W.plus.0 = (m+1)*(2*m+1)/(24*m)
if(debug) {
return(c(W.plus=W.plus, var.W.plus=var.W.plus, var.W.plus.0=var.W.plus.0))
}
# p value
pchisq( (W.plus-m/4)^2/var.W.plus, df=1, lower.tail=FALSE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.