R/ks.R

ks <-
function(x,y,w=FALSE,sig=TRUE,alpha=.05){
#  Compute the Kolmogorov-Smirnov test statistic
#
#  w=T computes the weighted version instead.
#
#  sig=T indicates that the exact significance level is to be computed.
#  If there are ties, the reported significance level is exact when
#  using the unweighted test, but for the weighted test the reported
#  level is too high.
#
#  This function uses the functions ecdf, kstiesig, kssig and kswsig
#
#  This function returns the value of the test statistic, the approximate .05
#  critical value, and the exact significance level if sig=T.
#
#  Missing values are automatically removed
#
x<-x[!is.na(x)]
y<-y[!is.na(y)]
w<-as.logical(w)
sig<-as.logical(sig)
tie<-logical(1)
siglevel<-NA
z<-sort(c(x,y))  # Pool and sort the observations
tie=duplicated(z)
v<-1   # Initializes v
for (i in 1:length(z))v[i]<-abs(ecdf(x,z[i])-ecdf(y,z[i]))
ks<-max(v)
#
#crit<-1.36*sqrt((length(x)+length(y))/(length(x)*length(y))) # Approximate
#                                                       .05 critical value
crit=sqrt(0-log(alpha/2)*(length(x)+length(y))/(2*length(x)*length(y)))
if(!w && sig && !tie)siglevel<-kssig(length(x),length(y),ks)
if(!w && sig && tie)siglevel<-kstiesig(x,y,ks)
if(w){
crit<-(max(length(x),length(y))-5)*.48/95+2.58+abs(length(x)-length(y))*.44/95
if(length(x)>100 || length(y)>100)warning(paste("When either sample size is
greater than 100, the approximate critical value can be inaccurate. It is
recommended that the exact significance level be computed."))
for (i in 1:length(z)){
temp<-(length(x)*ecdf(x,z[i])+length(y)*ecdf(y,z[i]))/length(z)
temp<-temp*(1.-temp)
v[i]<-v[i]/sqrt(temp)
}
v<-v[!is.na(v)]
ks<-max(v)*sqrt(length(x)*length(y)/length(z))
if(sig)siglevel<-kswsig(length(x),length(y),ks)
if(tie && sig)
warning(paste("Ties were detected. The reported significance level of the
weighted Kolmogorov-Smirnov test statistic is not exact."))
}
list(test=ks,critval=crit,p.value=siglevel)
}
musto101/wilcox_R documentation built on May 23, 2019, 10:52 a.m.