# R/kuiper.2samp.R In kuiper.2samp: Two-Sample Kuiper Test

```#'  2-sample Kuiper Test Function: performs Kuiper Test for two sets samples of observations

#' @param x an array of sample observations
#' @param y the other array of sample observations
#'
#' @return Kuiper test statistic and p-value
#'
#' @examples
#' kuiper.2samp(rnorm(1e3),rnorm(1e3))

#' @note
#'  The computation of the p-value takes references from Paltani(2004)
#'  which states that the functions (in the set of four formulas) never underestimates the false positive probability
#'  however it can be a bit high when the sample size in the range of 40 to 50
#'  a factor of 1.5 is quoted at the 1e-7 level

#' @references
#'  Kuiper, N. H. (1960). "Tests concerning random points on a circle". Proceedings of the Koninklijke Nederlandse Akademie van Wetenschappen, Series A. 63: 38-47.
#'  Paltani, S., "Searching for periods in X-ray observations using Kuiper's test. Application to the ROSAT PSPC archive", Astronomy and Astrophysics, v.240, p.789-790, 2004.

#' @export
kuiper.2samp<-function(x,y)
{
#identify and remove na in x data
x<-x[!is.na(x)]

#calculate length of x data
nx<-length(x)
if(nx<1)
stop("Not enough 'x' data")
pval<-NULL

#identify and remove na in y data
y<-y[!is.na(y)]

#calculate length of y data
ny<-length(y)
if(ny<1)
stop("Not Enough'y' data")

#obtain the rank of x and y combine series
r<-rank(c(x,y),ties.method="min")

#create CDF of x and y respectively
c<-c(x,y)
csort<-sort(c)
c.min<-min(c(x,y))
c.max<-max(c(x,y))

#The CDF of x and y seperately with first row - the order number, and second row - the frequency (%)
x1<-rbind(sort(x),seq(1/nx,1,by=1/nx))
y1<-rbind(sort(y),seq(1/ny,1,by=1/ny))

#combine CDF_x and CDF_y to the combined scale c(x,y)
temp<-matrix(0,2,nx+ny)

for (i in (1:(nx+ny))){
temp[1,i]<-ifelse(sum(csort[i]==sort(x))>0,x1[2,which.max(csort[i]==sort(x))],max(temp[1,1:i]))
temp[2,i]<-ifelse(sum(csort[i]==sort(y))>0,y1[2,which.max(csort[i]==sort(y))],max(temp[2,1:i]))
}

#Kuiper statistics calculates the sum of D+ and D- in absoulute value
kp<-abs(max(temp[1,]-temp[2,]))+abs(max(temp[2,]-temp[1,]))

#Calculation of p-value according to S.Paltani (2004)
n<-nx*ny/(nx+ny)
if (kp<0|kp>2){
stop("The test statistic much be in the range of (0,2) by definition of the Kuiper Test")
} else if (kp<2/n){

#(4) in S.Paltani (2004)
PVAL<-1-factorial(n)*(kp-1/n)^(n-1)
} else if (kp<3/n){

#(5) in S.Paltani (2004)
k<--(n*kp-1)/2
r<-sqrt(k^2-(n*kp-2)/2)
a<--k+r
b<--k-r
PVAL<-1-factorial(n-1)*(b^(n-1)*(1-a)-a^(n-1)*(1-b))/(n^(n-2)*(b-a))
} else if ((kp>0.5&&n%%2==0)|(kp>(n-1)/(2*n)&&n%%2==1)){

#(6) in S.Paltani (2004)
temp<-as.integer(floor(n*(1-kp)))+1
c<-matrix(0,ncol=1,nrow=temp)
#Loop for calculating the sum (any other structure better than loop in here?)
for (t in 0:temp){
y<-kp+t/n
tt<-y^(t-3)*(y^3*n-y^2*t*(3-2/n)/n-t*(t-1)*(t-2)/n^2)
p_temp<-choose(n,t)*(1-kp-t/n)^(n-t-1)*tt
c[t]<-p_temp
}
PVAL<-sum(c)
} else {

#(3) in S.Paltani (2004)
z<-kp*sqrt(n)
s1<-0
term<-1e-12
abs<-1e-100
for (m in 1:1e+08){
t1<-2*(4*m^2*z^2-1)*exp(-2*m^2*z^2)
s<-s1
s1<-s1+t1
if ((abs(s1-s)/(abs(s1)+abs(s))<term)|(abs(s1-s)<abs)) break
}
s2<-0
for (m in 1:1e+08){
t2<-m^2*(4*m^2*z^2-3)*exp(-2*m^2*z^2)
s<-s2
s2<-s2+t2
if ((abs(s2-s)/(abs(s2)+abs(s)))<term|(abs(s1-s)<abs)) break
PVAL<-s1-8*kp/(3*sqrt(n))*s2
}
}

#Kuiper statistic and p-value as the outputs
output<-list(Kuiper.statistic=kp, p.value=PVAL)
return(output)
}
```

## Try the kuiper.2samp package in your browser

Any scripts or data that you put into this service are public.

kuiper.2samp documentation built on May 2, 2019, 7:20 a.m.