R/testRNG.R

Defines functions permut stirlingDividedByK stirling coll.test.sparse coll.test order.test poker.test serial.test freq.test gap.test

Documented in coll.test coll.test.sparse freq.test gap.test order.test permut poker.test serial.test stirling

## 
# @file  testRNG.R
# @brief R file for all RNG tests
#
# @author Christophe Dutang
#
#
# Copyright (C) 2009, Christophe Dutang, 
# All rights reserved.
#
# The new BSD License is applied to this software.
# Copyright (c) 2009 Christophe Dutang. 
# Christophe Dutang, see http://dutangc.free.fr
# All rights reserved.
#
#      Redistribution and use in source and binary forms, with or without
#      modification, are permitted provided that the following conditions are
#      met:
#      
#          - Redistributions of source code must retain the above copyright
#          notice, this list of conditions and the following disclaimer.
#          - Redistributions in binary form must reproduce the above
#          copyright notice, this list of conditions and the following
#          disclaimer in the documentation and/or other materials provided
#          with the distribution.
#          - Neither the names of its contributors may be used to endorse or promote 
#          products derived from this software without specific prior written
#          permission.
#     
#      THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
#      "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
#      LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
#      A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
#      OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#      SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#      LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
#      DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
#      THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#      (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
#      OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#  
#
#############################################################################
### Tests of random numbers generated by computers
###
###			R functions
### 

#gap test
gap.test <- function(u, lower = 0, upper = 1/2, echo = TRUE)
{
  
  # compute gaps such as g_i = 1 if u_i > max or u_i < min, 0 otherwise 
  gap <-   ( ! ( ( u <= upper ) & ( u >= lower ) ) ) * 1 
  n <- length( u )
  p <- upper - lower
  
  #find index of zeros 
  indexzero <- ( gap == 1 ) * 1:n
  indexzero <- indexzero[ indexzero != 0 ]
  indexzero <- c(0, indexzero, n+1)
  lindzero <- length( indexzero )
  
  #compute sizes of zero lengths
  lengthsize <- indexzero[2:lindzero] - indexzero[2:lindzero-1] -1
  lengthsize <- lengthsize[lengthsize != 0]
  maxlen <- max( lengthsize )
  maxlen <- max( maxlen, floor( ( log( 10^(-1) ) - 2*log( 1-p ) - log( n ) ) / log( p ) ) )
  
  
  #compute observed and theoretical frequencies
  obsnum <- sapply( 1:maxlen, function(t) sum( lengthsize == t ) )
  expnum <- (1-p)^2*p^( 1:maxlen ) * n
  
  #compute chisquare statistic
  residu <-  (obsnum - expnum) / sqrt(expnum)
  stat <- sum( residu^2 )
  pvalue <- pchisq( stat, maxlen - 1, lower.tail = FALSE)
  
  options(digits=2)    
  if( echo )
  {
    cat("\n\t\t\t Gap test\n")
    cat("\nchisq stat = ", stat, ", df = ",maxlen-1, ", p-value = ", pvalue, "\n", sep="")
    cat("\n\t\t (sample size : ",length(u),")\n\n", sep="")
    cat("length\tobserved freq\t\ttheoretical freq\n")
    for(i in 1:maxlen)
      cat(i,"\t\t\t", obsnum[i],"\t\t\t", expnum[i],"\n")
  }
  
  res <- list( statistic = stat, parameter = maxlen-1, 
               p.value = pvalue, observed = obsnum, 
               expected = expnum, residuals = residu ) 
  return( invisible( res ) )
}

#frequency test
freq.test <- function(u, seq = 0:15, echo = TRUE)
{
  #compute integers such as min(seq) <= integernum[i] < max(seq)
  integernum <- floor( u * length( seq ) + min( seq ) )
  
  # observed numbers equal to seq[i]
  obsnum <- sapply( seq, function(x) sum( integernum == x ) )
  
  # expected number equal to seq[i]
  expnum <- length( u ) / length( seq )    
  
  #compute chisquare statistic
  residu <-  (obsnum - expnum) / sqrt(expnum)
  stat <- sum( residu^2 )
  pvalue <- pchisq( stat, length( seq ) - 1, lower.tail = FALSE)
  
  options(digits=2)    
  if( echo )
  {
    cat("\n\t\t\t Frequency test\n")
    cat("\nchisq stat = ", stat, ", df = ",length(seq)-1, ", p-value = ", pvalue, "\n", sep="")
    cat("\n\t\t (sample size : ",length(u),")\n\n", sep="")
    cat("\tobserved number\t",obsnum,"\n")
    cat("\texpected number\t",expnum,"\n")    
  } 
  
  res <- list( statistic = stat, parameter = length( seq ) -1, 
               p.value = pvalue, observed = obsnum, 
               expected = expnum, residuals =residu ) 
  return( invisible( res ) )
}

#serial test
serial.test <- function(u , d = 8, echo = TRUE)
{
  if(length(u) / d - as.integer( length(u) / d ) > 0)
    stop("the length of 'u' must be a multiple of d")
  
  #compute pairs in {0, ..., d-1}
  pair <- matrix( floor( u * d ), length( u ) / 2, 2)
  
  #compute u_i * d + u_i+1 in {0, ..., d^2-1}
  poly <- pair[ ,1] * d + pair[ ,2]
  
  #compute numbers
  obsnum <- sapply( 0 : ( d^2 - 1 ), function(x) sum( poly == x ) )
  expnum <- length( u ) / ( 2 * d^2 )    
  
  #compute chisquare statistic
  residu <-  (obsnum - expnum) / sqrt(expnum) 
  stat <- sum( residu^2 )
  pvalue <- pchisq( stat, d^2 - 1, lower.tail = FALSE)
  
  options(digits=2)    
  if( echo )
  {
    cat("\n\t\t\t Serial test\n")
    cat("\nchisq stat = ", stat, ", df = ",d^2-1, ", p-value = ", pvalue, "\n", sep="")
    cat("\n\t\t (sample size : ",length(u),")\n\n", sep="")
    cat("\tobserved number\t",obsnum,"\n")
    cat("\texpected number\t",expnum,"\n")    
  } 
  
  res <- list( statistic = stat, parameter = d^2 -1, 
               p.value = pvalue, observed = obsnum, 
               expected = expnum, residuals = residu) 
  return( invisible( res ) )
}


#poker.test 
poker.test <- function(u , nbcard = 5, echo = TRUE)
{
  if(length(u) / nbcard - as.integer( length(u) / nbcard ) > 0)
    stop("the length of 'u' must be a multiple of nbcard")
  
  #number of lines  
  nbl <- length( u ) / nbcard
  #compute "nbcard-hands" in {0, ..., k-1}
  hands <- matrix( as.integer( floor( u * nbcard ) ), nbl, nbcard) 
  
  
  #compute observed hands
  #implemented in src/testrng.c
  obshands <- .Call(CF_doPokerTest, hands, nbl, nbcard)
  
  #compute expected hands
  fact <- vector("numeric", nbcard+1)
  fact[1] <- 1
  for(i in 2 : (nbcard+1) )
    fact[i] <- fact[i-1] * (i-1)
  #fact contains 0!,1!, 2! ... (2*nbcard)!
  ind <- 1 : nbcard
  stirlingNum <- stirling( nbcard )[-1]
  exphands <- 1 / nbcard^nbcard * fact[nbcard+1] / fact[nbcard - ind +1] * stirlingNum[ ind ] * nbl
  
  
  stat <- sum( (obshands - exphands) ^ 2 / exphands )
  pvalue <- pchisq( stat, nbcard - 1, lower.tail = FALSE)
  
  options(digits=2)
  if( echo )
  {
    cat("\n\t\t\t Poker test\n")
    cat("\nchisq stat = ", stat, ", df = ",nbcard-1, ", p-value = ", pvalue, "\n", sep="")
    cat("\n\t\t (sample size : ",length(u),")\n\n", sep="")
    cat("\tobserved number\t",obshands,"\n")
    cat("\texpected number\t",exphands,"\n")    
  } 
  
  
  res <- list( statistic = stat, parameter = nbcard-1, 
               p.value = pvalue, observed = obshands, 
               expected = exphands, residuals = (obshands - exphands) ^ 2 / exphands ) 
  return( invisible( res ) )
}

#order test 
order.test <- function(u, d = 3, echo = TRUE)
{
  if(d > 8)
    stop("too long to compute this exponential cost problem.")
  if(d < 2)
    stop("wrong argument 'd'.")
  
  if(length(u) / d - as.integer( length(u) / d ) > 0)
    stop("the length of 'u' must be a multiple of d")
  
  # store u in a matrix
  if(is.vector(u))
    u <- matrix(u, length(u) / d, d)
  if(!is.matrix(u))
    stop("wrong argument u")
  
  
  # compute observed numbers (much faster to do it manually)
  factOfD <- factorial(d)
  obsnum <- vector("numeric", length=factOfD)
  if(d == 2)
  {
    obsnum[1] <- sum(u[,1] < u[,2])
    obsnum[2] <- sum(u[,2] < u[,1])
  }
  if(d == 3)
  {
    obsnum[1] <- sum(u[,1] < u[,2] & u[,2] <u[,3])
    obsnum[2] <- sum(u[,1] < u[,3] & u[,3] <u[,2])
    obsnum[3] <- sum(u[,2] < u[,1] & u[,1] <u[,3])
    obsnum[4] <- sum(u[,2] < u[,3] & u[,3] <u[,1])
    obsnum[5] <- sum(u[,3] < u[,2] & u[,2] <u[,1])
    obsnum[6] <- sum(u[,3] < u[,1] & u[,1] <u[,2])
  }
  if(d == 4)
  {
    obsnum[1] <- sum(u[,1] < u[,2] & u[,2] <u[,3] & u[,3] <u[,4])
    obsnum[2] <- sum(u[,1] < u[,3] & u[,3] <u[,2] & u[,2] <u[,4])
    obsnum[3] <- sum(u[,2] < u[,1] & u[,1] <u[,3] & u[,3] <u[,4])
    obsnum[4] <- sum(u[,2] < u[,3] & u[,3] <u[,1] & u[,1] <u[,4])
    obsnum[5] <- sum(u[,3] < u[,2] & u[,2] <u[,1] & u[,1] <u[,4])
    obsnum[6] <- sum(u[,3] < u[,1] & u[,1] <u[,2] & u[,2] <u[,4])
    
    obsnum[7] <- sum(u[,1] < u[,2] & u[,2] <u[,4] & u[,4] <u[,3])
    obsnum[8] <- sum(u[,1] < u[,3] & u[,3] <u[,4] & u[,4] <u[,2])
    obsnum[9] <- sum(u[,2] < u[,1] & u[,1] <u[,4] & u[,4] <u[,3])
    obsnum[10] <- sum(u[,2] < u[,3] & u[,3] <u[,4] & u[,4] <u[,1])
    obsnum[11] <- sum(u[,3] < u[,2] & u[,2] <u[,4] & u[,4] <u[,1])
    obsnum[12] <- sum(u[,3] < u[,1] & u[,1] <u[,4] & u[,4] <u[,2])
    
    obsnum[13] <- sum(u[,1] < u[,4] & u[,4] <u[,2] & u[,2] <u[,3])
    obsnum[14] <- sum(u[,1] < u[,4] & u[,4] <u[,3] & u[,3] <u[,2])
    obsnum[15] <- sum(u[,2] < u[,4] & u[,4] <u[,1] & u[,1] <u[,3])
    obsnum[16] <- sum(u[,2] < u[,4] & u[,4] <u[,3] & u[,3] <u[,1])
    obsnum[17] <- sum(u[,3] < u[,4] & u[,4] <u[,2] & u[,2] <u[,1])
    obsnum[18] <- sum(u[,3] < u[,4] & u[,4] <u[,1] & u[,1] <u[,2])
    
    obsnum[19] <- sum(u[,4] < u[,1] & u[,1] <u[,2] & u[,2] <u[,3])
    obsnum[20] <- sum(u[,4] < u[,1] & u[,1] <u[,3] & u[,3] <u[,2])
    obsnum[21] <- sum(u[,4] < u[,2] & u[,2] <u[,1] & u[,1] <u[,3])
    obsnum[22] <- sum(u[,4] < u[,2] & u[,2] <u[,3] & u[,3] <u[,1])
    obsnum[23] <- sum(u[,4] < u[,3] & u[,3] <u[,2] & u[,2] <u[,1])
    obsnum[24] <- sum(u[,4] < u[,3] & u[,3] <u[,1] & u[,1] <u[,2])
  }
  
  if(d == 5)
  {
    obsnum[1] <- sum(u[,1] < u[,2] & u[,2] <u[,3] & u[,3] <u[,4] & u[,4] <u[,5])
    obsnum[2] <- sum(u[,1] < u[,3] & u[,3] <u[,2] & u[,2] <u[,4] & u[,4] <u[,5])
    obsnum[3] <- sum(u[,2] < u[,1] & u[,1] <u[,3] & u[,3] <u[,4] & u[,4] <u[,5])
    obsnum[4] <- sum(u[,2] < u[,3] & u[,3] <u[,1] & u[,1] <u[,4] & u[,4] <u[,5])
    obsnum[5] <- sum(u[,3] < u[,2] & u[,2] <u[,1] & u[,1] <u[,4] & u[,4] <u[,5])
    obsnum[6] <- sum(u[,3] < u[,1] & u[,1] <u[,2] & u[,2] <u[,4] & u[,4] <u[,5])
    
    obsnum[7] <- sum(u[,1] < u[,2] & u[,2] <u[,4] & u[,4] <u[,3] & u[,3] <u[,5])
    obsnum[8] <- sum(u[,1] < u[,3] & u[,3] <u[,4] & u[,4] <u[,2] & u[,2] <u[,5])
    obsnum[9] <- sum(u[,2] < u[,1] & u[,1] <u[,4] & u[,4] <u[,3] & u[,3] <u[,5])
    obsnum[10] <- sum(u[,2] < u[,3] & u[,3] <u[,4] & u[,4] <u[,1] & u[,1] <u[,5])
    obsnum[11] <- sum(u[,3] < u[,2] & u[,2] <u[,4] & u[,4] <u[,1] & u[,1] <u[,5])
    obsnum[12] <- sum(u[,3] < u[,1] & u[,1] <u[,4] & u[,4] <u[,2] & u[,2] <u[,5])
    
    obsnum[13] <- sum(u[,1] < u[,4] & u[,4] <u[,2] & u[,2] <u[,3] & u[,3] <u[,5])
    obsnum[14] <- sum(u[,1] < u[,4] & u[,4] <u[,3] & u[,3] <u[,2] & u[,2] <u[,5])
    obsnum[15] <- sum(u[,2] < u[,4] & u[,4] <u[,1] & u[,1] <u[,3] & u[,3] <u[,5])
    obsnum[16] <- sum(u[,2] < u[,4] & u[,4] <u[,3] & u[,3] <u[,1] & u[,1] <u[,5])
    obsnum[17] <- sum(u[,3] < u[,4] & u[,4] <u[,2] & u[,2] <u[,1] & u[,1] <u[,5])
    obsnum[18] <- sum(u[,3] < u[,4] & u[,4] <u[,1] & u[,1] <u[,2] & u[,2] <u[,5])
    
    obsnum[19] <- sum(u[,4] < u[,1] & u[,1] <u[,2] & u[,2] <u[,3] & u[,3] <u[,5])
    obsnum[20] <- sum(u[,4] < u[,1] & u[,1] <u[,3] & u[,3] <u[,2] & u[,2] <u[,5])
    obsnum[21] <- sum(u[,4] < u[,2] & u[,2] <u[,1] & u[,1] <u[,3] & u[,3] <u[,5])
    obsnum[22] <- sum(u[,4] < u[,2] & u[,2] <u[,3] & u[,3] <u[,1] & u[,1] <u[,5])
    obsnum[23] <- sum(u[,4] < u[,3] & u[,3] <u[,2] & u[,2] <u[,1] & u[,1] <u[,5])
    obsnum[24] <- sum(u[,4] < u[,3] & u[,3] <u[,1] & u[,1] <u[,2] & u[,2] <u[,5])
    
    
    obsnum[25] <- sum(u[,1] < u[,2] & u[,2] <u[,3] & u[,3] <u[,5] & u[,5] <u[,4])
    obsnum[26] <- sum(u[,1] < u[,3] & u[,3] <u[,2] & u[,2] <u[,5] & u[,5] <u[,4])
    obsnum[27] <- sum(u[,2] < u[,1] & u[,1] <u[,3] & u[,3] <u[,5] & u[,5] <u[,4])
    obsnum[28] <- sum(u[,2] < u[,3] & u[,3] <u[,1] & u[,1] <u[,5] & u[,5] <u[,4])
    obsnum[29] <- sum(u[,3] < u[,2] & u[,2] <u[,1] & u[,1] <u[,5] & u[,5] <u[,4])
    obsnum[30] <- sum(u[,3] < u[,1] & u[,1] <u[,2] & u[,2] <u[,5] & u[,5] <u[,4])
    
    obsnum[31] <- sum(u[,1] < u[,2] & u[,2] <u[,4] & u[,4] <u[,5] & u[,5] <u[,3])
    obsnum[32] <- sum(u[,1] < u[,3] & u[,3] <u[,4] & u[,4] <u[,5] & u[,5] <u[,2])
    obsnum[33] <- sum(u[,2] < u[,1] & u[,1] <u[,4] & u[,4] <u[,5] & u[,5] <u[,3])
    obsnum[34] <- sum(u[,2] < u[,3] & u[,3] <u[,4] & u[,4] <u[,5] & u[,5] <u[,1])
    obsnum[35] <- sum(u[,3] < u[,2] & u[,2] <u[,4] & u[,4] <u[,5] & u[,5] <u[,1])
    obsnum[36] <- sum(u[,3] < u[,1] & u[,1] <u[,4] & u[,4] <u[,5] & u[,5] <u[,2])
    
    obsnum[37] <- sum(u[,1] < u[,4] & u[,4] <u[,2] & u[,2] <u[,5] & u[,5] <u[,3])
    obsnum[38] <- sum(u[,1] < u[,4] & u[,4] <u[,3] & u[,3] <u[,5] & u[,5] <u[,2])
    obsnum[39] <- sum(u[,2] < u[,4] & u[,4] <u[,1] & u[,1] <u[,5] & u[,5] <u[,3])
    obsnum[40] <- sum(u[,2] < u[,4] & u[,4] <u[,3] & u[,3] <u[,5] & u[,5] <u[,1])
    obsnum[41] <- sum(u[,3] < u[,4] & u[,4] <u[,2] & u[,2] <u[,5] & u[,5] <u[,1])
    obsnum[42] <- sum(u[,3] < u[,4] & u[,4] <u[,1] & u[,1] <u[,5] & u[,5] <u[,2])
    
    obsnum[43] <- sum(u[,4] < u[,1] & u[,1] <u[,2] & u[,2] <u[,5] & u[,5] <u[,3])
    obsnum[44] <- sum(u[,4] < u[,1] & u[,1] <u[,3] & u[,3] <u[,5] & u[,5] <u[,2])
    obsnum[45] <- sum(u[,4] < u[,2] & u[,2] <u[,1] & u[,1] <u[,5] & u[,5] <u[,3])
    obsnum[46] <- sum(u[,4] < u[,2] & u[,2] <u[,3] & u[,3] <u[,5] & u[,5] <u[,1])
    obsnum[47] <- sum(u[,4] < u[,3] & u[,3] <u[,2] & u[,2] <u[,5] & u[,5] <u[,1])
    obsnum[48] <- sum(u[,4] < u[,3] & u[,3] <u[,1] & u[,1] <u[,5] & u[,5] <u[,2])
    
    
    obsnum[49] <- sum(u[,1] < u[,2] & u[,2] <u[,5] & u[,5] <u[,3] & u[,3] <u[,4])
    obsnum[50] <- sum(u[,1] < u[,3] & u[,3] <u[,5] & u[,5] <u[,2] & u[,2] <u[,4])
    obsnum[51] <- sum(u[,2] < u[,1] & u[,1] <u[,5] & u[,5] <u[,3] & u[,3] <u[,4])
    obsnum[52] <- sum(u[,2] < u[,3] & u[,3] <u[,5] & u[,5] <u[,1] & u[,1] <u[,4])
    obsnum[53] <- sum(u[,3] < u[,2] & u[,2] <u[,5] & u[,5] <u[,1] & u[,1] <u[,4])
    obsnum[54] <- sum(u[,3] < u[,1] & u[,1] <u[,5] & u[,5] <u[,2] & u[,2] <u[,4])
    
    obsnum[55] <- sum(u[,1] < u[,2] & u[,2] <u[,5] & u[,5] <u[,4] & u[,4] <u[,3])
    obsnum[56] <- sum(u[,1] < u[,3] & u[,3] <u[,5] & u[,5] <u[,4] & u[,4] <u[,2])
    obsnum[57] <- sum(u[,2] < u[,1] & u[,1] <u[,5] & u[,5] <u[,4] & u[,4] <u[,3])
    obsnum[58] <- sum(u[,2] < u[,3] & u[,3] <u[,5] & u[,5] <u[,4] & u[,4] <u[,1])
    obsnum[59] <- sum(u[,3] < u[,2] & u[,2] <u[,5] & u[,5] <u[,4] & u[,4] <u[,1])
    obsnum[60] <- sum(u[,3] < u[,1] & u[,1] <u[,5] & u[,5] <u[,4] & u[,4] <u[,2])
    
    obsnum[61] <- sum(u[,1] < u[,4] & u[,4] <u[,5] & u[,5] <u[,2] & u[,2] <u[,3])
    obsnum[62] <- sum(u[,1] < u[,4] & u[,4] <u[,5] & u[,5] <u[,3] & u[,3] <u[,2])
    obsnum[63] <- sum(u[,2] < u[,4] & u[,4] <u[,5] & u[,5] <u[,1] & u[,1] <u[,3])
    obsnum[64] <- sum(u[,2] < u[,4] & u[,4] <u[,5] & u[,5] <u[,3] & u[,3] <u[,1])
    obsnum[65] <- sum(u[,3] < u[,4] & u[,4] <u[,5] & u[,5] <u[,2] & u[,2] <u[,1])
    obsnum[66] <- sum(u[,3] < u[,4] & u[,4] <u[,5] & u[,5] <u[,1] & u[,1] <u[,2])
    
    obsnum[67] <- sum(u[,4] < u[,1] & u[,1] <u[,5] & u[,5] <u[,2] & u[,2] <u[,3])
    obsnum[68] <- sum(u[,4] < u[,1] & u[,1] <u[,5] & u[,5] <u[,3] & u[,3] <u[,2])
    obsnum[69] <- sum(u[,4] < u[,2] & u[,2] <u[,5] & u[,5] <u[,1] & u[,1] <u[,3])
    obsnum[70] <- sum(u[,4] < u[,2] & u[,2] <u[,5] & u[,5] <u[,3] & u[,3] <u[,1])
    obsnum[71] <- sum(u[,4] < u[,3] & u[,3] <u[,5] & u[,5] <u[,2] & u[,2] <u[,1])
    obsnum[72] <- sum(u[,4] < u[,3] & u[,3] <u[,5] & u[,5] <u[,1] & u[,1] <u[,2])
    
    
    obsnum[73] <- sum(u[,1] < u[,5] & u[,5] <u[,2] & u[,2] <u[,3] & u[,3] <u[,4])
    obsnum[74] <- sum(u[,1] < u[,5] & u[,5] <u[,3] & u[,3] <u[,2] & u[,2] <u[,4])
    obsnum[75] <- sum(u[,2] < u[,5] & u[,5] <u[,1] & u[,1] <u[,3] & u[,3] <u[,4])
    obsnum[76] <- sum(u[,2] < u[,5] & u[,5] <u[,3] & u[,3] <u[,1] & u[,1] <u[,4])
    obsnum[77] <- sum(u[,3] < u[,5] & u[,5] <u[,2] & u[,2] <u[,1] & u[,1] <u[,4])
    obsnum[78] <- sum(u[,3] < u[,5] & u[,5] <u[,1] & u[,1] <u[,2] & u[,2] <u[,4])
    
    obsnum[79] <- sum(u[,1] < u[,5] & u[,5] <u[,2] & u[,2] <u[,4] & u[,4] <u[,3])
    obsnum[80] <- sum(u[,1] < u[,5] & u[,5] <u[,3] & u[,3] <u[,4] & u[,4] <u[,2])
    obsnum[81] <- sum(u[,2] < u[,5] & u[,5] <u[,1] & u[,1] <u[,4] & u[,4] <u[,3])
    obsnum[82] <- sum(u[,2] < u[,5] & u[,5] <u[,3] & u[,3] <u[,4] & u[,4] <u[,1])
    obsnum[83] <- sum(u[,3] < u[,5] & u[,5] <u[,2] & u[,2] <u[,4] & u[,4] <u[,1])
    obsnum[84] <- sum(u[,3] < u[,5] & u[,5] <u[,1] & u[,1] <u[,4] & u[,4] <u[,2])
    
    obsnum[85] <- sum(u[,1] < u[,5] & u[,5] <u[,4] & u[,4] <u[,2] & u[,2] <u[,3])
    obsnum[86] <- sum(u[,1] < u[,5] & u[,5] <u[,4] & u[,4] <u[,3] & u[,3] <u[,2])
    obsnum[87] <- sum(u[,2] < u[,5] & u[,5] <u[,4] & u[,4] <u[,1] & u[,1] <u[,3])
    obsnum[88] <- sum(u[,2] < u[,5] & u[,5] <u[,4] & u[,4] <u[,3] & u[,3] <u[,1])
    obsnum[89] <- sum(u[,3] < u[,5] & u[,5] <u[,4] & u[,4] <u[,2] & u[,2] <u[,1])
    obsnum[90] <- sum(u[,3] < u[,5] & u[,5] <u[,4] & u[,4] <u[,1] & u[,1] <u[,2])
    
    obsnum[91] <- sum(u[,4] < u[,5] & u[,5] <u[,1] & u[,1] <u[,2] & u[,2] <u[,3])
    obsnum[92] <- sum(u[,4] < u[,5] & u[,5] <u[,1] & u[,1] <u[,3] & u[,3] <u[,2])
    obsnum[93] <- sum(u[,4] < u[,5] & u[,5] <u[,2] & u[,2] <u[,1] & u[,1] <u[,3])
    obsnum[94] <- sum(u[,4] < u[,5] & u[,5] <u[,2] & u[,2] <u[,3] & u[,3] <u[,1])
    obsnum[95] <- sum(u[,4] < u[,5] & u[,5] <u[,3] & u[,3] <u[,2] & u[,2] <u[,1])
    obsnum[96] <- sum(u[,4] < u[,5] & u[,5] <u[,3] & u[,3] <u[,1] & u[,1] <u[,2])        
    
    
    obsnum[97] <- sum(u[,5] < u[,1] & u[,1] <u[,2] & u[,2] <u[,3] & u[,3] <u[,4])
    obsnum[98] <- sum(u[,5] < u[,1] & u[,1] <u[,3] & u[,3] <u[,2] & u[,2] <u[,4])
    obsnum[99] <- sum(u[,5] < u[,2] & u[,2] <u[,1] & u[,1] <u[,3] & u[,3] <u[,4])
    obsnum[100] <- sum(u[,5] < u[,2] & u[,2] <u[,3] & u[,3] <u[,1] & u[,1] <u[,4])
    obsnum[101] <- sum(u[,5] < u[,3] & u[,3] <u[,2] & u[,2] <u[,1] & u[,1] <u[,4])
    obsnum[102] <- sum(u[,5] < u[,3] & u[,3] <u[,1] & u[,1] <u[,2] & u[,2] <u[,4])
    
    obsnum[103] <- sum(u[,5] < u[,1] & u[,1] <u[,2] & u[,2] <u[,4] & u[,4] <u[,3])
    obsnum[104] <- sum(u[,5] < u[,1] & u[,1] <u[,3] & u[,3] <u[,4] & u[,4] <u[,2])
    obsnum[105] <- sum(u[,5] < u[,2] & u[,2] <u[,1] & u[,1] <u[,4] & u[,4] <u[,3])
    obsnum[106]<- sum(u[,5] < u[,2] & u[,2] <u[,3] & u[,3] <u[,4] & u[,4] <u[,1])
    obsnum[107] <- sum(u[,5] < u[,3] & u[,3] <u[,2] & u[,2] <u[,4] & u[,4] <u[,1])
    obsnum[108] <- sum(u[,5] < u[,3] & u[,3] <u[,1] & u[,1] <u[,4] & u[,4] <u[,2])
    
    obsnum[109] <- sum(u[,5] < u[,1] & u[,1] <u[,4] & u[,4] <u[,2] & u[,2] <u[,3])
    obsnum[110] <- sum(u[,5] < u[,1] & u[,1] <u[,4] & u[,4] <u[,3] & u[,3] <u[,2])
    obsnum[111] <- sum(u[,5] < u[,2] & u[,2] <u[,4] & u[,4] <u[,1] & u[,1] <u[,3])
    obsnum[112] <- sum(u[,5] < u[,2] & u[,2] <u[,4] & u[,4] <u[,3] & u[,3] <u[,1])
    obsnum[113] <- sum(u[,5] < u[,3] & u[,3] <u[,4] & u[,4] <u[,2] & u[,2] <u[,1])
    obsnum[114] <- sum(u[,5] < u[,3] & u[,3] <u[,4] & u[,4] <u[,1] & u[,1] <u[,2])
    
    obsnum[115] <- sum(u[,5] < u[,4] & u[,4] <u[,1] & u[,1] <u[,2] & u[,2] <u[,3])
    obsnum[116] <- sum(u[,5] < u[,4] & u[,4] <u[,1] & u[,1] <u[,3] & u[,3] <u[,2])
    obsnum[117] <- sum(u[,5] < u[,4] & u[,4] <u[,2] & u[,2] <u[,1] & u[,1] <u[,3])
    obsnum[118] <- sum(u[,5] < u[,4] & u[,4] <u[,2] & u[,2] <u[,3] & u[,3] <u[,1])
    obsnum[119] <- sum(u[,5] < u[,4] & u[,4] <u[,3] & u[,3] <u[,2] & u[,2] <u[,1])
    obsnum[120] <- sum(u[,5] < u[,4] & u[,4] <u[,3] & u[,3] <u[,1] & u[,1] <u[,2]) 
    
  }
  
  #when d is greather than 5, compute all the permutation recursively
  if(d > 5 && d <= 8)
  {
    mypermut <- permut(d)
    obsnum <- u[, mypermut[, 1]] < u[, mypermut[, 2]]
    #compare columns of u 
    for(i in 3:d)
    {
      obsnum <- obsnum & u[, mypermut[, i-1]] < u[, mypermut[, i]]
    }
    
    if(NCOL(obsnum) == 1)
      obsnum <- sum(obsnum)
    else
      obsnum <- colSums(obsnum)
  }
  
  # compute expected numbers
  expnum <- length(u[ ,1]) / factOfD
  
  #compute chisquare statistic
  residu <-  (obsnum - expnum) / sqrt(expnum) 
  stat <- sum( residu^2 )
  pvalue <- pchisq( stat, factOfD - 1, lower.tail = FALSE)
  
  options(digits=2)    
  if( echo )
  {
    cat("\n\t\t\t Order test\n")
    cat("\nchisq stat = ", stat, ", df = ",factOfD-1, ", p-value = ", pvalue, "\n", sep="")
    cat("\n\t\t (sample size : ",length(u),")\n\n", sep="")
    if(length(obsnum) <= 1000)
      cat("\tobserved number\t",obsnum,"\n")
    else
      cat("\tobserved number\t too many to be printed\n")
    cat("\texpected number\t",expnum,"\n")    
  } 
  
  res <- list( statistic = stat, parameter = factOfD -1, 
               p.value = pvalue, observed = obsnum, 
               expected = expnum, residuals = residu) 
  return( invisible( res ) )
}

#collision test
coll.test <- function(rand, lenSample = 2^14, segments = 2^10, tdim = 2, nbSample = 1000, 
                      echo = TRUE, ...)
{
  nbCell <- segments^tdim
  
  routine <- function()
  {
    # compute random sample
    u <- rand( tdim * lenSample, ... )
    uint <- matrix(floor( u * segments ), nrow=tdim, ncol=lenSample)
    # compute urn numbers i.e. integers in {0, ..., nbCell-1}
    num <- c(rbind(segments^(1:tdim - 1)) %*% uint)
    # compute the number of collisions
    #implemented in src/testrng.c
    NumColl <- .Call(CF_doCollisionTest, as.integer(num), lenSample, nbCell)
  }
  
  # observed numbers of collisions for 'nbSample' random samples 
  # (of length 'lenSample') in 'nbCell' urns 
  obsNumColl <- replicate( nbSample, routine())
  
  # empirical counts of collision numbers with 'hist'
  # maybe we should use table(obsNumColl)
  collMax <- max(obsNumColl)
  collMin <- min(obsNumColl)
  empNumColl <- hist(obsNumColl, (collMin-1) : collMax, plot=FALSE)$counts
  
  # theoretical counts of collision numbers
  if(lenSample / nbCell > 1/32 && lenSample <= 2^8) #exact distribution
  { 
    method <- "exact distribution"
    # compute stirling number S_n^k for k = 0 : n and n = lenSample
    stirling0toN <- stirling(lenSample)
    
    # collision c = 0 : (n-1)
    collRange <- 0:collMax
    # compute P(Collision = c) = \prod_{i=0}^{n-c-1}\frac{k-i}{k}  *  \frac{1}{k^c} *  S_n^{n-c} in two steps
    f <- function(x) 
    {   
      res <- prod( 1 - 0:(lenSample-x-1) / nbCell ) / (nbCell^x)  * stirling0toN[lenSample-x+1]
      res
    }
    expNumColl <- sapply(collRange, f)
    
    expNumColl <- expNumColl * nbSample
    
    
    
    # just prob of observable collisions i.e. c=collMin ... collMax         
    expNumColl <- expNumColl[ collMin : collMax+1 ]
  }
  else if(lenSample / nbCell > 1/32 && lenSample > 2^8) #normal approximation
  {
    stop("normal approximation not yet implemented")
    method <- "normal approximation"
  }
  else 
  {   #Poisson approximation
    method <- "Poisson approximation"
    theoLambda <- lenSample^2 / ( 2 * nbCell ) 
    expNumColl <- dpois(collMin:collMax, lambda = theoLambda ) * nbSample
  }
  
  
  #compute chisquare statistic
  residu <-  (empNumColl - expNumColl) / sqrt(expNumColl) 
  stat <- sum( residu^2 )
  dfree <- collMax - collMin + 1 - 1
  pvalue <- pchisq( stat, dfree, lower.tail = FALSE)
  
  options(digits=2)        
  if( echo )
  {
    cat("\n\t\t\t Collision test\n")
    cat("\nchisq stat = ", stat, ", df = ", dfree, ", p-value = ", pvalue, "\n", sep="")
    cat("\n",method," (sample number : ", nbSample," / sample size : ", lenSample," / cell number : ", nbCell,")\n\n", sep="")
    
    cat("\tcollision \t\t observed\t\t expected\n")
    cat("\tnumber \t\t\t count \t\t count\n")
    for(i in 1:(collMax - collMin + 1) )
      cat("\t\t", collMin+i-1,"\t\t\t", empNumColl[i],"\t\t\t", expNumColl[i],"\n")
  } 
  
  if(any(expNumColl < 5))
    warning("p-values will be approximated in the presence of low expected collision number.")
  
  res <- list( statistic = stat, parameter = dfree, 
               p.value = pvalue, observed = empNumColl, 
               expected = expNumColl, residuals = residu) 
  return( invisible( res ) )
}

coll.test.sparse <- function(rand, lenSample = 2^14, segments = 2^10, tdim = 2, 
                             nbSample = 10, ...)
{
  nbCell <- segments^tdim
  
  routine <- function()
  {
    # compute random sample
    u <- rand( tdim * lenSample, ... )
    uint <- matrix(floor( u * segments ), nrow=tdim, ncol=lenSample)
    # compute urn numbers i.e. integers in {0, ..., nbCell-1}
    num <- c(rbind(segments^(1:tdim - 1)) %*% uint)
    # compute the number of collisions
    length(num) - length(unique(num))
  }
  
  # observed numbers of collisions for 'nbSample' random samples 
  # (of length 'lenSample') in 'nbCell' urns 
  obsNumColl <- replicate( nbSample, routine())
  collMax <- max(obsNumColl)
  
  # theoretical counts of collision numbers
  if(lenSample / nbCell > 1/32 && lenSample <= 2^8) #exact distribution
  { 
    method <- "exact distribution"
    # compute stirling number S_n^k for k = 0 : n and n = lenSample
    stirling0toN <- stirling(lenSample)
    
    # collision c = 0 : (n-1)
    collRange <- 0:collMax
    # compute P(Collision = c) = \prod_{i=0}^{n-c-1}\frac{k-i}{k}  *  \frac{1}{k^c} *  S_n^{n-c} in two steps
    f <- function(x) 
    {   
      res <- prod( 1 - 0:(lenSample-x-1) / nbCell ) / (nbCell^x)  * stirling0toN[lenSample-x+1]
      res
    }
    
    probNumColl <- sapply(collRange, f)
  }
  else if(lenSample / nbCell > 1/32 && lenSample > 2^8)
  {
    stop("unsupported parameters for the collision test")
  }
  else 
  {   #Poisson approximation
    method <- "Poisson approximation"
    theoLambda <- lenSample^2 / ( 2 * nbCell ) 
    probNumColl <- dpois(0:collMax, lambda = theoLambda )
  }
  
  probNumColl <- c(probNumColl, 1 - sum(probNumColl))
  pValLeft <- cumsum(probNumColl)
  pValRight <- rev(cumsum(rev(probNumColl)))
  invLeft <- ifelse(pValLeft < 0.5, 1 - pValLeft, 0.5)
  pVal <- ifelse(pValRight < pValLeft, pValRight, invLeft)
  data.frame(observed=obsNumColl, p.value = pVal[obsNumColl + 1])
}

#compute stirling numbers of second kind i.e.
# Stirl_n^k = k * Stirl_{n-1}^k + Stirl_{n-1}^{k-1}
# and Stirl_n^1 = Stirl_n^n = 1
# e.g. 
# n = 0, returns 1
# n = 1, returns c(0,1)
# n = 2, returns c(0,1,1)
# n = 3, returns c(0,1,3,1)
# n = 4, returns c(0,1,7,6,1)...
stirling <- function(n)
{
  if( n == 0)
    res <- 1
  if(n > 0)
    res <- c(0,1)   
  
  if(n > 1)
  {
    for(i in 1:(n-1))
    {
      k <- 1:length(res) -1
      res <- c(k * res, 0) + c(0, res) 
    }
  }
  return(res)    
}


stirlingDividedByK <- function(n, cmax, cste)
{
  if( n == 0)
    res <- 1
  if(n > 0)
    res <- c(0,1)  
  
  if(n > 1)
  {
    if(cmax >= n) 
      stop("wrong cmax argument")
    for(i in 1:cmax)
    {
      k <- 1:length(res) -1
      res <- c(k * res, 0) + c(0, res) 
      #            cat("i ", i, " res ", res, "\n")
    }
    for(i in (cmax+1):(n-1))
    {
      k <- 1:length(res) -1
      res <- c(k * res, 0) + c(0, res) / cste
      #            cat("i ", i, " res ", res, "\n")
    }
  }
  return(res)    
}


#compute permutation
# e.g. 
# n=1 returns 1
# n=2 returns 
# 1 2
# 2 1
# n=3 returns
# 3 1 2
# 3 2 1
# 1 3 2
# 2 3 1
# 1 2 3
# 2 1 3
permut <- function(n)
{
  if(!is.numeric(n) || length(n) >1)
    stop("wrong argument")
  
  if(n <= 0)
    stop("wrong argument")
  
  if(n > 15)
    cat("Warning in permut: it will take a long time to compute!")
  
  if(n == 1) 
    return(matrix(1, 1, 1))
  if(n == 2)
    return(rbind(1:2, 2:1))
  
  result <- rbind(1:2, 2:1)
  
  if(n>2)
  {
    for(i in 3:n)
    {
      tempagg <- NULL
      #add i on first column
      temp <- cbind(i, result)
      
      tempagg <- rbind(tempagg, temp)
      
      #add i on the jth column
      for( j in 1:(NCOL(result)-1) )
      {   
        temp <- cbind(result[, 1:j], i, result[, (j+1):NCOL(result)] )
        tempagg <- rbind(tempagg, temp)
      }
      
      #add i on the last column
      temp <- cbind(result, i)
      
      result <- rbind(tempagg, temp)
    }
  }
  colnames(result) <- NULL
  return(as.matrix(result))
}

Try the randtoolbox package in your browser

Any scripts or data that you put into this service are public.

randtoolbox documentation built on Feb. 16, 2023, 7:18 p.m.