Rutils/maybe-not-useful/sw.test.r

#==========================================================================================#
#==========================================================================================#
#     This method performs the Shapiro-Wilk test for normality.  It doesn't check the      #
# size of the matrix for data sets bigger than 5000.  In case the vector has size less     #
# than 3, we return NA instead of crashing.                                                #
#------------------------------------------------------------------------------------------#
sw.test <<- function(x){

  #----- Keep only the finite data. -------------------------------------------------------#
  x = sort(x[is.finite(x)])
  n = length(x)
  #----------------------------------------------------------------------------------------#



  #----- Find the basic statistics for this distribution and the expected values. ---------#
  P.x    = (sequence(n)-0.5)/n
  mean.x = mean (x)
  sd.x   = sd   (x)
  E.x    = qnorm(p=P.x,mean=mean.x,sd=sd.x)
  #----------------------------------------------------------------------------------------#



  #----------------------------------------------------------------------------------------#
  #    There must be at least three valid numbers to be able to test the distribution.     #
  #----------------------------------------------------------------------------------------#
  if (n < 3){
     ans = list ( statistic     = NA
                , p.value       = NA
                , confident     = FALSE
                )#end list
     return(ans)
     warning(" Sample size is too small for a Shapiro-Wilk test...")
  }else{
     confident = n <= 5000


     #----- Find a vector with standard random distribution and same size as X. -----------#
     m  = qnorm((sequence(n)-0.375)/(n+0.25),mean=0,sd=1)
     mt = t(m)
     #-------------------------------------------------------------------------------------#



     #-------------------------------------------------------------------------------------#
     #      Decide which method to use based on the kurtosis.                              #
     #-------------------------------------------------------------------------------------#
     if (kurt(x) %>% 3){
        #----------------------------------------------------------------------------------#
        #    Use Shapiro-Francia as the sample is leptokurtic.  The Shapiro-Francia        #
        # statistic W is calculated to avoid excessive rounding errors for W close to 1    #
        # (a potential problem in very large samples).                                     #
        #----------------------------------------------------------------------------------#



        #----- Find the weigths. ----------------------------------------------------------#
        a = m / sqrt(sum(m^2))
        #----------------------------------------------------------------------------------#


        #----- Find the W statistic. ------------------------------------------------------#
        W   =   sum(a * x) ^2 / sum((x-mean.x)^2)
        #----------------------------------------------------------------------------------#


        #----------------------------------------------------------------------------------#
        #     Find the standardised W.                                                     #
        #----------------------------------------------------------------------------------#
        mu      = -1.2725 + ( 1.05210 * ( log(log(n)/n) ) )
        sigma   =  1.0308 - ( 0.26758 * ( log(log(n)) + 2/log(n) ) )
        p.value = 1.0 - 2. * abs(pnorm(q=log(1.0-W),mean=mu,sd=sigma)-0.5)
        #----------------------------------------------------------------------------------#
     }else{
        #----------------------------------------------------------------------------------#
        #    Use the traditional Shapiro-Wilk test as the sample is platykurtic.           #
        #----------------------------------------------------------------------------------#
        cc = m  / sqrt(sum(m^2))
        uu = 1. / sqrt(n)
        #----------------------------------------------------------------------------------#


        #----------------------------------------------------------------------------------#
        #    Find the weights.                                                             #
        #----------------------------------------------------------------------------------#
        a    = rep(0,times=n)
        #----- Edges.  We must check whether n is 3 or greater. ---------------------------#
        if (n == 3){
           a[  1] = 0.707106781
           a[  n] = - a[1]
        }else{
           a[  n] = ( cc[n  ] + 0.221157 * uu   - 0.147981 * uu^2 - 2.071190 * uu^3
                              + 4.434685 * uu^4 - 2.706056 * uu^5 )
           a[  1] = - a[  n]
        }#end if
        #----- Other values, fit according to the size. -----------------------------------#
        if (n < 6){
           ia     = 2
           iz     = n - 1
           phi    = ( sum(m^2) - 2 * m[n]^2 ) / ( 1 - 2 * a[n]^2 )
        }else{
           a[n-1] = ( cc[n-1] + 0.042981 * uu   - 0.293762 * uu^2 - 1.752461 * uu^3
                              + 5.682633 * uu^4 - 3.582633 * uu^5 )
           a[  2] = - a[n-1]
           ia     = 3
           iz     = n - 2
           phi    = ( ( sum(m^2) - 2. * m[  n]^2 - 2. * m[n-1]^2 )
                    / (       1. - 2. * a[  n]^2 - 2. * a[n-1]^2 ) )
        }#end if
        #----- These are the same as the Shapiro-Wilk test for small samples. -------------#
        a[ia:iz] = m[ia:iz] / sqrt(phi)
        #----------------------------------------------------------------------------------#


        #---- Find the W statistic. -------------------------------------------------------#
        W   = (sum(a*x))^2 / sum( (x-mean.x)^2)
        #----------------------------------------------------------------------------------#



        #----------------------------------------------------------------------------------#
        #     Find the derived statistics.                                                 #
        #----------------------------------------------------------------------------------#
        if (n == 3){
           p.value  = 1.0 - 2.0 * ( 1.909859 * (asin(sqrt(W)) - 1.047198) - 1.0 )
        }else if (n < 11){
           mu      =      0.54400 - 0.399780 *  n + 0.02505400 *  n^2 - 0.0006714 *  n^3
           sigma   =  exp(1.38220 - 0.778570 *  n + 0.06276700 *  n^2 - 0.0020322 *  n^3)
           gam     = -2.27300 + 0.459000 *  n
           p.value = 1.0 - 2.0 * abs(pnorm(q=-log(gam-log(1.-W)),mean=mu,sd=sigma)-0.5)
        }else{
           ln      = log(n)
           mu      =     -1.58610 - 0.310820 * ln - 0.08375100 * ln^2 + 0.00389150 * ln^3
           sigma   = exp(-0.48030 - 0.082676 * ln + 0.00303020 * ln^2 )
           p.value = 1.0 - 2.0 * abs( pnorm(q=log(1.0-W),mean=mu,sd=sigma) - 0.5 )
        }#end if
        #----------------------------------------------------------------------------------#
     }#end if


     #-------------------------------------------------------------------------------------#
     #     Return the statistics and the p-value.                                          #
     #-------------------------------------------------------------------------------------#
     ans = list ( statistic     = W
                , p.value       = p.value
                , confident     = confident
                )#end list
     return(ans)
     #-------------------------------------------------------------------------------------#
  }#end if
  #----------------------------------------------------------------------------------------#
}#end sw.test
#==========================================================================================#
#==========================================================================================#
manfredo89/ED2io documentation built on May 21, 2019, 11:24 a.m.