R/b.estimate.R

b.estimate <-
function (data, to = 1, min_points, alpha = 0.05, g = 1, r = 1)
{
   cl <- match.call()
   
   #ensure that the domain of the data is correct. Removes all duplicate observations while scaling the data to a domain of [0,1] if required
   if (round(max(data), 1) == round(2 * pi, 1))
       data <- unique(data / (2 * pi))
   data <- sort(unique(data))

   data <- c(data - 1, data, data + 1)
   close_to_min_index <- unique(findInterval(min_points, data) + 1)

   CVM_rejectionpnt <- numeric(length(close_to_min_index))
   AD_rejectionpnt <- numeric(length(close_to_min_index))
   KS_rejectionpnt <- numeric(length(close_to_min_index))
   rayleigh_rejectionpnt <- numeric(length(close_to_min_index))

   #This loop iterates the algorithm for each of the unique minimum points specified in the arguent "min_points"
   for (k in 1 : length(close_to_min_index))
   {
        #begin represents the first value in the complete interval over which uniformity is tested
        begin <- close_to_min_index[k]
        #end represents the last value in the complete interval over which uniformity is tested 
        end <- close_to_min_index[k] + (length(data) / 3)
        start <- begin
        lengte <- (end - start - 1)

        # Create empty vectors for the different GOF test statistics and p values
        ks <- numeric(trunc(lengte / g))
        ks_p <- numeric(trunc(lengte / g))
        c <- numeric(trunc(lengte / g))
        d <- numeric(trunc(lengte / g))
        asq <- numeric(trunc(lengte / g))
        asq_p <- numeric(trunc(lengte / g))
        CVM_pvalue <- numeric(trunc(lengte / g))
        rayleigh_p <- numeric(trunc(lengte / g))
        tol <- 1e-20
        
        # j represents the set number
        j <- 1
        
        #Boolean operators to establish when all GOF tests have been rejected
        CVM_rejection <- FALSE
        KS_rejection <- FALSE
        AD_rejection <- FALSE
        rayleigh_rejection <- FALSE
        rejected <- (CVM_rejection & KS_rejection & AD_rejection & rayleigh_rejection)


        while (!rejected & (j <= trunc(lengte / g)))
        {
               sn <- 0
               n <- 0
               sn_star <- 0
               star_pval <- 0
               endpoint <- start + (j * g) + 1
               n <- endpoint - start - 1
               d[j] <- n

               # The next lines contains the calculation of the test statistics and p-values for the different GOF tests.

               #Anderson-Darling test statistic and p-value calculation
               if (n < 8)
               {
                   asq[j] <- 0
                   asq_p[j] <- 0.99
               }
               else 
               {
                   sn <- ad.test(data[(start + 1) : (endpoint - 1)],punif, data[start]-tol, data[endpoint]+tol)
   asq[j] <-sn$statistic
   asq_p[j] <-sn$p.value

               }

               #Cramer-von Mises test statistic and p-value calculation (Stephens 1970, p101,p105) 
               sn <- ((data[(start + 1) : (endpoint - 1)] - data[start]) / (data[endpoint] - data[start]) - (1 : n - 0.5) / (n))^2
               c[j] <- sum(sn) + (1 / (12 * n))
               CVM_pvalue[j] <- CVM_pvall(c[j], n)

               #Kolmogorov-Smirnov test statistic and p-value calculation (ks.test function in package "stats")
               ks[j] <- ks.test(data[(start + 1) : (endpoint - 1)], punif, data[start], data[endpoint])$statistic
               ks_p[j] <- ks.test(data[(start + 1) : (endpoint - 1)], punif, data[start], data[endpoint])$p.value
               
               #Rayleigh p-value calculation
          
               rayleigh_p[j] <- rayleigh.test(circular(((data[(start + 1) : (endpoint - 1)]) - data[(start + 1)]) / (data[(endpoint - 1)] - data[(start + 1)]) * 2 * pi))$p.value
               if (rayleigh_p[j]=="NaN")
                   rayleigh_p[j]<-1

               if (j>=r)
               {
                   #Calulate the point of rejection of uniformity for each GOF test and for each selected minimum point by establishing the point where the
                   # p-value(s) is less than alpha

                   if (CVM_rejection == FALSE)
                   {
                       if (max(CVM_pvalue[(j - r + 1) : j]) < alpha)
                       {
                           CVM_rejection <- TRUE
                           if (start + 1 + ((j - r + 1) * g) <= ((length(data) / 3) * 2))
                               CVM_rejectionpnt[k] <- data[start+1+((j-r+1)*g)]
                           else
                               CVM_rejectionpnt[k] <- data[start + 1 + ((j - r + 1) * g) - (length(data) / 3)]
                       }
                   }

                   if (KS_rejection == FALSE)
                   {
                       if (max(ks_p[(j - r + 1) : j]) < alpha)
                       {
                           KS_rejection <- TRUE
                           if (start + 1 + ((j - r + 1) * g) <= ((length(data) / 3) * 2))
                               KS_rejectionpnt[k]<-data[start+1+((j-r+1)*g)]
                           else
                               KS_rejectionpnt[k]<-data[start+1+((j-r+1)*g) - (length(data)/3)]
                       }
                   }

                   if (AD_rejection == FALSE)
                   {
                       if (max(asq_p[(j - r + 1) : j]) < alpha)
                       {
                           AD_rejection <- TRUE
                           if (start + 1 + ((j - r + 1) * g) <= ((length(data) / 3) * 2))
                               AD_rejectionpnt[k] <- data[start + 1 + ((j - r + 1) * g)]
                           else
                               AD_rejectionpnt[k] <- data[start + 1 + ((j - r + 1) * g) - (length(data) / 3)]
                       }
                   }

                   if (rayleigh_rejection == FALSE)
                   {
                       if (max(rayleigh_p[(j - r + 1) : j]) < alpha)
                       {
                           rayleigh_rejection <- TRUE
                           if (start + 1 + ((j - r + 1) * g) <= ((length(data) / 3) * 2))
                               rayleigh_rejectionpnt[k] <- data[start + 1 + ((j - r + 1) * g)]
                           else
                               rayleigh_rejectionpnt[k] <- data[start + 1 + ((j - r + 1) * g) - (length(data) / 3)]

                       }

                   }
                }
#if j > rejection loop end

                rejected <- (CVM_rejection & KS_rejection & AD_rejection & rayleigh_rejection)
                j <- j + 1
        }
        # while rejection loop end

        #Clean up the vector of test statistics and p-values before the next minimum point is analysed
        if (!(is.na(which(c == 0)[1])))
            CVM_pvalue <- CVM_pvalue[1 : ((which(CVM_pvalue == 0)[1]) - 1)]
        if (!(is.na(which(d == 0)[1])))
            d <- d[1 : ((which(d == 0)[1] - 1))]
        if (!(is.na(which(ks == 0)[1])))
            ks <- ks[1 : ((which(ks == 0)[1] - 1))]
        if (!(is.na(which(ks_p == 0)[1])))
            ks_p <- ks_p[1 : ((which(ks_p == 0)[1] - 1))]
        if (!(is.na(which(asq == 0)[1])))
            asq <- asq[1 : ((which(asq == 0)[1] - 1))]
        if (!(is.na(which(asq_p == 0)[1])))
            asq_p <- asq_p[1 : ((which(asq_p == 0)[1] - 1))]
        if (!(is.na(which(rayleigh_p == 0)[1])))
            rayleigh_p<-rayleigh_p[1 : ((which(rayleigh_p==0)[1] - 1))]

   }
   #for k loop end (number of minimum points)

   #Combine results in different data structures for output
   vec <- c(mean(CVM_rejectionpnt), mean(KS_rejectionpnt), mean(AD_rejectionpnt), mean(rayleigh_rejectionpnt))
   sum <- matrix(c(mean(CVM_rejectionpnt), mean(KS_rejectionpnt), mean(AD_rejectionpnt), mean(rayleigh_rejectionpnt), median(vec)), ncol=5,
                 dimnames = list(c("b-hat"), c("Cramer von Mises", "Kolmogorov-Smirnoff", "Anderson-Darling", "Rayleigh", "MEDIAN")))
   CVM <- list(rejection = CVM_rejectionpnt, mean = mean(CVM_rejectionpnt))
   KS <- list(rejection = KS_rejectionpnt, mean = mean(KS_rejectionpnt))
   AD <- list(rejection = AD_rejectionpnt, mean = mean(AD_rejectionpnt))
   rayleigh <- list(rejection = rayleigh_rejectionpnt, mean = mean(rayleigh_rejectionpnt))
   general <- list(call = cl, Minimums = data[close_to_min_index], alpha = alpha, grow = g, nr_reject = r, kernel_function = "Epanechnikov")
   comb<-list(summary = sum, general = general)

   return(comb)

}

Try the SOPIE package in your browser

Any scripts or data that you put into this service are public.

SOPIE documentation built on March 18, 2022, 5:25 p.m.