R/circtestsonesample.R

Defines functions CircularStatisticsOneSampleTests .circularTestsOneSampleReadData .circularTestsOneSampleCheckErrors .circularTestsOneSampleComputeResults .circularTestsOneSampleComputeResultsRao .circularTestsOneSampleComputeResultsRayleigh .circularTestsOneSampleComputeResultsModifiedRayleigh .circularTestsOneSampleComputeResultsVonMises .circularTestsOneSampleComputeResultsVonMisesSub .circularTestsOneSampleCreateTable .circularTestsOneSampleFillTable .circularTestsOneSampleFillTableVonMises .normalizeData

#
# Copyright (C) 2019 Aaron Bahde and Philipp Berens, University of Tuebingen
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

CircularStatisticsOneSampleTests <- function(jaspResults, dataset, options, ...) {
  # Get the correct period. This step is neccessary since pi is hard to specify in the GUI.
  if (options$periodGroup == "pi")
    options$period <- pi
  if (options$periodGroup == "pi_2")
    options$period <- 2 * pi

  # Set title
  #jaspResults$title <- "Test Results"

  ready <- (length(options$variables) > 0)
  
  # Read dataset
  if(ready){
    
    dataset <- .circularTestsOneSampleReadData(dataset, options)

    # Error checking
    errors <- .circularTestsOneSampleCheckErrors(dataset, options)
  }

  
  # Output tables and plots
  if(options$rao || options$rayleigh || options$modifiedRayleigh){
    if(ready)
      circularTestsOneSampleResults <- try(.circularTestsOneSampleComputeResults(jaspResults, dataset, options))
    .circularTestsOneSampleCreateTable(jaspResults, dataset, options, circularTestsOneSampleResults, ready)
  }
  if(options$vonMisesCheck){
    if(ready)
      circularTestsOneSampleVonMisesResults <- try(.circularTestsOneSampleComputeResultsVonMises(jaspResults, dataset, options))
    .circularTestsOneSampleCreateTableVonMises(jaspResults, dataset, options, circularTestsOneSampleVonMisesResults, ready)
  }
}

# Preprocessing functions ----
.circularTestsOneSampleReadData <- function(dataset, options) {
  variables <- unlist(options$variables)
  splitName <- options$splitby
  wantsSplit <- splitName != ""
  if (wantsSplit) {
    dataset <- .readDataSetToEnd(columns.as.numeric = variables, columns.as.factor = splitName)
  } else {
    dataset <- .readDataSetToEnd(columns.as.numeric = variables)
  }
  return(dataset)
}
.circularTestsOneSampleCheckErrors <- function(dataset, options){
  splitName <- options$splitby
  wantsSplit <- splitName != ""
  if(wantsSplit){
    
    # check that there is at least one level for the split factor
    .hasErrors(
      dataset              = dataset,
      perform              = "run",
      type                 = "factorLevels",
      factorLevels.target  = options$splitby,
      factorLevels.amount  = "< 1",
      exitAnalysisIfErrors = TRUE)
    
    # check that there are no infinity values or zero observations
    .hasErrors(dataset, 
             type = c('observations', 'infinity'),
             all.target = options$variables, 
             all.grouping = options$splitby,
             observations.amount = c('< 1'), 
             exitAnalysisIfErrors = TRUE)
    
    # The rao test needs at least 4 data points.
    if(options$rao)
      .hasErrors(dataset, 
                 type = c('observations'),
                 all.target = options$variables, 
                 all.grouping = options$splitby,
                 observations.amount = c('< 4'), 
                 exitAnalysisIfErrors = TRUE)
    
    # The von Mises check fails if there is zero variance in the data.
    if(options$vonMises)
      .hasErrors(dataset, 
                 type = c('variance'),
                 all.target = options$variables, 
                 all.grouping = options$splitby, 
                 exitAnalysisIfErrors = TRUE)
    
  } else {
    
    # check that there are no infinity values or zero observations
    .hasErrors(dataset, 
               type = c('observations', 'infinity'),
               all.target = options$variables, 
               observations.amount = c('< 1'), 
               exitAnalysisIfErrors = TRUE)

    # The rao test needs at least 4 data points.
    if(options$rao)
      .hasErrors(dataset, 
                 type = c('observations'),
                 all.target = options$variables,
                 observations.amount = c('< 4'), 
                 exitAnalysisIfErrors = TRUE)
    
    # The von Mises check fails if there is zero variance in the data.
    if(options$vonMises)
      .hasErrors(dataset, 
                 type = c('variance'),
                 all.target = options$variables, 
                 exitAnalysisIfErrors = TRUE)
  }
  
  # check for reasonable period and that the data does not exceed a tolerable concentration
  # (It might happen that the user forgets to specify the correct period 
  # which leads to data that can be very concentrated when normalized to the unit circle, i.e. almost zero circular variance).
  # This can cause time outs in the circular package.
  .oneSampleTestsCheckForReasonablePeriodAndConcentration <- function(){

    tolerance <- 10**-3

    splitName <- options$splitby
    wantsSplit <- splitName != ""
    variables <- unlist(options$variables)

    if(wantsSplit){
      split <- dataset[[.v(options$splitby)]]
      splitLevels <- levels(split)
      for(variable in variables){
        for(level in splitLevels){
          column <- dataset[[.v(variable)]][split == level]
          validData <- column[!is.na(column)]
          validDataNormalized <- .normalizeData(validData, options$period)
          validDataCirc <- circular::circular(validDataNormalized)
          meanResultantLength <- as.numeric(circular::rho.circular(validDataCirc))
          if (abs(meanResultantLength-1) < tolerance)    # The maximum mean resultant length is 1. So if it exceeds the tolerance, return an error.
            return(paste("The data of the variable", variable, "on level", level,"exceeds the tolerance for the concentration. The data shows almost zero variance. Did you maybe specify the wrong period?"))
        }
      }
    } else{
      for(variable in variables){
        column <- dataset[[.v(variable)]]
        validData <- column[!is.na(column)]
        validDataNormalized <- .normalizeData(validData, options$period)
        validDataCirc <- circular::circular(validDataNormalized)
        meanResultantLength <- as.numeric(circular::rho.circular(validDataCirc))
        if (abs(meanResultantLength-1) < tolerance)    # The maximum mean resultant length is 1. So if it exceeds the tolerance, return an error.
          return(paste("The data of the variable", variable, "exceeds the tolerance for the concentration. The data shows almost zero variance. Did you maybe specify the wrong period?"))
      }
    }
  }
  .hasErrors(      dataset              = dataset,
                   perform              = "run",
                   custom               = .oneSampleTestsCheckForReasonablePeriodAndConcentration,
                   exitAnalysisIfErrors = TRUE)
}
# Results functions ----
.circularTestsOneSampleComputeResults <- function(jaspResults, dataset, options) {
  
  splitName <- options$splitby
  wantsSplit <- splitName != ""
  variables <- unlist(options$variables)

  results <- list()

  if(wantsSplit){
    split <- dataset[[.v(options$splitby)]]
    splitLevels <- levels(split)
    for(variable in variables){
      for(level in splitLevels){
        column <- dataset[[.v(variable)]][split == level]
        validData <- column[!is.na(column)]
        validDataNormalized <- .normalizeData(validData, options$period)
        validDataCirc <- circular::circular(validDataNormalized)

        if (options$rao){
          testResults <- .circularTestsOneSampleComputeResultsRao(jaspResults, validDataCirc, options)
          results[[variable]][[level]][["rao"]] <- list(
            variable = variable,
            level = level,
            testName = "Rao's spacing",
            alpha = testResults$alpha,
            statistic = testResults$statistic,
            criticalValue = testResults$criticalValue,
            n = testResults$n
          )
        }
        if (options$rayleigh){
          testResults <- .circularTestsOneSampleComputeResultsRayleigh(jaspResults, validDataCirc, options)
          results[[variable]][[level]][["rayleigh"]] <- list(
            variable = variable,
            level = level,
            testName = "Rayleigh",
            p = testResults$p,
            statistic = testResults$statistic
          )
        }
        if (options$modifiedRayleigh){
          testResults <- .circularTestsOneSampleComputeResultsModifiedRayleigh(jaspResults, validDataCirc, options)
          results[[variable]][[level]][["modifiedRayleigh"]] <- list(
            variable = variable,
            level = level,
            testName = paste("V against", toString(options$testValue)),
            p = testResults$p,
            statistic = testResults$statistic
          )
        }
      }
    }

  }
  else {
    for (variable in variables){
      column <- dataset[[.v(variable)]]
      validData <- column[!is.na(column)]
      validDataNormalized <- .normalizeData(validData, options$period)
      validDataCirc <- circular::circular(validDataNormalized)

      if (options$rao){
        testResults <- .circularTestsOneSampleComputeResultsRao(jaspResults, validDataCirc, options)
        results[[variable]][["rao"]] <- list(
          variable = variable,
          testName = "Rao's Spacing",
          alpha = testResults$alpha,
          statistic = testResults$statistic,
          criticalValue = testResults$criticalValue,
          n = testResults$n
        )
      }
      if (options$rayleigh){
        testResults <- .circularTestsOneSampleComputeResultsRayleigh(jaspResults, validDataCirc, options)
        results[[variable]][["rayleigh"]] <- list(
          variable = variable,
          testName = "Rayleigh",
          p = testResults$p,
          statistic = testResults$statistic
        )
      }
      if (options$modifiedRayleigh){
        testResults <- .circularTestsOneSampleComputeResultsModifiedRayleigh(jaspResults, validDataCirc, options)
        results[[variable]][["modifiedRayleigh"]] <- list(
          variable = variable,
          testName = paste("V against", toString(options$testValue)),
          p = testResults$p,
          statistic = testResults$statistic
        )
      }
    }
  }

  return(results)
}
.circularTestsOneSampleComputeResultsRao <- function(jaspResults, data, options) {
  
  alpha <- as.numeric(options$alphaRao)
  testResult <- circular::rao.spacing.test(data,alpha=alpha)
  U <- testResult$statistic
  n <- testResult$n
  # The following is a routine that gets the critical value for the specified alpha value, depending in the dataset size.
  # It is neccesary, since the circular package just prints the test results without returning the critical values.
  data(rao.table, package = "circular", envir = sys.frame(which = sys.nframe()))    # table for critical values (Levitin, Rusell 1995)
  criticalTableColumn <- (1:4)[alpha == c(0.001, 0.01, 0.05, 0.1)]
  # get the table row where the data count is as closest to the one in the table
  countColumn <- c(4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,35,40,45,50,75,100,150,200,300,400,500,600,700,800,900,1000)
  criticalTableRow <- which(abs(countColumn-n)==min(abs(countColumn-n)))
  criticalValue <- rao.table[criticalTableRow, criticalTableColumn]
  results <- list(alpha = alpha, statistic = U, criticalValue = criticalValue, n = n)
}
.circularTestsOneSampleComputeResultsRayleigh <- function(jaspResults, data, options) {
  testResult <- circular::rayleigh.test(data)
  p <- testResult$p.value
  statistic <- testResult$statistic
  results <- list(p = p, statistic = statistic)
return(results)
}
.circularTestsOneSampleComputeResultsModifiedRayleigh <- function(jaspResults, data, options) {
  # We do not trust the implementation of the V test in the circular package. Therefore, we implement it on our own following Fisher 1993, p.69.
  # The test statistic is here based on the mean resultant length and not on "rayleighs R". Therefore the resulting statistic differs by a factor of n from other libraries, like CircStat for MATLAB

  # calculate the statistic r0Bar
  n <- length(data)
  mu0 <- .normalizeData(options$testValue, options$period)    # test direction must be normalized as well
  meanDirection <- as.numeric(circular::mean.circular(data))
  meanResultantLength <- as.numeric(circular::rho.circular(data))
  r0Bar <- meanResultantLength * cos(meanDirection - mu0)

  # determine p-value
  z0 <- sqrt(2 * n) * r0Bar
  pz <- pnorm(z0)
  fz <- dnorm(z0)
  p <- 1 - pz + fz * ((3 * z0 - z0^3)/(16 * n) + (15 * z0 + 305 * z0^3 - 125 * z0^5 + 9 * z0^7)/(4608 * n^2))

  results <- list(p = p, statistic = r0Bar)
  return(results)
}

.circularTestsOneSampleComputeResultsVonMises <- function(jaspResults, dataset, options){
  splitName <- options$splitby
  wantsSplit <- splitName != ""
  variables <- unlist(options$variables)

  circularTestsOneSampleVonMisesTestResults <- list()

  if (wantsSplit){
    split       <- dataset[[.v(options$splitby)]]
    splitLevels <- levels(split)
    for(variable in variables){
      for(level in splitLevels){
        column    <- dataset[[.v(variable)]][split == level]
        validData <- column[!is.na(column)]
        validDataNormalized <- .normalizeData(validData, options$period)
        validDataCirc <- circular::circular(validDataNormalized)
        testResults <- .circularTestsOneSampleComputeResultsVonMisesSub(validDataCirc, options, "vonmises")
        circularTestsOneSampleVonMisesTestResults[[variable]][[level]] <- list(
          variable = variable,
          level = level,
          statistic = testResults$statistic,
          critical = testResults$critical,
          alpha = testResults$alpha,
          kappa = testResults$kappa
        )
      }
    }
  } else {
    for (variable in variables){
      column <- dataset[[.v(variable)]]
      validData <- column[!is.na(column)]
      validDataNormalized <- .normalizeData(validData, options$period)
      validDataCirc <- circular::circular(validDataNormalized)
      testResults <- .circularTestsOneSampleComputeResultsVonMisesSub(validDataCirc, options, "vonmises")
      circularTestsOneSampleVonMisesTestResults[[variable]] <- list(
        variable = variable,
        statistic = testResults$statistic,
        critical = testResults$critical,
        alpha = testResults$alpha,
        kappa = testResults$kappa
      )
    }
  }
  return(circularTestsOneSampleVonMisesTestResults)
}
.circularTestsOneSampleComputeResultsVonMisesSub <- function(data, options, dist = "vonmises"){
  alpha <- as.numeric(options$alphaVonMises)

  # Get the estimated kappa for the footnote. If kappa is too small, the data might be rather uniform.
  kappa <- circular::mle.vonmises(data, bias = FALSE)$kappa

  testResult <- circular::watson.test(data, alpha = alpha, dist = dist)
  row <- testResult$row

  statistic <- testResult$statistic

  # This table comparison is copied from circular::print.watson.test() to get the values instead of just printing them to the console.
  u2.crits <- cbind(c(0, 0.5, 1, 1.5, 2, 4, 100),
                    c(0.052, 0.056, 0.066, 0.077, 0.084, 0.093, 0.096),
                    c(0.061,0.066, 0.079, 0.092, 0.101, 0.113, 0.117),
                    c(0.081, 0.09, 0.11, 0.128, 0.142, 0.158, 0.164))

  if (alpha == 0.1)
    col <- 2
  else if (alpha == 0.05)
    col <- 3
  else if (alpha == 0.01)
    col <- 4

  critical <- u2.crits[row, col]
  
  return(list(critical = critical, statistic = statistic, alpha = alpha, kappa = kappa))
}
# Output functions ----
.circularTestsOneSampleCreateTable <- function(jaspResults, dataset, options, circularTestsOneSampleResults, ready) {

  wantsSplit <- options$splitby != ""
  
  # Create table
  oneSampleTable <- createJaspTable(title = "Uniformity Tests")
  jaspResults[["oneSampleTable"]] <- oneSampleTable
  jaspResults[["oneSampleTable"]]$dependOn(options = c("variables", "splitby", "rao", "alphaRao", "rayleigh", "modifiedRayleigh", "period", "periodGroup"))

  oneSampleTable$showSpecifiedColumnsOnly <- TRUE

  # Add columns to table
  oneSampleTable$addColumnInfo(name = "variable",   title = "Variable",   type = "string", combine = TRUE)
  if (wantsSplit)
    oneSampleTable$addColumnInfo(name = "level",   title = "Level",   type = "string", combine = TRUE)

  oneSampleTable$addColumnInfo(name = "testName",   title = "Test",   type = "string")
  if(options$rayleigh || options$modifiedRayleigh){
    oneSampleTable$addColumnInfo(name = "p",   title = "p",   type = "number", format = "dp:3;p:.001")
  }
  if (options$rao){
    oneSampleTable$addColumnInfo(name = "alpha",   title = "\u03B1",   type = "number", format = "dp:3")
    oneSampleTable$addColumnInfo(name = "criticalValue",   title = "Critical",   type = "number", format = "dp:3")
  }
  oneSampleTable$addColumnInfo(name = "statistic",   title = "Statistic",   type = "number", format = "dp:3")
  
  oneSampleTable$addFootnote(symbol = "<em>Note.</em>", message = "All statistics are caclulated on a normalized period of 2\u03C0.")
  
  # add citations
  oneSampleTable$addCitation("Aaron Bahde and Philipp Berens (2019). University of Tuebingen.")
  oneSampleTable$addCitation("Ulric Lund and Claudio Agostinelli (2017). Circular (Version 0.4-93): Circular Statistics [R Package].")
  oneSampleTable$addCitation("Gerald Russell and Daniel Levitin (2007). An Extended Table of Probability Values for Raos Spacing Test.")
  
  if(ready){
    # If the calculations failed, do not fill the table but rather show the error.
    if(inherits(circularTestsOneSampleResults, "try-error")){
      errorMessage <- as.character(circularTestsOneSampleResults)
      oneSampleTable$setError(errorMessage)
      return()
    } else {
      .circularTestsOneSampleFillTable(oneSampleTable, circularTestsOneSampleResults, options, dataset)
    }
  }
}
.circularTestsOneSampleFillTable <- function(oneSampleTable, circularTestsOneSampleResults, options, dataset) {
  splitName <- options$splitby
  wantsSplit <- splitName != ""
  variables <- unlist(options$variables)
  if (wantsSplit){
    split <- dataset[[.v(options$splitby)]]
    splitLevels <- levels(split)
    
    rowNamesForRaoFootnote <- c()
    
    for(variable in variables){
      for(level in splitLevels){
        if (options$rao){
          row <- circularTestsOneSampleResults[[variable]][[level]][["rao"]]
          rowName <- paste(variable, level, "rao")
          oneSampleTable$addRows(row, rowNames = rowName)
          
          rowNamesForRaoFootnote <- c(rowNamesForRaoFootnote, rowName)
        }
        if (options$rayleigh){
          row <- circularTestsOneSampleResults[[variable]][[level]][["rayleigh"]]
          oneSampleTable$addRows(row, rowNames = paste(variable))
        }
        if (options$modifiedRayleigh){
          row <- circularTestsOneSampleResults[[variable]][[level]][["modifiedRayleigh"]]
          oneSampleTable$addRows(row, rowNames = paste(variable))
        }
      }
    }
  }
  else {
    
    rowNamesForRaoFootnote <- c()
    
    for (variable in variables){
      if (options$rao){
        row <- circularTestsOneSampleResults[[variable]][["rao"]]
        rowName <- paste(variable, "rao")
        oneSampleTable$addRows(row, rowNames = rowName)
        
        rowNamesForRaoFootnote <- c(rowNamesForRaoFootnote, rowName)
      }
      if (options$rayleigh){
        row <- circularTestsOneSampleResults[[variable]][["rayleigh"]]
        oneSampleTable$addRows(row, rowNames = paste(variable))
      }
      if (options$modifiedRayleigh){
        row <- circularTestsOneSampleResults[[variable]][["modifiedRayleigh"]]
        oneSampleTable$addRows(row, rowNames = paste(variable))
      }
    }
  }
  if (options$rao)
    oneSampleTable$addFootnote(message = paste("The Rao spacing test was run with \u03B1 = ", options$alphaRao, "so please compare the statistics to the critical value."), colNames = "testName", rowNames = rowNamesForRaoFootnote)
}

.circularTestsOneSampleCreateTableVonMises <-function (jaspResults, dataset, options, circularTestsOneSampleVonMisesTestResults, ready){
  wantsSplit <- options$splitby != ""
  
  # Create table
  vonMisesCheckTable <- createJaspTable(title = "Von Mises Assumption Check")
  jaspResults[["vonMisesCheckTable"]] <- vonMisesCheckTable
  jaspResults[["vonMisesCheckTable"]]$dependOn(options = c("variables", "splitby", "vonMisesCheck", "alphaVonMises", "period", "periodGroup"))

  vonMisesCheckTable$showSpecifiedColumnsOnly <- TRUE

  # Add columns to table
  vonMisesCheckTable$addColumnInfo(name = "variable",   title = "Variable",   type = "string", combine=TRUE)
  if (wantsSplit)
    vonMisesCheckTable$addColumnInfo(name = "level",   title = "Level",   type = "string", combine = TRUE)
  vonMisesCheckTable$addColumnInfo(name = "alpha",   title = "\u03B1",   type = "number", format = "dp:3")
  vonMisesCheckTable$addColumnInfo(name = "critical",   title = "Critical",   type = "number", format = "dp:3")
  vonMisesCheckTable$addColumnInfo(name = "statistic",   title = "U\u00B2",   type = "number", format = "dp:3")
  vonMisesCheckTable$addColumnInfo(name = "kappa",   title = "Est. \u03BA",   type = "number", format = "dp:2")
  
  # add citations
  vonMisesCheckTable$addCitation("Aaron Bahde and Philipp Berens (2019). University of Tuebingen.")
  vonMisesCheckTable$addCitation("Ulric Lund and Claudio Agostinelli (2017). Circular (Version 0.4-93): Circular Statistics [R Package].")
  vonMisesCheckTable$addCitation("R. A. Lockhart and M. A. Stephens (1985). Tests of Fit for the Von Mises Distribution.")
  
  if(ready){
    # If the calculations failed, do not fill the table but rather show the error.
    if(inherits(circularTestsOneSampleVonMisesTestResults, "try-error")){
      errorMessage <- as.character(circularTestsOneSampleVonMisesTestResults)
      vonMisesCheckTable$setError(errorMessage)
      return()
    }
    .circularTestsOneSampleFillTableVonMises(vonMisesCheckTable, circularTestsOneSampleVonMisesTestResults, options, dataset)
  }
}
.circularTestsOneSampleFillTableVonMises <- function(vonMisesCheckTable, circularTestsOneSampleVonMisesTestResults, options, dataset) {
  
  splitName <- options$splitby
  wantsSplit <- splitName != ""
  variables <- unlist(options$variables)
  
  if (wantsSplit){
    split       <- dataset[[.v(options$splitby)]]
    splitLevels <- levels(split)
    
    rowsForKappaFootnote <- c()
    
    for(variable in variables){
      for(level in splitLevels){
        row <- circularTestsOneSampleVonMisesTestResults[[variable]][[level]]
        vonMisesCheckTable$addRows(row, rowNames = paste(variable, level))
        
        if (row$kappa < 1)
          rowsForKappaFootnote <- c(rowsForKappaFootnote, paste(variable, level))
        
      }
    }
    vonMisesCheckTable$addFootnote(message = "Do not trust a significant result where \u03BA is small (< 1). The data could rather be uniform.", colNames = "kappa", rowNames = rowsForKappaFootnote)
  }
  else {
    rowsForKappaFootnote <- c()
    
    for(variable in variables){
      row <- circularTestsOneSampleVonMisesTestResults[[variable]]
      vonMisesCheckTable$addRows(row, rowNames = paste(variable))
      if (row$kappa < 1)
        rowsForKappaFootnote <- c(rowsForKappaFootnote, paste(variable))
    }
    vonMisesCheckTable$addFootnote(message = "Do not trust a significant result where \03BA is small (< 1). The data could rather be uniform.", colNames = "kappa", rowNames = rowsForKappaFootnote)
    vonMisesCheckTable$addFootnote(symbol = "<em>Note.</em>", message = paste("The test is run with \u03B1 = ",  options$alphaVonMises, "so please compare the statistics to the critical value."))
  }
}

# Helper functions for circular statistics ----
.normalizeData <- function(data, period){
  return(((data %% period) / period) * 2 * pi)
}
circstat/JASP-module documentation built on Nov. 4, 2019, 8:53 a.m.