R/unif_stat.R

Defines functions unif_stat

Documented in unif_stat

#' @title Circular and (hyper)spherical uniformity statistics
#'
#' @description Implementation of several statistics for assessing uniformity
#' on the (hyper)sphere
#' \eqn{S^{p-1} := \{{\bf x} \in R^p : ||{\bf x}|| = 1\}}{
#' S^{p-1} := \{x \in R^p : ||x|| = 1\}}, \eqn{p\ge 2}, for a sample
#' \eqn{{\bf X}_1,\ldots,{\bf X}_n\in S^{p-1}}{X_1,\ldots,X_n\in S^{p-1}}.
#'
#' \code{unif_stat} receives a (several) sample(s) of directions in
#' \emph{Cartesian coordinates}, except for the circular case (\eqn{p=2}) in
#' which the sample(s) can be \emph{angles}
#' \eqn{\Theta_1,\ldots,\Theta_n\in [0, 2\pi)}.
#'
#' \code{unif_stat} allows to compute several statistics to several samples
#' within a single call, facilitating thus Monte Carlo experiments.
#'
#' @param data sample to compute the test statistic. An \bold{array} of size
#' \code{c(n, p, M)} containing \code{M} samples of size \code{n} of directions
#' (in Cartesian coordinates) on \eqn{S^{p-1}}. Alternatively, a
#' \bold{matrix} of size \code{c(n, M)} with the angles on \eqn{[0, 2\pi)} of
#' the \code{M} circular samples of size \code{n} on \eqn{S^{1}}. Other objects
#' accepted are an array of size \code{c(n, 1, M)} or a vector of size
#' \code{n} with angular data. Must not contain \code{NA}'s.
#' @inheritParams unif_test
#' @param data_sorted is the circular data sorted? If \code{TRUE}, certain
#' statistics are faster to compute. Defaults to \code{FALSE}.
#' @param Rayleigh_m integer \eqn{m} for the \eqn{m}-modal Rayleigh test.
#' Defaults to \code{m = 1} (the standard Rayleigh test).
#' @param cov_a \eqn{a_n = a / n} parameter used in the length of the arcs
#' of the coverage-based tests. Must be positive. Defaults to \code{2 * pi}.
#' @param Rothman_t \eqn{t} parameter for the Rothman test, a real in
#' \eqn{(0, 1)}. Defaults to \code{1 / 3}.
#' @param Cressie_t \eqn{t} parameter for the Cressie test, a real in
#' \eqn{(0, 1)}. Defaults to \code{1 / 3}.
#' @param Pycke_q \eqn{q} parameter for the Pycke "\eqn{q}-test", a real in
#' \eqn{(0, 1)}. Defaults to \code{1 / 2}.
#' @param Riesz_s \eqn{s} parameter for the \eqn{s}-Riesz test, a real in
#' \eqn{(0, 2)}. Defaults to \code{1}.
#' @param CCF09_dirs a matrix of size \code{c(n_proj, p)} containing
#' \code{n_proj} random directions (in Cartesian coordinates) on \eqn{S^{p-1}}
#' to perform the CCF09 test. If \code{NULL} (default), a sample of size
#' \code{n_proj = 50} directions is computed internally.
#' @param K_CCF09 integer giving the truncation of the series present in the
#' asymptotic distribution of the Kolmogorov-Smirnov statistic. Defaults to
#' \code{25}.
#' @param CJ12_reg type of asymptotic regime for CJ12 test, either \code{1}
#' (sub-exponential regime), \code{2} (exponential), or \code{3}
#' (super-exponential; default).
#' @param Poisson_rho \eqn{\rho} parameter for the Poisson test, a real in
#' \eqn{[0, 1)}. Defaults to \code{0.5}.
#' @param Softmax_kappa \eqn{\kappa} parameter for the Softmax test, a
#' non-negative real. Defaults to \code{1}.
#' @param Stereo_a \eqn{a} parameter for the Stereo test, a real in
#' \eqn{[-1, 1]}. Defaults to \code{0}.
#' @param Sobolev_vk2 weights for the finite Sobolev test. A non-negative
#' vector or matrix. Defaults to \code{c(0, 0, 1)}.
#' @return A data frame of size \code{c(M, length(type))}, with column names
#' given by \code{type}, that contains the values of the test statistics.
#' @details
#' Except for \code{CCF09_dirs}, \code{K_CCF09}, and \code{CJ12_reg}, all the
#' test-specific parameters are vectorized.
#'
#' Descriptions and references for most of the statistics are available
#' in García-Portugués and Verdebout (2018).
#' @references
#' García-Portugués, E. and Verdebout, T. (2018) An overview of uniformity
#' tests on the hypersphere. \emph{arXiv:1804.00286}.
#' \doi{10.48550/arXiv.1804.00286}.
#' @examples
#' ## Circular data
#'
#' # Sample
#' n <- 10
#' M <- 2
#' Theta <- r_unif_cir(n = n, M = M)
#'
#' # Matrix
#' unif_stat(data = Theta, type = "all")
#'
#' # Array
#' unif_stat(data = array(Theta, dim = c(n, 1, M)), type = "all")
#'
#' # Vector
#' unif_stat(data = Theta[, 1], type = "all")
#'
#' ## Spherical data
#'
#' # Circular sample in Cartesian coordinates
#' n <- 10
#' M <- 2
#' X <- array(dim = c(n, 2, M))
#' for (i in 1:M) X[, , i] <- cbind(cos(Theta[, i]), sin(Theta[, i]))
#'
#' # Array
#' unif_stat(data = X, type = "all")
#'
#' # High-dimensional data
#' X <- r_unif_sph(n = n, p = 3, M = M)
#' unif_stat(data = X, type = "all")
#'
#' ## Specific arguments
#'
#' # Rothman
#' unif_stat(data = Theta, type = "Rothman", Rothman_t = 0.5)
#'
#' # CCF09
#' unif_stat(data = X, type = "CCF09", CCF09_dirs = X[, , 1])
#' unif_stat(data = X, type = "CCF09", CCF09_dirs = X[, , 1], K_CCF09 = 1)
#'
#' # CJ12
#' unif_stat(data = X, type = "CJ12", CJ12_reg = 3)
#' unif_stat(data = X, type = "CJ12", CJ12_reg = 1)
#' @export
unif_stat <- function(data, type = "all", data_sorted = FALSE,
                      CCF09_dirs = NULL, CJ12_reg = 3, cov_a = 2 * pi,
                      Cressie_t = 1 / 3, K_CCF09 = 25, Poisson_rho = 0.5,
                      Pycke_q = 0.5, Rayleigh_m = 1, Riesz_s = 1,
                      Rothman_t = 1 / 3, Sobolev_vk2 = c(0, 0, 1),
                      Softmax_kappa = 1, Stereo_a = 0) {

  # Stop if NA's
  if (anyNA(data)) {

    stop("NAs present in data, please remove them.")

  }

  # If data is a vector, transform it to matrix
  if (is.vector(data)) {

    data <- matrix(data, ncol = 1)

  }

  # Decode the type of data: vector, matrix, or array
  d <- dim(data)

  # Check sample size
  n <- d[1]
  if (n < 2) {

    stop("Sample size is one!")

  }

  # Array or matrix?
  if (length(d) == 3) {

    # Dimension and number of samples
    p <- d[2]
    M <- d[3]

    # Convert to polar coordinates?
    if (p == 2) {

      data <- X_to_Theta(X = data)

    } else if (p == 1) {

      data <- data[, 1, ]
      p <- 2

    }

  } else {

    # Dimension and number of samples
    p <- 2
    M <- d[2]

  }

  # Available statistics
  if (p == 2) {

    avail_stats <- avail_cir_tests

  } else {

    avail_stats <- avail_sph_tests

  }

  # Get the type of statistics
  if (is.character(type)) {

    type <- gsub("-", "_", type)
    type <- unique(type)
    if ("all" %in% type) {

      stats_type <- avail_stats

    } else {

      # Allow KS, CvM, and AD in unif_stat() (not in unif_test())
      if (p == 2) {

        avail_stats <- c(avail_stats, "KS", "CvM", "AD")

      }
      stats_type <- try(match.arg(arg = type, choices = avail_stats,
                                  several.ok = TRUE), silent = TRUE)
      if (inherits(stats_type, "try-error")) {

        stop(paste(strwrap(paste0(
          "Statistic with type = \"", type, "\" is unsupported. ",
          "Must be one of the following tests: \"",
          paste(avail_stats, collapse = "\", \""), "\"."),
          width = 80, indent = 0, exdent = 2), collapse = "\n"))

      }

    }

  } else if (is.numeric(type)) {

    type <- unique(type)
    if (type > length(avail_stats)) {

      stop("type must be a numeric vector with values between 1 and ",
           length(avail_stats), ".")

    } else {

      stats_type <- avail_stats[type]

    }

  } else {

    stop("type must be a character or a numeric vector.")

  }

  # Create a data frame for the statistics
  n_stats <- length(stats_type)
  stats <- matrix(NA, nrow = M, ncol = n_stats)
  colnames(stats) <- stats_type
  stats <- as.data.frame(stats)

  # Compute efficiently several statistics
  if (p == 2) { # p == 2

    # Statistics using gaps and sorted data
    stats_using_gaps <- c("Gini", "Gini_squared", "Greenwood", "Log_gaps",
                          "Max_uncover", "Num_uncover", "Range", "Rao",
                          "Vacancy")
    stats_using_sorted_data <- c("Cressie", "FG01", "Hodges_Ajne", "Kuiper",
                                 "Watson", "Watson_1976", "KS", "CvM", "AD",
                                 stats_using_gaps)

    # Statistics using the shortest angles matrix Psi
    stats_using_Psi <- c("Ajne", "Bakshaev", "Gine_Fn", "Gine_Gn",
                         "Hermans_Rasson", "PAD", "PCvM", "Poisson", "PRt",
                         "Pycke", "Pycke_q", "Rothman", "Riesz", "Sobolev",
                         "Softmax", "Stereo")

    # Statistics with vectorized parameters
    stats_vectorized <- c("Cressie", c("Max_uncover", "Num_uncover", "Vacancy"),
                          "Rayleigh", "Riesz", c("Rothman", "PRt"), "Poisson",
                          "Pycke_q", "Sobolev", "Softmax")
    param_vectorized <- c("Cressie_t", rep("cov_a", 3), "Rayleigh_m", "Riesz_s",
                          rep("Rothman_t", 2), "Poisson_rho", "Pycke_q",
                          "Sobolev_vk2", "Softmax_kappa")

    # Evaluate which statistics to apply
    run_test <- as.list(c(avail_cir_tests, "KS", "CvM", "AD") %in% stats_type)
    names(run_test) <- c(avail_cir_tests, "KS", "CvM", "AD")

    ## Sorted-data statistics

    # Is it worth to sort Theta? Only if there is more than one statistic
    # using sorted Theta and if it has not already been done
    # CAUTION: replacement of data with sorted_data!
    n_stats_type_sorted <- sum(stats_type %in% stats_using_sorted_data)
    if (!data_sorted && (n_stats_type_sorted > 1)) {

      data <- sort_each_col(data)
      data_sorted <- TRUE

    }

    # Statistics
    if (run_test$Cressie) {

      Cressie <- matrix(NA, nrow = M, ncol = length(Cressie_t))
      for (i in seq_along(Cressie_t)) {

        Cressie[, i] <- cir_stat_Cressie(Theta = data, t = Cressie_t[i],
                                         sorted = data_sorted)

      }
      stats$Cressie <- Cressie

    }
    if (run_test$FG01) {

      stats$FG01 <- cir_stat_FG01(Theta = data, sorted = data_sorted)

    }
    if (run_test$Hodges_Ajne) {

      stats$Hodges_Ajne <- cir_stat_Hodges_Ajne(Theta = data, asymp_std = FALSE,
                                                sorted = data_sorted,
                                                use_Cressie = TRUE)

    }
    if (run_test$Kuiper) {

      stats$Kuiper <- cir_stat_Kuiper(Theta = data, sorted = data_sorted,
                                      KS = FALSE, Stephens = FALSE)

    }
    if (run_test$Watson) {

      stats$Watson <- cir_stat_Watson(Theta = data, sorted = data_sorted,
                                      CvM = FALSE, Stephens = FALSE)

    }
    if (run_test$KS) {

      stats$KS <- cir_stat_Kuiper(Theta = data, sorted = data_sorted,
                                  KS = TRUE, Stephens = FALSE)

    }
    if (run_test$CvM) {

      stats$CvM <- cir_stat_Watson(Theta = data, sorted = data_sorted,
                                   CvM = TRUE, Stephens = FALSE)

    }
    if (run_test$AD) {

      stats$AD <- cir_stat_PAD(Theta = data, sorted = data_sorted, AD = TRUE)

    }
    if (run_test$Watson_1976) {

      stats$Watson_1976 <- cir_stat_Watson_1976(Theta = data,
                                                sorted = data_sorted,
                                                minus = FALSE)

    }

    ## Gaps-based statistics

    # Is it worth to compute the circular gaps? Only if there is more than
    # one statistic using them
    n_stats_type_gaps <- sum(stats_type %in% stats_using_gaps)
    if (n_stats_type_gaps > 1) {

      gaps <- cir_gaps(Theta = data, sorted = data_sorted)
      gaps_in_Theta <- TRUE

    } else {

      gaps <- data
      gaps_in_Theta <- FALSE

    }
    if (run_test$Range) {

      stats$Range <- cir_stat_Range(Theta = gaps, gaps_in_Theta = gaps_in_Theta,
                                    max_gap = TRUE, sorted = data_sorted)

    }
    if (run_test$Rao) {

      stats$Rao <- cir_stat_Rao(Theta = gaps, gaps_in_Theta = gaps_in_Theta,
                                sorted = data_sorted)

    }
    if (run_test$Gini) {

      stats$Gini <- cir_stat_Gini(Theta = gaps, gaps_in_Theta = gaps_in_Theta,
                                  sorted = data_sorted)

    }
    if (run_test$Gini_squared) {

      stats$Gini_squared <- cir_stat_Gini_squared(Theta = gaps,
                                                  gaps_in_Theta = gaps_in_Theta,
                                                  sorted = data_sorted)

    }
    if (run_test$Greenwood) {

      stats$Greenwood <- cir_stat_Greenwood(Theta = gaps,
                                            gaps_in_Theta = gaps_in_Theta,
                                            sorted = data_sorted)

    }
    if (run_test$Log_gaps) {

      stats$Log_gaps <- cir_stat_Log_gaps(Theta = gaps,
                                          gaps_in_Theta = gaps_in_Theta,
                                          sorted = data_sorted)

    }
    if (run_test$Max_uncover) {

      Max_uncover <- matrix(NA, nrow = M, ncol = length(cov_a))
      for (i in seq_along(cov_a)) {

        Max_uncover[, i] <- cir_stat_Max_uncover(Theta = gaps, a = cov_a[i],
                                                 gaps_in_Theta = gaps_in_Theta,
                                                 sorted = data_sorted)

      }
      stats$Max_uncover <- Max_uncover

    }
    if (run_test$Num_uncover) {

      Num_uncover <- matrix(NA, nrow = M, ncol = length(cov_a))
      for (i in seq_along(cov_a)) {

        Num_uncover[, i] <- cir_stat_Num_uncover(Theta = gaps, a = cov_a[i],
                                                 gaps_in_Theta = gaps_in_Theta,
                                                 sorted = data_sorted)

      }
      stats$Num_uncover <- Num_uncover

    }
    if (run_test$Vacancy) {

      Vacancy <- matrix(NA, nrow = M, ncol = length(cov_a))
      for (i in seq_along(cov_a)) {

        Vacancy[, i] <- cir_stat_Vacancy(Theta = gaps, a = cov_a[i],
                                         gaps_in_Theta = gaps_in_Theta,
                                         sorted = data_sorted)

      }
      stats$Vacancy <- Vacancy

    }

    ## Other statistics

    if (run_test$Rayleigh) {

      Rayleigh <- matrix(NA, nrow = M, ncol = length(Rayleigh_m))
      for (i in seq_along(Rayleigh_m)) {

        Rayleigh[, i] <- cir_stat_Rayleigh(Theta = data, m = Rayleigh_m[i])

      }
      stats$Rayleigh <- Rayleigh

    }
    if (run_test$Bingham) {

      stats$Bingham <- cir_stat_Bingham(Theta = data)

    }
    if (run_test$CCF09) {

      # Sample random directions
      if (is.null(CCF09_dirs)) {

        CCF09_dirs <- r_unif_sph(n = 50, p = 2, M = 1)[, , 1]

      }
      stats$CCF09 <- cir_stat_CCF09(Theta = data, dirs = CCF09_dirs,
                                    K_CCF09 = K_CCF09, original = FALSE)

    }

    ## Psi-based statistics

    # Is it worth to compute Psi? Only if there is more than one statistic
    # using it (after discounting equivalent tests if they are simultaneously
    # present) AND if 0.5 * n * (n - 1) * M is not too large.
    # CAUTION: replacement of data with Psi!

    # Number of statistics
    n_stats_type_Psi <- sum(stats_type %in% stats_using_Psi)

    # Add statistics with vectorized parameters
    n_stats_type_Psi <- n_stats_type_Psi + (
      run_test$Riesz * (length(Riesz_s) - 1) +
      (run_test$PRt || run_test$Rothman) * (length(Rothman_t) - 1) +
      run_test$Poisson * (length(Poisson_rho) - 1) +
      run_test$Softmax * (length(Softmax_kappa) - 1) +
      run_test$Sobolev * (nrow(rbind(Sobolev_vk2)) - 1))

    # Remove equivalent tests
    n_stats_type_Psi <- n_stats_type_Psi - (
      (run_test$Watson && run_test$PCvM) +
      (run_test$Rothman && run_test$PRt) +
      (any(Riesz_s == 2) && run_test$Riesz) +
      (any(Riesz_s == 0) && run_test$Pycke && run_test$Riesz) +
      (any(Riesz_s == 1) && run_test$Bakshaev && run_test$Riesz) +
      (any(Poisson_rho == 0) && run_test$Poisson) +
      (any(Softmax_kappa == 0) && run_test$Softmax) +
      (run_test$Gine_Fn && run_test$Gine_Gn && run_test$Ajne))

    # Compute Psi
    if (n_stats_type_Psi > 1 && 0.5 * n * (n - 1) * M <= 1e8) {

      dim(data) <- c(n, 1, M)
      data <- Psi_mat(data = data)
      Psi_in_Theta <- TRUE

    } else {

      Psi_in_Theta <- FALSE

    }

    # Statistics
    if (run_test$Ajne) {

      stats$Ajne <- cir_stat_Ajne(Theta = data, Psi_in_Theta = Psi_in_Theta)

    }
    if (run_test$Bakshaev) {

      stats$Bakshaev <- cir_stat_Riesz(Theta = data,
                                       Psi_in_Theta = Psi_in_Theta, s = 1)

    }
    if (run_test$Riesz) {

      Riesz <- matrix(NA, nrow = M, ncol = length(Riesz_s))
      for (i in seq_along(Riesz_s)) {

        if (Riesz_s[i] == 1 && run_test$Bakshaev) {

          Riesz[, i] <- stats$Bakshaev

        } else if (Riesz_s[i] == 2 &&
                   ((run_test$Rayleigh && any(Rayleigh_m == 1)) ||
                    !Psi_in_Theta)) {

          if (run_test$Rayleigh) {

            Riesz[, i] <- stats$Rayleigh[, which(Rayleigh_m == 1)]

          } else {

            Riesz[, i] <- cir_stat_Rayleigh(Theta = data, m = 1)

          }

        } else {

          Riesz[, i] <- (1 - 2 * (Riesz_s[i] < 0)) *
            cir_stat_Riesz(Theta = data, Psi_in_Theta = Psi_in_Theta,
                           s = Riesz_s[i])

        }

      }
      stats$Riesz <- Riesz

    }
    if (run_test$Gine_Gn) {

      stats$Gine_Gn <- cir_stat_Gine_Gn(Theta = data,
                                        Psi_in_Theta = Psi_in_Theta)

    }
    if (run_test$Gine_Fn) {

      if (run_test$Ajne && run_test$Gine_Gn) {

        stats$Gine_Fn <- 4 * stats$Ajne + stats$Gine_Gn

      } else {

        stats$Gine_Fn <- cir_stat_Gine_Fn(Theta = data,
                                          Psi_in_Theta = Psi_in_Theta)

      }

    }
    if (run_test$Hermans_Rasson) {

      stats$Hermans_Rasson <-
        cir_stat_Hermans_Rasson(Theta = data, Psi_in_Theta = Psi_in_Theta)

    }
    if (run_test$PAD) {

      stats$PAD <- cir_stat_PAD(Theta = data, Psi_in_Theta = Psi_in_Theta,
                                AD = FALSE, sorted = FALSE)

    }
    if (run_test$PCvM) {

      if (run_test$Watson) {

        stats$PCvM <- 2 * stats$Watson

      } else {

        stats$PCvM <- cir_stat_PCvM(Theta = data, Psi_in_Theta = Psi_in_Theta)

      }

    }
    if (run_test$Rothman) {

      Rothman <- matrix(NA, nrow = M, ncol = length(Rothman_t))
      for (i in seq_along(Rothman_t)) {

        Rothman[, i] <- cir_stat_Rothman(Theta = data,
                                         Psi_in_Theta = Psi_in_Theta,
                                         t = Rothman_t[i])

      }
      stats$Rothman <- Rothman

    }
    if (run_test$PRt) {

      if (run_test$Rothman) {

        stats$PRt <- stats$Rothman

      } else {

        PRt <- matrix(NA, nrow = M, ncol = length(Rothman_t))
        for (i in seq_along(Rothman_t)) {

          PRt[, i] <- cir_stat_PRt(Theta = data, Psi_in_Theta = Psi_in_Theta,
                                   t = Rothman_t[i])

        }
        stats$PRt <- PRt

      }

    }
    if (run_test$Pycke) {

      if (run_test$Riesz && any(Riesz_s == 0)) {

        stats$Pycke <- (2 * n) / (n - 1) * stats$Riesz[, which(Riesz_s == 0)]

      } else {

        stats$Pycke <- cir_stat_Pycke(Theta = data, Psi_in_Theta = Psi_in_Theta)

      }

    }
    if (run_test$Pycke_q) {

      Pycke_q_stat <- matrix(NA, nrow = M, ncol = length(Pycke_q))
      for (i in seq_along(Pycke_q)) {

        Pycke_q_stat[, i] <- cir_stat_Pycke_q(Theta = data,
                                              Psi_in_Theta = Psi_in_Theta,
                                              q = Pycke_q[i])

      }
      stats$Pycke_q <- Pycke_q_stat

    }
    if (run_test$Poisson) {

      Poisson <- matrix(NA, nrow = M, ncol = length(Poisson_rho))
      for (i in seq_along(Poisson_rho)) {

        if (Poisson_rho[i] == 0) {

          if (run_test$Rayleigh && any(Rayleigh_m == 1)) {

            Poisson[, i] <- stats$Rayleigh[, which(Rayleigh_m == 1)]

          } else {

            Poisson[, i] <- cir_stat_Rayleigh(Theta = data, m = 1)

          }

        } else {

          Poisson[, i] <- cir_stat_Poisson(Theta = data,
                                           Psi_in_Theta = Psi_in_Theta,
                                           rho = Poisson_rho[i])

        }

      }
      stats$Poisson <- Poisson

    }
    if (run_test$Softmax) {

      Softmax <- matrix(NA, nrow = M, ncol = length(Softmax_kappa))
      for (i in seq_along(Softmax_kappa)) {

        if (Softmax_kappa[i] == 0) {

          if (run_test$Rayleigh && any(Rayleigh_m == 1)) {

            Softmax[, i] <- stats$Rayleigh[, which(Rayleigh_m == 1)]

          } else {

            Softmax[, i] <- cir_stat_Rayleigh(Theta = data, m = 1)

          }

        } else {

            Softmax[, i] <- cir_stat_Softmax(Theta = data,
                                             Psi_in_Theta = Psi_in_Theta,
                                             kappa = Softmax_kappa[i])

        }

      }
      stats$Softmax <- Softmax

    }
    if (run_test$Sobolev) {

      stats$Sobolev <- cir_stat_Sobolev(Theta = data,
                                        Psi_in_Theta = Psi_in_Theta,
                                        vk2 = Sobolev_vk2)

    }

  } else { # p >= 3

    # Statistics using the shortest angles matrix Psi
    stats_using_Psi <- c("Ajne", "Bakshaev", "CJ12", "Gine_Fn", "Gine_Gn",
                         "PAD", "PCvM", "PRt", "Poisson", "Pycke", "Riesz",
                         "Sobolev", "Softmax", "Stereo")

    # Statistics with vectorized parameters
    stats_vectorized <- c("Poisson", "PRt", "Riesz", "Sobolev", "Softmax",
                          "Stereo")
    param_vectorized <- c("Poisson_rho", "Rothman_t", "Riesz_s",
                          "Sobolev_vk2", "Softmax_kappa", "Stereo_a")

    # Evaluate which statistics to apply
    run_test <- as.list(avail_sph_tests %in% stats_type)
    names(run_test) <- avail_sph_tests

    ## Other statistics

    if (run_test$Bingham) {

      stats$Bingham <- sph_stat_Bingham(X = data)

    }
    if (run_test$CCF09) {

      # Sample random directions
      if (is.null(CCF09_dirs)) {

        CCF09_dirs <- r_unif_sph(n = 50, p = p, M = 1)[, , 1]

      }
      stats$CCF09 <- sph_stat_CCF09(X = data, dirs = CCF09_dirs,
                                    K_CCF09 = K_CCF09, original = FALSE)

    }
    if (run_test$Rayleigh) {

      stats$Rayleigh <- sph_stat_Rayleigh(X = data)

    }
    if (run_test$Rayleigh_HD) {

      if (run_test$Rayleigh) {

        stats$Rayleigh_HD <- (stats$Rayleigh - p) / sqrt(2 * p)

      } else {

        stats$Rayleigh_HD <- sph_stat_Rayleigh_HD(X = data)

      }

    }

    ## Psi-based statistics

    # Is it worth to compute Psi? Only if there is more than one statistic
    # using it (after discounting equivalent tests if they are simultaneously
    # present) AND if 0.5 * n * (n - 1) * M is not too large.
    # CAUTION: replacement of data with Psi!

    # Number of statistics
    n_stats_type_Psi <- sum(stats_type %in% stats_using_Psi)

    # Add statistics with vectorized parameters
    n_stats_type_Psi <- n_stats_type_Psi + (
      run_test$Riesz * (length(Riesz_s) - 1) +
      run_test$PRt * (length(Rothman_t) - 1) +
      run_test$Poisson * (length(Poisson_rho) - 1) +
      run_test$Softmax * (length(Softmax_kappa) - 1) +
      run_test$Stereo * (length(Stereo_a) - 1) +
      run_test$Sobolev * (nrow(rbind(Sobolev_vk2)) - 1))

    # Remove equivalent tests
    n_stats_type_Psi <- n_stats_type_Psi - (
      (any(Riesz_s == 2) && run_test$Riesz) +
      (any(Riesz_s == 0) && run_test$Pycke && run_test$Riesz) +
      (any(Riesz_s == 1) && run_test$Bakshaev && run_test$Riesz) +
      (any(Poisson_rho == 0) && run_test$Poisson) +
      (any(Softmax_kappa == 0) && run_test$Softmax) +
      (p == 3 && run_test$PCvM && run_test$Bakshaev) +
      (run_test$Gine_Fn && run_test$Gine_Gn && run_test$Ajne))

    # Compute Psi
    if (n_stats_type_Psi > 1 && 0.5 * n * (n - 1) * M <= 1e8) {

      data <- Psi_mat(data = data)
      dim(data) <- c(dim(data), 1)
      Psi_in_X <- TRUE

    } else {

      Psi_in_X <- FALSE

    }

    # Statistics
    if (run_test$Ajne) {

      stats$Ajne <- sph_stat_Ajne(X = data, Psi_in_X = Psi_in_X)

    }
    if (run_test$Bakshaev) {

      stats$Bakshaev <- sph_stat_Riesz(X = data, Psi_in_X = Psi_in_X, p = p,
                                       s = 1)

    }
    if (run_test$Riesz) {

      Riesz <- matrix(NA, nrow = M, ncol = length(Riesz_s))
      for (i in seq_along(Riesz_s)) {

        if (Riesz_s[i] == 1 && run_test$Bakshaev) {

          Riesz[, i] <- stats$Bakshaev

        } else if (Riesz_s[i] == 2 && (run_test$Rayleigh || !Psi_in_X)) {

          if (run_test$Rayleigh) {

            Riesz[, i] <- (2 / p) * stats$Rayleigh

          } else {

            Riesz[, i] <- (2 / p) * sph_stat_Rayleigh(X = data)

          }

        } else {

          Riesz[, i] <- (1 - 2 * (Riesz_s[i] < 0)) *
            sph_stat_Riesz(X = data, Psi_in_X = Psi_in_X, p = p, s = Riesz_s[i])

        }

      }
      stats$Riesz <- Riesz

    }
    if (run_test$CJ12) {

      if (Psi_in_X) {

        stats$CJ12 <- sph_stat_CJ12(X = data, Psi_in_X = TRUE, p = p,
                                    regime = CJ12_reg)

      } else {

        stats$CJ12 <- sph_stat_CJ12(X = data, Psi_in_X = FALSE, p = p,
                                    regime = CJ12_reg)

      }

    }
    if (run_test$Gine_Gn) {

      stats$Gine_Gn <- sph_stat_Gine_Gn(X = data, Psi_in_X = Psi_in_X, p = p)

    }
    if (run_test$Gine_Fn) {

      if (run_test$Ajne && run_test$Gine_Gn) {

        stats$Gine_Fn <- 4 * stats$Ajne + stats$Gine_Gn

      } else {

        stats$Gine_Fn <- sph_stat_Gine_Fn(X = data, Psi_in_X = Psi_in_X, p = p)

      }

    }
    if (run_test$PAD) {

      stats$PAD <- sph_stat_PAD(X = data, Psi_in_X = Psi_in_X, p = p)

    }
    if (run_test$PCvM) {

      if (run_test$Bakshaev && p == 3) {

        stats$PCvM <- 0.125 * stats$Bakshaev

      } else {

        stats$PCvM <- sph_stat_PCvM(X = data, Psi_in_X = Psi_in_X, p = p)

      }

    }
    if (run_test$PRt) {

      PRt <- matrix(NA, nrow = M, ncol = length(Rothman_t))
      for (i in seq_along(Rothman_t)) {

        PRt[, i] <- sph_stat_PRt(X = data, Psi_in_X = Psi_in_X, p = p,
                                 t = Rothman_t[i])

      }
      stats$PRt <- PRt

    }
    if (run_test$Pycke) {

      if (run_test$Riesz && any(Riesz_s == 0)) {

        if (p == 3) {

          stats$Pycke <- n / (2 * pi * (n - 1)) *
            (stats$Riesz[, which(Riesz_s == 0)] - (log(4) - 1) / 2)

        } else {

          warning(paste("Pycke statistic is only defined for p = 2,3.",
                        "Using Riesz statistic with s = 0 instead,",
                        "which behaves consistently across dimensions."))
          stats$Pycke <- stats$Riesz

        }

      } else {

        if (Psi_in_X) {

          stats$Pycke <- sph_stat_Pycke(X = data, Psi_in_X = TRUE, p = p)

        } else {

          stats$Pycke <- sph_stat_Pycke(X = data, Psi_in_X = FALSE, p = p)

        }

      }

    }
    if (run_test$Stereo) {

      Stereo <- matrix(NA, nrow = M, ncol = length(Stereo_a))
      for (i in seq_along(Stereo_a)) {

        Stereo[, i] <- sph_stat_Stereo(X = data, Psi_in_X = Psi_in_X, p = p,
                                       a = Stereo_a[i])

      }
      stats$Stereo <- Stereo

    }
    if (run_test$Poisson) {

      Poisson <- matrix(NA, nrow = M, ncol = length(Poisson_rho))
      for (i in seq_along(Poisson_rho)) {

        if (Poisson_rho[i] == 0) {

          if (run_test$Rayleigh) {

            Poisson[, i] <- stats$Rayleigh

          } else {

            Poisson[, i] <- sph_stat_Rayleigh(X = data)

          }

        } else {

          Poisson[, i] <- sph_stat_Poisson(X = data, Psi_in_X = Psi_in_X, p = p,
                                           rho = Poisson_rho[i])

        }

      }
      stats$Poisson <- Poisson

    }
    if (run_test$Softmax) {

      Softmax <- matrix(NA, nrow = M, ncol = length(Softmax_kappa))
      for (i in seq_along(Softmax_kappa)) {

        if (Softmax_kappa[i] == 0) {

          if (run_test$Rayleigh) {

            Softmax[, i] <- stats$Rayleigh

          } else {

            Softmax[, i] <- sph_stat_Rayleigh(X = data)

          }

        } else {

          Softmax[, i] <- sph_stat_Softmax(X = data, Psi_in_X = Psi_in_X,
                                           p = p, kappa = Softmax_kappa[i])

        }

      }
      stats$Softmax <- Softmax

    }
    if (run_test$Sobolev) {

      stats$Sobolev <- sph_stat_Sobolev(X = data, Psi_in_X = Psi_in_X, p = p,
                                        vk2 = Sobolev_vk2)

    }

  }

  # Avoid returning matrices in variables if there are vectorized tests.
  # Instead, return a data frame with Sobolev.1, Sobolev.2, etc. variables
  n_param_vectorized <- sapply(param_vectorized, function(par) {

    obj <- get(x = par)
    return(ifelse(is.null(dim(obj)), length(obj), nrow(obj)))

  })
  if (any(stats_vectorized %in% type) && any(n_param_vectorized > 1)) {

    stats <- do.call(data.frame, stats)

  }

  return(stats)

}

Try the sphunif package in your browser

Any scripts or data that you put into this service are public.

sphunif documentation built on May 29, 2024, 4:19 a.m.