tests/mean2.R

library("matrixStats")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Consistency checks
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
set.seed(1)

mean2_R <- function(x, na.rm = FALSE, idxs = NULL) {
  if (is.null(idxs)) {
    mean(x, na.rm = na.rm)
  } else {
    mean(x[idxs], na.rm = na.rm)
  }
} # mean2_R()


cat("Consistency checks:\n")
for (kk in 1:20) {
  cat("Random test #", kk, "\n", sep = "")

  # Simulate data in a matrix of any shape
  n <- sample(100L, size = 1L)
  x <- rnorm(n, sd = 100)

  # Add NAs?
  if ((kk %% 4) %in% c(3, 0)) {
    cat("Adding NAs\n")
    nna <- sample(n, size = 1L)
    na_values <- c(NA_real_, NaN)
    t <- sample(na_values, size = nna, replace = TRUE)
    x[sample(length(x), size = nna)] <- t
  }

  # Integer or double?
  if ((kk %% 4) %in% c(2, 0)) {
    cat("Coercing to integers\n")
    storage.mode(x) <- "integer"
  }

  na.rm <- sample(c(TRUE, FALSE), size = 1L)

  # Sum over all
  y0 <- mean2_R(x, na.rm = na.rm)
  y1 <- mean2(x, na.rm = na.rm)
  stopifnot(all.equal(y1, y0))

  # Sum over subset
  nidxs <- sample(n, size = 1L)
  idxs <- sample(n, size = nidxs)
  y0 <- mean2_R(x, na.rm = na.rm, idxs = idxs)
  y1 <- mean2(x, na.rm = na.rm, idxs = idxs)
  stopifnot(all.equal(y1, y0))

  if (storage.mode(x) == "integer") {
    storage.mode(x) <- "logical"
    
    y0 <- mean2_R(x, na.rm = na.rm)
    y1 <- mean2(x, na.rm = na.rm)
    stopifnot(all.equal(y1, y0))
    
    y0 <- mean2_R(x, na.rm = na.rm, idxs = idxs)
    y1 <- mean2(x, na.rm = na.rm, idxs = idxs)
    stopifnot(all.equal(y1, y0))
  }
} # for (kk ...)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special cases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (na.rm in c(FALSE, TRUE)) {
  # Averaging over zero elements (integers)
  x <- integer(0)
  s1 <- mean(x, na.rm = na.rm)
  s2 <- mean2(x, na.rm = na.rm)
  stopifnot(identical(s1, s2))

  x <- 1:5
  idxs <- integer(0)
  s1 <- mean(x[idxs], na.rm = na.rm)
  s2 <- mean2(x, idxs = idxs, na.rm = na.rm)
  stopifnot(identical(s1, s2))

  # Averaging over NA_integer_:s
  x <- rep(NA_integer_, times = 5L)
  s1 <- mean(x, na.rm = na.rm)
  s2 <- mean2(x, na.rm = na.rm)
  stopifnot(identical(s1, s2))

  x <- rep(NA_integer_, times = 5L)
  idxs <- 1:3
  s1 <- mean(x[idxs], na.rm = na.rm)
  s2 <- mean2(x, idxs = idxs, na.rm = na.rm)
  stopifnot(identical(s1, s2))


  # Averaging over zero elements (doubles)
  x <- double(0)
  s1 <- mean(x)
  s2 <- mean2(x)
  stopifnot(identical(s1, s2))

  x <- as.double(1:10)
  idxs <- integer(0)
  s1 <- mean(x[idxs])
  s2 <- mean2(x, idxs = idxs)
  stopifnot(identical(s1, s2))

  # Averaging over NA_real_:s
  x <- rep(NA_real_, times = 5L)
  s1 <- mean(x, na.rm = na.rm)
  s2 <- mean2(x, na.rm = na.rm)
  stopifnot(identical(s1, s2))

  x <- rep(NA_real_, times = 5L)
  idxs <- 1:3
  s1 <- mean(x[idxs], na.rm = na.rm)
  s2 <- mean2(x, idxs = idxs, na.rm = na.rm)
  stopifnot(identical(s1, s2))

  # Averaging over -Inf:s
  x <- rep(-Inf, times = 3L)
  s1 <- mean(x, na.rm = na.rm)
  s2 <- mean2(x, na.rm = na.rm)
  stopifnot(identical(s1, s2))

  # Averaging over +Inf:s
  x <- rep(+Inf, times = 3L)
  s1 <- mean(x, na.rm = na.rm)
  s2 <- mean2(x, na.rm = na.rm)
  stopifnot(identical(s1, s2))

  # Averaging over mix of -Inf:s and +Inf:s
  x <- rep(c(-Inf, +Inf), times = 3L)
  s1 <- mean(x, na.rm = na.rm)
  s2 <- mean2(x, na.rm = na.rm)
  stopifnot(identical(s1, s2))

  # Averaging over mix of -Inf:s and +Inf:s and numerics
  x <- rep(c(-Inf, +Inf, 3.14), times = 2L)
  s1 <- mean(x, na.rm = na.rm)
  s2 <- mean2(x, na.rm = na.rm)
  stopifnot(identical(s1, s2))

  # Averaging over mix of NaN, NA, +Inf, and numerics
  x <- c(NaN, NA, +Inf, 3.14)
  s1 <- mean(x, na.rm = na.rm)
  s2 <- mean2(x, na.rm = na.rm)
  if (na.rm) {
    stopifnot(identical(s2, s1))
  } else {
    stopifnot(is.na(s1), is.na(s2))
    ## NOTE, due to compiler optimization, it is not guaranteed that NA is
    ## returned here (as one would expect). NaN might very well be returned,
    ## when both NA and NaN are involved.  This is an accepted feature in R,
    ## which is documented in help("is.nan").  See also
    ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html.
    ## Thus, we cannot guarantee that s1 is identical to s0.
  }

  # Averaging over mix of NaN, NA_real_, +Inf, and numerics
  x <- c(NA_real_, NaN, +Inf, 3.14)
  s1 <- mean(x, na.rm = na.rm)
  s2 <- mean2(x, na.rm = na.rm)
  if (na.rm) {
    stopifnot(identical(s2, s1))
  } else {
    stopifnot(is.na(s1), is.na(s2))
    ## NOTE, due to compiler optimization, it is not guaranteed that NA is
    ## returned here (as one would expect). NaN might very well be returned,
    ## when both NA and NaN are involved.  This is an accepted feature in R,
    ## which is documented in help("is.nan").  See also
    ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html.
    ## Thus, we cannot guarantee that s1 is identical to s0.
  }
}


# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'idxs'
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- 1:5
idxs_list <- list(
  integer = 1:3,
  double  = as.double(1:3),
  logical = (x <= 3)
)

for (idxs in idxs_list) {
  cat("idxs:\n")
  str(idxs)
  s1 <- mean(x[idxs], na.rm = TRUE)
  s2 <- mean2(x, idxs = idxs, na.rm = TRUE)
  stopifnot(identical(s1, s2))
}
HenrikBengtsson/matrixStats documentation built on Nov. 15, 2024, 3:20 p.m.