tests/rowRanks.R

library("matrixStats")

dense_rank <- function(x) match(x, table = sort(unique(x)))

rowRanks_R <- function(x, ties.method, ..., useNames = NA) {
  if (ties.method == "dense") {
    res <- t(apply(x, MARGIN = 1L, FUN = dense_rank))
  } else {
    res <- t(apply(x, MARGIN = 1L, FUN = rank, na.last = "keep", ties.method = ties.method))
  }
  
  # Preserve dimnames attribute?
  dim(res) <- dim(x)
  dimnames(res) <- if (isTRUE(useNames)) dimnames(x) else NULL
  
  res
}

colRanks_R <- function(x, ties.method, preserveShape = FALSE, ..., useNames = NA) {
  if (ties.method == "dense") {
    res <- t(apply(x, MARGIN = 2L, FUN = dense_rank))
  } else {
    res <- t(apply(x, MARGIN = 2L, FUN = rank, na.last = "keep", ties.method = ties.method))
  }
  
  # Preserve dimnames attribute?
  tx <- t(x)
  dim(res) <- dim(tx)
  dimnames(res) <- if (isTRUE(useNames)) dimnames(tx) else NULL
  
  if (preserveShape) res <- t(res)
  res
}

set.seed(1)

cat("Consistency checks:\n")
xs <- vector("list", length = 4L)
for (kk in 1:4) {
  
  # Simulate data in a matrix of any shape
  dim <- sample(40:80, size = 2L)
  n <- prod(dim)
  x <- rnorm(n, sd = 10)
  dim(x) <- dim
  
  # Add NAs?
  if ((kk %% 4) %in% c(3, 0)) {
    cat("Adding NAs\n")
    nna <- sample(n, size = 1L)
    x[sample(length(x), size = nna)] <- NA_real_
  }
  
  # Integer or double?
  if ((kk %% 4) %in% c(2, 0)) {
    cat("Coercing to integers\n")
    storage.mode(x) <- "integer"
  }

  xs[[kk]] <- x
} # for (kk ...)
str(xs)

for (kk in 1:4) {
  cat("Random test #", kk, "\n", sep = "")
  x <- xs[[kk]]
  tx <- t(x)
  
  for (ties in c("max", "min", "average", "first", "last", "dense")) {
    cat(sprintf("ties.method = %s\n", ties))
    # rowRanks():
    y1 <- matrixStats::rowRanks(x, ties.method = ties)
    if (ties != "last" || getRversion() >= "3.3.0") {
      y2 <- rowRanks_R(x, ties.method = ties)
      stopifnot(identical(y1, y2))
    }
    
    y3 <- matrixStats::colRanks(tx, ties.method = ties)
    stopifnot(identical(y1, y3))
    
    # colRanks():
    y1 <- matrixStats::colRanks(x, ties.method = ties)
    if (ties != "last" || getRversion() >= "3.3.0") {
      y2 <- colRanks_R(x, ties.method = ties)
      stopifnot(identical(y1, y2))
    }
    
    y3 <- matrixStats::rowRanks(tx, ties.method = ties)
    stopifnot(identical(y1, y3))
  }
} # for (kk ...)

# Note, below we know ties.method %in% c("min", "max", "average") is correct

cat("Consistency checks for random:\n")
tolerance <- 0.1
nsamples <- 10000
for (kk in 1:4) {
  cat("Random test #", kk, "\n", sep = "")
  x <- xs[[kk]]
  tx <- t(x)

  for (ties in c("random")) {
    cat(sprintf("ties.method = %s\n", ties))
    
    ## rowRanks():
    y0 <- rowRanks_R(x, ties.method = ties)
    y1 <- matrixStats::rowRanks(x, ties.method = ties)
    
    ## Assert symmetric rank differences
    d <- y1 - y0
    stopifnot(all(rowSums(d) == 0, na.rm = TRUE))
    
    ## Assert within [min, max]
    y2min <- matrixStats::rowRanks(x, ties.method = "min")
    y2max <- matrixStats::rowRanks(x, ties.method = "max")
    stopifnot(all(y1 >= y2min, na.rm = TRUE) && all(y1 <= y2max, na.rm = TRUE))
    ## Assert near average
    y1list <- replicate(nsamples, matrixStats::rowRanks(x, ties.method = ties), simplify = FALSE)
    y1mean <- Reduce(`+`, y1list) / nsamples
    y2avg <- matrixStats::rowRanks(x, ties.method = "average")
    stopifnot(all(abs(y1mean - y2avg) < tolerance, na.rm = TRUE))

    ## colRanks():
    y0 <- colRanks_R(x, ties.method = ties)
    y1 <- matrixStats::colRanks(x, ties.method = ties)

    ## Assert symmetric rank differences
    d <- y1 - y0
    stopifnot(all(rowSums(d) == 0, na.rm = TRUE))

    ## Assert within [min, max]
    y2min <- matrixStats::colRanks(x, ties.method = "min")
    y2max <- matrixStats::colRanks(x, ties.method = "max")
    stopifnot(all(y1 >= y2min, na.rm = TRUE) && all(y1 <= y2max, na.rm = TRUE))
    y1list <- replicate(nsamples, matrixStats::colRanks(x, ties.method = ties), simplify = FALSE)
    y1mean <- Reduce(`+`, y1list) / nsamples
    ## Assert near average
    y2avg <- matrixStats::colRanks(x, ties.method = "average")
    stopifnot(all(abs(y1mean - y2avg) < tolerance, na.rm = TRUE))
  }
} # for (kk ...)


## Exception handling
x <- matrix(1:12, nrow = 3L, ncol = 4L)
y <- try(rowRanks(x, ties.method = "unknown"), silent = TRUE)
stopifnot(inherits(y, "try-error"))

y <- try(colRanks(x, ties.method = "unknown"), silent = TRUE)
stopifnot(inherits(y, "try-error"))

dimnames <- list(letters[1:3], LETTERS[1:4])
for (mode in c("integer", "double")){
  storage.mode(x) <- mode
  # Test with and without dimnames on x
  for (setDimnames in c(TRUE, FALSE)) {
    if (setDimnames) dimnames(x) <- dimnames
    else dimnames(x) <- NULL
    # Check names attribute
    for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) {
      for (ties in c("max", "min", "average", "first", "last", "dense", "random")) {
        cat(sprintf("ties.method = %s\n", ties))
        # rowRanks():
        y1 <- matrixStats::rowRanks(x, ties.method = ties, useNames = useNames)
        if (ties != "last" || getRversion() >= "3.3.0") {
          y2 <- rowRanks_R(x, ties.method = ties, useNames = useNames)
          stopifnot(identical(y1, y2))
        }
        
        y3 <- matrixStats::colRanks(t(x), ties.method = ties, useNames = useNames)
        stopifnot(identical(y1, y3))
        
        # colRanks():
        y1 <- matrixStats::colRanks(x, ties.method = ties, useNames = useNames)
        if (ties != "last" || getRversion() >= "3.3.0") {
          y2 <- colRanks_R(x, ties.method = ties, useNames = useNames)
          stopifnot(identical(y1, y2))
        }
        
        y3 <- matrixStats::rowRanks(t(x), ties.method = ties, useNames = useNames)
        stopifnot(identical(y1, y3))
        
        # Check preserveShape
        y1 <- matrixStats::colRanks(x, ties.method = ties, preserveShape = TRUE, useNames = useNames)
        if (ties != "last" || getRversion() >= "3.3.0") {
          y2 <- colRanks_R(x, ties.method = ties, preserveShape = TRUE, useNames = useNames)
          stopifnot(identical(y1, y2))
        }
      }
    }
  }
}
HenrikBengtsson/matrixStats documentation built on April 12, 2024, 5:32 a.m.