tests/testthat/test_flintyR.R

#' This script performs unit tests on
#' all functions.
#'

## Set seed for reproducibility
set.seed(2021)

## Test Hamming distance
test_that("Hamming distances are computed and stored correctly" , {

  X <- matrix(nrow = 150, ncol = 100, rbinom(15000, 1, 0.5))
  h_dists <- getHammingDistance(X)
  h_dist_vec <- c()
  for (i in 1:149) {
    for (j in (i+1):150) {
      h_dist_vec <- c(h_dist_vec, sum((X[i,] - X[j,])^2))
    }
  }
  testthat::expect_equal(h_dists, h_dist_vec)
})

## Test Lp distance
test_that("l_p^p distances are computed and stored correctly", {

  X <- matrix(nrow = 150, ncol = 100, rnorm(15000))
  lp_dists <- getLpDistance(X, p = 2) %>% as.numeric()
  lp_dist_vec <- c()
  for (i in 1:149) {
    for (j in (i+1):150) {
      lp_dist_vec <- c(lp_dist_vec, sum((X[i,] - X[j,])^2))
    }
  }
  testthat::expect_equal(lp_dists, lp_dist_vec)
})

## Test V statistic computation for binary matrix
test_that("V statistic is computed correctly for binary matrix", {

  X <- matrix(nrow = 15, ncol = 10, rbinom(150, 1, 0.5))
  v_stat <- getBinVStat(X)
  h_dists <- getHammingDistance(X)
  calc <- var(h_dists) / 10
  testthat::expect_equal(v_stat, calc)
})

## Test V statistic computation for real matrix
test_that("V statistic is computed correctly for real matrix", {

  X <- matrix(nrow = 15, ncol = 10, rnorm(150))
  v_stat <- getRealVStat(X, 2)
  h_dists <- getLpDistance(X, 2)
  calc <- var(h_dists) / 10
  testthat::expect_equal(v_stat, calc)
})

## Test that weights are non-negative
# [!] This ensures our chi square convolution numerical integration
#     runs properly.
test_that("Weights generated by real matrices are non-negative", {
    weights_df <- data.frame(w1 = numeric(),
                             w2 = numeric())
    for (i in 1:100) {
      X <- matrix(nrow = 100, ncol = 100, rnorm(1e4))
      coeffs <- getCov(X, p = 2)
      weights <- getChi2Weights(coeffs[1], coeffs[2], coeffs[3], N = 100)
      weights_df <- rbind(weights_df,
                          data.frame(w1 = weights[1],
                                     w2 = weights[2]))
    }
    row.names(weights_df) <- 1:dim(weights_df)[1]
    testthat::expect_true(all(weights_df >= 0))
})

## Test block covariance weights are non-negative
# [!] This ensures our chi square convolution numerical integration
#     runs properly.
test_that("Weights generated by binary matrices under block dependencies are non-negative", {
  weights_df <- data.frame(w1 = numeric(),
                           w2 = numeric())
  for (i in 1:100) {
    X <- matrix(nrow = 100, ncol = 100, rbinom(1e4, 1, 0.5))
    coeffs <- getBlockCov(X,
                          block_boundaries = NULL,
                          block_labels = rep(c(1,2,3,4,5),20))
    weights <- getChi2Weights(coeffs[1], coeffs[2], coeffs[3], N = 100)
    weights_df <- rbind(weights_df,
                        data.frame(w1 = weights[1],
                                   w2 = weights[2]))
  }
  row.names(weights_df) <- 1:dim(weights_df)[1]
  testthat::expect_true(all(weights_df >= 0))
})

test_that("Weights generated by real matrices under block dependencies are non-negative", {
  weights_df <- data.frame(w1 = numeric(),
                           w2 = numeric())
  for (i in 1:100) {
    X <- matrix(nrow = 100, ncol = 100, rnorm(1e4))
    coeffs <- getBlockCov(X,
                          block_boundaries = NULL,
                          block_labels = rep(c(1,2,3,4,5),20), p = 2)
    weights <- getChi2Weights(coeffs[1], coeffs[2], coeffs[3], N = 100)
    weights_df <- rbind(weights_df,
                        data.frame(w1 = weights[1],
                                   w2 = weights[2]))
  }
  row.names(weights_df) <- 1:dim(weights_df)[1]
  testthat::expect_true(all(weights_df >= 0))
})

## Test chi square convolution
test_that("Convolution of chi square returns correct tail probabilities", {
  for (i in 1:10) {
    w1 <- abs(rnorm(1))
    w2 <- abs(rnorm(1))
    d1 <- sample(size = 1, x = 1:1000)
    d2 <- sample(size = 1, x = 1:1000)
    val <- w1 * rchisq(n = 1, df = d1) + w2 * rchisq(n = 1, df = d2)
    sims <- w1 * rchisq(n = 1e4, df = d1) + w2 * rchisq(n = 1e4, df = d2)
    p_val <- mean(sims > val)
    error <- 3 * sqrt(p_val * (1 - p_val) / 1e4)
    error <- max(error, 3/1e4)
    theory <- weightedChi2P(val, w1, w2, d1, d2)
    testthat::expect_true(abs(theory - p_val) < error)
  }
})

## Test large P asymptotics
test_that("Large P asymptotics with independent features approximates permutation test well for binary matrices", {
  for (i in 1:10) {
    X <- matrix(nrow = 50, ncol = 1000, rbinom(5e4, 1, 0.5))
    exact_p_val <- blockPermute(X, block_labels = 1:1000, nruns = 5000, type = 'unbiased')
    asymp_p_val <- indLargeP(X)
    error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 1000)
    error <- max(error, 3/1000)
    testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
  }
})

test_that("Large P asymptotics with dependent features approximates permutation test well for binary matrices", {
  for (i in 1:10) {
    X <- matrix(nrow = 20, ncol = 1000, rbinom(2e4, 1, 0.5))
    exact_p_val <- blockPermute(X,
                                block_labels = rep(1:100, 10),
                                nruns = 5000,
                                type = 'unbiased')
    asymp_p_val <- blockLargeP(X,
                               block_boundaries = NULL,
                               block_labels = rep(1:100, 10))
    error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
    error <- max(error, 3/100)
    testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
  }
})

test_that("Large P asymptotics with independent features approximates permutation test well for real matrices", {
  for (i in 1:10) {
    X <- matrix(nrow = 20, ncol = 100, rnorm(2e3))
    exact_p_val <- blockPermute(X, block_labels = 1:100, nruns = 5000, p = 2, type = 'unbiased')
    asymp_p_val <- indLargeP(X, p = 2)
    error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
    error <- max(error, 3/100)
    testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
  }
})

test_that("Large P asymptotics with dependent features approximates permutation test well for real matrices", {
  for (i in 1:10) {
    X <- matrix(nrow = 20, ncol = 1000, rnorm(2e4))
    exact_p_val <- blockPermute(X, block_labels = rep(1:100, 10), nruns = 5000, p = 2, type = 'unbiased')
    asymp_p_val <- blockLargeP(X,
                               block_boundaries = NULL,
                               block_labels = rep(1:100, 10),
                               p = 2)
    error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
    error <- max(error, 3/100)
    testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
  }
})

## Test large P and N asymptotics
test_that("Large P and N asymptotics with independent features approximates permutation test well for binary matrices", {
  for (i in 1:10) {
    X <- matrix(nrow = 100, ncol = 100, rbinom(1e4, 1, 0.5))
    exact_p_val <- blockPermute(X, block_labels = 1:100, nruns = 5000, type = 'unbiased')
    asymp_p_val <- indGaussian(X)
    error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
    error <- max(error, 3/100)
    testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
  }
})

test_that("Large P and N asymptotics with dependent features approximates permutation test well for binary matrices", {
  for (i in 1:10) {
    X <- matrix(nrow = 100, ncol = 1000, rbinom(1e5, 1, 0.5))
    exact_p_val <- blockPermute(X,
                                block_labels = rep(1:100, 10),
                                nruns = 5000, 
                                type = 'unbiased')
    asymp_p_val <- blockGaussian(X,
                               block_boundaries = NULL,
                               block_labels = rep(1:100, 10))
    error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
    error <- max(error, 3/100)
    testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
  }
})

test_that("Large P and N asymptotics with independent features approximates permutation test well for real matrices", {
  for (i in 1:10) {
    X <- matrix(nrow = 100, ncol = 100, rnorm(1e4))
    exact_p_val <- blockPermute(X, block_labels = 1:100, nruns = 5000, p = 2, type = 'unbiased')
    asymp_p_val <- indGaussian(X, p = 2)
    error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
    error <- max(error, 3/100)
    testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
  }
})

test_that("Large P asymptotics with dependent features approximates permutation test well for real matrices", {
  for (i in 1:10) {
    X <- matrix(nrow = 100, ncol = 1000, rnorm(1e5))
    exact_p_val <- blockPermute(X, block_labels = rep(1:100, 10), nruns = 5000, p = 2, type = 'unbiased')
    asymp_p_val <- blockGaussian(X,
                               block_boundaries = NULL,
                               block_labels = rep(1:100, 10),
                               p = 2)
    error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
    error <- max(error, 3/100)
    testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
  }
})

## Test getPValue
test_that("Large P approximate p-values generated by getPValue for independent binary features are uniformly distributed", {
  p_val_vec <- c()
  for (i in 1:3000) {
    X <- matrix(nrow = 20, ncol = 100, rbinom(2e3, 1, 0.5))
    p_val_vec <- c(p_val_vec, suppressWarnings(getPValue(X, largeP = TRUE))) # ignore zeros or ones columns
  }
  testthat::expect_true(abs(mean(p_val_vec)-0.5) < 0.1 & abs(var(p_val_vec) - 1/12) < 0.1)
})

test_that("Large P approximate p-values generated by getPValue for independent real features are uniformly distributed", {
  p_val_vec <- c()
  for (i in 1:3000) {
    X <- matrix(nrow = 20, ncol = 100, rnorm(2e3, 1, 0.5))
    p_val_vec <- c(p_val_vec, getPValue(X, p = 2, largeP = TRUE))
  }
  testthat::expect_true(abs(mean(p_val_vec)-0.5) < 0.1 & abs(var(p_val_vec) - 1/12) < 0.1)
})

test_that("getPValue returns error when input dataset is not a matrix", {
  X <- rbinom(10, 1, 0.5)
  testthat::expect_error(getPValue(X))
})

Try the flintyR package in your browser

Any scripts or data that you put into this service are public.

flintyR documentation built on March 31, 2023, 8:19 p.m.