Nothing
#' This script performs unit tests on
#' all functions.
#'
## Set seed for reproducibility
set.seed(2021)
## Test Hamming distance
test_that("Hamming distances are computed and stored correctly" , {
X <- matrix(nrow = 150, ncol = 100, rbinom(15000, 1, 0.5))
h_dists <- getHammingDistance(X)
h_dist_vec <- c()
for (i in 1:149) {
for (j in (i+1):150) {
h_dist_vec <- c(h_dist_vec, sum((X[i,] - X[j,])^2))
}
}
testthat::expect_equal(h_dists, h_dist_vec)
})
## Test Lp distance
test_that("l_p^p distances are computed and stored correctly", {
X <- matrix(nrow = 150, ncol = 100, rnorm(15000))
lp_dists <- getLpDistance(X, p = 2) %>% as.numeric()
lp_dist_vec <- c()
for (i in 1:149) {
for (j in (i+1):150) {
lp_dist_vec <- c(lp_dist_vec, sum((X[i,] - X[j,])^2))
}
}
testthat::expect_equal(lp_dists, lp_dist_vec)
})
## Test V statistic computation for binary matrix
test_that("V statistic is computed correctly for binary matrix", {
X <- matrix(nrow = 15, ncol = 10, rbinom(150, 1, 0.5))
v_stat <- getBinVStat(X)
h_dists <- getHammingDistance(X)
calc <- var(h_dists) / 10
testthat::expect_equal(v_stat, calc)
})
## Test V statistic computation for real matrix
test_that("V statistic is computed correctly for real matrix", {
X <- matrix(nrow = 15, ncol = 10, rnorm(150))
v_stat <- getRealVStat(X, 2)
h_dists <- getLpDistance(X, 2)
calc <- var(h_dists) / 10
testthat::expect_equal(v_stat, calc)
})
## Test that weights are non-negative
# [!] This ensures our chi square convolution numerical integration
# runs properly.
test_that("Weights generated by real matrices are non-negative", {
weights_df <- data.frame(w1 = numeric(),
w2 = numeric())
for (i in 1:100) {
X <- matrix(nrow = 100, ncol = 100, rnorm(1e4))
coeffs <- getCov(X, p = 2)
weights <- getChi2Weights(coeffs[1], coeffs[2], coeffs[3], N = 100)
weights_df <- rbind(weights_df,
data.frame(w1 = weights[1],
w2 = weights[2]))
}
row.names(weights_df) <- 1:dim(weights_df)[1]
testthat::expect_true(all(weights_df >= 0))
})
## Test block covariance weights are non-negative
# [!] This ensures our chi square convolution numerical integration
# runs properly.
test_that("Weights generated by binary matrices under block dependencies are non-negative", {
weights_df <- data.frame(w1 = numeric(),
w2 = numeric())
for (i in 1:100) {
X <- matrix(nrow = 100, ncol = 100, rbinom(1e4, 1, 0.5))
coeffs <- getBlockCov(X,
block_boundaries = NULL,
block_labels = rep(c(1,2,3,4,5),20))
weights <- getChi2Weights(coeffs[1], coeffs[2], coeffs[3], N = 100)
weights_df <- rbind(weights_df,
data.frame(w1 = weights[1],
w2 = weights[2]))
}
row.names(weights_df) <- 1:dim(weights_df)[1]
testthat::expect_true(all(weights_df >= 0))
})
test_that("Weights generated by real matrices under block dependencies are non-negative", {
weights_df <- data.frame(w1 = numeric(),
w2 = numeric())
for (i in 1:100) {
X <- matrix(nrow = 100, ncol = 100, rnorm(1e4))
coeffs <- getBlockCov(X,
block_boundaries = NULL,
block_labels = rep(c(1,2,3,4,5),20), p = 2)
weights <- getChi2Weights(coeffs[1], coeffs[2], coeffs[3], N = 100)
weights_df <- rbind(weights_df,
data.frame(w1 = weights[1],
w2 = weights[2]))
}
row.names(weights_df) <- 1:dim(weights_df)[1]
testthat::expect_true(all(weights_df >= 0))
})
## Test chi square convolution
test_that("Convolution of chi square returns correct tail probabilities", {
for (i in 1:10) {
w1 <- abs(rnorm(1))
w2 <- abs(rnorm(1))
d1 <- sample(size = 1, x = 1:1000)
d2 <- sample(size = 1, x = 1:1000)
val <- w1 * rchisq(n = 1, df = d1) + w2 * rchisq(n = 1, df = d2)
sims <- w1 * rchisq(n = 1e4, df = d1) + w2 * rchisq(n = 1e4, df = d2)
p_val <- mean(sims > val)
error <- 3 * sqrt(p_val * (1 - p_val) / 1e4)
error <- max(error, 3/1e4)
theory <- weightedChi2P(val, w1, w2, d1, d2)
testthat::expect_true(abs(theory - p_val) < error)
}
})
## Test large P asymptotics
test_that("Large P asymptotics with independent features approximates permutation test well for binary matrices", {
for (i in 1:10) {
X <- matrix(nrow = 50, ncol = 1000, rbinom(5e4, 1, 0.5))
exact_p_val <- blockPermute(X, block_labels = 1:1000, nruns = 5000, type = 'unbiased')
asymp_p_val <- indLargeP(X)
error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 1000)
error <- max(error, 3/1000)
testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
}
})
test_that("Large P asymptotics with dependent features approximates permutation test well for binary matrices", {
for (i in 1:10) {
X <- matrix(nrow = 20, ncol = 1000, rbinom(2e4, 1, 0.5))
exact_p_val <- blockPermute(X,
block_labels = rep(1:100, 10),
nruns = 5000,
type = 'unbiased')
asymp_p_val <- blockLargeP(X,
block_boundaries = NULL,
block_labels = rep(1:100, 10))
error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
error <- max(error, 3/100)
testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
}
})
test_that("Large P asymptotics with independent features approximates permutation test well for real matrices", {
for (i in 1:10) {
X <- matrix(nrow = 20, ncol = 100, rnorm(2e3))
exact_p_val <- blockPermute(X, block_labels = 1:100, nruns = 5000, p = 2, type = 'unbiased')
asymp_p_val <- indLargeP(X, p = 2)
error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
error <- max(error, 3/100)
testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
}
})
test_that("Large P asymptotics with dependent features approximates permutation test well for real matrices", {
for (i in 1:10) {
X <- matrix(nrow = 20, ncol = 1000, rnorm(2e4))
exact_p_val <- blockPermute(X, block_labels = rep(1:100, 10), nruns = 5000, p = 2, type = 'unbiased')
asymp_p_val <- blockLargeP(X,
block_boundaries = NULL,
block_labels = rep(1:100, 10),
p = 2)
error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
error <- max(error, 3/100)
testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
}
})
## Test large P and N asymptotics
test_that("Large P and N asymptotics with independent features approximates permutation test well for binary matrices", {
for (i in 1:10) {
X <- matrix(nrow = 100, ncol = 100, rbinom(1e4, 1, 0.5))
exact_p_val <- blockPermute(X, block_labels = 1:100, nruns = 5000, type = 'unbiased')
asymp_p_val <- indGaussian(X)
error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
error <- max(error, 3/100)
testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
}
})
test_that("Large P and N asymptotics with dependent features approximates permutation test well for binary matrices", {
for (i in 1:10) {
X <- matrix(nrow = 100, ncol = 1000, rbinom(1e5, 1, 0.5))
exact_p_val <- blockPermute(X,
block_labels = rep(1:100, 10),
nruns = 5000,
type = 'unbiased')
asymp_p_val <- blockGaussian(X,
block_boundaries = NULL,
block_labels = rep(1:100, 10))
error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
error <- max(error, 3/100)
testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
}
})
test_that("Large P and N asymptotics with independent features approximates permutation test well for real matrices", {
for (i in 1:10) {
X <- matrix(nrow = 100, ncol = 100, rnorm(1e4))
exact_p_val <- blockPermute(X, block_labels = 1:100, nruns = 5000, p = 2, type = 'unbiased')
asymp_p_val <- indGaussian(X, p = 2)
error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
error <- max(error, 3/100)
testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
}
})
test_that("Large P asymptotics with dependent features approximates permutation test well for real matrices", {
for (i in 1:10) {
X <- matrix(nrow = 100, ncol = 1000, rnorm(1e5))
exact_p_val <- blockPermute(X, block_labels = rep(1:100, 10), nruns = 5000, p = 2, type = 'unbiased')
asymp_p_val <- blockGaussian(X,
block_boundaries = NULL,
block_labels = rep(1:100, 10),
p = 2)
error <- 3 * sqrt(exact_p_val * (1 - exact_p_val) / 100)
error <- max(error, 3/100)
testthat::expect_true(abs(exact_p_val - asymp_p_val) < error)
}
})
## Test getPValue
test_that("Large P approximate p-values generated by getPValue for independent binary features are uniformly distributed", {
p_val_vec <- c()
for (i in 1:3000) {
X <- matrix(nrow = 20, ncol = 100, rbinom(2e3, 1, 0.5))
p_val_vec <- c(p_val_vec, suppressWarnings(getPValue(X, largeP = TRUE))) # ignore zeros or ones columns
}
testthat::expect_true(abs(mean(p_val_vec)-0.5) < 0.1 & abs(var(p_val_vec) - 1/12) < 0.1)
})
test_that("Large P approximate p-values generated by getPValue for independent real features are uniformly distributed", {
p_val_vec <- c()
for (i in 1:3000) {
X <- matrix(nrow = 20, ncol = 100, rnorm(2e3, 1, 0.5))
p_val_vec <- c(p_val_vec, getPValue(X, p = 2, largeP = TRUE))
}
testthat::expect_true(abs(mean(p_val_vec)-0.5) < 0.1 & abs(var(p_val_vec) - 1/12) < 0.1)
})
test_that("getPValue returns error when input dataset is not a matrix", {
X <- rbinom(10, 1, 0.5)
testthat::expect_error(getPValue(X))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.