################################################################################
context("SPLIT_LD")
################################################################################
test_that("get_L() and get_C() work", {
corr <- as(outer(1:4 / 10, 1:4 / 10, '+'), "dgCMatrix")
diag(corr) <- 1
L <- matrix(0, 4, 4)
L[1, 2] <- sum(corr[1, 2:4]^2)
L[1, 3] <- sum(corr[1, 3:4]^2)
L[1, 4] <- sum(corr[1, 4:4]^2)
L[2, 3] <- sum(corr[2, 3:4]^2)
L[2, 4] <- sum(corr[2, 4:4]^2)
L[3, 4] <- sum(corr[3, 4:4]^2)
# Rcpp::sourceCpp('src/split-LD.cpp')
library(magrittr)
L2 <- corr %>%
Matrix::tril() %>%
{ bigsnpr:::get_L(.@p, .@i, .@x, thr_r2 = 0, max_r2 = 1) } %>%
# L now has an extra column with all 0s for convenience
{ Matrix::sparseMatrix(i = .[[1]], j = .[[2]], x = .[[3]], dims = c(4, 5),
triangular = FALSE, index1 = FALSE) }
expect_equal(L2[, 1:4], as(L, "dgCMatrix"))
# E <- matrix(0, 4, 4)
# E[1, 1] <- sum(corr[ 1, 2:4]^2)
# E[1, 2] <- sum(corr[1:2, 3:4]^2)
# E[1, 3] <- sum(corr[1:3, 4]^2)
# E[2, 2] <- sum(corr[ 2, 3:4]^2)
# E[2, 3] <- sum(corr[2:3, 4]^2)
# E[3, 3] <- sum(corr[ 3, 4]^2)
path_cost1 <- bigsnpr:::get_C(L2, min_size = 1, max_size = 4,
max_K = 5, max_cost = Inf,
pos_scaled = rep(0, ncol(corr)))
# one block remaining
expect_identical(path_cost1$best_ind[, 1], rep(4L, 4))
expect_identical(path_cost1$C[, 1], rep(0, 4))
# two blocks remaining -> last is impossible
expect_identical(path_cost1$best_ind[, 2], c(1L, 2L, 3L, NA))
expect_equal(path_cost1$C[, 2], c(0.5, 0.61, 0.49, Inf))
# three blocks remaining
expect_identical(path_cost1$best_ind[, 3], c(1L, 2L, NA, NA))
expect_equal(path_cost1$C[, 3], c(1.11, 1.1, Inf, Inf), tolerance = 1e-6)
# four blocks remaining
expect_identical(path_cost1$best_ind[, 4], c(1L, NA, NA, NA))
expect_equal(path_cost1$C[, 4], c(1.6, Inf, Inf, Inf))
# five blocks remaining -> all impossible
expect_identical(path_cost1$best_ind[, 5], rep(NA_integer_, 4))
expect_identical(path_cost1$C[, 5], rep(Inf, 4))
# must use two blocks of two
path_cost2 <- bigsnpr:::get_C(L2, min_size = 2, max_size = 2,
max_K = 3, max_cost = Inf,
pos_scaled = rep(1, ncol(corr)))
# one block remaining
expect_identical(path_cost2$best_ind[, 1], c(NA, NA, 4L, NA))
expect_identical(path_cost2$C[, 1], c(Inf, Inf, 0, Inf))
# two blocks remaining
expect_identical(path_cost2$best_ind[, 2], c(2L, NA, NA, NA))
expect_equal(path_cost2$C[, 2], c(1.02, Inf, Inf, Inf), tolerance = 1e-6)
# three blocks remaining -> always impossible
expect_identical(path_cost2$best_ind[, 3], rep(NA_integer_, 4))
expect_identical(path_cost2$C[, 3], rep(Inf, 4))
path_cost3 <- bigsnpr:::get_C(L2, min_size = 1, max_size = 3,
max_K = 3, max_cost = Inf,
pos_scaled = seq(0, 1, length.out = ncol(corr)))
# one block remaining
expect_identical(path_cost3$best_ind[, 1], c(NA, 4L, 4L, 4L))
expect_identical(path_cost3$C[, 1], c(Inf, 0, 0, 0))
# two blocks remaining
expect_identical(path_cost3$best_ind[, 2], c(1L, 2L, 3L, NA))
expect_equal(path_cost3$C[, 2], c(0.5, 0.61, 0.49, Inf))
# three blocks remaining
expect_identical(path_cost3$best_ind[, 3], c(1L, 2L, NA, NA))
expect_equal(path_cost3$C[, 3], c(1.11, 1.10, Inf, Inf), tolerance = 1e-6)
# pos_scaled works
expect_null(snp_ldsplit(corr, thr_r2 = 0, max_r2 = 1, min_size = 1,
max_size = 3, max_K = 3, max_cost = Inf,
pos_scaled = cols_along(corr) * 2))
test <- snp_ldsplit(corr, thr_r2 = 0, max_r2 = 1, min_size = 1,
max_size = 3, max_K = 4, max_cost = Inf,
pos_scaled = cols_along(corr) * 2)
expect_equal(nrow(test), 1)
path_cost4 <- bigsnpr:::get_C(L2, min_size = 1, max_size = 3,
max_K = 4, max_cost = Inf,
pos_scaled = cols_along(corr) * 2)
# one block remaining
expect_identical(path_cost4$best_ind[, 1], c(NA, NA, NA, 4L))
expect_identical(path_cost4$C[, 1], c(Inf, Inf, Inf, 0))
# two blocks remaining
err <- corr[3, 4]^2
expect_identical(path_cost4$best_ind[, 2], c(NA, NA, 3L, NA))
expect_equal(path_cost4$C[, 2], c(Inf, Inf, err, Inf))
# three blocks remaining
err <- err + sum(corr[2, 3:4]^2)
expect_identical(path_cost4$best_ind[, 3], c(NA, 2L, NA, NA))
expect_equal(path_cost4$C[, 3], c(Inf, err, Inf, Inf), tolerance = 1e-6)
# four blocks remaining
err <- err + sum(corr[1, 2:4]^2)
expect_identical(path_cost4$best_ind[, 4], c(1L, NA, NA, NA))
expect_equal(path_cost4$C[, 4], c(err, Inf, Inf, Inf), tolerance = 1e-6)
})
################################################################################
test_that("$perc_kept in snp_ldsplit() is exact", {
corr <- as(outer(1:4 / 10, 1:4 / 10, '+'), "dgCMatrix")
diag(corr) <- 1
test <- snp_ldsplit(corr, 0, 1, 2, 4, max_r2 = 1, max_cost = Inf)
expect_identical(test$n_block, 2:4)
expect_identical(test$perc_kept, c(8, 6, 4) / 16)
})
################################################################################
test_that("snp_ldsplit() gives consistent results", {
compute_cost <- function(block_num, corr, thr_r2) {
corr %>%
Matrix::tril() %>%
as("dgTMatrix") %>%
{ .@x[block_num[.@i + 1L] != block_num[.@j + 1L]]^2 } %>%
{ sum(.[. >= thr_r2]) }
}
compute_max_out <- function(block_num, corr) {
corr %>%
Matrix::tril() %>%
as("dgTMatrix") %>%
{ .@x[block_num[.@i + 1L] != block_num[.@j + 1L]]^2 } %>%
max()
}
corr <- readRDS(test_path("testdata/spMat.rds"))
res1 <- snp_ldsplit(corr, thr_r2 = 0.02, min_size = 10, max_size = 30,
max_K = 50, max_r2 = 1, max_cost = Inf)
# 30 * 13 = 390 < 401
# 10 * 40 = 400 < 401
expect_identical(res1$n_block, 14:40)
block_num <- lapply(res1$all_size, function(sizes) rep(seq_along(sizes), sizes))
costs <- sapply(block_num, function(nums) compute_cost(nums, corr, thr_r2 = 0.02))
expect_equal(res1$cost, costs)
prev_res1 <- readRDS(test_path("testdata/split_before.rds")) # < v1.10.1
expect_equal(res1$cost, prev_res1$cost) # only costs are the same
res2 <- snp_ldsplit(corr, thr_r2 = runif(1, 0, 0.2), min_size = 20, max_size = 40,
max_K = 50, max_r2 = 1, max_cost = Inf)
expect_identical(res2$n_block, 11:20)
res3 <- snp_ldsplit(corr, thr_r2 = runif(1, 0, 0.2), min_size = 20, max_size = 40,
max_K = 15, max_r2 = 1, max_cost = Inf)
expect_identical(res3$n_block, 11:15)
# parameter 'max_cost' works
max_cost <- runif(1, min(res1$cost), max(res1$cost))
res4 <- snp_ldsplit(corr, thr_r2 = 0.02, min_size = 10, max_size = 30,
max_K = 50, max_r2 = 1, max_cost = max_cost)
expect_true(all(res4$cost <= max_cost))
res1.bad <- subset(res1, !n_block %in% res4$n_block)
expect_true(all(res1.bad$cost > max_cost))
# parameter 'max_r2' works
max_r2 <- runif(1, 0.1, 0.4)
res5 <- snp_ldsplit(corr, thr_r2 = 0.02, min_size = 10, max_size = 50,
max_K = 100, max_r2 = max_r2, max_cost = Inf)
sapply(res5$all_size, function(sizes) {
block_num <- rep(seq_along(sizes), sizes)
max_r2_out <- compute_max_out(block_num, corr)
expect_lte(max_r2_out, max_r2)
max_r2_out / max_r2
})
# can provide multiple 'max_size'
max_r2 <- runif(1, 0.3, 1)
res6 <- snp_ldsplit(corr, thr_r2 = 0.02, min_size = 10, max_size = 30,
max_K = 50, max_r2 = max_r2, max_cost = Inf)
res7 <- snp_ldsplit(corr, thr_r2 = 0.02, min_size = 10, max_size = 40,
max_K = 50, max_r2 = max_r2, max_cost = Inf)
res67 <- snp_ldsplit(corr, thr_r2 = 0.02, min_size = 10,
max_size = sample(c(30, 40)),
max_K = 50, max_r2 = max_r2, max_cost = Inf)
expect_equal(res67[-1], unique(rbind(res6, res7)[-1]))
})
################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.