Nothing
################################################################################
context("FAST_IMPUTE")
options(bigstatsr.check.parallel.blas = FALSE)
################################################################################
test_that("fast imputation (xgboost) works", {
skip_if_not_installed("xgboost")
skip_if(is_cran)
suppressMessages({
library(Matrix)
library(foreach)
library(magrittr)
library(dplyr)
})
bigsnp <- snp_attachExtdata()
G <- bigsnp$genotypes
expect_equal(G$code256, CODE_012)
corr <- snp_cor(G)
ind <- which(apply(corr, 2, function(x) max(x[x < 1])) > 0.6)
indNA <- foreach(j = ind, .combine = "rbind") %do% {
cbind(sample(nrow(G), 100), j)
}
# Create a copy of our bigSNP example
tmpfile <- tempfile()
bigsnp.copy <- snp_writeBed(bigsnp, bedfile = paste0(tmpfile, ".bed")) %>%
snp_readBed(backingfile = tmpfile) %>%
snp_attach()
# Fill some missing values
GNA <- bigsnp.copy$genotypes
GNA[indNA] <- as.raw(3)
counts <- big_counts(GNA)
expect_equal(sum(counts[4, ]), nrow(indNA))
################################################################################
# Fast imputation
time <- system.time(
infos <- snp_fastImpute(GNA, infos.chr = bigsnp$map$chromosome, p.train = 0.6)
)
# expect_lt(time[3], 10)
# Still NAs
counts <- big_counts(GNA)
expect_equal(sum(counts[4, ]), nrow(indNA))
GNA$code256 <- CODE_IMPUTE_PRED
infosNA <- tibble(
col = indNA[, 2],
error = (GNA[indNA] != G[indNA])
) %>%
group_by(col) %>%
summarise(nb_err = sum(error)) %>%
mutate(nb_err_est = (infos[1, ] * infos[2, ] * nrow(G))[col])
pval <- anova(lm(nb_err ~ nb_err_est - 1, data = infosNA))$`Pr(>F)`[[1]]
expect_lt(pval, 2e-16)
################################################################################
fake <- snp_fake(100, 200)
G <- fake$genotypes
G[] <- sample(as.raw(0:3), size = length(G), replace = TRUE)
CHR <- fake$map$chromosome
# 'infos.chr' needs to have the same length as the number of SNPs
expect_error(snp_fastImpute(G, 1), "Incompatibility between dimensions.",
fixed = TRUE)
# you can't impute randomness, right?
expect_gt(mean(snp_fastImpute(G, CHR)[2, ]), 0.5)
################################################################################
fake <- snp_fake(100, 200)
G <- fake$genotypes
G[] <- sample(as.raw(0:3), size = length(G), replace = TRUE)
CHR <- sort(sample(1:3, ncol(G), TRUE))
G2 <- bigsnpr:::FBM_infos(G)
ind <- 1:11 + 150
G2[1, ind] <- seq(0, 1, 0.1)
G2 <- snp_fastImpute(G, CHR)
G3 <- G$copy(CODE_IMPUTE_PRED)
nbNA <- colSums(is.na(G3[]))
expect_true(all(nbNA[ind] > 0))
expect_true(all(nbNA[-ind] == 0))
################################################################################
bigsnp <- snp_attachExtdata()
G <- bigsnp$genotypes
CHR <- bigsnp$map$chromosome
elemNA <- sample(length(G), size = 20)
G2 <- big_copy(G); G2[elemNA] <- as.raw(3) # NA
G3 <- big_copy(G); G3[elemNA] <- as.raw(3) # NA
expect_equal(snp_fastImpute(G2, CHR, seed = 2)[],
snp_fastImpute(G3, CHR, seed = 2, ncores = 2)[])
expect_equal(G2[], G3[])
})
################################################################################
test_that("fast imputation (simple) works", {
G <- snp_attachExtdata("example-missing.bed")$genotypes
# cbind(colMeans(G[]), colMeans(G[], na.rm = TRUE))
expect_error(snp_fastImputeSimple(G, "mean"), "should be one of")
expect_warning(G1 <- snp_fastImputeSimple(G, "zero"), "deprecated")
expect_equal(G[c(18, 72), 400], rep(NA_real_, 2))
expect_equal(G1[c(18, 72), 400], rep(0, 2))
G2 <- snp_fastImputeSimple(G, "mean0")
expect_equal(G[c(18, 72), 400], rep(NA_real_, 2))
expect_equal(G2[c(18, 72), 400], rep(1, 2))
G3 <- snp_fastImputeSimple(G, "mean2")
expect_equal(G3[c(18, 72), 400], rep(1.01, 2))
G4 <- snp_fastImputeSimple(G, "mode")
expect_equal(G4[c(4, 12), 1], rep(0, 2))
expect_equal(G4[c(18, 72), 400], rep(1, 2))
imp_val <- replicate(500, {
G5 <- snp_fastImputeSimple(G, "random")
G5[c(18, 72), 400]
})
p <- mean(G[, 400], na.rm = TRUE) / 2
prob <- c((1 - p)^2, 2 * p * (1 - p), p^2)
expect_gt(stats::chisq.test(table(imp_val), p = prob)$p.value, 1e-4)
expect_equal(snp_fastImputeSimple(G, method = "mean2")[],
snp_fastImputeSimple(G, method = "mean2", ncores = 2)[])
})
################################################################################
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.