Nothing
## Helper: compute MAF of a SNP over a subset of subject indices
snp_maf <- function(bdinfo, snp_idx, samp_idx = NULL) {
snp <- getsnp(bdinfo, snp_idx, dosageonly = TRUE)
ds <- if (is.null(samp_idx)) snp$dosage else snp$dosage[samp_idx]
aaf <- mean(ds, na.rm = TRUE) / 2.0
min(aaf, 1.0 - aaf)
}
test_that("subsetbd errors", {
bdfile <- system.file("extdata", "vcf1a.bdose", package = "BinaryDosage")
expect_error(subsetbd(bdose_file = "x.bdose"),
"No input binary dosage files specified")
expect_error(subsetbd(bdfiles = bdfile),
"No output .bdose file specified")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = "x.bdose"),
"At least one of")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = "x.bdose",
locations = c(10000, 11000),
startloc = 10000),
"locations and startloc/endloc cannot both be specified")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = "x.bdose",
startloc = 10000),
"endloc must be specified")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = "x.bdose",
endloc = 19000),
"startloc must be specified")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = "x.bdose",
startloc = 15000, endloc = 10000),
"startloc must be less than or equal to endloc")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = "x.bdose",
minmaf = "high"),
"minmaf must be a single numeric value")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = "x.bdose",
minmaf = 0.6),
"minmaf must be between 0 and 0.5")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = "x.bdose",
subjectids = c("X1", "X2")),
"No subjects match")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = "x.bdose",
minmaf = 0.5),
"No SNPs pass")
})
test_that("subsetbd subject filter", {
bdfile <- system.file("extdata", "vcf1a.bdose", package = "BinaryDosage")
old_info <- getbdinfo(bdfile)
keep_ids <- old_info$samples$sid[1:30]
bdose <- tempfile(fileext = ".bdose")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = bdose,
subjectids = keep_ids), NA)
new_info <- getbdinfo(bdose)
expect_equal(nrow(new_info$samples), 30L)
expect_equal(new_info$samples$sid, keep_ids)
expect_equal(nrow(new_info$snps), nrow(old_info$snps))
# Verify dosage values match for a sample of SNPs
for (i in c(1L, 5L, 10L)) {
old_snp <- getsnp(old_info, i, dosageonly = TRUE)
new_snp <- getsnp(new_info, i, dosageonly = TRUE)
expect_equal(new_snp$dosage, old_snp$dosage[1:30], tolerance = 5e-5,
label = paste0("subject filter dosage SNP ", i))
}
})
test_that("subsetbd location vector filter", {
bdfile <- system.file("extdata", "vcf1a.bdose", package = "BinaryDosage")
old_info <- getbdinfo(bdfile)
keep_locs <- c(12000, 15000, 17000) # SNPs 3, 6, 8
bdose <- tempfile(fileext = ".bdose")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = bdose,
locations = keep_locs), NA)
new_info <- getbdinfo(bdose)
expect_equal(nrow(new_info$snps), length(keep_locs))
expect_equal(new_info$snps$location, keep_locs)
expect_equal(nrow(new_info$samples), nrow(old_info$samples))
# Verify data for each kept SNP
orig_idx <- match(keep_locs, old_info$snps$location)
for (j in seq_along(keep_locs)) {
old_snp <- getsnp(old_info, orig_idx[j], dosageonly = TRUE)
new_snp <- getsnp(new_info, j, dosageonly = TRUE)
expect_equal(new_snp$dosage, old_snp$dosage, tolerance = 5e-5,
label = paste0("location filter dosage SNP loc=", keep_locs[j]))
}
})
test_that("subsetbd location range filter", {
bdfile <- system.file("extdata", "vcf1a.bdose", package = "BinaryDosage")
old_info <- getbdinfo(bdfile)
# startloc=13000, endloc=16000 keeps SNPs 4,5,6,7 (locs 13000-16000)
exp_locs <- c(13000, 14000, 15000, 16000)
bdose <- tempfile(fileext = ".bdose")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = bdose,
startloc = 13000, endloc = 16000), NA)
new_info <- getbdinfo(bdose)
expect_equal(new_info$snps$location, exp_locs)
orig_idx <- match(exp_locs, old_info$snps$location)
for (j in seq_along(exp_locs)) {
old_snp <- getsnp(old_info, orig_idx[j], dosageonly = TRUE)
new_snp <- getsnp(new_info, j, dosageonly = TRUE)
expect_equal(new_snp$dosage, old_snp$dosage, tolerance = 5e-5,
label = paste0("range filter dosage loc=", exp_locs[j]))
}
})
test_that("subsetbd MAF filter", {
bdfile <- system.file("extdata", "vcf1a.bdose", package = "BinaryDosage")
old_info <- getbdinfo(bdfile)
min_maf <- 0.25
# Determine expected kept SNPs
keep_snps <- which(vapply(seq_len(nrow(old_info$snps)),
function(i) snp_maf(old_info, i) >= min_maf,
logical(1L)))
bdose <- tempfile(fileext = ".bdose")
expect_error(subsetbd(bdfiles = bdfile, bdose_file = bdose,
minmaf = min_maf), NA)
new_info <- getbdinfo(bdose)
expect_equal(nrow(new_info$snps), length(keep_snps))
expect_equal(new_info$snps$location,
old_info$snps$location[keep_snps])
for (j in seq_along(keep_snps)) {
old_snp <- getsnp(old_info, keep_snps[j], dosageonly = TRUE)
new_snp <- getsnp(new_info, j, dosageonly = TRUE)
expect_equal(new_snp$dosage, old_snp$dosage, tolerance = 5e-5,
label = paste0("maf filter dosage SNP ", keep_snps[j]))
}
})
test_that("subsetbd combined filters", {
bdfile <- system.file("extdata", "vcf1a.bdose", package = "BinaryDosage")
old_info <- getbdinfo(bdfile)
keep_ids <- old_info$samples$sid[1:30]
samp_idx <- 1:30
min_maf <- 0.25
# Expected: range 13000-18000, maf >= 0.25 over first 30 subjects
exp_locs <- c(13000, 14000, 15000, 16000, 17000, 18000)
loc_snps <- match(exp_locs, old_info$snps$location)
keep_snps <- loc_snps[vapply(loc_snps,
function(i) snp_maf(old_info, i, samp_idx) >= min_maf,
logical(1L))]
bdose <- tempfile(fileext = ".bdose")
expect_error(subsetbd(bdfiles = bdfile,
bdose_file = bdose,
minmaf = min_maf,
startloc = 13000,
endloc = 18000,
subjectids = keep_ids), NA)
new_info <- getbdinfo(bdose)
expect_equal(nrow(new_info$samples), 30L)
expect_equal(new_info$samples$sid, keep_ids)
expect_equal(new_info$snps$location, old_info$snps$location[keep_snps])
for (j in seq_along(keep_snps)) {
old_snp <- getsnp(old_info, keep_snps[j], dosageonly = TRUE)
new_snp <- getsnp(new_info, j, dosageonly = TRUE)
expect_equal(new_snp$dosage, old_snp$dosage[samp_idx], tolerance = 5e-5,
label = paste0("combined filter dosage SNP ", keep_snps[j]))
}
})
test_that("subsetbd format 5 input", {
# Verify subsetbd also accepts format 5 as input
bdfile4 <- system.file("extdata", "vcf1a.bdose", package = "BinaryDosage")
bdose5_in <- tempfile(fileext = ".bdose")
updatebd(bdfiles = bdfile4, bdose_file = bdose5_in)
bdose_out <- tempfile(fileext = ".bdose")
expect_error(subsetbd(bdfiles = bdose5_in,
bdose_file = bdose_out,
startloc = 12000,
endloc = 14000), NA)
new_info <- getbdinfo(bdose_out)
expect_equal(new_info$snps$location, c(12000, 13000, 14000))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.