Nothing
context("test-write_bed")
# write_bed is complicated, deserves its own section
# some simple tests don't depend on BEDMatrix being around...
# simulate X (to share across tests)
# create a simple matrix with random valid data
# have n != m so we can check dimensions within write_plink
n <- 5
m <- 10
miss <- 0.1 # add missingness for good measure
fo <- tempfile('delete-me-random-test') # output name without extensions!
fo_bed <- add_ext(fo, 'bed') # output with extension
# create ancestral allele frequencies
p_anc <- runif(m)
# create genotypes
X <- rbinom(n*m, 2, p_anc)
# add missing values
X[sample(n*m, n*m*miss)] <- NA
# turn into matrix
X <- matrix(X, nrow = m, ncol = n)
# genotypes with names (trivial in these cases, to match autogenerated values)
X_named <- X
rownames(X_named) <- 1:m
colnames(X_named) <- 1:n
# simulate phenotype (to share across tests)
pheno <- rnorm(n)
test_that("write_bed and read_bed work", {
# test that there are errors when crucial data is missing
expect_error(write_bed()) # all is missing
expect_error(write_bed('file')) # X is missing
expect_error(write_bed(X = matrix(NA, 1, 1))) # file is missing
# die if X is not a matrix!
expect_error(write_bed('file', 'not-matrix'))
# this should work
expect_silent(
write_bed(fo, X, verbose = FALSE)
)
# read tests
# parse data back, verify agreement!
expect_silent(
X2 <- read_bed(fo, m_loci = m, n_ind = n, verbose = FALSE)
)
expect_equal(X, X2)
# test that we can read same file when extension is not BED
fo_not_bed <- add_ext(fo, 'not') # output with wrong extension
file.copy( fo_bed, fo_not_bed )
expect_silent(
X3 <- read_bed(fo_not_bed, m_loci = m, n_ind = n, verbose = FALSE)
)
expect_equal(X, X3)
# cleanup
rm( X3 )
invisible( file.remove( fo_not_bed ) )
if ( file.exists( fo_not_bed ) )
stop( 'Could not delete: ', fo_not_bed )
# errors for missing params
expect_error( read_bed() ) # missing all
expect_error( read_bed(m_loci = m, n_ind = n) ) # missing file
expect_error( read_bed(fo, n_ind = n) ) # missing m_loci
expect_error( read_bed(fo, m_loci = m) ) # missing n_ind
# error tests for bad dimensions
expect_error( read_bed(fo, m_loci = n, n_ind = m) ) # reversed dimensions: padding checks and padding mismatches catch these!
expect_error( read_bed(fo, m_loci = m+1, n_ind = n) )
expect_error( read_bed(fo, m_loci = m-1, n_ind = n) )
expect_error( read_bed(fo, m_loci = m, n_ind = n-1) )
expect_error( read_bed(fo, m_loci = m, n_ind = n+4) ) # have to be off by a whole byte to notice some of these errors
# file path that doesn't exist
expect_error( read_bed( 'doesnt-exist', m_loci = m, n_ind = n ) )
# very long non-existent file path
expect_error( read_bed( '/made/up/long/path/doesnt-exist_doesnt-exist_doesnt-exist_doesnt-exist_doesnt-exist', m_loci = m, n_ind = n ) )
# delete output when done
invisible(file.remove(fo_bed))
# make sure deletion worked
if ( file.exists( fo_bed ) )
stop('Could not delete: ', fo_bed)
# now add invalid values to X, make sure it dies!
X2 <- X
# add a negative value in a random location
X2[sample(n*m, 1)] <- -1
# actually die!
expect_error( write_bed(fo, X2) )
# make sure output doesn't exist anymore (code should automatically clean it up)
expect_false( file.exists(fo_bed) )
# another test
X2 <- X
# add a 3 in a random location
X2[sample(n*m, 1)] <- 3
# actually die!
expect_error( write_bed(fo, X2) )
# make sure output doesn't exist anymore (code should automatically clean it up)
expect_false( file.exists(fo_bed) )
# NOTE: if X contains values that truncate to the correct range (say, 1.5, which becomes 1 upon truncation), then that's what Rcpp does internally and no errors are raised!
})
test_that("write_bed with `append = TRUE` works", {
# here's the awkward part, where we write it back in parts
# write every two lines
for ( i in 1 : ( nrow( X ) / 2 ) ) {
# try writing it back elsewhere
expect_silent(
write_bed(
fo,
X[ (2*i-1):(2*i), ],
append = TRUE,
verbose = FALSE
)
)
}
# read tests
# parse data back, verify agreement!
expect_silent(
X2 <- read_bed(fo, m_loci = m, n_ind = n, verbose = FALSE)
)
expect_equal(X, X2)
# delete output when done
invisible(file.remove(fo_bed))
# repeat writing one line at the time
for ( i in 1 : nrow( X ) ) {
# try writing it back elsewhere
expect_silent(
write_bed(
fo,
X[ i, , drop = FALSE ],
append = TRUE,
verbose = FALSE
)
)
}
# read tests
# parse data back, verify agreement!
expect_silent(
X2 <- read_bed(fo, m_loci = m, n_ind = n, verbose = FALSE)
)
expect_equal(X, X2)
# delete output when done
invisible(file.remove(fo_bed))
})
# let's include BEDMatrix in tests, if it's available
test_BEDMatrix <- FALSE
# this test requires BEDMatrix to read file and print it back out
if (suppressMessages(suppressWarnings(require(BEDMatrix)))) {
# wrapper to get BEDMatrix to read a matrix in the format we expect it to be
# NOTE: since file handles don't appear to close properly, don't use this on files that have to be deleted as part of the test!
read_bed_hack <- function(file) {
# run this way since BEDMatrix is rather verbose
# when there's no paired FAM/BIM, here provide dimensions and we're good (NULL requires FAM/BIM)
X <- suppressMessages(suppressWarnings(BEDMatrix(file)))
# this turns it into a regular matrix just like the one we expect
X <- as.matrix(X)
# transpose as needed
X <- t(X)
# remove dimnames for later comparison (adds unnecessary complexity)
dimnames(X) <- NULL
# finally done!
return(X)
}
# switch this to know we can test with BEDMatrix
test_BEDMatrix <- TRUE
}
# generic testing function
testOneInput <- function(nameIn, m_loci, n_ind) {
# load using my code
expect_silent(
X <- read_bed(nameIn, m_loci = m_loci, n_ind = n_ind, verbose = FALSE) # hack use dimensions from the X read by BEDMatrix
)
if (test_BEDMatrix) {
# load dummy file
X2 <- read_bed_hack(nameIn)
# compare now
expect_equal(X, X2)
}
# ensure expected failures do fail
# mess with dimensions on purpose
expect_error( read_bed(nameIn, m_loci = n_ind, n_ind = m_loci) ) # reverse dimensions, get caught because of padding checks (non-commutative unless both are factors of 4)
expect_error( read_bed(nameIn, m_loci = m_loci+1, n_ind = n_ind) )
expect_error( read_bed(nameIn, m_loci = m_loci-1, n_ind = n_ind) )
# NOTE: this used to cause an error when we required that paddings be zero, but now, given some real files (in the wild) with non-zero pads, and seeing that other software just ignores the pads, decided to ignore them too (now these don't trigger errors)
#expect_error( read_bed(nameIn, m_loci = m_loci, n_ind = n_ind-1) )
# sadly many +1 individual cases don't cause error because they just look like zeroes (in all loci) if there is enough padding.
# do expect an error if we're off by a whole byte (4 individuals)
expect_error( read_bed(nameIn, m_loci = m_loci, n_ind = n_ind+4) )
# write second version (BED only)
nameOut <- tempfile( paste0(nameIn, '_rewrite') )
expect_silent(
write_bed(nameOut, X, verbose = FALSE)
)
# compare outputs, they should be identical!
# this is less than ideal, but at least it's a pure R solution (not depending on linux 'cmp' or 'diff')
f1 <- add_ext(nameIn, 'bed')
f2 <- add_ext(nameOut, 'bed')
# load all data brute force
data1 <- readLines(f1, warn = FALSE)
data2 <- readLines(f2, warn = FALSE)
# compare now
expect_equal(data1, data2)
# remove temporary output file
file.remove(f2)
# make sure deletion worked
if ( file.exists(f2) )
stop('Could not delete: ', f2)
}
test_that("write_bed and read_bed agree with each other and BEDMatrix", {
# repeat on several files
testOneInput('dummy-33-101-0.1', 101, 33)
testOneInput('dummy-4-10-0.1', 10, 4)
testOneInput('dummy-5-10-0.1', 10, 5)
testOneInput('dummy-6-10-0.1', 10, 6)
testOneInput('dummy-7-10-0.1', 10, 7)
})
test_that("write_plink works", {
# test that there are errors when crucial data is missing
expect_error(write_plink()) # all is missing
expect_error(write_plink('file')) # tibble is missing
expect_error(write_plink(X = matrix(NA, 1, 1))) # file is missing
# for tests, change fo (file out) here
fo <- tempfile('test-write-plink-1')
# this autocompletes bim and fam!
expect_silent(
write_plink(fo, X, verbose = FALSE)
)
# make sure we can read outputs!
# read with my new function
expect_silent(
data <- read_plink(fo, verbose = FALSE)
)
# compare again (must compare named version for success)
expect_equal(X_named, data$X)
# compare row and column names internally
expect_equal( rownames(data$X), data$bim$id )
expect_equal( colnames(data$X), data$fam$id )
# delete all three outputs when done
# this also tests that all three files existed!
expect_silent( delete_files_plink(fo) )
# change again
fo <- tempfile('test-write-plink-2')
# this autocompletes bim and fam except for pheno
expect_silent(
write_plink(fo, X, pheno = pheno, verbose = FALSE)
)
# in this case parse fam and make sure we recover pheno!
expect_silent(
fam <- read_fam(fo, verbose = FALSE)
)
# compare!
expect_equal(fam$pheno, pheno)
# read with my new function
expect_silent(
data <- read_plink(fo, verbose = FALSE)
)
# compare again (must compare named version for success)
expect_equal(X_named, data$X)
# compare row and column names internally
expect_equal( rownames(data$X), data$bim$id )
expect_equal( colnames(data$X), data$fam$id )
# delete all three outputs when done
# this also tests that all three files existed!
expect_silent( delete_files_plink(fo) )
# test with `write_phen = TRUE`
expect_silent(
write_plink(fo, X, pheno = pheno, verbose = FALSE, write_phen = TRUE)
)
# in this case parse phen and make sure we recover pheno!
expect_silent(
phen <- read_phen(fo, verbose = FALSE)
)
# compare!
expect_equal(phen$pheno, pheno)
# (do not re-test genotype parts)
# delete all outputs when done
# this also tests that all files existed!
expect_silent( delete_files_plink(fo) )
expect_silent( delete_files_phen(fo) )
# change again
fo <- tempfile('test-write-plink-3')
# create a case in which fam is also provided, make sure we get warning
fam <- make_fam(n = n)
expect_warning( write_plink(fo, X, fam = fam, pheno = pheno) )
# parse fam and make sure pheno was missing
expect_silent(
fam <- read_fam(fo, verbose = FALSE)
)
expect_equal(fam$pheno, rep.int(0, n))
# delete all three outputs when done
# this also tests that all three files existed!
expect_silent( delete_files_plink(fo) )
})
test_that("write_plink with `append = TRUE` works", {
# for this test, we need an original BIM table to compare to
bim <- make_bim( n = m )
# need to change some modes for the purpose of the test only
bim$chr <- as.character( bim$chr )
bim$id <- as.character( bim$id )
bim$ref <- as.character( bim$ref )
bim$alt <- as.character( bim$alt )
# here's the awkward part, where we write it back in parts
# write every two lines
for ( i in 1 : ( nrow( X ) / 2 ) ) {
indexes <- (2*i-1):(2*i)
# try writing it back elsewhere
expect_silent(
write_plink(
fo,
X = X[ indexes, ],
bim = bim[ indexes, ],
append = TRUE,
verbose = FALSE
)
)
}
# read tests
# parse data back, verify agreement!
expect_silent(
obj2 <- read_plink(fo, verbose = FALSE)
)
expect_equal( X_named, obj2$X ) # need named version for this test
expect_equal( bim, obj2$bim[] ) # need [] to change stupid readr class, for testing only
# delete all three outputs when done
# this also tests that all three files existed!
expect_silent( delete_files_plink(fo) )
# repeat writing one line at the time
for ( i in 1 : nrow( X ) ) {
# try writing it back elsewhere
expect_silent(
write_plink(
fo,
X = X[ i, , drop = FALSE ],
bim = bim[ i, ],
append = TRUE,
verbose = FALSE
)
)
}
# read tests
# parse data back, verify agreement!
expect_silent(
obj2 <- read_plink(fo, verbose = FALSE)
)
expect_equal( X_named, obj2$X ) # need named version for this test
expect_equal( bim, obj2$bim[] ) # need [] to change stupid readr class, for testing only
# delete all three outputs when done
# this also tests that all three files existed!
expect_silent( delete_files_plink(fo) )
})
# this is a case reported by richelbilderbeek
test_that( "read_plink works with file without zero pads", {
name <- 'HumanOrigins249_tiny'
expect_silent(
data <- read_plink( name, verbose = FALSE )
)
if (test_BEDMatrix) {
# load using BEDMatrix
X2 <- read_bed_hack( name )
# for comparison, clean up genio version (read_plink returns row/col names)
X <- data$X
dimnames(X) <- NULL
# compare now
expect_equal( X, X2 )
}
})
test_that( "geno_to_char works", {
# use data where plink1 told us what the answer was
name <- 'dummy-4-10-0.1'
expect_silent(
data <- read_plink( name, verbose = FALSE )
)
X <- data$X
bim <- data$bim
m <- nrow( X )
n <- ncol( X )
# need to primitively parse corresponding ped output
# (sorry this is tedious)
name_ped <- paste0( name, '.ped' )
Y_exp <- readr::read_table( name_ped, col_names = FALSE )
# remove FAM columns (first 6)
# also transpose
Y_exp <- t( Y_exp[ , -(1:6) ] )
# and collapse the way we have it
for ( i in 1 : m ) {
# indexes on Y matrix
# first row
j <- 2 * i - 1
# get two rows at the time
# store back in final row
Y_exp[ i, ] <- paste0( Y_exp[ j, ], '/', Y_exp[ j + 1, ] )
}
# shrink matrix now
Y_exp <- Y_exp[ 1:m , ]
# cases are coded as 0/0 are NA, replace now:
Y_exp[ Y_exp == '0/0' ] <- NA
# copy correct names (missing in this data)
dimnames( Y_exp ) <- dimnames( X )
# for simplicity, flip heterozygotes so they're all A/B instead of some being B/A
Y_exp[ Y_exp == 'B/A' ] <- 'A/B'
# START TESTING!
# errors due to missing required arguments
expect_error( geno_to_char( X = X ) )
expect_error( geno_to_char( bim = bim ) )
# disagreeing dimensions
expect_error( geno_to_char( X, bim[ -1, ] ) )
# slashes in allele names
bim_bad <- bim
i <- sample( m, 1 )
bim_bad$ref[ i ] <- 'a/b'
expect_error( geno_to_char( X, bim_bad ) )
bim_bad <- bim
i <- sample( m, 1 )
bim_bad$alt[ i ] <- 'a/b'
expect_error( geno_to_char( X, bim_bad ) )
# same ref/alt at a locus
bim_bad <- bim
i <- sample( m, 1 )
bim_bad$alt[ i ] <- bim_bad$ref[ i ]
expect_error( geno_to_char( X, bim_bad ) )
# invalid values in X
X_bad <- X
i <- sample( m*n, 1 )
X_bad[ i ] <- -1
expect_error( geno_to_char( X_bad, bim ) )
X_bad <- X
i <- sample( m*n, 1 )
X_bad[ i ] <- 0.5
expect_error( geno_to_char( X_bad, bim ) )
X_bad <- X
i <- sample( m*n, 1 )
X_bad[ i ] <- 3
expect_error( geno_to_char( X_bad, bim ) )
# now a succesful run
expect_silent(
Y_obs <- geno_to_char(X, bim)
)
# allele order doesn't matter, so normalize it here as we did with the expected matrix
Y_obs[ Y_obs == 'B/A' ] <- 'A/B'
# make sure we recovered the desired data!
expect_equal( Y_obs, Y_exp )
})
# this is a case reported by richelbilderbeek
# sadly, on my machine the "ruthless crash" was treated as a normal error (same as it is after the fix), but might as well include that here too
test_that( 'write_bed does not crash ruthlessly when output directory does not exist', {
expect_error(
write_bed( 'dir-does-not-exist/test', X, verbose = FALSE )
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.