context("test-write_bed")
# write_bed is complicated, deserves its own section
# some simple tests don't depend on BEDMatrix being around...
# simulate X (to share across tests)
# create a simple matrix with random valid data
# have n != m so we can check dimensions within write_plink
n <- 5
m <- 10
miss <- 0.1 # add missingness for good measure
fo <- tempfile('delete-me-random-test') # output name without extensions!
fo_bed <- add_ext(fo, 'bed') # output with extension
# create ancestral allele frequencies
p_anc <- runif(m)
# create genotypes
X <- rbinom(n*m, 2, p_anc)
# add missing values
X[sample(n*m, n*m*miss)] <- NA
# turn into matrix
X <- matrix(X, nrow = m, ncol = n)
# genotypes with names (trivial in these cases, to match autogenerated values)
X_named <- X
rownames(X_named) <- 1:m
colnames(X_named) <- 1:n
# simulate phenotype (to share across tests)
pheno <- rnorm(n)
test_that("write_bed and read_bed work", {
# test that there are errors when crucial data is missing
expect_error(write_bed()) # all is missing
expect_error(write_bed('file')) # X is missing
expect_error(write_bed(X = matrix(NA, 1, 1))) # file is missing
# die if X is not a matrix!
expect_error(write_bed('file', 'not-matrix'))
# this should work
expect_silent(
write_bed(fo, X, verbose = FALSE)
)
# read tests
# parse data back, verify agreement!
expect_silent(
X2 <- read_bed(fo, m_loci = m, n_ind = n, verbose = FALSE)
)
expect_equal(X, X2)
# test that we can read same file when extension is not BED
fo_not_bed <- add_ext(fo, 'not') # output with wrong extension
file.copy( fo_bed, fo_not_bed )
expect_silent(
X3 <- read_bed(fo_not_bed, m_loci = m, n_ind = n, verbose = FALSE)
)
expect_equal(X, X3)
# cleanup
rm( X3 )
invisible( file.remove( fo_not_bed ) )
if ( file.exists( fo_not_bed ) )
stop( 'Could not delete: ', fo_not_bed )
# errors for missing params
expect_error( read_bed() ) # missing all
expect_error( read_bed(m_loci = m, n_ind = n) ) # missing file
expect_error( read_bed(fo, n_ind = n) ) # missing m_loci
expect_error( read_bed(fo, m_loci = m) ) # missing n_ind
# error tests for bad dimensions
expect_error( read_bed(fo, m_loci = n, n_ind = m) ) # reversed dimensions: padding checks and padding mismatches catch these!
expect_error( read_bed(fo, m_loci = m+1, n_ind = n) )
expect_error( read_bed(fo, m_loci = m-1, n_ind = n) )
expect_error( read_bed(fo, m_loci = m, n_ind = n-1) )
expect_error( read_bed(fo, m_loci = m, n_ind = n+4) ) # have to be off by a whole byte to notice some of these errors
# file path that doesn't exist
expect_error( read_bed( 'doesnt-exist', m_loci = m, n_ind = n ) )
# very long non-existent file path
expect_error( read_bed( '/made/up/long/path/doesnt-exist_doesnt-exist_doesnt-exist_doesnt-exist_doesnt-exist', m_loci = m, n_ind = n ) )
# delete output when done
invisible(file.remove(fo_bed))
# make sure deletion worked
if ( file.exists( fo_bed ) )
stop('Could not delete: ', fo_bed)
# now add invalid values to X, make sure it dies!
X2 <- X
# add a negative value in a random location
X2[sample(n*m, 1)] <- -1
# actually die!
expect_error( write_bed(fo, X2) )
# make sure output doesn't exist anymore (code should automatically clean it up)
expect_false( file.exists(fo_bed) )
# another test
X2 <- X
# add a 3 in a random location
X2[sample(n*m, 1)] <- 3
# actually die!
expect_error( write_bed(fo, X2) )
# make sure output doesn't exist anymore (code should automatically clean it up)
expect_false( file.exists(fo_bed) )
# NOTE: if X contains values that truncate to the correct range (say, 1.5, which becomes 1 upon truncation), then that's what Rcpp does internally and no errors are raised!
})
test_that("write_bed with `append = TRUE` works", {
# here's the awkward part, where we write it back in parts
# write every two lines
for ( i in 1 : ( nrow( X ) / 2 ) ) {
# try writing it back elsewhere
expect_silent(
write_bed(
fo,
X[ (2*i-1):(2*i), ],
append = TRUE,
verbose = FALSE
)
)
}
# read tests
# parse data back, verify agreement!
expect_silent(
X2 <- read_bed(fo, m_loci = m, n_ind = n, verbose = FALSE)
)
expect_equal(X, X2)
# delete output when done
invisible(file.remove(fo_bed))
# repeat writing one line at the time
for ( i in 1 : nrow( X ) ) {
# try writing it back elsewhere
expect_silent(
write_bed(
fo,
X[ i, , drop = FALSE ],
append = TRUE,
verbose = FALSE
)
)
}
# read tests
# parse data back, verify agreement!
expect_silent(
X2 <- read_bed(fo, m_loci = m, n_ind = n, verbose = FALSE)
)
expect_equal(X, X2)
# delete output when done
invisible(file.remove(fo_bed))
})
# let's include BEDMatrix in tests, if it's available
test_BEDMatrix <- FALSE
# this test requires BEDMatrix to read file and print it back out
if (suppressMessages(suppressWarnings(require(BEDMatrix)))) {
# wrapper to get BEDMatrix to read a matrix in the format we expect it to be
# NOTE: since file handles don't appear to close properly, don't use this on files that have to be deleted as part of the test!
read_bed_hack <- function(file) {
# run this way since BEDMatrix is rather verbose
# when there's no paired FAM/BIM, here provide dimensions and we're good (NULL requires FAM/BIM)
X <- suppressMessages(suppressWarnings(BEDMatrix(file)))
# this turns it into a regular matrix just like the one we expect
X <- as.matrix(X)
# transpose as needed
X <- t(X)
# remove dimnames for later comparison (adds unnecessary complexity)
dimnames(X) <- NULL
# finally done!
return(X)
}
# switch this to know we can test with BEDMatrix
test_BEDMatrix <- TRUE
}
# generic testing function
testOneInput <- function(nameIn, m_loci, n_ind) {
# load using my code
expect_silent(
X <- read_bed(nameIn, m_loci = m_loci, n_ind = n_ind, verbose = FALSE) # hack use dimensions from the X read by BEDMatrix
)
if (test_BEDMatrix) {
# load dummy file
X2 <- read_bed_hack(nameIn)
# compare now
expect_equal(X, X2)
}
# ensure expected failures do fail
# mess with dimensions on purpose
expect_error( read_bed(nameIn, m_loci = n_ind, n_ind = m_loci) ) # reverse dimensions, get caught because of padding checks (non-commutative unless both are factors of 4)
expect_error( read_bed(nameIn, m_loci = m_loci+1, n_ind = n_ind) )
expect_error( read_bed(nameIn, m_loci = m_loci-1, n_ind = n_ind) )
# NOTE: this used to cause an error when we required that paddings be zero, but now, given some real files (in the wild) with non-zero pads, and seeing that other software just ignores the pads, decided to ignore them too (now these don't trigger errors)
#expect_error( read_bed(nameIn, m_loci = m_loci, n_ind = n_ind-1) )
# sadly many +1 individual cases don't cause error because they just look like zeroes (in all loci) if there is enough padding.
# do expect an error if we're off by a whole byte (4 individuals)
expect_error( read_bed(nameIn, m_loci = m_loci, n_ind = n_ind+4) )
# write second version (BED only)
nameOut <- tempfile( paste0(nameIn, '_rewrite') )
expect_silent(
write_bed(nameOut, X, verbose = FALSE)
)
# compare outputs, they should be identical!
# this is less than ideal, but at least it's a pure R solution (not depending on linux 'cmp' or 'diff')
f1 <- add_ext(nameIn, 'bed')
f2 <- add_ext(nameOut, 'bed')
# load all data brute force
data1 <- readLines(f1, warn = FALSE)
data2 <- readLines(f2, warn = FALSE)
# compare now
expect_equal(data1, data2)
# remove temporary output file
file.remove(f2)
# make sure deletion worked
if ( file.exists(f2) )
stop('Could not delete: ', f2)
}
test_that("write_bed and read_bed agree with each other and BEDMatrix", {
# repeat on several files
testOneInput('dummy-33-101-0.1', 101, 33)
testOneInput('dummy-33-2-0.1', 2, 33)
testOneInput('dummy-4-10-0.1', 10, 4)
testOneInput('dummy-5-10-0.1', 10, 5)
testOneInput('dummy-6-10-0.1', 10, 6)
testOneInput('dummy-7-10-0.1', 10, 7)
})
test_that("write_plink works", {
# test that there are errors when crucial data is missing
expect_error(write_plink()) # all is missing
expect_error(write_plink('file')) # tibble is missing
expect_error(write_plink(X = matrix(NA, 1, 1))) # file is missing
# for tests, change fo (file out) here
fo <- tempfile('test-write-plink-1')
# this autocompletes bim and fam!
expect_silent(
write_plink(fo, X, verbose = FALSE)
)
# make sure we can read outputs!
# read with my new function
expect_silent(
data <- read_plink(fo, verbose = FALSE)
)
# compare again (must compare named version for success)
expect_equal(X_named, data$X)
# compare row and column names internally
expect_equal( rownames(data$X), data$bim$id )
expect_equal( colnames(data$X), data$fam$id )
# delete all three outputs when done
# this also tests that all three files existed!
expect_silent( delete_files_plink(fo) )
# change again
fo <- tempfile('test-write-plink-2')
# this autocompletes bim and fam except for pheno
expect_silent(
write_plink(fo, X, pheno = pheno, verbose = FALSE)
)
# in this case parse fam and make sure we recover pheno!
expect_silent(
fam <- read_fam(fo, verbose = FALSE)
)
# compare!
expect_equal(fam$pheno, pheno)
# read with my new function
expect_silent(
data <- read_plink(fo, verbose = FALSE)
)
# compare again (must compare named version for success)
expect_equal(X_named, data$X)
# compare row and column names internally
expect_equal( rownames(data$X), data$bim$id )
expect_equal( colnames(data$X), data$fam$id )
# delete all three outputs when done
# this also tests that all three files existed!
expect_silent( delete_files_plink(fo) )
# test with `write_phen = TRUE`
expect_silent(
write_plink(fo, X, pheno = pheno, verbose = FALSE, write_phen = TRUE)
)
# in this case parse phen and make sure we recover pheno!
expect_silent(
phen <- read_phen(fo, verbose = FALSE)
)
# compare!
expect_equal(phen$pheno, pheno)
# (do not re-test genotype parts)
# delete all outputs when done
# this also tests that all files existed!
expect_silent( delete_files_plink(fo) )
expect_silent( delete_files_phen(fo) )
# change again
fo <- tempfile('test-write-plink-3')
# create a case in which fam is also provided, make sure we get warning
fam <- make_fam(n = n)
expect_warning( write_plink(fo, X, fam = fam, pheno = pheno) )
# parse fam and make sure pheno was missing
expect_silent(
fam <- read_fam(fo, verbose = FALSE)
)
expect_equal(fam$pheno, rep.int(0, n))
# delete all three outputs when done
# this also tests that all three files existed!
expect_silent( delete_files_plink(fo) )
})
test_that("write_plink with `append = TRUE` works", {
# for this test, we need an original BIM table to compare to
bim <- make_bim( n = m )
# need to change some modes for the purpose of the test only
bim$chr <- as.character( bim$chr )
bim$id <- as.character( bim$id )
bim$ref <- as.character( bim$ref )
bim$alt <- as.character( bim$alt )
# here's the awkward part, where we write it back in parts
# write every two lines
for ( i in 1 : ( nrow( X ) / 2 ) ) {
indexes <- (2*i-1):(2*i)
# try writing it back elsewhere
expect_silent(
write_plink(
fo,
X = X[ indexes, ],
bim = bim[ indexes, ],
append = TRUE,
verbose = FALSE
)
)
}
# read tests
# parse data back, verify agreement!
expect_silent(
obj2 <- read_plink(fo, verbose = FALSE)
)
expect_equal( X_named, obj2$X ) # need named version for this test
expect_equal( bim, obj2$bim[] ) # need [] to change stupid readr class, for testing only
# delete all three outputs when done
# this also tests that all three files existed!
expect_silent( delete_files_plink(fo) )
# repeat writing one line at the time
for ( i in 1 : nrow( X ) ) {
# try writing it back elsewhere
expect_silent(
write_plink(
fo,
X = X[ i, , drop = FALSE ],
bim = bim[ i, ],
append = TRUE,
verbose = FALSE
)
)
}
# read tests
# parse data back, verify agreement!
expect_silent(
obj2 <- read_plink(fo, verbose = FALSE)
)
expect_equal( X_named, obj2$X ) # need named version for this test
expect_equal( bim, obj2$bim[] ) # need [] to change stupid readr class, for testing only
# delete all three outputs when done
# this also tests that all three files existed!
expect_silent( delete_files_plink(fo) )
})
# this is a case reported by richelbilderbeek
test_that( "read_plink works with file without zero pads", {
name <- 'HumanOrigins249_tiny'
expect_silent(
data <- read_plink( name, verbose = FALSE )
)
if (test_BEDMatrix) {
# load using BEDMatrix
X2 <- read_bed_hack( name )
# for comparison, clean up genio version (read_plink returns row/col names)
X <- data$X
dimnames(X) <- NULL
# compare now
expect_equal( X, X2 )
}
})
test_that( "geno_to_char works", {
# use data where plink1 told us what the answer was
name <- 'dummy-4-10-0.1'
expect_silent(
data <- read_plink( name, verbose = FALSE )
)
X <- data$X
bim <- data$bim
m <- nrow( X )
n <- ncol( X )
# need to primitively parse corresponding ped output
# (sorry this is tedious)
name_ped <- paste0( name, '.ped' )
Y_exp <- readr::read_table( name_ped, col_names = FALSE )
# remove FAM columns (first 6)
# also transpose
Y_exp <- t( Y_exp[ , -(1:6) ] )
# and collapse the way we have it
for ( i in 1 : m ) {
# indexes on Y matrix
# first row
j <- 2 * i - 1
# get two rows at the time
# store back in final row
Y_exp[ i, ] <- paste0( Y_exp[ j, ], '/', Y_exp[ j + 1, ] )
}
# shrink matrix now
Y_exp <- Y_exp[ 1:m , ]
# cases are coded as 0/0 are NA, replace now:
Y_exp[ Y_exp == '0/0' ] <- NA
# copy correct names (missing in this data)
dimnames( Y_exp ) <- dimnames( X )
# for simplicity, flip heterozygotes so they're all A/B instead of some being B/A
Y_exp[ Y_exp == 'B/A' ] <- 'A/B'
# START TESTING!
# errors due to missing required arguments
expect_error( geno_to_char( X = X ) )
expect_error( geno_to_char( bim = bim ) )
# disagreeing dimensions
expect_error( geno_to_char( X, bim[ -1, ] ) )
# slashes in allele names
bim_bad <- bim
i <- sample( m, 1 )
bim_bad$ref[ i ] <- 'a/b'
expect_error( geno_to_char( X, bim_bad ) )
bim_bad <- bim
i <- sample( m, 1 )
bim_bad$alt[ i ] <- 'a/b'
expect_error( geno_to_char( X, bim_bad ) )
# same ref/alt at a locus
bim_bad <- bim
i <- sample( m, 1 )
bim_bad$alt[ i ] <- bim_bad$ref[ i ]
expect_error( geno_to_char( X, bim_bad ) )
# invalid values in X
X_bad <- X
i <- sample( m*n, 1 )
X_bad[ i ] <- -1
expect_error( geno_to_char( X_bad, bim ) )
X_bad <- X
i <- sample( m*n, 1 )
X_bad[ i ] <- 0.5
expect_error( geno_to_char( X_bad, bim ) )
X_bad <- X
i <- sample( m*n, 1 )
X_bad[ i ] <- 3
expect_error( geno_to_char( X_bad, bim ) )
# now a succesful run
expect_silent(
Y_obs <- geno_to_char(X, bim)
)
# allele order doesn't matter, so normalize it here as we did with the expected matrix
Y_obs[ Y_obs == 'B/A' ] <- 'A/B'
# make sure we recovered the desired data!
expect_equal( Y_obs, Y_exp )
})
# this is a case reported by richelbilderbeek
# sadly, on my machine the "ruthless crash" was treated as a normal error (same as it is after the fix), but might as well include that here too
test_that( 'write_bed does not crash ruthlessly when output directory does not exist', {
expect_error(
write_bed( 'dir-does-not-exist/test', X, verbose = FALSE )
)
})
test_that( "sim_and_write_plink works", {
# start by loading an existing file with known values
# this first test won't have p_anc
out <- read_plink( 'dummy-33-101-0.1', verbose = FALSE )
X <- out$X
bim <- out$bim
fam <- out$fam
m_loci <- nrow( bim )
# this ought to omit it from the return value?
p_anc <- NULL
# a global variable updated as we go
m_last <- 0
# define a trivial but complete genotype simulator function
sim_chunk <- function( m_chunk ) {
# get subset of interest
indexes <- m_last + ( 1 : m_chunk )
# update global value (use <<-) for next round
m_last <<- m_last + m_chunk
# return all of these elements in a named list!
obj <- list(
X = X[ indexes, , drop = FALSE ],
bim = bim[ indexes, ]
)
if ( !is.null( p_anc ) )
obj$p_anc <- p_anc[ indexes ]
# add p_anc if not NULL
return( obj )
}
# write to the temp output
expect_silent(
sim_and_write_plink( sim_chunk, m_loci, fam, fo )
)
# read it back, make sure it matches
out2 <- read_plink( fo, verbose = FALSE )
expect_equal( out2, out )
# cleanup
expect_silent( delete_files_plink(fo) )
# repeat with a small chunk size, to make sure we write in several parts
m_last <- 0 # always reset!
expect_silent(
sim_and_write_plink( sim_chunk, m_loci, fam, fo, n_data_cut = 197 )
)
out2 <- read_plink( fo, verbose = FALSE )
expect_equal( out2, out )
expect_silent( delete_files_plink(fo) )
# test singleton edge case
m_last <- 0 # always reset!
expect_silent(
sim_and_write_plink( sim_chunk, m_loci, fam, fo, n_data_cut = 1 )
)
out2 <- read_plink( fo, verbose = FALSE )
expect_equal( out2, out )
expect_silent( delete_files_plink(fo) )
# version with mid-size chunk and p_anc
fp <- tempfile('delete-me-random-test-p-anc.txt.gz') # this one has an extension
p_anc <- runif( m_loci )
m_last <- 0 # always reset!
expect_silent(
sim_and_write_plink( sim_chunk, m_loci, fam, fo, fp, n_data_cut = 197 )
)
out2 <- read_plink( fo, verbose = FALSE )
expect_equal( out2, out )
# load and check p-anc too
p_anc2 <- as.numeric( readr::read_lines( fp ) )
expect_equal( p_anc2, p_anc )
expect_silent( delete_files_plink(fo) )
file.remove( fp )
})
test_that( 'symlink works', {
# a local file that exists
file <- 'dummy-33-101-0.1.fam'
# a file that doesn't exist already, setup to be in a temporary path
# NOTE: out of necessity, this is a path not in current directory, so that's already testing that complicated case!
link <- fo
# this ought to succeed!
expect_silent(
symlink( file, link, verbose = FALSE )
)
# confirm that file contents are the same?
fam1 <- read_fam( file, verbose = FALSE )
fam2 <- read_fam( link, verbose = FALSE )
expect_equal( fam2, fam1 )
# delete when done!
expect_true( file.remove( link ) )
# repeat when input file has absolute path
file <- R.utils::getAbsolutePath( file )
expect_silent(
symlink( file, link, verbose = FALSE )
)
fam1 <- read_fam( file, verbose = FALSE )
fam2 <- read_fam( link, verbose = FALSE )
expect_equal( fam2, fam1 )
expect_true( file.remove( link ) )
# so only case not directly tested is two files in same local path, but that's sort of the easiest case and tested in my own runs, so I'm not worried that would somehow fail!
})
test_that( "het_reencode_bed works", {
# apply reencoding to this existing file
fi <- 'dummy-33-101-0.1'
expect_silent(
het_reencode_bed( fi, fo, verbose = FALSE )
)
# to validate, read both and apply the explicit mapping in R, which is simpler code
data <- read_plink( fi, verbose = FALSE )
X <- data$X
# this is desired transformation
X[ X == 2 ] <- 0
X[ X == 1 ] <- 2
data2 <- read_plink( fo, verbose = FALSE )
# confirm transformation matched
expect_equal( data2$X, X )
# these should be symlinks, so they should be identical!
expect_equal( data2$bim, data$bim )
expect_equal( data2$fam, data$fam )
# all bed/bim/fam files were created (the other two are symlinks), delete all now
expect_silent( delete_files_plink( fo ) )
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.