tests/testthat/test-fasta_IO.R

# library(jackalope)
# library(testthat)

context("Testing FASTA file input/output")

dir <- tempdir(check = TRUE)



chroms <- jackalope:::rando_chroms(10, 100)

ref <- ref_genome$new(jackalope:::make_ref_genome(chroms))





# ----------*
# Errors ----
# ----------*

test_that("Read/writing FASTA files produces errors when nonsense is input", {

    fa_fn <- sprintf("%s/%s.fa", dir, "test")

    expect_error(write_fasta("ref", fa_fn),
                 regexp = "argument `obj` must be a \"ref_genome\" or \"haplotypes\"")

    expect_error(write_fasta(ref, 3),
                 regexp = "argument `out_prefix` must be a single string")

    expect_error(write_fasta(ref, fa_fn, compress = "3"),
                 regexp = paste("argument `compress` must be a single logical",
                                "or integer from 1 to 9"))

    expect_error(write_fasta(ref, fa_fn, text_width = "y"),
                 regexp = "argument `text_width` must be a single integer >= 1")


    expect_error(read_fasta(c()),
                 regexp = "argument `fasta_files` must be a character vector")

    expect_error(read_fasta(fa_fn, NA),
                 regexp = paste("argument `fai_files` must be NULL or a",
                                "character vector of the same length as the",
                                "`fasta_files` argument"))

    expect_error(read_fasta(rep(fa_fn, 2), "NA"),
                 regexp = paste("argument `fai_files` must be NULL or a",
                                "character vector of the same length as the",
                                "`fasta_files` argument"))

    expect_error(read_fasta(fa_fn, cut_names = "yeah"),
                 regexp = "argument `cut_names` must be a single logical")

})




# ================================================================================`
# ================================================================================`

# >>> Non-indexed ----

# ================================================================================`
# ================================================================================`


# ----------*
# Single ----
# ----------*


test_that("Read/writing single non-indexed FASTA files works with uncompressed output", {

    fa_fn <- sprintf("%s/%s", dir, "test")

    write_fasta(ref, fa_fn, compress = FALSE, overwrite = TRUE)

    fa_fn <- sprintf("%s/%s.fa", dir, "test")
    new_ref <- read_fasta(fa_fn)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})


test_that("Read/writing single non-indexed FASTA files works with gzipped output", {

    fa_fn <- sprintf("%s/%s", dir, "test")

    write_fasta(ref, fa_fn, compress = TRUE, comp_method = "gzip", overwrite = TRUE)

    fa_fn <- sprintf("%s/%s.fa.gz", dir, "test")
    new_ref <- read_fasta(fa_fn)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})


test_that("Read/writing single non-indexed FASTA files works with bgzipped output", {

    fa_fn <- sprintf("%s/%s", dir, "test")

    write_fasta(ref, fa_fn, compress = TRUE, comp_method = "bgzip", overwrite = TRUE)

    fa_fn <- sprintf("%s/%s.fa.gz", dir, "test")
    new_ref <- read_fasta(fa_fn)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})




# ----------*
# Multiple ----
# ----------*


ref1 <- ref_genome$new(jackalope:::make_ref_genome(chroms[1:5]))
ref2 <- ref_genome$new(jackalope:::make_ref_genome(chroms[6:10]))


test_that("Read/writing multiple non-indexed FASTA files works with uncompressed output", {

    fa_fns <- sprintf("%s/%s%i", dir, "test", 1:2)

    write_fasta(ref1, fa_fns[1], compress = FALSE, overwrite = TRUE)
    write_fasta(ref2, fa_fns[2], compress = FALSE, overwrite = TRUE)

    fa_fns <- sprintf("%s/%s%i.fa", dir, "test", 1:2)
    new_ref <- read_fasta(fa_fns)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})


test_that("Read/writing multiple non-indexed FASTA files works with gzipped output", {

    fa_fns <- sprintf("%s/%s%i", dir, "test", 1:2)

    write_fasta(ref1, fa_fns[1], compress = TRUE, comp_method = "gzip", overwrite = TRUE)
    write_fasta(ref2, fa_fns[2], compress = TRUE, comp_method = "gzip", overwrite = TRUE)

    fa_fns <- sprintf("%s/%s%i.fa.gz", dir, "test", 1:2)
    new_ref <- read_fasta(fa_fns)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})

test_that("Read/writing multiple non-indexed FASTA files works with bgzipped output", {

    fa_fns <- sprintf("%s/%s%i", dir, "test", 1:2)

    write_fasta(ref1, fa_fns[1], compress = TRUE, comp_method = "bgzip", overwrite = TRUE)
    write_fasta(ref2, fa_fns[2], compress = TRUE, comp_method = "bgzip", overwrite = TRUE)

    fa_fns <- sprintf("%s/%s%i.fa.gz", dir, "test", 1:2)
    new_ref <- read_fasta(fa_fns)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})








# ================================================================================`
# ================================================================================`

# >>> Indexed ----

# ================================================================================`
# ================================================================================`




# ----------*
# Single ----
# ----------*


# Making my own fasta index file:
fa_index_df <-
    data.frame(name = ref$chrom_names(),
               nbases = ref$sizes(),
               byte_index = cumsum(c(nchar(ref$chrom_names()[1]) + 2,
                                     nchar(ref$chrom_names()[-1]) + 2 +
                                         utils::head(ref$sizes(), -1) +
                                         ceiling(utils::head(ref$sizes(), -1) /
                                                     formals(write_fasta)$text_width))),
               bases_perline = formals(write_fasta)[["text_width"]],
               bytes_perline = formals(write_fasta)[["text_width"]] + 1)


utils::write.table(fa_index_df, sprintf("%s/%s.fa.fai", dir, "test"), quote = FALSE,
                   sep = "\t", row.names = FALSE, col.names = FALSE)


test_that("Read/writing single indexed FASTA files works with uncompressed output", {

    fa_fn <- sprintf("%s/%s", dir, "test")
    fai_fn <- sprintf("%s/%s.fa.fai", dir, "test")

    write_fasta(ref, fa_fn, compress = FALSE, overwrite = TRUE)

    fa_fn <- sprintf("%s/%s.fa", dir, "test")
    new_ref <- read_fasta(fa_fn, fai_fn)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})


test_that("Read/writing single indexed FASTA files works with gzipped output", {

    fa_fn <- sprintf("%s/%s", dir, "test")
    fai_fn <- sprintf("%s/%s.fa.fai", dir, "test")

    write_fasta(ref, fa_fn, compress = TRUE, comp_method = "gzip", overwrite = TRUE)

    fa_fn <- sprintf("%s/%s.fa.gz", dir, "test")
    new_ref <- read_fasta(fa_fn, fai_fn)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})


test_that("Read/writing single indexed FASTA files works with bgzipped output", {

    fa_fn <- sprintf("%s/%s", dir, "test")
    fai_fn <- sprintf("%s/%s.fa.fai", dir, "test")

    write_fasta(ref, fa_fn, compress = TRUE, comp_method = "bgzip", overwrite = TRUE)

    fa_fn <- sprintf("%s/%s.fa.gz", dir, "test")
    new_ref <- read_fasta(fa_fn, fai_fn)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})



# ----------*
# Multiple ----
# ----------*


# Making my own fasta index file:
fa_index_df1 <- fa_index_df[1:5,]
fa_index_df2 <- fa_index_df[6:10,]
fa_index_df2$byte_index <- fa_index_df2$byte_index + ((nchar(ref$chrom_names()[1]) + 2) -
                                                          fa_index_df2$byte_index[1])


ref1 <- ref_genome$new(jackalope:::make_ref_genome(chroms[1:5]))
ref2 <- ref_genome$new(jackalope:::make_ref_genome(chroms[6:10]))

utils::write.table(fa_index_df1, sprintf("%s/%s1.fa.fai", dir, "test"),
                   quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
utils::write.table(fa_index_df2, sprintf("%s/%s2.fa.fai", dir, "test"),
                   quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)


test_that("Read/writing multiple indexed FASTA files works with uncompressed output", {

    fa_fns <- sprintf("%s/%s%i", dir, "test", 1:2)
    fai_fns <- sprintf("%s/%s%i.fa.fai", dir, "test", 1:2)

    write_fasta(ref1, fa_fns[1], compress = FALSE, overwrite = TRUE)
    write_fasta(ref2, fa_fns[2], compress = FALSE, overwrite = TRUE)

    fa_fns <- sprintf("%s/%s%i.fa", dir, "test", 1:2)
    new_ref <- read_fasta(fa_fns, fai_fns)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})


test_that("Read/writing multiple indexed FASTA files works with gzipped output", {

    fa_fns <- sprintf("%s/%s%i", dir, "test", 1:2)
    fai_fns <- sprintf("%s/%s%i.fa.fai", dir, "test", 1:2)

    write_fasta(ref1, fa_fns[1], compress = TRUE, comp_method = "gzip", overwrite = TRUE)
    write_fasta(ref2, fa_fns[2], compress = TRUE, comp_method = "gzip", overwrite = TRUE)

    fa_fns <- sprintf("%s/%s%i.fa.gz", dir, "test", 1:2)
    new_ref <- read_fasta(fa_fns, fai_fns)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})



test_that("Read/writing multiple indexed FASTA files works with bgzipped output", {

    fa_fns <- sprintf("%s/%s%i", dir, "test", 1:2)
    fai_fns <- sprintf("%s/%s%i.fa.fai", dir, "test", 1:2)

    write_fasta(ref1, fa_fns[1], compress = TRUE, comp_method = "bgzip", overwrite = TRUE)
    write_fasta(ref2, fa_fns[2], compress = TRUE, comp_method = "bgzip", overwrite = TRUE)

    fa_fns <- sprintf("%s/%s%i.fa.gz", dir, "test", 1:2)
    new_ref <- read_fasta(fa_fns, fai_fns)

    expect_identical(ref$n_chroms(), new_ref$n_chroms())

    for (i in 1:ref$n_chroms()) {
        expect_identical(ref$chrom(i), ref$chrom(i))
    }

})






# ___ Writing haplotypes -----

haps <- create_haplotypes(ref, haps_theta(0.1, 2), sub_JC69(0.001))

test_that("Writing FASTA files with haplotypes", {

    fa_fn <- sprintf("%s/%s", dir, "test")

    write_fasta(haps, fa_fn, compress = FALSE, overwrite = TRUE)

    fa_fn <- sprintf("%s/%s__%s.fa", dir, "test", haps$hap_names())
    new_ref1 <- read_fasta(fa_fn[1])
    new_ref2 <- read_fasta(fa_fn[2])

    expect_identical(haps$n_chroms(), new_ref1$n_chroms())
    expect_identical(haps$n_chroms(), new_ref2$n_chroms())

    expect_identical(sapply(1:haps$n_chroms(), function(i) haps$chrom(1, i)),
                     sapply(1:haps$n_chroms(), function(i) new_ref1$chrom(i)))
    expect_identical(sapply(1:haps$n_chroms(), function(i) haps$chrom(2, i)),
                     sapply(1:haps$n_chroms(), function(i) new_ref2$chrom(i)))

})

Try the jackalope package in your browser

Any scripts or data that you put into this service are public.

jackalope documentation built on May 29, 2024, 3:58 a.m.