tests/testthat/test-twobit_roundtrip.R

### I don't know how to retrieve the warnings suppressed by suppressWarnings().
### Is it even possible? So here is a slightly modified version of
### suppressWarnings() that stores the warnings in '.last_suppressed_warnings'
### if any.
.last_suppressed_warnings <- NULL

.suppress_warnings <- function(expr, classes="warning")
{
    .last_suppressed_warnings <<- NULL
    withCallingHandlers(expr,
        warning=function(w) {
            if (inherits(w, classes)) {
                .last_suppressed_warnings <<- c(.last_suppressed_warnings,
                                                w$message)
                tryInvokeRestart("muffleWarning")
            }
        }
    )
}

### An easier way would be to just do:
###   tools::md5sum(file1) == tools::md5sum(file2)
.files_are_identical <- function(file1, file2, n=50000L)
{
    con1 <- file(file1, "rb")
    on.exit(close(con1))
    con2 <- file(file2, "rb")
    on.exit(close(con2), add=TRUE)
    while (TRUE) {
        bytes1 <- readBin(con1, what=raw(), n=n)
        bytes2 <- readBin(con2, what=raw(), n=n)
        if (!identical(bytes1, bytes2))
            return(FALSE)
        if (length(bytes1) == 0L)
            break
    }
    TRUE
}

test_that("twobit_read/twobit_write roundtrips are lossless",
{
    ## on eboVir3.2bit (1 sequence)
    inpath <- system.file(package="Rtwobitlib", "extdata", "eboVir3.2bit")
    dna <- twobit_read(inpath)
    outpath <- twobit_write(dna, tempfile())
    expect_true(.files_are_identical(inpath, outpath))

    ## on sacCer2.2bit (18 sequences)
    inpath <- system.file(package="Rtwobitlib", "extdata", "sacCer2.2bit")
    dna <- twobit_read(inpath)
    outpath <- twobit_write(dna, tempfile())
    expect_true(.files_are_identical(inpath, outpath))
})

test_that("twobit_write/twobit_read roundtrips are lossless",
{
    dna <- c(seq1="A", seq2="TnT")
    filepath <- twobit_write(dna, tempfile())
    expect_identical(twobit_read(filepath), dna)
    filepath <- twobit_write(dna, tempfile(), use.long=TRUE)
    expect_identical(twobit_read(filepath), dna)

    dna <- c(chr1="AAAAAATTcccgcgccgccgTTTTAATCGaataataataatGGNNNNN",
             chr2="TTTNNNNNNATTATTTTACCACCAAACCCCACACT",
             chrM="GGGCAAATGGCG")
    filepath <- twobit_write(dna, tempfile())
    expect_identical(twobit_read(filepath), dna)
    filepath <- twobit_write(dna, tempfile(), use.long=TRUE)
    expect_identical(twobit_read(filepath), dna)
})

test_that("twobit_write error handling",
{
    ## --- with empty sequences ---

    dna <- c(chr1="cccATT", chr2="", chr3="NNN", chr4="")
    filepath <- .suppress_warnings(twobit_write(dna, tempfile()))
    expected_warnings <-
        sprintf("sequence chr%d has length 0 ==> skipping it", c(2L, 4L))
    expect_identical(.last_suppressed_warnings, expected_warnings)
    expect_identical(twobit_read(filepath), dna[c(1L, 3L)])

    ## --- with duplicated sequence names ---

    dna <- c(seq1="A", seq2="CC", seq3="TnT", seq2="TTnnnG",
             seq2="NNNN", seq4="CGCGcgacgTA", seq1="ACGT")
    expect_error(twobit_write(dna, tempfile()),
                 regexp="duplicate sequence name seq2")
    filepath <-
        .suppress_warnings(twobit_write(dna, tempfile(), skip.dups=TRUE))
    expected_warnings <-
        sprintf("duplicate sequence name seq%d ==> skipping it", c(2L, 2L, 1L))
    expect_identical(.last_suppressed_warnings, expected_warnings)
    expect_identical(twobit_read(filepath), dna[c(1:3, 6L)])

    ## --- with a mix of empty sequences and duplicated sequence names ---

    dna <- c(chrX="cccATT", chrX="")
    filepath <- .suppress_warnings(twobit_write(dna, tempfile()))
    expected_warning <- "sequence chrX has length 0 ==> skipping it"
    expect_identical(.last_suppressed_warnings, expected_warning)
    expect_identical(twobit_read(filepath), dna[1L])

    dna <- c(chrX="", chrX="cccATT")
    filepath <- .suppress_warnings(twobit_write(dna, tempfile()))
    expected_warning <- "sequence chrX has length 0 ==> skipping it"
    expect_identical(.last_suppressed_warnings, expected_warning)
    expect_identical(twobit_read(filepath), dna[2L])

    dna <- c(chr1="AAAAAATTcccgcgccgccgTTTTAATCGaataataataatGGNNNNN",
             chr2="TTTNNNNNNATTATTTTACCACCAAACCCCACACT",
             chr3="",
             chr2="TT",
             chr4="NNNNGGACAGGACattcattcattcattcTTCGNNNnnnnnnNNNNTAGGAGTCNN",
             chr1="",
             chrX="TnT",
             chrX="GGGCAAATGGCG",
             chr1="a")
    expect_error(suppressWarnings(twobit_write(dna, tempfile())),
                 regexp="duplicate sequence name chr2")
    filepath <-
        .suppress_warnings(twobit_write(dna, tempfile(), skip.dups=TRUE))
    expected_warnings <- c("sequence chr3 has length 0 ==> skipping it",
                           "duplicate sequence name chr2 ==> skipping it",
                           "sequence chr1 has length 0 ==> skipping it",
                           "duplicate sequence name chrX ==> skipping it",
                           "duplicate sequence name chr1 ==> skipping it")
    expect_identical(.last_suppressed_warnings, expected_warnings)
    expect_identical(twobit_read(filepath), dna[c(1:2, 5L, 7L)])

    ## --- produce empty 2bit file ---

    dna0 <- setNames(character(0), character(0))
    filepath <- twobit_write(dna0, tempfile())
    expect_identical(twobit_read(filepath), dna0)

    dna <- c(chr1="", chr2="", chr3="")
    expect_warning(filepath <- twobit_write(dna, tempfile()))
    expect_identical(twobit_read(filepath), dna0)

    ## --- with unknown DNA letters ---

    ## Unknown DNA letters are silently converted to T or t (lowercase
    ## letters -> t, anything else -> T).

    dna <- c(chr1="AAx78g*p-CC", chr2="Gn-nNyN", chr3="?", chr4="z")
    filepath <- twobit_write(dna, tempfile())
    expected <- c(chr1="AAtTTgTtTCC", chr2="GnTnNtN", chr3="T", chr4="t")
    expect_identical(twobit_read(filepath), expected)

    ## --- with sequence names longer than 255 chars ---

    dna <- c("TnT", "A", "ACGTNacgtnNTGCA")
    names(dna) <- c("chr1", sprintf("A%0254d", 1), sprintf("B%0254d", 1))
    filepath <- twobit_write(dna, tempfile())
    expect_identical(twobit_read(filepath), dna)

    names(dna) <- c("chr1", sprintf("A%0254d", 1), sprintf("B%0255d", 1))
    expect_error(twobit_write(dna, tempfile()),
                 regexp="sequence name too long: B0+1")

    names(dna) <- c("chr1", sprintf("A%0255d", 1), sprintf("B%0255d", 1))
    expect_error(twobit_write(dna, tempfile()),
                 regexp="sequence name too long: A0+1")

    ## --- invalid 'x' ---

    expect_error(twobit_write(factor(), tempfile()),
                 regexp="must be a character vector")
    expect_error(twobit_write(letters, tempfile()),
                 regexp="must have names")
    expect_error(twobit_write(c("AA", NA), tempfile()),
                 regexp="must have names")
    expect_error(twobit_write(c(seq1="AA", seq2=NA), tempfile()),
                 regexp="cannot contain NAs")
    dna <- c("A", "TnT")
    expect_error(twobit_write(setNames(dna, c("seq1", NA)), tempfile()),
                 regexp="names on .* cannot contain NAs or empty strings")
    expect_error(twobit_write(setNames(dna, c("seq1", "")), tempfile()),
                 regexp="names on .* cannot contain NAs or empty strings")

    ## --- invalid 'filepath' ---

    expect_error(twobit_write(c(seq1="A", seq2="TnT"), factor()),
                 regexp="'filepath' must be a single string")
    expect_error(twobit_write(c(seq1="A", seq2="TnT"), letters),
                 regexp="'filepath' must be a single string")
    expect_error(twobit_write(c(seq1="A", seq2="TnT"), NA_character_),
                 regexp="'filepath' must be a single string")
    expect_error(twobit_write(c(seq1="A", seq2="TnT"), ""),
                 regexp="'filepath' must be a non-empty string")
    expect_error(twobit_write(c(seq1="A", seq2="TnT"), "https://foo/bar.2bit"),
                 regexp="directory .* does not exist")

    ## --- invalid 'skip.dups' ---

    expect_error(twobit_write(c(seq1="A", seq2="TnT"), tempfile(),
                              skip.dups=NA),
                 regexp="'skip.dups' must be TRUE or FALSE")
    expect_error(twobit_write(c(seq1="A", seq2="TnT"), tempfile(),
                              skip.dups=logical(0)),
                 regexp="'skip.dups' must be TRUE or FALSE")
})

Try the Rtwobitlib package in your browser

Any scripts or data that you put into this service are public.

Rtwobitlib documentation built on June 8, 2025, 12:16 p.m.