tests/testthat/test-variants_coal.R

#'
#' The files `files/ms_out*.txt` are used for these tests, and all but
#' `ms_out.txt` have errors intentionally added:
#'
#' - `ms_out_err1.txt`: no sites and no trees
#' - `ms_out_err2.txt`: a position is removed in the seg. sites
#'   (`"error bad site pos"`),
#'   a haplotype is removed in a tree (`"error missing tree tip"`).
#' - `ms_out_err3.txt`: a position is removed in a tree (`"error missing tree pos"`),
#'   a site is removed in seg. sites
#'
#'
#'




# library(jackalope)
# library(testthat)

context("Testing creating haplotypes from coalescent simulations")

# necessary objects -----

arg_list <- list(reference = create_genome(3, 100), sub = sub_JC69(0.1),
                 ins = indels(rate = 0.1, max_length = 10),
                 del = indels(rate = 0.1, max_length = 10))






# ==============================================================================`
# ==============================================================================`

# MS OBJECTS -----

# ==============================================================================`
# ==============================================================================`



#' Check for proper output and errors when creating haplotypes from coalescent objects
#'
#' coal_obj must be a coalescent object from scrm or coala.
#'

coal_obj_run_trees <- function(coal_obj, pkg) {

    arg_list_ <- c(list(haps_info = haps_gtrees(obj = coal_obj)), arg_list)
    haps <- do.call(create_haplotypes, arg_list_)
    expect_equal(haps$n_haps(), 5, info = paste("works with gene trees -", pkg))

    invisible(NULL)
}

coal_obj_run_sites <- function(coal_obj, pkg) {
    arg_list_ <- c(list(haps_info = haps_ssites(obj = coal_obj)), arg_list)
    haps <- do.call(create_haplotypes, arg_list_)
    expect_equal(haps$n_haps(), 5, info = paste("works with seg. sites -", pkg))
    invisible(NULL)
}
coal_obj_err_no_trees <- function(coal_obj, pkg) {
    coal_obj$trees <- NULL
    expect_error(
        haps_gtrees(obj = coal_obj),
        regexp = paste("`obj` must be \\(1\\) a list with a",
                       "`trees` field present or \\(2\\) a list of",
                       "lists, each sub-list containing a `trees`",
                       "field of length 1."),
        info = paste("returns error when no gene trees provided -", pkg))
    invisible(NULL)
}
coal_obj_err_no_sites <- function(coal_obj, pkg) {
    coal_obj$seg_sites <- NULL
    expect_error(
        haps_ssites(obj = coal_obj),
        regexp = paste("argument `obj` must be NULL or a list with a `seg_sites`",
                       "field present."),
        info = paste("returns error when no seg. sites provided -", pkg))
    invisible(NULL)
}
coal_obj_err_missing_tree_pos <- function(coal_obj, pkg) {
    i <- which(sapply(coal_obj$trees, length) > 1)[1]
    coal_obj$trees[[i]][1] <- substr(coal_obj$trees[[i]][1],
                                     regexpr("\\]", coal_obj$trees[[i]][1])[[1]]+1,
                                     9999)

    arg_list_ <- c(list(haps_info = haps_gtrees(obj = coal_obj)), arg_list)

    expect_error(
        do.call(create_haplotypes, arg_list_),
        regexp = paste("A coalescent string appears to include",
                       "recombination but does not include sizes for each region."),
        info = paste("returns error when improper gene trees provided -", pkg))
    invisible(NULL)
}
coal_obj_err_miss_tree_tip <- function(coal_obj, pkg) {
    i <- which(sapply(coal_obj$trees, length) > 1)[1]
    strs <- coal_obj$trees[[i]]
    pos <- as.integer(sapply(strs, function(x) substr(x, 2, regexpr("\\]", x)[[1]]-1)))
    tr <- ape::read.tree(text = strs)
    tr <- lapply(tr, ape::drop.tip, tip = "1")
    class(tr) <- "multiPhylo"
    coal_obj$trees[[i]] <- sprintf("[%i]%s", pos, ape::write.tree(tr))

    arg_list_ <- c(list(haps_info = haps_gtrees(obj = coal_obj)), arg_list)

    expect_error(
        do.call(create_haplotypes, arg_list_),
        regexp = "One or more trees have differing tip labels.",
        info = paste("returns error when improper gene trees provided -", pkg))
    invisible(NULL)
}
coal_obj_err_bad_site_pos <- function(coal_obj, pkg) {
    i <- which(sapply(coal_obj$seg_sites, ncol) > 1)[1]
    mat <- coal_obj$seg_sites[[i]]
    mat <- as.matrix(mat)
    colnames(mat) <- c("XX", colnames(mat)[-1])
    coal_obj$seg_sites[[i]] <- mat
    expect_error(
        suppressWarnings({
            haps_ssites(obj = coal_obj)
        }),
        regexp = paste("Positions in one or more segregating-sites matrices",
                       "are producing NAs"),
        info = paste("returns error with improper seg. sites col names -", pkg))
    invisible(NULL)
}






# scrm checks -----

test_that("haplotype creation works with scrm coalescent object", {

    skip_if_not_installed("scrm")
    library(scrm)
    coal_obj <- scrm("5 3 -r 3.1 100 -t 10 -T -L")

    coal_obj_run_trees(coal_obj, "scrm")
    coal_obj_run_sites(coal_obj, "scrm")
    coal_obj_err_no_trees(coal_obj, "scrm")
    coal_obj_err_no_sites(coal_obj, "scrm")
    coal_obj_err_missing_tree_pos(coal_obj, "scrm")
    coal_obj_err_miss_tree_tip(coal_obj, "scrm")
    coal_obj_err_bad_site_pos(coal_obj, "scrm")


})


# scrm checks - exact -----

test_that("haplotype creation works with scrm coalescent object", {

    .arg_list <- arg_list

    arg_list <- c(list(epsilon = 0), arg_list)

    skip_if_not_installed("scrm")
    library(scrm)
    coal_obj <- scrm("5 3 -r 3.1 100 -t 10 -T -L")

    coal_obj_run_trees(coal_obj, "scrm")
    coal_obj_run_sites(coal_obj, "scrm")
    coal_obj_err_no_trees(coal_obj, "scrm")
    coal_obj_err_no_sites(coal_obj, "scrm")
    coal_obj_err_missing_tree_pos(coal_obj, "scrm")
    coal_obj_err_miss_tree_tip(coal_obj, "scrm")
    coal_obj_err_bad_site_pos(coal_obj, "scrm")

    arg_list <- .arg_list

})


# coala checks -----

test_that("haplotype creation works with coala coalescent object", {

    skip_if_not_installed("coala")
    library(coala)
    model <- coal_model(sample_size = 5, loci_number = 3, loci_length = 100) +
        feat_recombination(2) +
        feat_mutation(5) +
        sumstat_trees() +
        sumstat_seg_sites()
    coal_obj <- simulate(model)

    coal_obj_run_trees(coal_obj, "coala")
    coal_obj_run_sites(coal_obj, "coala")
    coal_obj_err_no_trees(coal_obj, "coala")
    coal_obj_err_no_sites(coal_obj, "coala")
    coal_obj_err_missing_tree_pos(coal_obj, "coala")
    coal_obj_err_miss_tree_tip(coal_obj, "coala")
    coal_obj_err_bad_site_pos(coal_obj, "coala")

})








# ==============================================================================`
# ==============================================================================`

# MS FILES -----

# ==============================================================================`
# ==============================================================================`



test_that("haplotype creation works with ms-style file output", {


    .p <- function(x) test_path(sprintf("files/%s.txt", x))

    expect_equal({
        arg_list_ <- c(list(haps_info = haps_gtrees(fn = .p("ms_out"))), arg_list)
        haps <- do.call(create_haplotypes, arg_list_)
        haps$n_haps()
    }, 5, info = paste("works with gene trees - ms-file"))

    expect_equal({
        arg_list_ <- c(list(haps_info = haps_gtrees(fn = .p("ms_out"))), arg_list)
        haps <- do.call(create_haplotypes, arg_list_)
        haps$n_haps()
    }, 5, info = "works with seg. sites - ms-file")

    expect_error(
        haps_gtrees(fn = .p("ms_out_err1")),
        regexp = "one or more chromosomes have no trees",
        info = "returns error when no gene trees provided - ms-file")

    expect_error(
        haps_ssites(fn = .p("ms_out_err1")),
        regexp = paste("One or more seg. sites matrices from a ms-style",
                       "file output have no haplotype information specified."),
        info = "returns error when no seg. sites provided - ms-file")


    expect_error({
        arg_list_ <- c(list(haps_info = haps_gtrees(fn = .p("ms_out_err3"))), arg_list)
        do.call(create_haplotypes, arg_list_)
    },
    regexp = paste("A coalescent string appears to include",
                   "recombination but does not include sizes for each region."),
    info = "returns error when improper gene trees provided - ms-file")


    expect_error({
        arg_list_ <- c(list(haps_info = haps_gtrees(fn = .p("ms_out_err3"))), arg_list)
        do.call(create_haplotypes, arg_list_)
    },
    regexp = paste("A coalescent string appears to include",
                   "recombination but does not include sizes for each region."),
    info = "returns error when improper gene trees provided - ms-file")


    expect_error({
        arg_list_ <- c(list(haps_info = haps_gtrees(fn = .p("ms_out_err2"))), arg_list)
        do.call(create_haplotypes, arg_list_)
    },
    regexp = "One or more trees have differing tip labels.",
    info = "returns error when improper gene trees provided - ms-file")


    expect_error(
        haps_ssites(fn = test_path(paste0("files/", "ms_out_err2.txt"))),
        regexp = paste("the listed positions for each site \\(line starting",
                       "with 'positions:'\\) does not have a length that's",
                       "the same as the \\# sites as given by the line",
                       "starting with 'segsites:'"),
        info = paste("returns error with improper seg. sites positions - ms-file"))


    expect_error(
        haps_ssites(fn = test_path("files/ms_out_err3.txt")),
        regexp = paste("the listed number of sites \\(line starting with",
                       "'segsites:'\\) does not agree with the number of",
                       "items in the 3th line of segregating sites",
                       "info \\(ones filled with 0s and 1s\\)"),
        info = paste("returns error with improper seg. sites matrices - ms file"))


    reference2 <- create_genome(3, 1000)

    expect_error({
        arg_list_ <- c(list(haps_info = haps_gtrees(fn = .p("ms_out"))), arg_list)
        arg_list_$reference <- reference2
        do.call(create_haplotypes, arg_list_)
    },
    regexp = paste("A coalescent string appears to include recombination",
                   "but the combined sizes of all regions don't match the",
                   "size of the chromosome"))

})



test_that("haplotype creation returns error with improper ref_genome input", {
    .p <- function(x) test_path(sprintf("files/%s.txt", x))
    expect_error({
        arg_list_ <- c(list(haps_info = haps_gtrees(fn = .p("ms_out"))), arg_list)
        arg_list_$reference <- list(1, 2)
        do.call(create_haplotypes, arg_list_)
    },
    regexp = paste("For the `create_haplotypes` function in jackalope,",
                   "argument `reference` must be a \"ref_genome\" object."))
})

test_that("seg. sites produces the correct number of mutations", {
    ms_file <- test_path("files/ms_out.txt")
    # Have to make bigger ref. genome to make sure site positions don't overlap.
    reference2 <- create_genome(3, 100e3)
    arg_list_ <- c(list(haps_info = haps_ssites(fn = ms_file)), arg_list)
    arg_list_$reference <- reference2
    haps <- do.call(create_haplotypes, arg_list_)

    msf <- readLines(ms_file)[-1:-2]
    msf <- msf[grepl("^0|^1", msf)]
    msf <- do.call(c, strsplit(msf, ""))
    n_muts_by_hap <-
        sapply(0:4, function(i) nrow(jackalope:::view_mutations(haps$ptr(), i)))
    expect_equal(sum(as.integer(msf)), sum(n_muts_by_hap))
})





# ==============================================================================`
# ==============================================================================`

# GENERAL NONSENSE -----

# ==============================================================================`
# ==============================================================================`


test_that("errors occur when nonsense is input to haps_gtrees", {

    expect_error(haps_gtrees(NULL, NULL),
                 regexp = "either argument `obj` or `fn` must be provided")
    expect_error(haps_gtrees(1, 1),
                 regexp = "only one argument \\(`obj` or `fn`\\) should be provided")
    expect_error(haps_gtrees(1, NULL),
                 regexp = paste("argument `obj` must be \\(1\\) a list with a",
                                "`trees` field present or \\(2\\) a list of lists,",
                                "each sub-list containing a `trees` field of length 1"))
    expect_error(haps_gtrees(NULL, 1),
                 regexp = "argument `fn` must be NULL or a single string")

})
test_that("errors occur when nonsense is input to haps_ssites", {

    expect_error(haps_ssites(NULL, NULL),
                 regexp = "either argument `obj` or `fn` must be provided")
    expect_error(haps_ssites(1, 1),
                 regexp = "only one argument \\(`obj` or `fn`\\) should be provided")
    expect_error(haps_ssites(1, NULL),
                 regexp = paste("argument `obj` must be NULL or a list with a",
                                "`seg_sites` field present"))
    expect_error(haps_ssites(NULL, 1),
                 regexp = "argument `fn` must be NULL or a single string")

})






# ==============================================================================`
# ==============================================================================`

# Writing gene trees -----

# ==============================================================================`
# ==============================================================================`


test_that("gene trees written properly by write_gtrees", {

    out_prefix <- paste0(tempdir(TRUE), "/trees")

    # Write gene trees to file based on known ms-style file:
    write_gtrees(haps_gtrees(fn = test_path("files/ms_out.txt")), out_prefix)

    # Newly written gene tree strings:
    wr_str <- strsplit(paste(readLines(paste0(out_prefix, ".trees")),
                             collapse = ""), "//")[[1]]
    wr_str <- wr_str[grepl("^\\[", wr_str)]

    # original file, split by chromosome:
    og_str <- strsplit(strsplit(paste(readLines(test_path("files/ms_out.txt"))[-1:-2],
                                      collapse = "\n"), "//\n")[[1]], "\n")[-1]
    # Now get just the gene trees:
    og_str <- sapply(og_str, function(x) paste(x[grepl("^\\[", x)], collapse = ""))

    expect_identical(wr_str, og_str)

})

Try the jackalope package in your browser

Any scripts or data that you put into this service are public.

jackalope documentation built on Oct. 15, 2023, 9:06 a.m.