tests/testthat/test-simulator-seqgen.R

context("Simulator seqgen")

test_that("it parses seqgen output", {
  output <- c(" 11 10",
              "s11       AATTTTGCCT",
              "s2        TTCCCAAGTT",
              "s4        TTCACAAGTG",
              "s1        TTCCCAAGTG",
              "s3        TTCCTAAGTG",
              "s5        TCGGAAGCAG",
              "s7        TCGGAAGCAG",
              "s6        CCGGAAGCCT",
              "s8        GCGGAAGCCT",
              "s9        CCGGCTGCAG",
              "s10       CCTCAGGGCC",
              " 11 10",
              "11        ATTGAACCGC",
              "5         GTATATTTAC",
              "9         GAATATGAAG",
              "6         CTATATTTAG",
              "8         CTAAATGAGG",
              "7         CTATATGAAC",
              "10        CTATATGAAC",
              "1         CCATACGATA",
              "2         CTTGACGGTA",
              "3         GCAGACGGTA",
              "4         GCTGATAATA")

  sequence <- parse_seqgen_output(output, individuals = 11, locus_length = 10,
                                  locus_number = 2, outgroup_size = 1,
                                  calc_segsites = FALSE)
  expect_equivalent(sequence, list(matrix(c(4, 4, 2, 2, 2, 1, 1, 3, 4, 3,
                                            4, 4, 2, 2, 2, 1, 1, 3, 4, 4,
                                            4, 4, 2, 2, 4, 1, 1, 3, 4, 3,
                                            4, 4, 2, 1, 2, 1, 1, 3, 4, 3,
                                            4, 2, 3, 3, 1, 1, 3, 2, 1, 3,
                                            2, 2, 3, 3, 1, 1, 3, 2, 2, 4,
                                            4, 2, 3, 3, 1, 1, 3, 2, 1, 3,
                                            3, 2, 3, 3, 1, 1, 3, 2, 2, 4,
                                            2, 2, 3, 3, 2, 4, 3, 2, 1, 3,
                                            2, 2, 4, 2, 1, 3, 3, 3, 2, 2,
                                            1, 1, 4, 4, 4, 4, 3, 2, 2, 4),
                                          11, 10, byrow = TRUE),
                                   matrix(c(2, 2, 1, 4, 1, 2, 3, 1, 4, 1,
                                            2, 4, 4, 3, 1, 2, 3, 3, 4, 1,
                                            3, 2, 1, 3, 1, 2, 3, 3, 4, 1,
                                            3, 2, 4, 3, 1, 4, 1, 1, 4, 1,
                                            3, 4, 1, 4, 1, 4, 4, 4, 1, 2,
                                            2, 4, 1, 4, 1, 4, 4, 4, 1, 3,
                                            2, 4, 1, 4, 1, 4, 3, 1, 1, 2,
                                            2, 4, 1, 1, 1, 4, 3, 1, 3, 3,
                                            3, 1, 1, 4, 1, 4, 3, 1, 1, 3,
                                            2, 4, 1, 4, 1, 4, 3, 1, 1, 2,
                                            1, 4, 4, 3, 1, 1, 2, 2, 3, 2),
                                          11, 10, byrow = TRUE)))


  seg_sites <- parse_seqgen_output(output, individuals = 11, locus_length = 10,
                                   locus_number = 2, outgroup_size = 1,
                                   calc_segsites = TRUE)

  seg_sites_1 <- create_segsites(matrix(c(1, 1, 1, 1, 1, 1, 1,
                                          1, 1, 1, 1, 1, 1, 0,
                                          1, 0, 1, 1, 1, 1, 1,
                                          1, 1, 1, 1, 1, 1, 1,
                                          1, 1, 1, 0, 0, 1, 1,
                                          1, 1, 1, 0, 0, 0, 0,
                                          1, 1, 1, 0, 0, 1, 1,
                                          1, 1, 1, 0, 0, 0, 0,
                                          1, 1, 0, 0, 0, 1, 1,
                                          0, 1, 1, 0, 1, 0, 1),
                                        10, 7, byrow = TRUE),
                                 c(2, 4:9) / 9)
  expect_equal(seg_sites[[1]], seg_sites_1)

  seg_sites_2 <- create_segsites(matrix(c(1, 1, 1, 1, 1,
                                          0, 0, 0, 1, 1,
                                          1, 1, 0, 1, 1,
                                          1, 0, 0, 1, 1,
                                          0, 1, 1, 1, 0,
                                          0, 1, 1, 1, 1,
                                          0, 1, 1, 1, 0,
                                          0, 1, 1, 0, 1,
                                          1, 1, 1, 1, 1,
                                          0, 1, 1, 1, 0),
                                        10, 5, byrow = TRUE),
                                 c(1, 2, 3, 8, 9) / 9)
  expect_equal(seg_sites[[2]], seg_sites_2)


  # With outgroup of multiple individuals
  seg_sites <- parse_seqgen_output(output, individuals = 11, locus_length = 10,
                                   locus_number = 2, outgroup_size = 3,
                                   calc_segsites = TRUE)

  seg_sites_o1 <- seg_sites_1[1:8, 4, drop = FALSE]
  expect_equivalent(seg_sites[[1]], seg_sites_o1)

  seg_sites_o2 <- seg_sites_1[1:8, c(), drop = FALSE]
  expect_equivalent(seg_sites[[2]], seg_sites_o2)

  # Unexpected sequence character
  expect_error(parse_seqgen_output(c(" 1 2", "1         AX"),
                                   1, 2, 1, 0, FALSE))

  # False sequence length
  #expect_error(parse_seqgen_output(c(" 1 3", "1         AAA"),
  #                                 1, 2, 1, 0, FALSE))
  #expect_error(parse_seqgen_output(c(" 1 1", "1         A"),
  #                                 1, 2, 1, 0, FALSE))

  # False number of individuals
  expect_error(parse_seqgen_output(c(" 1 3", "1         AAA"),
                                   2, 3, 1, 0, FALSE))
  expect_error(parse_seqgen_output(c(" 2 1", "1         A", "2         G"),
                                   1, 1, 1, 0, FALSE))

  # False locus number
  expect_error(parse_seqgen_output(c(" 1 3", "1         AAA"),
                                   1, 3, 2, 0, FALSE))
  expect_error(parse_seqgen_output(c(" 1 3", "1         AAA",
                                     " 1 3", "1         AAA"),
                                   1, 3, 1, 0, FALSE))

  # No outgroup
  expect_error(parse_seqgen_output(c(" 2 1", "1         A", "2         G"),
                                   2, 1, 1, 0, TRUE))


})


test_that("it parses seqgen output with split sequences", {
  output <- c(" 2 10",
              "s1        AAAAA", "AAAAA",
              "s2        GGGGG", "GGGGG",
              " 2 10",
              "s1        AAAAAAAA", "AA",
              "s2        GGGGGGGG", "GG")
  sequence <- parse_seqgen_output(output, individuals = 2, locus_length = 10,
                                  locus_number = 2, outgroup_size = 0,
                                  calc_segsites = FALSE)
  expect_equivalent(sequence, list(matrix(c(1, 3), 2, 10),
                                   matrix(c(1, 3), 2, 10)))
})


test_that("test.sg_generate_opts", {
  if (!has_seqgen()) skip("seqgen not installed")
  model.hky <- model_hky()
  opts <- sg_generate_opts(model.hky, c(1, 10), 1, c(0, 0, 10, 0, 0), 1)
  opts <- strsplit(opts, " ")[[1]]
  expect_true("-l" %in% opts)
  expect_true("-p" %in% opts)
  expect_true("-q" %in% opts)
  expect_true("-mHKY" %in% opts)
  expect_true("-t" %in% opts)
  expect_true("-s" %in% opts)
})


test_that("generation of tree models works", {
  if (!has_seqgen()) skip("seqgen not installed")
  for (model in list(model_hky(), model_gtr())) {
    tree_model <- generate_tree_model(model)
    stats <- simulate(tree_model, pars = c(1, 5))
    expect_false(is.null(stats$trees))
    unlink(unlist(stats$trees))
  }
})


test_that("seqgen creates tasks", {
  if (!has_seqgen()) skip("seqgen not installed")
  sg <- get_simulator("seqgen")

  sim_task <- sg$create_task(model_hky(), c(tau = 1, theta = 5), 2)
  expect_true(is_simulation_task(sim_task))
  expect_equal(sim_task$locus_number, 2)
  expect_true(is_simulation_task(sim_task$get_arg("tree_task")))
  expect_true(grepl("-mHKY", sim_task$get_arg("cmd")))
  expect_equivalent(sim_task$get_arg("trio_dists"),
                    c(0, 0, get_locus_length(model_hky()), 0, 0))
})


test_that("simulation with seq-gen works", {
  if (!has_seqgen()) skip("seqgen not installed")

  set.seed(100)
  sum_stats <- simulate(model_hky(), pars = c(tau = 1, theta = 10))
  expect_true(is.list(sum_stats))
  expect_true(is.array(sum_stats$jsfs))
  expect_true(sum(sum_stats$jsfs) > 0)

  set.seed(100)
  sum_stats2 <- simulate(model_hky(), pars = c(tau = 1, theta = 10))
  expect_equal(sum_stats2$jsfs, sum_stats$jsfs)
})


test_that("seqgen simulates long sequences", {
  if (!has_seqgen()) skip("seqgen not installed")
  stat <- simulate(model_hky() + locus_single(10000), pars = c(1, 5))
  expect_true(sum(stat$jsfs) > 1)
})


test_that("seqgen can simulate all example models", {
  if (!has_seqgen()) skip("seqgen not installed")
  set.seed(12)
  for (model in list(model_hky(), model_gtr())) {
    sum_stats <- simulate(model, pars = c(1, 5))
    expect_true(sum(sum_stats$jsfs) > 0)
  }
})


test_that("seqgen can simulate files", {
  if (!has_seqgen()) skip("seqgen not installed")
  tmp_dir <- tempfile("seqgen_tmp_files_test")
  stats <- simulate(model_hky() + sumstat_file(tmp_dir), pars = c(1, 5))
  expect_true(dir.exists(tmp_dir))
  expect_true(file.exists(stats$file))
  unlink(tmp_dir, recursive = TRUE)
})


test_that("seq-gen can simulate trios", {
  if (!has_seqgen()) skip("seqgen not installed")
  model <- model_gtr() +
    feat_recombination(5) +
    locus_trio(locus_length = c(10, 20, 10), distance = c(5, 5), number = 2) +
    locus_trio(locus_length = c(20, 10, 15), distance = c(7, 5)) +
    sumstat_seg_sites()

  sum.stats <- simulate(model, pars = c(1, 10))
  expect_true(sum(sum.stats$jsfs) <= sum(sapply(sum.stats$seg_sites, ncol)))
})


test_that("Error is thrown without an outgroup", {
  if (!has_seqgen()) skip("seqgen not installed")
  temp_files_before <- list.files(tempdir(), pattern = "^coala-[0-9]+-")

  model <- coal_model(c(3, 3), 10) +
    feat_mutation(par_range("theta", 5, 10), model = "HKY",
                  tstv_ratio = .5, base_frequencies = rep(.25, 4)) +
    feat_pop_merge(par_range("tau", .5, 1), 2, 1) +
    sumstat_jsfs()
  expect_error(simulate(model, pars = c(7.5, .75)))

  # Remove tempfiles that may remain because of error exit
  temp_files_after <- list.files(tempdir(), pattern = "^coala-[0-9]+-")
  temp_files_diff <- temp_files_after[!temp_files_after %in% temp_files_before]
  unlink(file.path(tempdir(), temp_files_diff))
})


test_that("a more complicated model works", {
  if (!has_seqgen()) skip("seqgen not installed")
  model <- coal_model(c(5, 5, 2), 1, 100) +
    feat_mutation(par_range("theta", .1, 40), model = "HKY",
                  base_frequencies = c(0.26, 0.20, 0.22, 0.32),
                  tstv_ratio = 1.26) +
    feat_migration(par_range("m12", 0.001, 5), 1, 2) +
    feat_migration(par_range("m21", 0.001, 5), 2, 1) +
    feat_size_change(par_range("q", 0.05, 40), population = 2, time = 0) +
    par_range("s1", 0.01, 2) + par_range("s2", 0.01, 2) +
    feat_growth(par_expr(log(1 / s1) / tau), population = 1, time = 0) +
    feat_growth(par_expr(log(q / s2) / tau), population = 2, time = 0) +
    feat_size_change(par_expr(s1 + s2), population = 1,
                     time = par_expr(tau)) +
    feat_pop_merge(par_range("tau", 0.001, 5), 2, 1) +
    feat_pop_merge(par_expr(2 * tau), 3, 1) +
    feat_recombination(par_const(10)) +
    feat_outgroup(3) +
    sumstat_jsfs()

  stat <- simulate(model, pars = c(10, 0.5, 0.6, 4.5, 0.2, 0.1, 0.4))
  expect_true(sum(stat$jsfs) > 0)
})


test_that("seqgen works with inter-locus variation", {
  if (!has_seqgen()) skip("seq-gen not installed")

  model_tmp <- coal_model(c(3, 3, 1), 2) +
    feat_pop_merge(2.0, 2, 1) +
    feat_pop_merge(3.0, 3, 1) +
    feat_recombination(1) +
    feat_outgroup(3) +
    feat_mutation(par_variation(5, 10), model = "GTR", gtr_rates = 1:6) +
    sumstat_jsfs()
  expect_true(has_variation(model_tmp))

  set.seed(1100)
  sum_stats <- simulate(model_tmp, pars = numeric(0))
  expect_is(sum_stats$jsfs, "matrix")
  expect_that(sum(sum_stats$jsfs), is_more_than(0))
  expect_equal(length(sum_stats$cmds), 2)
})


test_that("seq-gen works without recombination", {
  if (!has_seqgen()) skip("seq-gen not installed")
  model <- coal_model(c(3, 3, 1), 2) +
    feat_pop_merge(par_range("tau", 0.01, 5), 2, 1) +
    feat_pop_merge(par_expr("2*tau"), 3, 1) +
    feat_outgroup(3) +
    feat_mutation(par_range("theta", 1, 10), model = "GTR", gtr_rates = 1:6) +
    sumstat_jsfs()

  stats <- simulate(model, pars = c(1, 5))
  expect_is(stats, "list")
})


test_that("Printing the command works", {
  if (!has_seqgen()) skip("seq-gen not installed")
  cmd <- get_cmd(model_gtr())
  expect_that(cmd, is_a("character"))
  expect_equal(length(cmd), 2)
})


test_that("seqgen can added manually", {
  if (!has_seqgen()) skip("seqgen not installed")
  sg_bin <- get_simulator("seqgen")$get_info()["binary"]
  activate_seqgen(sg_bin, 99)
  expect_equal(get_simulator("seqgen")$get_priority(), 99)
  expect_error(use_seqgen(tempfile("not-existant")))
})



test_that("seq-gen work with msms", {
  if (!has_seqgen()) skip("seqgen not installed")
  if (!has_msms()) skip("msms not installed")

  model <- model_gtr() +
    feat_recombination(5) +
    locus_trio(locus_length = c(10, 20, 10), distance = c(5, 5), number = 2) +
    locus_trio(locus_length = c(20, 10, 15), distance = c(7, 5)) +
    sumstat_seg_sites() +
    feat_selection(strength_Aa = 1000, time = 0.05)

  sum.stats <- simulate(model, pars = c(1, 10))
  expect_true(sum(sum.stats$jsfs) <= sum(sapply(sum.stats$seg_sites, ncol)))
})


test_that("seq-gen works with zero-inflation", {
  if (!has_seqgen()) skip("seqgen not installed")

  model <- model_gtr() +
    feat_recombination(par_zero_inflation(5, .5)) +
    sumstat_seg_sites()

  sum.stats <- simulate(model, pars = c(1, 10))
  expect_true(sum(sum.stats$jsfs) <= sum(sapply(sum.stats$seg_sites, ncol)))
})

Try the coala package in your browser

Any scripts or data that you put into this service are public.

coala documentation built on Jan. 5, 2023, 5:11 p.m.