skip_if(!is_slendr_env_present())
init_env(quiet = TRUE)
set.seed(42)
simulate_slim_ts <- function(N) {
script_file <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
ts_file <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
writeLines(sprintf('initialize() {
setSeed(42);
initializeTreeSeq();
initializeMutationRate(0);
initializeMutationType("m1", 0.5, "f", 0.0);
initializeGenomicElementType("g1", m1, 1.0);
initializeGenomicElement(g1, 0, 1e5);
initializeRecombinationRate(1e-8);
}
1 early() {
sim.addSubpop("p0", %d);
}
100 late() {
sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);
sim.treeSeqOutput("%s");
}
', N, ts_file), script_file)
binary <- get_binary("batch")
system2(binary, script_file, stdout = FALSE)
ts_file
}
msprime_ts_sim_ancestry <- function(N) {
msprime_file <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
slendr:::msp$sim_ancestry(N)$dump(msprime_file)
msprime_file
}
msprime_ts_simulate <- function(N) {
msprime_file <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
slendr:::msp$simulate(as.integer(N))$dump(msprime_file)
msprime_file
}
# Make sure the unsimplified tree sequence has the same set of nodes as
# what we extracted with ts_nodes (ssampled and all)
compare_ts_nodes <- function(ts, N) {
data <- ts_nodes(ts)
expect_true(nrow(data) == ts$num_nodes)
expect_true(nrow(data[data$sampled, ]) == ts$num_samples)
if (any(!is.na(data$ind_id))) expect_true(nrow(data[data$sampled, ]) == 2 * N)
ts2 <- ts_simplify(ts)
data2 <- ts_nodes(ts2)
# make sure the same holds also for a simplified tree sequence
expect_true(nrow(data2) == ts2$num_nodes)
expect_true(sum(data2$sampled) == ts2$num_samples)
if (any(!is.na(data$ind_id))) expect_true(sum(data2$sampled) == 2 * N)
}
# Make sure the extracted phylo tree structure is the same as what is encoded
# in the edge table
compare_ts_phylo <- function(ts, N) {
ts2 <- ts_simplify(ts)
tskit_tree <- ts2$at(0)
tree <- ts_phylo(ts2, i = 0, quiet = TRUE)
tree_data <- ts_nodes(tree)
# make sure that the converted phylo tree has the same number of edges and
# nodes as the original tskit Tree object
expect_true(nrow(tree$edge) == tskit_tree$num_edges)
expect_true((tree$Nnode + length(tree$tip.label)) == length(reticulate::iterate(tskit_tree$nodes(tskit_tree$root))))
# the root must be the same node as well
phylo_root <- length(tree$tip.label) + 1
tskit_root <- tskit_tree$root
expect_true(which(tree_data$node_id == tskit_root) == which(tree_data$phylo_id == phylo_root))
expect_true(length(tree$tip.label) == nrow(tree_data[tree_data$sampled, ]))
}
# SLiM --------------------------------------------------------------------
test_that("non-slendr SLiM ts_nodes corresponds to the expected outcome", {
N <- 5
ts_file <- simulate_slim_ts(N)
suppressMessages(ts <- ts_read(ts_file))
compare_ts_nodes(ts, N)
})
test_that("non-slendr SLiM simplified ts_nodes corresponds to the expected outcome", {
N <- 500
ts_file <- simulate_slim_ts(N)
suppressMessages(ts <- ts_read(ts_file))
simplify_to <- ts_nodes(ts) %>% dplyr::filter(sampled) %>% dplyr::pull(node_id) %>% sample(3)
expect_warning(ts2 <- ts_simplify(ts, simplify_to = simplify_to),
"Simplifying a non-recapitated tree sequence")
expect_silent(ts2 <- ts_recapitate(ts, Ne = 100, recombination_rate = 1e-8) %>%
ts_simplify(simplify_to = simplify_to))
# make sure that the simplified nodes match the pedigree_id values in the
# original tree-sequence data
orig_ids <- ts_nodes(ts) %>% dplyr::filter(node_id %in% simplify_to) %>% dplyr::pull(pedigree_id)
simplified_ids <- ts_nodes(ts2) %>% dplyr::filter(sampled) %>% dplyr::pull(pedigree_id)
expect_true(all(orig_ids == simplified_ids))
})
test_that("non-slendr SLiM ts_phylo corresponds to the expected outcome", {
N <- 500
ts_file <- simulate_slim_ts(N)
suppressMessages(ts <- ts_read(ts_file) %>% ts_recapitate(recombination_rate = 1e-8, Ne = 100))
compare_ts_phylo(ts, N)
})
test_that("non-slendr SLiM ts_nodes can be recapitated", {
N <- 10000
ts_file <- simulate_slim_ts(N)
suppressMessages(ts <- ts_read(ts_file) %>% ts_recapitate(Ne = 100, recombination_rate = 1e-8))
expect_silent(compare_ts_nodes(ts, N))
})
test_that("non-slendr SLiM ts_nodes carries correct population names", {
ts_file <- simulate_slim_ts(50)
suppressMessages(ts <- ts_read(ts_file))
expect_true(unique(ts_nodes(ts)$pop) == "p0")
})
# msprime tree sequences (sim_ancestry) -------------------------------------
test_that("non-slendr msprime simplification on its own gives warning", {
N <- 5
ts_file <- msprime_ts_sim_ancestry(N)
suppressMessages(ts <- ts_read(ts_file))
compare_ts_nodes(ts, N)
})
test_that("non-slendr SLiM simplified ts_nodes corresponds to the expected outcome", {
N <- 5
ts_file <- msprime_ts_sim_ancestry(N)
suppressMessages(ts <- ts_read(ts_file))
simplify_to <- ts_nodes(ts) %>% dplyr::filter(sampled) %>% dplyr::pull(node_id) %>% sample(3)
ts2 <- ts_simplify(ts, simplify_to = simplify_to)
# there are no fixed pedigree_id values, so let's check that the number of
# "sampled" nodes corresponds to the number of nodes used for simplification
expect_true(sum(ts_nodes(ts2)$sampled) == 3)
})
test_that("non-slendr msprime ts_phylo corresponds to the expected outcome", {
N <- 5
ts_file <- msprime_ts_sim_ancestry(N)
suppressMessages(ts <- ts_read(ts_file))
compare_ts_phylo(ts, N)
})
test_that("non-slendr msprime ts_phylo (simplified) corresponds to the expected outcome", {
N <- 5
ts_file <- msprime_ts_sim_ancestry(N)
suppressMessages(ts <- ts_read(ts_file))
simplify_to <- ts_nodes(ts) %>% dplyr::filter(sampled) %>% dplyr::pull(node_id) %>% sample(3)
ts2 <- ts_simplify(ts, simplify_to = simplify_to)
compare_ts_phylo(ts2, N)
})
test_that("non-slendr msprime ts_nodes carries correct population names", {
ts_file <- msprime_ts_sim_ancestry(50)
suppressMessages(ts <- ts_read(ts_file))
expect_true(unique(ts_nodes(ts)$pop) == "pop_0")
})
# msprime tree sequences (simulate) -------------------------------------
test_that("non-slendr msprime simplification on its own gives warning (simulate)", {
N <- 5
ts_file <- msprime_ts_simulate(N)
suppressMessages(ts <- ts_read(ts_file))
compare_ts_nodes(ts, N)
})
test_that("non-slendr SLiM simplified ts_nodes corresponds to the expected outcome (simulate)", {
N <- 5
ts_file <- msprime_ts_simulate(N)
suppressMessages(ts <- ts_read(ts_file))
simplify_to <- ts_nodes(ts) %>% dplyr::filter(sampled) %>% dplyr::pull(node_id) %>% sample(3)
ts2 <- ts_simplify(ts, simplify_to = simplify_to)
# there are no fixed pedigree_id values, so let's check that the number of
# "sampled" nodes corresponds to the number of nodes used for simplification
expect_true(sum(ts_nodes(ts2)$sampled) == 3)
})
test_that("non-slendr msprime ts_phylo corresponds to the expected outcome (simulate)", {
N <- 5
ts_file <- msprime_ts_simulate(N)
suppressMessages(ts <- ts_read(ts_file))
compare_ts_phylo(ts, N)
})
test_that("non-slendr msprime ts_phylo (simplified) corresponds to the expected outcome (simulate)", {
N <- 5
ts_file <- msprime_ts_simulate(N)
suppressMessages(ts <- ts_read(ts_file))
simplify_to <- ts_nodes(ts) %>% dplyr::filter(sampled) %>% dplyr::pull(node_id) %>% sample(3)
ts2 <- ts_simplify(ts, simplify_to = simplify_to)
compare_ts_phylo(ts2, N)
})
test_that("non-slendr msprime ts_nodes carries correct population names (simulate)", {
ts_file <- msprime_ts_simulate(50)
suppressMessages(ts <- ts_read(ts_file))
expect_true(unique(ts_nodes(ts)$pop) == "0")
})
# SLiM tskit statistics interface -----------------------------------------
test_that("tskit statistics interface works on non-slendr SLiM outputs", {
script_file <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
ts_file <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
writeLines(sprintf('initialize() {
setSeed(123);
initializeTreeSeq();
initializeMutationRate(0);
initializeMutationType("m1", 0.5, "f", 0.0);
initializeGenomicElementType("g1", m1, 1.0);
initializeGenomicElement(g1, 0, 1e6);
initializeRecombinationRate(1e-8);
}
1 early() {
sim.addSubpop("p1", 10);
}
1000 early() {
sim.addSubpopSplit("p2", 500, p1);
}
3000 early() {
sim.addSubpopSplit("p3", 2500, p1);
}
5000 early() {
sim.addSubpopSplit("p4", 10000, p1);
}
20000 late() {
sim.treeSeqOutput("%s");
}
', ts_file), script_file)
binary <- get_binary("batch")
system2(binary, script_file, stdout = FALSE)
suppressMessages(
ts <- ts_read(ts_file) %>%
ts_recapitate(Ne = 10, recombination_rate = 1e-8) %>%
ts_simplify() %>%
ts_mutate(mutation_rate = 1e-7)
)
data <- ts_nodes(ts) %>% dplyr::filter(sampled)
groups <- split(data$node_id, data$pop)
# diversity must increase p1 -> p2 -> p3 -> p4
diversity <- ts_diversity(ts, sample_sets = groups)
expect_true(all(diff(diversity$diversity) > 0))
# divergence from p1 must decrease p2 -> p3 -> p4
divergence <- ts_divergence(ts, sample_sets = groups)
expect_true(all((divergence %>% dplyr::filter(x == 1) %>% .$divergence %>% diff) < 0))
expect_true(all(sort(unique(ts_nodes(ts)$pop) == c("p1", "p2", "p3", "p4"))))
})
test_that("ts_ibd() on nonspatial SLiM tree sequences works with coordinates = (T|F)", {
ts_file <- simulate_slim_ts(1000)
suppressMessages(ts <- ts_read(ts_file))
suppressWarnings(ibd_totals <- ts_ibd(ts, coordinates = FALSE))
suppressWarnings(ibd_fragments <- ts_ibd(ts, coordinates = TRUE))
# compute IBD totals from individual fragments manually
ibd_totals2 <-
dplyr::group_by(ibd_fragments, node1, node2, node1_time, node2_time) %>%
dplyr::summarise(count = dplyr::n(), total = sum(length), .groups = "keep") %>%
dplyr::select(node1, node2, count, total, node1_time, node2_time) %>%
dplyr::ungroup()
expect_equal(ibd_totals, ibd_totals2)
})
test_that("ts_ibd() on nonspatial msprime tree sequences works with coordinates = (T|F)", {
ts_file <- msprime_ts_sim_ancestry(100)
suppressMessages(ts <- ts_read(ts_file))
suppressWarnings(ibd_totals <- ts_ibd(ts, coordinates = FALSE))
suppressWarnings(ibd_fragments <- ts_ibd(ts, coordinates = TRUE))
# compute IBD totals from individual fragments manually
ibd_totals2 <-
dplyr::group_by(ibd_fragments, node1, node2, node1_time, node2_time) %>%
dplyr::summarise(count = dplyr::n(), total = sum(length), .groups = "keep") %>%
dplyr::select(node1, node2, count, total, node1_time, node2_time) %>%
dplyr::ungroup()
expect_equal(ibd_totals, ibd_totals2)
})
test_that("ts_ibd() on non-spatial msprime tree sequences gives a correct object", {
ts_file <- msprime_ts_sim_ancestry(100)
suppressMessages(ts <- ts_read(ts_file))
ibd_sf <- ts_ibd(ts, coordinates = FALSE, minimum_length = 0)
ibd_nosf <- ts_ibd(ts, coordinates = FALSE, minimum_length = 0, sf = FALSE)
# returned object is never of a sf class as it's not from a spatial tree sequence
expect_true(!inherits(ibd_sf, "sf"))
expect_true(!inherits(ibd_nosf, "sf"))
# except for the spatial columns, the IBD results are the same
expect_equal(ibd_sf, ibd_nosf)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.