tests/testthat/test-sampling.R

msg <- "Cannot schedule sampling"

skip_if(!is_slendr_env_present())

init_env(quiet = TRUE)

test_that("sampling from a population which is not present is prevented (forward)", {
  map <- world(xrange = c(0, 100), yrange = c(0, 100), landscape = "blank")
  p1 <- population(name = "p1", time = 10, N = 1, center = c(1, 1), radius = 10, map = map, remove = 100)
  p2 <- population(name = "p2", parent = p1, time = 20, N = 1, center = c(1, 1), radius = 10, remove = 100)
  model <- compile_model(populations = list(p1, p2), path = tempdir(), generation_time = 1, resolution = 1, simulation_length = 1000, overwrite = TRUE, force = TRUE, competition = 1, mating = 1, dispersal = 1)
  expect_error(schedule_sampling(model, time = 15, list(p2, 3), strict = TRUE), msg) # pre-split
  expect_error(schedule_sampling(model, time = 1000, list(p2, 3), strict = TRUE), msg) # post-removal
})

test_that("sampling from a population which is not present is prevented (backward)", {
  map <- world(xrange = c(0, 100), yrange = c(0, 100), landscape = "blank")
  p1 <- population(name = "p1", time = 1000, N = 1, center = c(1, 1), radius = 10, map = map, remove = 100)
  p2 <- population(name = "p2", parent = p1, time = 900, N = 1, center = c(1, 1), radius = 10, remove = 100)
  model <- compile_model(populations = list(p1, p2), path = tempdir(), generation_time = 1, resolution = 1, simulation_length = 1000, overwrite = TRUE, force = TRUE, competition = 1, mating = 1, dispersal = 1)
  expect_error(schedule_sampling(model, time = 950, list(p2, 3), strict = TRUE), msg) # pre-split
  expect_error(schedule_sampling(model, time = 5, list(p2, 3), strict = TRUE), msg) # post-removal
})

test_that("invalid sampling results in a warning", {
  map <- world(xrange = c(0, 100), yrange = c(0, 100), landscape = "blank")
  p1 <- population(name = "p1", time = 10, N = 1, center = c(1, 1), radius = 10, map = map, remove = 100)
  p2 <- population(name = "p2", parent = p1, time = 20, N = 1, center = c(1, 1), radius = 10, remove = 100)
  model <- compile_model(populations = list(p1, p2), path = tempdir(), generation_time = 1, resolution = 1, simulation_length = 1000, overwrite = TRUE, force = TRUE, competition = 1, mating = 1, dispersal = 1)
  expect_warning(schedule_sampling(model, times = 10000, list(p1, 1), list(p2, 1)), "No valid sampling events were retained")
  suppressWarnings({res <- schedule_sampling(model, times = 10000, list(p1, 1), list(p2, 1))})
  expect_null(res)
})

msg <- "A sampling event was scheduled outside of the simulation time window"

test_that("sampling before a simulation start (forward)", {
  p1 <- population(name = "p1", time = 10, N = 1, remove = 100)
  model <- compile_model(populations = p1, path = tempdir(), generation_time = 1, simulation_length = 1000, overwrite = TRUE, force = TRUE)
  expect_error(schedule_sampling(model, time = 5, list(p1, 3), strict = TRUE), msg) # pre-split
  expect_error(schedule_sampling(model, time = 2000, list(p1, 3), strict = TRUE), msg) # post-removal
})

test_that("sampling before a simulation start (backward)", {
  p1 <- population(name = "p1", time = 1000, N = 1, remove = 100)
  model <- compile_model(populations = p1, path = tempdir(), generation_time = 1, simulation_length = 100, overwrite = TRUE, force = TRUE)
  expect_error(schedule_sampling(model, time = 1005, list(p1, 3), strict = TRUE), msg) # pre-split
  expect_error(schedule_sampling(model, time = 10, list(p1, 3), strict = TRUE), msg) # post-removal
})

test_that("sampling in the same generation of the split is prevented (forward)", {
  p1 <- population(name = "p1", time = 1, N = 1, remove = 50)
  model <- compile_model(populations = p1, path = tempdir(), generation_time = 25, simulation_length = 100, overwrite = TRUE, force = TRUE)
  expect_warning(schedule_sampling(model, time = 3, list(p1, 3)), "No valid sampling")
})

test_that("sampling in the same generation of the split is prevented (backward)", {
  p1 <- population(name = "p1", time = 100, N = 1, remove = 10)
  model <- compile_model(populations = p1, path = tempdir(), generation_time = 25, overwrite = TRUE, force = TRUE)
  expect_warning(schedule_sampling(model, time = 97, list(p1, 3)), "No valid sampling")
})

# spatial sampling --------------------------------------------------------

test_that("only locations within world bounds are valid", {
  map <- world(xrange = c(0, 100), yrange = c(0, 100), landscape = "blank")
  valid <- list(c(100, 100), c(0, 0), c(30, 5))
  invalid <- list(c(1000, 100), c(0, 0), c(30, 5))
  expect_error(check_location_bounds(invalid, map), "locations fall outside")
  expect_silent(check_location_bounds(valid, map))
})

test_that("sampling is as close to the a single specified position as possible", {
  skip_if(!is_slendr_env_present())

  n_samples <- 5
  times <- c(10, 100)
  locations <- list(c(50, 50))
  simulation_length <- 100
  locations_file <- normalizePath(tempfile(fileext = ".gz"), winslash = "/", mustWork = FALSE)

  map <- world(xrange = c(0, 100), yrange = c(0, 100), landscape = "blank")
  pop <- population("pop", 1, 100, map = map, center = c(50, 50), radius = 50)
  model <- compile_model(pop, path = tempdir(), generation_time = 1,
                   competition = 10, mating = 10, dispersal = 10,
                   simulation_length = simulation_length, resolution = 1, overwrite = TRUE, force = TRUE)

  samples <- schedule_sampling(model, times = times, locations = locations, list(pop, n_samples))
  ts <- slim(model, sequence_length = 1, recombination_rate = 0, samples = samples,
       method = "batch", locations = locations_file, verbose = FALSE)

  # load the locations of all individuals throughout the simulation, and filter
  # down to `n_sample` of the ones closest to the sampling location [50, 50] in
  # each time point (essentially replicating what we're doing on the SLiM
  # backend during the simulation run)
  locations <- locations_file %>%
    readr::read_tsv(show_col_types = FALSE) %>%
    dplyr::mutate(time = simulation_length - gen + 1) %>%
    dplyr::filter(time %in% times) %>%
    dplyr::group_by(time) %>%
    dplyr::mutate(distance = sqrt((50 - x)^2 + (50 - y)^2)) %>%
    dplyr::arrange(time, distance) %>%
    dplyr::mutate(i = 1:dplyr::n()) %>%
    dplyr::filter(i %in% 1:n_samples) %>%
    dplyr::ungroup() %>%
    dplyr::select(ind, time, x, y, distance) %>%
    dplyr::arrange(time, distance) %>%
    as.data.frame()

  # load locations of individuals remembered in the tree sequence
  individuals <- ts_nodes(ts) %>%
    dplyr::filter(sampled) %>%
    dplyr::select(-node_id) %>%
    dplyr::distinct() %>%
    dplyr::mutate(
      x = sf::st_coordinates(.)[, "X"],
      y = sf::st_coordinates(.)[, "Y"]
    ) %>%
    dplyr::as_tibble() %>%
    dplyr::select(ind_id, time, x, y) %>%
    dplyr::mutate(distance = sqrt((50 - x)^2 + (50 - y)^2)) %>%
    dplyr::arrange(time, distance) %>%
    as.data.frame()

  expect_true(all.equal(individuals$x, locations$x, tolerance = 0.001))
  expect_true(all.equal(individuals$y, locations$y, tolerance = 0.001))
  expect_true(all.equal(individuals$distance, locations$distance, tolerance = 0.001))
})

test_that("sampling is as close to the multiple specified positions as possible", {
  skip_if(!is_slendr_env_present())

  n_samples <- 5
  times <- c(10, 100)
  locations <- list(c(0, 0), c(100, 100))
  simulation_length <- 100
  locations_file <- normalizePath(tempfile(fileext = ".gz"), winslash = "/", mustWork = FALSE)

  map <- world(xrange = c(0, 100), yrange = c(0, 100), landscape = "blank")
  pop <- population("pop", 1, 100, map = map, center = c(50, 50), radius = 50)
  model <- compile_model(pop, path = tempdir(), generation_time = 1,
                   competition = 10, mating = 10, dispersal = 10,
                   simulation_length = simulation_length, resolution = 1, overwrite = TRUE, force = TRUE)

  samples <- rbind(
    schedule_sampling(model, times = times[1], locations = locations[1], list(pop, n_samples)),
    schedule_sampling(model, times = times[2], locations = locations[2], list(pop, n_samples))
  )

  ts <- slim(model, sequence_length = 1, recombination_rate = 0, samples = samples,
       method = "batch", locations = locations_file, verbose = FALSE)

  # load the locations of all individuals throughout the simulation, and filter
  # down to `n_sample` of the ones closest to the sampling location [50, 50] in
  # each time point (essentially replicating what we're doing on the SLiM
  # backend during the simulation run)
  all_locations <- locations_file %>%
    readr::read_tsv(show_col_types = FALSE) %>%
    dplyr::mutate(time = simulation_length - gen + 1)

  first_loc <- all_locations %>%
    dplyr::filter(time == times[1]) %>%
    dplyr::group_by(time) %>%
    dplyr::mutate(distance = sqrt((locations[[1]][1] - x)^2 + (locations[[1]][2] - y)^2)) %>%
    dplyr::arrange(time, distance) %>%
    dplyr::mutate(i = 1:dplyr::n()) %>%
    dplyr::filter(i %in% 1:n_samples) %>%
    dplyr::ungroup() %>%
    dplyr::select(ind, time, x, y, distance) %>%
    dplyr::arrange(time, distance) %>%
    as.data.frame()

  second_loc <- all_locations %>%
    dplyr::filter(time == times[2]) %>%
    dplyr::group_by(time) %>%
    dplyr::mutate(distance = sqrt((locations[[2]][1] - x)^2 + (locations[[2]][2] - y)^2)) %>%
    dplyr::arrange(time, distance) %>%
    dplyr::mutate(i = 1:dplyr::n()) %>%
    dplyr::filter(i %in% 1:n_samples) %>%
    dplyr::ungroup() %>%
    dplyr::select(ind, time, x, y, distance) %>%
    dplyr::arrange(time, distance) %>%
    as.data.frame()

  locs <- rbind(first_loc, second_loc)

  # load locations of individuals remembered in the tree sequence
  all_individuals <- ts_nodes(ts) %>%
    dplyr::filter(sampled) %>%
    dplyr::select(-node_id) %>%
    dplyr::distinct() %>%
    dplyr::mutate(
      x = sf::st_coordinates(.)[, "X"],
      y = sf::st_coordinates(.)[, "Y"]
    ) %>%
    dplyr::as_tibble() %>%
    dplyr::select(ind_id, time, x, y)

  first_individuals <- all_individuals %>%
    dplyr::filter(time == times[1]) %>%
    dplyr::mutate(distance = sqrt((locations[[1]][1] - x)^2 + (locations[[1]][2] - y)^2)) %>%
    dplyr::arrange(time, distance) %>%
    as.data.frame()

  second_individuals <- all_individuals %>%
    dplyr::filter(time == times[2]) %>%
    dplyr::mutate(distance = sqrt((locations[[2]][1] - x)^2 + (locations[[2]][2] - y)^2)) %>%
    dplyr::arrange(time, distance) %>%
    as.data.frame()

  individuals <- rbind(first_individuals, second_individuals)

  expect_true(all.equal(individuals$x, locs$x, tolerance = 0.001))
  expect_true(all.equal(individuals$y, locs$y, tolerance = 0.001))
  expect_true(all.equal(individuals$distance, locs$distance, tolerance = 0.001))
})

test_that("sampling locations may only be given for spatial models", {
  map <- world(xrange = c(0, 100), yrange = c(0, 100), landscape = "blank")
  p1 <- population(name = "p1", time = 10, N = 100)
  p2 <- population(name = "p2", parent = p1, time = 20, N = 100, center = c(1, 1), radius = 10)
  model <- compile_model(populations = list(p1, p2), generation_time = 1,
                         resolution = 1, simulation_length = 1000,
                         competition = 0, mating = 1, dispersal = 10)
  expect_error(
    schedule_sampling(model, time = 35, list(p2, 4), locations = list(c(75, 25)), strict = TRUE),
    "Sampling locations may only be specified for a spatial model"
  )
})

test_that("a mix of spatial and non-spatial samplings is not allowed for a single population", {
  skip_if(!is_slendr_env_present())

  map <- world(xrange = c(0, 100), yrange = c(0, 100), landscape = "blank")
  p1 <- population(name = "p1", time = 10, N = 100)
  p2 <- population(name = "p2", map = map, time = 20, N = 100, center = c(1, 1), radius = 10)
  suppressWarnings(
    model <- compile_model(populations = list(p1, p2), generation_time = 1,
                         resolution = 1, simulation_length = 1000,
                         competition = 0, mating = 1, dispersal = 10)
  )
  s1 <- schedule_sampling(model, time = 35, list(p1, 3), strict = TRUE)
  s2 <- schedule_sampling(model, time = 35, list(p2, 4), locations = list(c(75, 25)), strict = TRUE)
  # this gives error
  s3 <- schedule_sampling(model, time = 35, list(p2, 5), strict = TRUE)
  s <- rbind(s1, s2, s3)
  expect_error(
    slim(model, samples = s, sequence_length = 1000, recombination_rate = 0),
    "For each population, samples must be all spatial or all non-spatial.\nThis is not true for the following populations: p2"
  )
  # this passes
  s3 <- schedule_sampling(model, time = 35, list(p2, 5), locations = list(c(10, 15)), strict = TRUE)
  s <- rbind(s1, s2, s3)
  expect_s3_class(slim(model, samples = s, sequence_length = 1000, recombination_rate = 0), "slendr_ts")
})

test_that("sampling table is correctly adjusted after simplification (msprime)", {
  skip_if(!is_slendr_env_present())

  pop1 <- population("pop1", time = 1, N = 100)
  pop2 <- population("pop2", time = 10, N = 42, parent = pop1)
  model <- compile_model(list(pop1, pop2), generation_time = 1, simulation_length = 1000, direction = "forward")

  # original tree sequence can be saved and loaded with or without the model
  tmp_big <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
  ts_big <- msprime(model, sequence_length = 1000, recombination_rate = 0)
  ts_write(ts_big, tmp_big)
  expect_s3_class(ts_big_model <- ts_read(tmp_big, model), "slendr_ts")
  expect_s3_class(ts_big_nomodel <- ts_read(tmp_big), "slendr_ts")
  expect_true(nrow(ts_samples(ts_big_model)) == 142)
  expect_error(ts_samples(ts_big_nomodel), "Sampling schedule can only be extracted")

  # 'simple' simplified tree sequence can be saved and loaded with or without the model
  tmp_small1 <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
  suppressWarnings(ts_simplify(ts_big) %>% ts_write(tmp_small1))
  expect_s3_class(ts_small1_model <- ts_read(tmp_small1, model), "slendr_ts")
  expect_s3_class(ts_small1_nomodel <- ts_read(tmp_small1), "slendr_ts")
  expect_true(nrow(ts_samples(ts_small1_model)) == 142)
  expect_error(ts_samples(ts_small1_nomodel), "Sampling schedule can only be extracted")

  # tree sequence simplified to a subset can be saved and loaded with or without the model
  tmp_small2 <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
  ts_simplify(ts_big, simplify_to = c("pop1_1", "pop1_2", "pop1_3", "pop2_5")) %>% ts_write(tmp_small2)
  expect_s3_class(ts_small2_model <- ts_read(tmp_small2, model), "slendr_ts")
  expect_s3_class(ts_small2_nomodel <- ts_read(tmp_small2), "slendr_ts")
  expect_true(nrow(ts_samples(ts_small2_model)) == 4)
  expect_error(ts_samples(ts_small2_nomodel), "Sampling schedule can only be extracted")
})

test_that("sampling table is correctly adjusted after simplification (SLiM)", {
  skip_if(!is_slendr_env_present())

  pop1 <- population("pop1", time = 1, N = 100)
  pop2 <- population("pop2", time = 10, N = 42, parent = pop1)
  model <- compile_model(list(pop1, pop2), generation_time = 1, simulation_length = 1000, direction = "forward")

  # original tree sequence can be saved and loaded with or without the model
  tmp_big <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
  ts_big <- slim(model, sequence_length = 1000, recombination_rate = 0) %>% ts_recapitate(recombination_rate = 0, Ne = 100)
  ts_write(ts_big, tmp_big)
  expect_s3_class(ts_big_model <- ts_read(tmp_big, model), "slendr_ts")
  expect_s3_class(ts_big_nomodel <- ts_read(tmp_big), "slendr_ts")
  expect_true(nrow(ts_samples(ts_big_model)) == 142)
  expect_error(ts_samples(ts_big_nomodel), "Sampling schedule can only be extracted")

  # 'simple' simplified tree sequence can be saved and loaded with or without the model
  tmp_small1 <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
  suppressWarnings(ts_simplify(ts_big) %>% ts_write(tmp_small1))
  expect_s3_class(ts_small1_model <- ts_read(tmp_small1, model), "slendr_ts")
  expect_s3_class(ts_small1_nomodel <- ts_read(tmp_small1), "slendr_ts")
  expect_true(nrow(ts_samples(ts_small1_model)) == 142)
  expect_error(ts_samples(ts_small1_nomodel), "Sampling schedule can only be extracted")

  # tree sequence simplified to a subset can be saved and loaded with or without the model
  tmp_small2 <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
  ts_simplify(ts_big, simplify_to = c("pop1_1", "pop1_2", "pop1_3", "pop2_5")) %>% ts_write(tmp_small2)
  expect_s3_class(ts_small2_model <- ts_read(tmp_small2, model), "slendr_ts")
  expect_s3_class(ts_small2_nomodel <- ts_read(tmp_small2), "slendr_ts")
  expect_true(nrow(ts_samples(ts_small2_model)) == 4)
  expect_error(ts_samples(ts_small2_nomodel), "Sampling schedule can only be extracted")
})

test_that("all sampled populations must be present in the compiled model", {
  pA <- population("pA", time = 1, N = 1000)
  pB <- population("pB", time = 300, N = 1000, parent = pA)
  pC <- population("pC", time = 600, N = 1000, parent = pB)
  pD <- population("pD", time = 800, N = 1000, parent = pA)
  pE <- population("pE", time = 1300, N = 1000, parent = pC)

  model <- compile_model(list(pA, pB, pC, pD, pE), generation_time = 1, simulation_length = 2000)

  p1 <- population("p1", time = 1, N = 1000)
  p2 <- population("p2", time = 300, N = 1000, parent = p1)
  p3 <- population("p3", time = 600, N = 1000, parent = p2)
  p4 <- population("p4", time = 800, N = 1000, parent = p1)
  p5 <- population("p5", time = 1300, N = 1000, parent = p4)

  expect_error(
    schedule_sampling(model, times = 2000, list(p1, 10), list(p2, 10), list(p3, 10), list(p4, 10), list(p5, 10)),
    "The following sampled populations are not part of the model: p1, p2, p3, p4, p5"
  )

  expect_error(
    schedule_sampling(model, times = 2000, list(pA, 10), list(p2, 10), list(pC, 10), list(p4, 10), list(pD, 10)),
    "The following sampled populations are not part of the model: p2, p4"
  )

  expect_error(
    schedule_sampling(model, times = 2000, list(pA, 10), list(pB, 10), list(pC, 10), list(p4, 10), list(pD, 10)),
    "The following sampled populations are not part of the model: p4"
  )

  expect_s3_class(
    schedule_sampling(model, times = 2000, list(pA, 10), list(pB, 10), list(pC, 10), list(pD, 10), list(pD, 10)),
    "data.frame"
  )
})
bodkan/slendr documentation built on Dec. 19, 2024, 11:41 p.m.