tests/testthat/test-create-population.R

context("Pedigrees and haplotypes")

test_pop <- test_create_population()

test_that("test_create_population works", {
  expect_failure(expect_null(test_pop))
  expect_output(print(test_pop), regexp = "^Population with 12 individuals$")
  expect_equal(pop_size(test_pop), 12L)
})

indvs <- get_individuals(test_pop)
test_that("get_individuals works", {
  expect_failure(expect_null(indvs))
  expect_equal(length(indvs), 12L)
})

peds <- build_pedigrees(test_pop, progress = FALSE)
test_that("build_pedigrees works", {
  expect_output(print(peds), regexp = "^List of 2 pedigrees \\(of size 11, 1\\)$")
  expect_equal(pedigrees_count(peds), 2L)
})
ped <- peds[[1L]]
pids <- sort(get_pids_in_pedigree(ped))

test_that("pedigree pids works", {
  expect_equal(length(pids), 11L)
  expect_true(all(pids == 1L:11L))
  
  expect_equal(length(get_pids_in_pedigree(peds[[2L]])), 1L)
})

test_that("meiotic_dist works", {
  expect_equal(0L, meiotic_dist(get_individual(test_pop, pid = 1L), 
                                get_individual(test_pop, pid = 1L)))
  
  expect_equal(1L, meiotic_dist(get_individual(test_pop, pid = 1L), 
                                get_individual(test_pop, pid = 6L)))
  
  expect_equal(4L, meiotic_dist(get_individual(test_pop, pid = 1L), 
                                get_individual(test_pop, pid = 10L)))
  
  expect_equal(-1L, meiotic_dist(get_individual(test_pop, pid = 1L), 
                                 get_individual(test_pop, pid = 12L)))
})



LOCI <- 5L

pedigrees_all_populate_haplotypes(peds, loci = LOCI, mutation_rates = rep(0L, LOCI), progress = FALSE)
test_that("pedigrees_all_populate_haplotypes works", {
  expect_output(print(peds), regexp = "^List of 2 pedigrees \\(of size 11, 1\\)$")
})

test_that("haplotype_matches_individuals works", {
  expect_equal(length(indvs), length(haplotype_matches_individuals(indvs, rep(0L, LOCI))))
  expect_equal(lapply(indvs, get_pid), lapply(haplotype_matches_individuals(indvs, rep(0L, LOCI)), get_pid))
})

test_that("count_haplotype_occurrences_individuals works", {
  expect_equal(12L, count_haplotype_occurrences_individuals(indvs, rep(0L, LOCI)))
  expect_equal(0L, count_haplotype_occurrences_individuals(indvs, rep(1L, LOCI)))
  
  expect_equal(11L, count_haplotype_occurrences_pedigree(ped, rep(0L, LOCI)))
  expect_equal(5L, count_haplotype_occurrences_pedigree(ped, 
                                                        rep(0L, LOCI), 
                                                        generation_upper_bound_in_result = 0L))
  expect_equal(5L+3L, count_haplotype_occurrences_pedigree(ped, 
                                                           rep(0L, LOCI), 
                                                           generation_upper_bound_in_result = 1L))
  expect_equal(5L+3L+2L, count_haplotype_occurrences_pedigree(ped, 
                                                              rep(0L, LOCI), 
                                                              generation_upper_bound_in_result = 2L))
})

mei_res <- pedigree_haplotype_matches_in_pedigree_meiosis_L1_dists(
  suspect = get_individual(test_pop, pid = 1L), 
  generation_upper_bound_in_result = -1L)
mei_res <- mei_res[order(mei_res[, 3L]), ] # order by pid
meioses <- mei_res[, 1L]
test_that("pedigree_haplotype_matches_in_pedigree_meiosis_L1_dists works", {
  expect_equal(mei_res[, 3L], 1L:11L) # pids ordered
  expect_true(all(mei_res[, 2L] == 0L)) # max L1 == 0
  
  # meioses in meioses[pid]
  expect_equal(length(meioses), 11L)
  expect_equal(meioses[1L], 0L) # no. meioses between pid = 1 and pid = 1 is 0...
  expect_equal(meioses[2L], 4L) # no. meioses between pid = 1 and pid = 2 is 4...
  expect_equal(meioses[3L], 4L)
  expect_equal(meioses[4L], 6L)
  expect_equal(meioses[5L], 6L)
  expect_equal(meioses[6L], 1L)
  expect_equal(meioses[7L], 3L)
  expect_equal(meioses[8L], 5L)
  expect_equal(meioses[9L], 2L)
  expect_equal(meioses[10L], 4L)
  expect_equal(meioses[11L], 3L)
})












haps_from_ped <- get_haplotypes_in_pedigree(ped)
haps_from_pids <- get_haplotypes_pids(test_pop, pids)
haps_from_indvs <- get_haplotypes_individuals(indvs)
hap_from_indv <- lapply(pids, function(pid) get_haplotype(get_individual(test_pop, pid)))

test_that("pedigrees_all_populate_haplotypes haplotypes works", {
  #haps_from_ped
  expect_true(is.list(haps_from_ped))
  expect_equal(length(haps_from_ped), 11L)
  expect_equal(length(haps_from_ped[[1L]]), LOCI)
  expect_true(all(unlist(haps_from_ped) == 0L))
  
  #haps_from_pids
  expect_true(is.matrix(haps_from_pids))
  expect_equal(nrow(haps_from_pids), 11L)
  expect_equal(ncol(haps_from_pids), LOCI)
  
  #haps_from_indvs
  expect_equal(nrow(haps_from_indvs), 12L)
  expect_equal(unique(c(haps_from_indvs)), 0L)
  
  #hap_from_indv
  expect_equal(haps_from_ped, hap_from_indv)
})




test_that("pedigrees_all_populate_haplotypes_ladder_bounded error works", {
  #haps_from_ped
  expect_error(
    pedigrees_all_populate_haplotypes_ladder_bounded(peds, 
                                                     mutation_rates = rep(1, LOCI), 
                                                     ladder_min = rep(10L, LOCI), 
                                                     ladder_max = rep(10L, LOCI), 
                                                     get_founder_haplotype = function() rep(10L, LOCI),
                                                     progress = FALSE)
  )
})


pedigrees_all_populate_haplotypes_ladder_bounded(peds, 
                                                 mutation_rates = rep(0, LOCI), 
                                                 ladder_min = rep(10L, LOCI), 
                                                 ladder_max = rep(11L, LOCI), 
                                                 get_founder_haplotype = function() rep(10L, LOCI),
                                                 progress = FALSE)

haps_from_ped <- get_haplotypes_in_pedigree(ped)
haps_from_pids <- get_haplotypes_pids(test_pop, pids)
haps_from_indvs <- get_haplotypes_individuals(indvs)
hap_from_indv <- lapply(pids, function(pid) get_haplotype(get_individual(test_pop, pid)))

test_that("pedigrees_all_populate_haplotypes_ladder_bounded haplotypes works", {
  #haps_from_ped
  expect_true(is.list(haps_from_ped))
  expect_equal(length(haps_from_ped), 11L)
  expect_equal(length(haps_from_ped[[1L]]), LOCI)
  expect_true(all(unlist(haps_from_ped) == 10L))
  
  #haps_from_pids
  expect_true(is.matrix(haps_from_pids))
  expect_equal(nrow(haps_from_pids), 11L)
  expect_equal(ncol(haps_from_pids), LOCI)
  
  #haps_from_indvs
  expect_equal(nrow(haps_from_indvs), 12L)
  expect_equal(unique(c(haps_from_indvs)), 10L)
  
  #hap_from_indv
  expect_equal(haps_from_ped, hap_from_indv)
})



#####################################

set.seed(1)
pedigrees_all_populate_haplotypes_ladder_bounded(peds, 
                                                 mutation_rates = rep(1, LOCI), 
                                                 ladder_min = rep(10L, LOCI), 
                                                 ladder_max = rep(20L, LOCI), 
                                                 get_founder_haplotype = function() rep(10L, LOCI),
                                                 progress = FALSE)

haps_from_pids <- get_haplotypes_pids(test_pop, pids)
hap_fac <- factor(apply(haps_from_pids, 1, paste0, collapse = ";"))
hap_hash <- as.integer(hap_fac)
hashes <- haplotypes_to_hashes(test_pop, pids)

test_that("hashes works", {
  expect_equal(length(hap_fac), length(hashes))
  for (j in seq_along(hap_hash)) {
    #j <- 1
    #j <- 2
    is <- which(hap_hash == hap_hash[j])
    ks <- which(hashes == hashes[j])
    expect_equal(is, ks)
  }
})

pids_split_hap <- lapply(split_by_haplotypes(test_pop, pids), sort)
x <- lapply(split(pids, hap_fac), sort)
names(x) <- NULL

# Order list in some way that make them comparable:
#  Here, first by size and then by their max element; ensures unique ordering 
#  because all elements are pids and are hence unique integers.
ord_1 <- order(sapply(pids_split_hap, length), sapply(pids_split_hap, max))
ord_2 <- order(sapply(x, length), sapply(x, max))
pids_split_hap <- pids_split_hap[ord_1]
x <- x[ord_2]

test_that("split pid by haplotype works", {
  expect_equal(length(hap_fac), length(hashes))
})






###################################################
# Mutate distance 1
###################################################


LOCI <- 5L

set.seed(20180903)
pedigrees_all_populate_haplotypes(peds, loci = LOCI, mutation_rates = rep(0.1, LOCI), prob_two_step = 0, progress = FALSE)
if (FALSE) {
  plot(peds[[1]], haplotypes = TRUE)
}
test_that("pedigrees_all_populate_haplotypes mut step 1 works", {
  expect_equal(sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]])))), c(-1L, 0L, 1L))
})

###################

get_founder_h <- function() rep(-1L, LOCI)

set.seed(20180903)
pedigrees_all_populate_haplotypes_custom_founders(peds, 
                                                  mutation_rates = rep(0.5, LOCI), 
                                                  get_founder_haplotype = get_founder_h,
                                                  prob_two_step = 0, 
                                                  progress = FALSE)
if (FALSE) {
  plot(peds[[1]], haplotypes = TRUE)
}
as <- sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]]))))
test_that("pedigrees_all_populate_haplotypes_custom_founders mut step 1 works", {
  expect_equal(as, min(as):max(as))
})

#-------------

set.seed(20180903)
pedigrees_all_populate_haplotypes_ladder_bounded(peds, 
                                                 mutation_rates = rep(0.5, LOCI), 
                                                 ladder_min = rep(-4L, LOCI),
                                                 ladder_max = rep(4L, LOCI),
                                                 get_founder_haplotype = get_founder_h,
                                                 prob_two_step = 0, 
                                                 progress = FALSE)
if (FALSE) {
  plot(peds[[1]], haplotypes = TRUE)
  sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]]))))
}
as <- sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]]))))
test_that("pedigrees_all_populate_haplotypes_ladder_bounded mut step 1 works", {
  expect_equal(as, min(as):max(as))
})

###################################################
# Mutate distance 2
###################################################

LOCI <- 5L

set.seed(20180903)
pedigrees_all_populate_haplotypes(peds, loci = LOCI, mutation_rates = rep(0.1, LOCI), prob_two_step = 1, progress = FALSE)
if (FALSE) {
  plot(peds[[1]], haplotypes = TRUE)
}
test_that("pedigrees_all_populate_haplotypes mut step 2 works", {
  expect_equal(sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]])))), c(-2L, 0L, 2L))
})

###################

get_founder_h <- function() rep(-1L, LOCI)

set.seed(20180903)
pedigrees_all_populate_haplotypes_custom_founders(peds, 
                                                 mutation_rates = rep(0.5, LOCI), 
                                                 get_founder_haplotype = get_founder_h,
                                                 prob_two_step = 1, 
                                                 progress = FALSE)
if (FALSE) {
  plot(peds[[1]], haplotypes = TRUE)
}
# Start at -1, mutate 2, always odd (or == 1 mod 2)
test_that("pedigrees_all_populate_haplotypes_custom_founders mut step 2 works", {
  expect_true(all(sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]])))) %% 2 == 1L))
})

#-------------

set.seed(20180903)
pedigrees_all_populate_haplotypes_ladder_bounded(peds, 
                                                 mutation_rates = rep(0.5, LOCI), 
                                                 ladder_min = rep(-4L, LOCI),
                                                 ladder_max = rep(4L, LOCI),
                                                 get_founder_haplotype = get_founder_h,
                                                 prob_two_step = 1, 
                                                 progress = FALSE)
if (FALSE) {
  plot(peds[[1]], haplotypes = TRUE)
  sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]]))))
}
# Start at -1, mutate 2, ladder -4 to 4, only -3, -1, 1, 3 possible
test_that("pedigrees_all_populate_haplotypes_ladder_bounded mut step 2 works", {
  expect_equal(sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]])))), c(-3L, -1L, 1L, 3L))
})




###################################################
# Genealogical error
###################################################

LOCI <- 5L

if (FALSE) {
  set.seed(20190320)
  pedigrees_all_populate_haplotypes(peds, loci = LOCI, mutation_rates = rep(.2, LOCI), prob_genealogical_error = 0.5, progress = FALSE)
  plot(peds[[1]], haplotypes = TRUE)
}

#-------------

set.seed(20190320)
pedigrees_all_populate_haplotypes(peds, loci = LOCI, mutation_rates = rep(1, LOCI), prob_genealogical_error = 1, progress = FALSE)
if (FALSE) {
  plot(peds[[1]], haplotypes = TRUE)
}
test_that("pedigrees_all_populate_haplotypes genealogical error", {
  # Max one mutation at each locus because prob_genealogical_error = 1 so each get 0 haplotype with mutations.
  expect_equal(sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]])))), c(-1L, 0L, 1L))
})

#-------------

get_founder_h <- function() rep(-10L, LOCI)

set.seed(20190320)
pedigrees_all_populate_haplotypes_custom_founders(peds, 
                                                  mutation_rates = rep(0.5, LOCI), 
                                                  get_founder_haplotype = get_founder_h,
                                                  prob_genealogical_error = 1,
                                                  progress = FALSE)
test_that("pedigrees_all_populate_haplotypes_custom_founders genealogical error", {
  # Max one mutation at each locus because prob_genealogical_error = 1 so each get founder haplotype with mutations.
  expect_equal(sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]])))), c(-11L, -10L, -9L))
})


#-------------

set.seed(20190320)
pedigrees_all_populate_haplotypes_ladder_bounded(peds, 
                                                 mutation_rates = rep(0.5, LOCI), 
                                                 ladder_min = rep(-40L, LOCI),
                                                 ladder_max = rep(40L, LOCI),
                                                 get_founder_haplotype = get_founder_h,
                                                 prob_genealogical_error = 1, 
                                                 progress = FALSE)

test_that("pedigrees_all_populate_haplotypes_ladder_bounded genealogical error", {
  # Max one mutation at each locus because prob_genealogical_error = 1 so each get founder haplotype with mutations.
  expect_equal(sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]])))), c(-11L, -10L, -9L))
})


#-------------

set.seed(20190320)
pedigrees_all_populate_haplotypes_ladder_bounded(peds, 
                                                 mutation_rates = rep(0.5, LOCI), 
                                                 ladder_min = rep(-40L, LOCI),
                                                 ladder_max = rep(40L, LOCI),
                                                 get_founder_haplotype = get_founder_h,
                                                 prob_two_step = 1,
                                                 prob_genealogical_error = 1, 
                                                 progress = FALSE)

test_that("pedigrees_all_populate_haplotypes_ladder_bounded genealogical error", {
  # 0 or 2 mutations at each locus because prob_two_step = prob_genealogical_error = 1 so each get founder haplotype with mutations.
  expect_equal(sort(unique(unlist(get_haplotypes_in_pedigree(peds[[1]])))), c(-12L, -10L, -8L))
})


















############### 
# Near matches

set.seed(1)
pedigrees_all_populate_haplotypes(peds, loci = LOCI, mutation_rates = rep(0.1, LOCI), progress = FALSE)

indv_pid1 <- get_individual(test_pop, pid = 1L)
indv_pid1_h <- get_haplotype(indv_pid1)

no0exact <- count_haplotype_occurrences_individuals(
  individuals = indvs, 
  haplotype = indv_pid1_h)

no0 <- count_haplotype_near_matches_individuals(
  individuals = indvs, 
  haplotype = indv_pid1_h,
  max_dist = 0)
no1 <- count_haplotype_near_matches_individuals(
  individuals = indvs, 
  haplotype = indv_pid1_h,
  max_dist = 1)
no2 <- count_haplotype_near_matches_individuals(
  individuals = indvs, 
  haplotype = indv_pid1_h,
  max_dist = 2)

test_that("count_haplotype_near_matches_individuals works", {
  expect_equal(no0exact, no0)
  expect_true(no1 >= no0)
  expect_true(no2 >= no1)
})

mei_res_near <- pedigree_haplotype_near_matches_meiosis(
  suspect = indv_pid1, 
  max_dist = 2,
  generation_upper_bound_in_result = -1L)

test_that("count_haplotype_near_matches_individuals works", {
  expect_true(sum(mei_res_near[, "hap_dist"] == 0) <= no0)
  expect_true(sum(mei_res_near[, "hap_dist"] <= 1) <= no1)
  expect_true(sum(mei_res_near[, "hap_dist"] <= 2) <= no2)
})


############################################################
# haplotype_partially_matches_individuals
############################################################

test_that("haplotype_partially_matches_individuals input check", {
  expect_error(haplotype_partially_matches_individuals(indvs, indv_pid1_h, -1))
  expect_error(haplotype_partially_matches_individuals(indvs, indv_pid1_h, LOCI + 1))
  expect_error(haplotype_partially_matches_individuals(indvs, indv_pid1_h, c(1, LOCI + 1)))
  expect_error(haplotype_partially_matches_individuals(indvs, indv_pid1_h, 1:LOCI))
})

test_that("haplotype_partially_matches_individuals", {
  part_matches_all_loci <- haplotype_partially_matches_individuals(indvs, indv_pid1_h)
  expect_equal(length(part_matches_all_loci), count_haplotype_occurrences_individuals(indvs, indv_pid1_h))
  
  subsets <- lapply(1:LOCI, function(i) setdiff(1:LOCI, 1:i))
  subset_matches <- unlist(lapply(seq_along(subsets), function(i) {
    length(haplotype_partially_matches_individuals(indvs, indv_pid1_h, subsets[[i]]))
  }))
  
  expect_equal(subset_matches, sort(subset_matches, decreasing = TRUE))
})

Try the malan package in your browser

Any scripts or data that you put into this service are public.

malan documentation built on July 4, 2024, 9:09 a.m.