Nothing
context("Autosomal")
ESTIMATION_TOL_DUE_TO_SAMPLING <- 0.01
set.seed(1)
sim_res_fixed <- sample_geneology(population_size = 1e3,
generations = 100,
generations_full = 3,
generations_return = 3, # default value
progress = FALSE)
peds <- build_pedigrees(sim_res_fixed$population, progress = FALSE)
# alleles A B C
allele_prob <- c(0.7, 0.2, 0.1)
theta <- 0.1
L <- length(allele_prob)
x <- replicate(10000, sample_autosomal_genotype(allele_prob, theta))
#prop.table(table(apply(x, 2, paste0, collapse = ";")))
x_p_tab <- prop.table(table(c(x)))
x_p <- as.numeric(x_p_tab)
test_that("sample_autosomal_genotype works", {
expect_equal(x_p, allele_prob, tol = ESTIMATION_TOL_DUE_TO_SAMPLING)
})
geno_probs_R_mat <- matrix(0, nrow = L, ncol = L)
for (i in 1L:L) {
for (j in i:L) {
#P_AA = F*p_A + (1 - F) * p_A^2
#X != A: P_AX = (1 - F) * 2 * p_A*p_X
prob <- if (i == j) # homzyg:
theta*allele_prob[i] + (1-theta)*allele_prob[i]^2
else
(1-theta)*2*allele_prob[i]*allele_prob[j]
# only assign (i, j);
# if (j, i) should also be assigned, the factor 2 above in
# the else case must be removed
geno_probs_R_mat[i, j] <- prob
}
}
geno_probs_rcpp <- calc_autosomal_genotype_probs(allele_prob, theta)
geno_probs_R <- geno_probs_R_mat[upper.tri(geno_probs_R_mat, diag = TRUE)]
test_that("calc_autosomal_genotype_probs works", {
expect_equal(geno_probs_R, geno_probs_rcpp)
})
pedigrees_all_populate_autosomal(peds, allele_prob, theta, mutation_rate = 0, FALSE)
geno_mat <- do.call(rbind, lapply(seq_len(pedigrees_count(peds)), function(i) {
hs <- do.call(rbind, get_haplotypes_in_pedigree(peds[[i]]))
hs
}))
geno_vec <- c(geno_mat)
geno_vec_p <- as.numeric(prop.table(table(geno_vec)))
test_that("pedigrees_all_populate_autosomal works", {
expect_equal(geno_vec_p, allele_prob, tol = ESTIMATION_TOL_DUE_TO_SAMPLING)
})
get_ols_quantities <- function(y) {
x_unique_geno <- y[!duplicated(y), ]
ols_quantities <- apply(x_unique_geno, 1,
function(as) {
geno_prob <- mean(apply(y, 1, function(as2) all(as == as2)))
if (length(unique(as)) == 1L) {
p <- x_p_tab[as.character(as[1])]
x <- p - p^2
y <- geno_prob - p^2
return(c(x, y))
} else {
p <- x_p_tab[as.character(as[1])]
q <- x_p_tab[as.character(as[2])]
x <- -2*p*q
y <- geno_prob - 2*p*q
return(c(x, y))
}
})
ols_x <- as.matrix(ols_quantities[1L, ])
ols_y <- ols_quantities[2L, ]
return(list(x = ols_x, y = ols_y))
}
y <- t(x)
ols_res_genotypes <- get_ols_quantities(y)
theta_res <- estimate_autotheta_1subpop_genotypes(y)
test_that("estimate_autotheta_1subpop_genotypes works", {
expect_true(theta_res$error == FALSE)
expect_equal(qr.solve(ols_res_genotypes$x, ols_res_genotypes$y),
theta_res$estimate)
})
theta_res_info <- estimate_autotheta_1subpop_genotypes(y, return_estimation_info = TRUE)
test_that("estimate_autotheta_1subpop_genotypes return_estimation_info works", {
expect_true(theta_res_info$error == FALSE)
expect_equal(qr.solve(ols_res_genotypes$x, ols_res_genotypes$y),
theta_res_info$estimate)
expect_equal(sort(ols_res_genotypes$x), sort(theta_res_info$estimation_info$X))
})
# bootstrap:
theta_boot <- replicate(100, {
yboot <- y[sample(nrow(y), replace = TRUE), ]
estimate_autotheta_1subpop_genotypes(yboot)$estimate
})
# Expanding a bit...
theta_boot_rng <- c(0.9, 1.1) * range(theta_boot)
#theta_boot_rng
test_that("estimate_autotheta_1subpop_genotypes boot contains true", {
expect_true(all(theta_boot >= 0 & theta_boot <= 1))
expect_true(theta >= theta_boot_rng[1L] & theta <= theta_boot_rng[2L])
})
p_cumdists <- calc_autosomal_genotype_conditional_cumdist(allele_dist = allele_prob,
theta = theta)
cumdist_r <- geno_probs_R_mat
cumdist_r[lower.tri(cumdist_r)] <- cumdist_r[upper.tri(cumdist_r)] / 2
cumdist_r[upper.tri(cumdist_r)] <- cumdist_r[upper.tri(cumdist_r)] / 2
cumdist_r <- t(apply(cumdist_r / rowSums(cumdist_r), 1, cumsum))
#cumdist_r
test_that("calc_autosomal_genotype_conditional_cumdist works", {
expect_equal(p_cumdists, cumdist_r)
})
livepop <- sim_res_fixed$individuals_generations
y_livepop <- get_haplotypes_individuals(livepop)
ols_res_livepop <- get_ols_quantities(y_livepop)
test_that("estimate_autotheta_1subpop_individuals works", {
expect_equal(qr.solve(ols_res_livepop$x, ols_res_livepop$y),
estimate_autotheta_1subpop_individuals(livepop)$estimate,
tol = 10*ESTIMATION_TOL_DUE_TO_SAMPLING
)
})
test_that("livepop: haplotypes/individual same theta", {
expect_equal(estimate_autotheta_1subpop_genotypes(y_livepop)$estimate,
estimate_autotheta_1subpop_individuals(livepop)$estimate)
})
test_that("geneaology independent sample and population same theta", {
expect_equal(estimate_autotheta_1subpop_genotypes(y)$estimate,
estimate_autotheta_1subpop_genotypes(y_livepop)$estimate,
tol = ESTIMATION_TOL_DUE_TO_SAMPLING)
expect_equal(estimate_autotheta_1subpop_genotypes(y)$estimate,
estimate_autotheta_1subpop_individuals(livepop)$estimate,
tol = ESTIMATION_TOL_DUE_TO_SAMPLING)
})
if (FALSE) {
prop.table(table(c(x)))
prop.table(table(c(y_livepop)))
prop.table(table(apply(x, 2, paste0, collapse = ";")))
prop.table(table(apply(y_livepop, 1, paste0, collapse = ";")))
estimate_autotheta_1subpop_genotypes(y)
qr.solve(ols_res_genotypes$x, ols_res_genotypes$y)
estimate_autotheta_1subpop_genotypes(y_livepop)
estimate_autotheta_1subpop_individuals(livepop)
qr.solve(ols_res_livepop$x, ols_res_livepop$y)
}
#######################################################################
##########################################
# estimate_autotheta_subpops_individuals
##########################################
l_nonindv <- list(1:10, 1:20)
l_nonindv_sizes <- sapply(l_nonindv, length)
test_that("estimate_autotheta_subpops_individuals not list of list of individuals", {
expect_error(estimate_autotheta_subpops_individuals(l_nonindv, l_nonindv_sizes))
})
set.seed(2)
grps <- sample(c(1, 2), length(livepop), replace = TRUE)
subpops <- split(livepop, grps)
subpops_sizes <- sapply(subpops, length)
res_indv <- estimate_autotheta_subpops_individuals(subpops, subpops_sizes)
subpops_haps <- lapply(split(seq_len(nrow(y_livepop)), grps), function(is) y_livepop[is, ])
subpops_haps_sizes <- sapply(subpops_haps, nrow)
res_geno <- estimate_autotheta_subpops_genotypes(subpops_haps, subpops_haps_sizes)
test_that("estimate_autotheta_subpops_individuals = estimate_autotheta_subpops_genotypes", {
expect_equal(res_indv, res_geno)
})
# By pid:
pids_livepop <- sapply(livepop, get_pid)
subpops_pids <- split(pids_livepop, grps)
subpops_pids_sizes <- sapply(subpops_pids, length)
res_pids <- estimate_autotheta_subpops_pids(sim_res_fixed$population, subpops_pids, subpops_pids_sizes)
test_that("estimate_autotheta_subpops_pids", {
expect_equal(res_indv, res_pids)
expect_equal(res_geno, res_pids)
})
#####################################################################################
# theta
#####################################################################################
# Known answers, GDA2
two2cols <- function(x) {
do.call(rbind, lapply(x, function(as) as.integer(strsplit(as.character(as), "")[[1]])))
}
# GDA2, Exercise 5.4, p. 200:
loc4_g <- c(rep(11, 24), rep(12, 16))
loc4_n <- c(rep(11, 44), rep(12, 5), rep(22, 1))
loc4 <- list(two2cols(loc4_g), two2cols(loc4_n))
loc4_ni <- sapply(loc4, nrow)
# GDA2, Exercise 5.4 solution, p. 401
# loc4_sol_F <- -0.0082
# loc4_sol_theta <- 0.0633
# loc4_sol_f <- -0.0764
# From GDA1.1 program, too
loc4_sol_F <- -0.008234
loc4_sol_theta <- 0.063292
loc4_sol_f <- -0.076359
loc4_allele1_sigmasqP <- 0.007324
loc4_allele1_sigmasqI <- -0.008277
loc4_allele1_sigmasqG <- 0.116667
loc4_allele2_sigmasqP <- loc4_allele1_sigmasqP
loc4_allele2_sigmasqI <- loc4_allele1_sigmasqI
loc4_allele2_sigmasqG <- loc4_allele1_sigmasqG
res_loc4 <- estimate_autotheta_subpops_genotypes(loc4, loc4_ni)
test_that("estimate_autotheta_subpops_genotypes GDA2, Exercise 5.4, locus 4", {
expect_equal(res_loc4$F, loc4_sol_F, tol = 1e-5)
expect_equal(res_loc4$theta, loc4_sol_theta, tol = 1e-6)
expect_equal(res_loc4$f, loc4_sol_f, tol = 1e-5)
expect_equal(res_loc4$extra$allele_values[["1"]]$sigmasq_P,
res_loc4$extra$allele_values[["2"]]$sigmasq_P)
expect_equal(res_loc4$extra$allele_values[["1"]]$sigmasq_I,
res_loc4$extra$allele_values[["2"]]$sigmasq_I)
expect_equal(res_loc4$extra$allele_values[["1"]]$sigmasq_G,
res_loc4$extra$allele_values[["2"]]$sigmasq_G)
expect_equal(res_loc4$extra$allele_values[["1"]]$sigmasq_P,
res_loc4$extra$allele_values[["2"]]$sigmasq_P)
expect_equal(loc4_allele1_sigmasqP,
res_loc4$extra$allele_values[["1"]]$sigmasq_P, tol = 1e-6)
expect_equal(loc4_allele1_sigmasqI,
res_loc4$extra$allele_values[["1"]]$sigmasq_I, tol = 1e-6)
expect_equal(loc4_allele1_sigmasqG,
res_loc4$extra$allele_values[["1"]]$sigmasq_G, tol = 1e-6)
# Allele wise theta/F should be the same, GDA2, p. 178 mid.
# theta
for (allele_info in res_loc4$extra$allele_values) {
# allele_info <- res_loc4$extra$allele_values[["1"]]
expect_equal(with(allele_info, sigmasq_P / (sigmasq_P + sigmasq_I + sigmasq_G)),
with(allele_info, S1 / S2), tol = 1e-6)
}
# F
for (allele_info in res_loc4$extra$allele_values) {
# allele_info <- res_loc4$extra$allele_values[["1"]]
expect_equal(with(allele_info, (sigmasq_P + sigmasq_I) / (sigmasq_P + sigmasq_I + sigmasq_G)),
with(allele_info, 1 - (S3 / S2)), tol = 1e-5)
}
})
# Same as above, now locus 6
loc6_g <- c(rep(11, 9), rep(12, 1), rep(22, 5), rep(23, 7), rep(24, 8), rep(34, 10))
loc6_n <- c(rep(11, 22), rep(12, 14), rep(13, 2), rep(14, 6), rep(23, 1), rep(24, 3), rep(33, 1), rep(34, 1))
test_that("loc4/loc6 length", {
expect_equal(length(loc4_g), length(loc6_g))
expect_equal(length(loc4_n), length(loc6_n))
})
loc6 <- list(two2cols(loc6_g), two2cols(loc6_n))
loc6_ni <- sapply(loc6, nrow)
res_loc6 <- estimate_autotheta_subpops_genotypes(loc6, loc6_ni)
# loc6_sol_F <- 0.2009
# loc6_sol_theta <- 0.1517
# loc6_sol_f <- 0.0581
# From GDA1.1 program, too
loc6_sol_F <- 0.200938
loc6_sol_theta <- 0.151652
loc6_sol_f <- 0.058097
loc6_allele1_sigmasqP <- 0.086002
loc6_allele1_sigmasqI <- 0.080586
loc6_allele1_sigmasqG <- 0.127778
loc6_allele2_sigmasqP <- 0.008555
loc6_allele2_sigmasqI <- -0.007456
loc6_allele2_sigmasqG <- 0.188889
loc6_allele3_sigmasqP <- 0.010538
loc6_allele3_sigmasqI <- -0.009882
loc6_allele3_sigmasqG <- 0.116667
loc6_allele4_sigmasqP <- 0.006668
loc6_allele4_sigmasqI <- -0.026926
loc6_allele4_sigmasqG <- 0.155556
test_that("estimate_autotheta_subpops_genotypes GDA2, Exercise 5.4, locus 6", {
expect_equal(res_loc6$F, loc6_sol_F, tol = 1e-5)
expect_equal(res_loc6$theta, loc6_sol_theta, tol = 1e-6)
expect_equal(res_loc6$f, loc6_sol_f, tol = 1e-5)
# Allele 1
expect_equal(loc6_allele1_sigmasqP,
res_loc6$extra$allele_values[["1"]]$sigmasq_P, tol = 1e-6)
expect_equal(loc6_allele1_sigmasqI,
res_loc6$extra$allele_values[["1"]]$sigmasq_I, tol = 1e-6)
expect_equal(loc6_allele1_sigmasqG,
res_loc6$extra$allele_values[["1"]]$sigmasq_G, tol = 1e-6)
# Allele 2
expect_equal(loc6_allele2_sigmasqP,
res_loc6$extra$allele_values[["2"]]$sigmasq_P, tol = 1e-6)
expect_equal(loc6_allele2_sigmasqI,
res_loc6$extra$allele_values[["2"]]$sigmasq_I, tol = 1e-6)
expect_equal(loc6_allele2_sigmasqG,
res_loc6$extra$allele_values[["2"]]$sigmasq_G, tol = 1e-6)
# Allele 3
expect_equal(loc6_allele3_sigmasqP,
res_loc6$extra$allele_values[["3"]]$sigmasq_P, tol = 1e-6)
expect_equal(loc6_allele3_sigmasqI,
res_loc6$extra$allele_values[["3"]]$sigmasq_I, tol = 1e-6)
expect_equal(loc6_allele3_sigmasqG,
res_loc6$extra$allele_values[["3"]]$sigmasq_G, tol = 1e-6)
# Allele 4
expect_equal(loc6_allele4_sigmasqP,
res_loc6$extra$allele_values[["4"]]$sigmasq_P, tol = 1e-6)
expect_equal(loc6_allele4_sigmasqI,
res_loc6$extra$allele_values[["4"]]$sigmasq_I, tol = 1e-6)
expect_equal(loc6_allele4_sigmasqG,
res_loc6$extra$allele_values[["4"]]$sigmasq_G, tol = 1e-6)
# Allele wise theta/F should be the same, GDA2, p. 178 mid.
# theta
for (allele_info in res_loc6$extra$allele_values) {
# allele_info <- res_loc4$extra$allele_values[["1"]]
expect_equal(with(allele_info, sigmasq_P / (sigmasq_P + sigmasq_I + sigmasq_G)),
with(allele_info, S1 / S2), tol = 1e-6)
}
# F
for (allele_info in res_loc6$extra$allele_values) {
# allele_info <- res_loc4$extra$allele_values[["1"]]
expect_equal(with(allele_info, (sigmasq_P + sigmasq_I) / (sigmasq_P + sigmasq_I + sigmasq_G)),
with(allele_info, 1 - (S3 / S2)), tol = 1e-5)
}
})
#########################################################################
#####################################################################################
# GDA program
# https://phylogeny.uconn.edu/software/
# v. 1.1
# diploid.nex output
#####################################################################################
if (FALSE) {
diploid_nex_str <- "Pop_1:
_1_ 4/4 4/3 4/3 3/3 4/4
_2_ 4/4 4/4 4/3 3/3 4/4
_3_ 4/4 4/4 4/3 4/3 4/4
_4_ 4/4 4/4 ?/? 3/3 4/4
_5_ 4/4 4/4 2/4 3/4 4/4
_6_ 4/4 4/4 ?/? 4/3 4/4
_7_ 4/4 4/4 4/3 4/3 4/4
_8_ 4/4 4/4 ?/? 4/3 4/4,
Pop_2:
_1_ 4/4 4/4 3/3 3/2 4/4
_2_ 4/4 3/3 4/4 4/3 4/4
_3_ 4/4 4/3 4/4 4/3 4/4
_4_ 4/4 4/4 3/3 3/3 4/4
_5_ 4/4 4/3 4/4 4/4 4/4
_6_ 4/4 4/4 4/4 2/2 4/4
_7_ 4/4 4/4 4/3 4/3 4/4
_8_ 4/4 4/4 4/4 4/4 4/4,
Pop_3:
_1_ 4/4 4/4 4/4 4/3 4/4
_2_ 4/4 4/4 4/4 4/4 4/4
_3_ 4/4 4/4 4/3 2/1 4/4
_4_ 4/4 4/4 3/3 4/3 4/4
_5_ 4/4 4/4 4/3 2/1 4/4,
Pop_4:
_1_ 4/4 4/4 4/3 4/4 4/4
_2_ 4/4 4/4 4/3 4/3 4/4
_3_ 4/4 4/4 4/3 4/3 4/4
_4_ 4/4 4/4 4/3 4/4 4/4
_5_ 4/4 4/4 4/3 4/4 4/4
_6_ 4/4 4/4 4/4 3/3 4/4
_7_ 4/4 4/4 4/4 4/4 4/4,
Pop_5:
_1_ 4/4 4/4 4/4 2/1 4/4
_2_ 4/4 4/4 4/4 3/3 4/4
_3_ 4/4 4/4 4/3 4/3 4/4
_4_ 4/4 4/4 4/3 4/3 4/4
_5_ 4/4 4/4 4/4 4/4 4/4
_6_ 4/4 4/4 4/4 4/3 4/4
_7_ 4/4 4/4 4/3 4/3 4/4
_8_ 4/4 4/4 4/4 ?/? 4/4
_9_ 4/4 4/3 4/4 4/3 4/4,
Pop_6:
_1_ 4/4 4/4 4/4 4/3 4/4
_2_ 4/4 4/4 4/3 3/3 4/4
_3_ 4/4 4/4 4/4 3/2 4/4
_4_ 4/4 4/4 4/3 4/1 4/4
_5_ 4/4 4/4 4/4 4/4 4/4
_6_ 4/4 4/4 4/4 4/2 4/4
_7_ 4/4 4/4 4/4 4/3 4/4"
diploid_nex_con <- textConnection(diploid_nex_str)
lns <- readLines(diploid_nex_con)
is <- c(which(grepl("Pop", lns)), length(lns)+1)
grp <- unlist(lapply(1L:(length(is)-1), function(i) rep(i, is[i+1]-is[i])))
length(lns)
length(grp)
pops <- split(lns, grp)
prof <- lapply(pops, function(x) {
x <- x[-1]
x <- gsub("^[ ]*_[1-9]+_[ ]*(.*)$", "\\1", x)
x <- gsub(",", "", x, fixed = TRUE)
x <- strsplit(x, " ")
#x <- lapply(x, function(y) do.call(rbind, strsplit(y, "/")))
x <- lapply(x, function(y) strsplit(y, "/"))
#x <- do.call(rbind, lapply(x, function(y) strsplit(y, "/")))
x
})
prof
loci <- 2:4
prof_loc <- lapply(loci, function(i) {
lapply(prof, function(x) {
d <- do.call(rbind, lapply(x, function(y) y[[i]]))
d[d == "?"] <- NA
d <- na.omit(d)
d <- apply(d, 2, as.integer)
d
})
})
names(prof_loc) <- loci
prof_loc
diploid_nex_data <- prof_loc
dput(prof_loc)
}
diploid_nex_data <- structure(list(`2` = structure(list(`1` = structure(c(4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Dim = c(8L,
2L)),
`2` = structure(c(4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L,
3L, 4L, 3L, 4L, 4L, 4L), .Dim = c(8L, 2L)),
`3` = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Dim = c(5L, 2L)),
`4` = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Dim = c(7L,
2L)),
`5` = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L), .Dim = c(9L, 2L)),
`6` = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Dim = c(7L,
2L))), .Names = c("1", "2", "3", "4", "5", "6")),
`3` = structure(list(
`1` = structure(c(4L, 4L, 4L, 2L, 4L, 3L, 3L, 3L, 4L, 3L), .Dim = c(5L,
2L)),
`2` = structure(c(3L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 3L,
4L, 4L, 3L, 4L, 4L, 3L, 4L), .Dim = c(8L, 2L)),
`3` = structure(c(4L,
4L, 4L, 3L, 4L, 4L, 4L, 3L, 3L, 3L), .Dim = c(5L, 2L)),
`4` = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 4L, 4L), .Dim = c(7L,
2L)),
`5` = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 4L, 4L, 3L, 4L, 4L), .Dim = c(9L, 2L)),
`6` = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 4L, 4L, 4L), .Dim = c(7L,
2L))), .Names = c("1", "2", "3", "4", "5", "6")),
`4` = structure(list(
`1` = structure(c(3L, 3L, 4L, 3L, 3L, 4L, 4L, 4L, 3L, 3L,
3L, 3L, 4L, 3L, 3L, 3L), .Dim = c(8L, 2L)),
`2` = structure(c(3L,
4L, 4L, 3L, 4L, 2L, 4L, 4L, 2L, 3L, 3L, 3L, 4L, 2L, 3L, 4L
), .Dim = c(8L, 2L)),
`3` = structure(c(4L, 4L, 2L, 4L, 2L,
3L, 4L, 1L, 3L, 1L), .Dim = c(5L, 2L)),
`4` = structure(c(4L,
4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 4L), .Dim = c(7L,
2L)),
`5` = structure(c(2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 1L,
3L, 3L, 3L, 4L, 3L, 3L, 3L), .Dim = c(8L, 2L)),
`6` = structure(c(4L,
3L, 3L, 4L, 4L, 4L, 4L, 3L, 3L, 2L, 1L, 4L, 2L, 3L), .Dim = c(7L,
2L))), .Names = c("1", "2", "3", "4", "5", "6"))), .Names = c("2",
"3", "4"))
# Excerpt:
# Locus Allele f F Theta-P
# ---------- ---------- ---------- ---------- ----------
# locus-2 All 0.250514 0.302529 0.069401
# locus-3 All -0.034807 -0.030052 0.004595
# locus-4 All 0.012751 0.036738 0.024297
# Locus Allele Sigma-G Sigma-I Sigma-P
# ---------- ---------- ---------- ---------- ----------
# locus-2 All 0.090909 0.030386 0.009046
# 3 0.045455 0.015193 0.004523
# 4 0.045455 0.015193 0.004523
#
# locus-3 All 0.439024 -0.014767 0.001958
# 2 0.012195 -0.000383 0.000453
# 3 0.207317 -0.001286 -0.002083
# 4 0.219512 -0.013098 0.003588
#
# locus-4 All 0.604651 0.007810 0.015252
# 1 0.046512 -0.003444 0.002099
# 2 0.069767 0.014971 0.000953
# 4 0.244186 0.006261 0.000451
# 3 0.244186 -0.009979 0.011749
gda_sigma_lns <- readLines(textConnection("Locus Allele Sigma-G Sigma-I Sigma-P
locus-2 All 0.090909 0.030386 0.009046
3 0.045455 0.015193 0.004523
4 0.045455 0.015193 0.004523
locus-3 All 0.439024 -0.014767 0.001958
2 0.012195 -0.000383 0.000453
3 0.207317 -0.001286 -0.002083
4 0.219512 -0.013098 0.003588
locus-4 All 0.604651 0.007810 0.015252
1 0.046512 -0.003444 0.002099
2 0.069767 0.014971 0.000953
4 0.244186 0.006261 0.000451
3 0.244186 -0.009979 0.011749"))
gda_sigma_lns <- gda_sigma_lns[!grepl("locus", gda_sigma_lns)]
gda_sigma_lns <- gsub("[ ]+", " ", gda_sigma_lns)
gda_sigma_lns <- gsub("^ ", "", gda_sigma_lns)
gda_sigma_lns <- gda_sigma_lns[gda_sigma_lns != " "]
gda_sigma_lns <- gda_sigma_lns[gda_sigma_lns != ""]
gda_sigma_lns <- gda_sigma_lns[-1]
gda_sigma_d <- read.table(text = gda_sigma_lns, stringsAsFactors = FALSE,
sep = " ", row.names = NULL,
col.names = c("Allele", "sigmasq_G", "sigmasq_I", "sigmasq_P"))
gda_sigma <- list(
"2" = gda_sigma_d[1:2, ],
"3" = gda_sigma_d[3:5, ],
"4" = gda_sigma_d[6:9, ]
)
gda_thetaFf <- list(
"2" = list(f = 0.250514, F = 0.302529, theta = 0.069401),
"3" = list(f = -0.034807, F = -0.030052, theta = 0.004595),
"4" = list(f = 0.012751, F = 0.036738, theta = 0.024297)
)
test_that("GDA 1.1 diploid.nex ", {
for (loc in seq_along(diploid_nex_data)) {
#loc <- 1
expect_equal(names(gda_thetaFf)[loc], names(diploid_nex_data)[loc])
gda_loc <- gda_thetaFf[[loc]]
loc_subpops <- diploid_nex_data[[loc]]
loc_subpops_ni <- sapply(loc_subpops, nrow)
res_loc <- estimate_autotheta_subpops_genotypes(loc_subpops, loc_subpops_ni)
expect_equal(res_loc$theta, gda_loc$theta, tol = 1e-5)
expect_equal(res_loc$F, gda_loc$F, tol = 1e-5)
expect_equal(res_loc$f, gda_loc$f, tol = 1e-5)
# Sigma
sigma_tmp <- gda_sigma[[ names(gda_thetaFf)[loc] ]]
for (a_i in seq_along(sigma_tmp$Allele)) {
#a_i <- 1
a <- as.character(sigma_tmp$Allele[a_i])
expect_equal(sigma_tmp$sigmasq_P[a_i],
res_loc$extra$allele_values[[a]]$sigmasq_P, tol = 1e-6)
expect_equal(sigma_tmp$sigmasq_I[a_i],
res_loc$extra$allele_values[[a]]$sigmasq_I, tol = 1e-6)
expect_equal(sigma_tmp$sigmasq_G[a_i],
res_loc$extra$allele_values[[a]]$sigmasq_G, tol = 1e-6)
}
# Allele wise theta/F should be the same, GDA2, p. 178 mid.
# theta
for (allele_info in res_loc$extra$allele_values) {
# allele_info <- res_loc4$extra$allele_values[["1"]]
expect_equal(with(allele_info, sigmasq_P / (sigmasq_P + sigmasq_I + sigmasq_G)),
with(allele_info, S1 / S2), tol = 1e-6)
}
# F
for (allele_info in res_loc$extra$allele_values) {
# allele_info <- res_loc4$extra$allele_values[["1"]]
expect_equal(with(allele_info, (sigmasq_P + sigmasq_I) / (sigmasq_P + sigmasq_I + sigmasq_G)),
with(allele_info, 1 - (S3 / S2)), tol = 1e-5)
}
}
})
######################################
# Unweighted
######################################
set.seed(2000)
grps <- sample(c(1, 2), length(livepop), replace = TRUE)
subpops <- split(livepop, grps)
subpops_haps <- lapply(split(seq_len(nrow(y_livepop)), grps), function(is) y_livepop[is, ])
pids_livepop <- sapply(livepop, get_pid)
subpops_pids <- split(pids_livepop, grps)
test_that("get_allele_counts_pids/get_allele_counts_genotypes", {
expect_equal(get_allele_counts_pids(sim_res_fixed$population, subpops_pids)[, c("0", "1", "2")],
get_allele_counts_genotypes(subpops_haps)[, c("0", "1", "2")])
})
test_that("Theta unweighted", {
expect_equal(estimate_autotheta_subpops_unweighted_pids(sim_res_fixed$population, subpops_pids, assume_HWE = TRUE),
estimate_autotheta_subpops_unweighted_genotypes(subpops_haps, assume_HWE = TRUE))
# Not yet implemented
# expect_equal(estimate_autotheta_subpops_unweighted_pids(sim_res_fixed$population, subpops_pids, assume_HWE = FALSE),
# estimate_autotheta_subpops_unweighted_genotypes(subpops_haps, assume_HWE = FALSE))
})
##########################
if (requireNamespace("dirmult", quietly = TRUE)) {
library(dirmult)
dirmult_weir1 <- dirmult::weirMoM(get_allele_counts_pids(sim_res_fixed$population, subpops_pids))
dirmult_weir2 <- dirmult::weirMoM(get_allele_counts_genotypes(subpops_haps))
test_that("Theta unweighted with dirmult::weirMoM", {
expect_equal(dirmult_weir1, dirmult_weir2)
expect_equal(estimate_autotheta_subpops_unweighted_pids(sim_res_fixed$population, subpops_pids, assume_HWE = TRUE),
estimate_autotheta_subpops_unweighted_genotypes(subpops_haps, assume_HWE = TRUE))
})
ralleleprob <- function(n, ps, theta) {
#n <- 2; ps <- ALLELE_PROB; theta <- 0.1
#n <- 2; ps <- ALLELE_PROB; theta <- 0.001
#
stopifnot(length(ps) > 1)
stopifnot(length(theta) == 1L)
stopifnot(theta >= 0)
stopifnot(theta <= 1)
# FIXME: theta == 0 problematic...
alpha <- (1-theta)*ps/theta
x <- rdirichlet(n, alpha)
return(x)
}
set.seed(100)
for (theta in c(0.01, 0.02)) {
#theta <- 0.02
subpop_ps <- ralleleprob(10, ps = allele_prob, theta = theta)
subpop_geno <- lapply(seq_len(nrow(subpop_ps)), function(i) {
geno <- t(replicate(1000, sample_autosomal_genotype(subpop_ps[i, ], 0))) # 0 for HWE
return(geno)
})
# Missing alleles a technical issue, but not here because all are observed in all subpops
allele_counts <- do.call(rbind, lapply(subpop_geno, function(x) table(x)))
#get_allele_counts_genotypes()
if (FALSE) {
theta
fit <- dirmult(allele_counts, trace = FALSE)
fit$theta
dirmult::weirMoM(allele_counts)
estimate_autotheta_subpops_genotypes(subpop_geno, sapply(subpop_geno, function(x) 1000))$theta
estimate_autotheta_subpops_unweighted_genotypes(subpop_geno, assume_HWE = TRUE)
}
test_that(paste0("Theta unweighted with dirmult: theta = ", theta), {
expect_equal(estimate_autotheta_subpops_unweighted_genotypes(subpop_geno, assume_HWE = TRUE),
dirmult::weirMoM(allele_counts))
})
test_that(paste0("get_allele_counts_genotypes with dirmult: theta = ", theta), {
expect_equal(allele_counts,
get_allele_counts_genotypes(subpop_geno)[, colnames(allele_counts)])
})
}
}
####################################
# Weighted vs unweighted
####################################
if (requireNamespace("dirmult", quietly = TRUE)) {
library(dirmult)
ralleleprob <- function(n, ps, theta) {
#n <- 2; ps <- ALLELE_PROB; theta <- 0.1
#n <- 2; ps <- ALLELE_PROB; theta <- 0.001
#
stopifnot(length(ps) > 1)
stopifnot(length(theta) == 1L)
stopifnot(theta >= 0)
stopifnot(theta <= 1)
# FIXME: theta == 0 problematic...
alpha <- (1-theta)*ps/theta
x <- rdirichlet(n, alpha)
return(x)
}
set.seed(100)
for (theta in c(0.01, 0.02)) {
#theta <- 0.02
subpop_ps <- ralleleprob(10, ps = allele_prob, theta = theta)
subpop_geno_equal <- lapply(seq_len(nrow(subpop_ps)), function(i) {
geno <- t(replicate(1000, sample_autosomal_genotype(subpop_ps[i, ], 0))) # 0 for HWE
return(geno)
})
test_that(paste0("Theta unweighted vs weighted for equal sizes: theta = ", theta), {
expect_equal(
estimate_autotheta_subpops_unweighted_genotypes(
subpops = subpop_geno_equal,
assume_HWE = TRUE),
estimate_autotheta_subpops_genotypes(
subpops = subpop_geno_equal,
subpops_sizes = sapply(subpop_geno_equal, nrow))$theta,
tol = 1e-4
)
})
subpop_geno_unequal <- lapply(seq_len(nrow(subpop_ps)), function(i) {
geno <- t(replicate(i*100, sample_autosomal_genotype(subpop_ps[i, ], 0))) # 0 for HWE
return(geno)
})
theta_unw <- estimate_autotheta_subpops_unweighted_genotypes(
subpops = subpop_geno_unequal,
assume_HWE = TRUE)
theta_w <- estimate_autotheta_subpops_genotypes(
subpops = subpop_geno_unequal,
subpops_sizes = sapply(subpop_geno_unequal, nrow))$theta
test_that(paste0("Theta unweighted vs weighted for equal sizes: theta = ", theta), {
expect_true(!isTRUE(all.equal(theta_unw, theta_w)))
})
}
}
########################################
############ Others
test_that("hash_colisions", {
expect_true(all(unique(as.integer(hash_colisions(20))) == 1L))
})
############################################
# Infinite alleles
set.seed(1)
sim_res_fixed <- sample_geneology(population_size = 1e3,
generations = 100,
generations_full = 3,
generations_return = 3, # default value
progress = FALSE)
pop <- sim_res_fixed$population
livepop <- sim_res_fixed$individuals_generations
#peds <- build_pedigrees(sim_res_fixed$population, progress = FALSE)
get_generation(get_individual(pop, get_pid(livepop[[1]])))
set.seed(1)
population_populate_autosomal_infinite_alleles(pop, mutation_rate = 0, FALSE)
y_livepop <- get_haplotypes_individuals(livepop)
test_that("infinite alleles -- no mutations", {
expect_true(all.equal(unique(c(y_livepop)), 0L))
})
set.seed(1)
population_populate_autosomal_infinite_alleles(pop, mutation_rate = 1, FALSE)
y_livepop <- get_haplotypes_individuals(livepop)
test_that("infinite alleles -- always mutations", {
expect_equal(length(unique(c(y_livepop))), 2L*length(livepop))
})
set.seed(1)
population_populate_autosomal_infinite_alleles(pop, mutation_rate = 0.003, FALSE)
y_livepop <- get_haplotypes_individuals(livepop)
c(y_livepop) %>% table() %>% sort(decreasing = TRUE) %>% head()
test_that("infinite alleles -- sometimes mutations", {
expect_true(length(unique(c(y_livepop))) > 1)
expect_true(length(unique(c(y_livepop))) < 2L*length(livepop))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.