Nothing
## ----include = FALSE----------------------------------------------------------
env_present <- slendr:::is_slendr_env_present()
eval_chunk <- (Sys.which("slim") != "" || Sys.which("slim.exe") != "" ) && env_present
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
dpi = 60,
eval = eval_chunk
)
record_snapshot <- FALSE # only save PDFs on a local machine
# reporting knitr chunk run times based on:
# https://bookdown.org/yihui/rmarkdown-cookbook/time-chunk.html#time-chunk
all_times <- list()
knitr::knit_hooks$set(time_it = local({
now <- NULL
function(before, options) {
if (before) {
now <<- Sys.time()
} else {
res <- difftime(Sys.time(), now)
all_times[[options$label]] <<- res
}
}
}))
## -----------------------------------------------------------------------------
# workaround for GitHub actions getting "Killed" due to out of memory issues
# -- when running unit tests on GitHub, smaller amount of sequence will be
# simulated
if (Sys.getenv("RUNNER_OS") != "") {
scaling <- 10 # use 10X less sequence on GitHub actions
} else {
scaling <- 1 # but simulate normal amount of data otherwise
}
library(slendr)
library(dplyr)
library(ggplot2)
library(purrr)
library(tidyr)
library(cowplot)
library(forcats)
init_env()
SEED <- 42
set.seed(SEED)
## -----------------------------------------------------------------------------
# placeholder "figure" where a code chunk will be pasted
p_code <- ggplot() +
geom_text(aes(x = 1, y = 1), label = "code will be here") +
theme_void()
## ----ex1, time_it = TRUE, eval = eval_chunk, fig.keep='none'------------------
o <- population("o", time = 1, N = 100)
c <- population("c", time = 2500, N = 100, parent = o)
a <- population("a", time = 2800, N = 100, parent = c)
b <- population("b", time = 3700, N = 100, parent = a)
x1 <- population("x1", time = 4000, N = 15000, parent = c)
x2 <- population("x2", time = 4300, N = 15000, parent = x1)
gf <- gene_flow(from = b, to = x1, start = 5400, end = 5800, 0.1)
model <- compile_model(
populations = list(o, a, b, c, x1, x2), gene_flow = gf,
generation_time = 1, simulation_length = 6000
)
plot_model(model, sizes = FALSE, proportions = TRUE) # panel B
ts <- msprime(model, sequence_length = 100e6 / scaling, recombination_rate = 1e-8, random_seed = SEED) %>%
ts_mutate(mutation_rate = 1e-8, random_seed = SEED)
samples <- ts_samples(ts) %>% group_by(pop) %>% sample_n(100)
# panel C
divergence <- ts_divergence(ts, split(samples$name, samples$pop))
# panel D
f4ratio <- ts_f4ratio(
ts, X = filter(samples, pop %in% c("x1", "x2"))$name,
A = "a_1", B = "b_1", C = "c_1", O = "o_1"
)
## ----echo = FALSE, eval = eval_chunk && record_snapshot-----------------------
# system(sprintf("cp -r %s %s", model$path, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex1"))
# ts_save(ts, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex1.trees")
## ----figure_ex1, fig.height=6, fig.width=9------------------------------------
divergence <- divergence %>% mutate(pair = paste(x, "-", y))
f4ratio <- f4ratio %>% mutate(population = gsub("^(.*)_.*$", "\\1", X), alpha = alpha * 100)
p_ex1_divergence <- divergence %>%
ggplot(aes(fct_reorder(pair, divergence), divergence)) +
geom_point(size = 2.5) +
xlab("population pair") + ylab("pairwise divergence") +
theme_minimal() +
theme(legend.position = "bottom",
legend.text = element_text(size = 10),
axis.text.x = element_text(hjust = 1, angle = 45, size = 8),
axis.title.x = element_blank())
p_ex1_f4ratio <- f4ratio %>%
ggplot(aes(population, alpha)) +
geom_hline(yintercept = 0, linetype = 2) +
geom_jitter(alpha = 0.5) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
ylab(base::expression(italic("f")[4]~"-ratio ancestry proportion [%]")) +
theme_minimal() +
coord_cartesian(ylim = c(0, 20)) +
theme(legend.position = "none",
axis.text.x = element_text(size = 11),
axis.title.x = element_blank(),
panel.grid.major.x = element_blank())#; p_ex3_f4ratio
# let's avoid ggpubr as another dependency:
# https://github.com/kassambara/ggpubr/blob/master/R/as_ggplot.R#L27
p_ex1_legend <- ggdraw() + draw_grob(grid::grobTree(get_legend(p_ex1_divergence)))
p_ex1_model <- plot_model(model, sizes = FALSE, proportions = TRUE)
p_ex1 <- plot_grid(
p_code,
plot_grid(
p_ex1_model,
plot_grid(
p_ex1_divergence + theme(legend.position = "none"),
p_ex1_f4ratio,
ncol = 2, rel_widths = c(1, 0.8), labels = c("C", "D")
),
p_ex1_legend, nrow = 3, rel_heights = c(1, 1, 0.1),
labels = "B"
),
nrow = 1, labels = c("A", "")
)
p_ex1
## ----include = FALSE, eval = eval_chunk && record_snapshot--------------------
# ggsave("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex1.pdf", p_ex1, width = 9, height = 6, units = "in")
## ----ex2, time_it = TRUE, eval = eval_chunk-----------------------------------
map <- world(xrange = c(0, 10), yrange = c(0, 10),
landscape = region(center = c(5, 5), radius = 5))
p1 <- population("pop1", time = 1, N = 2000, map = map, competition = 0)
p2 <- population("pop2", time = 1, N = 2000, map = map, competition = 9)
p3 <- population("pop3", time = 1, N = 2000, map = map, competition = 6)
p4 <- population("pop4", time = 1, N = 2000, map = map, competition = 5)
p5 <- population("pop5", time = 1, N = 2000, map = map, competition = 4)
p6 <- population("pop6", time = 1, N = 2000, map = map, competition = 3)
p7 <- population("pop7", time = 1, N = 2000, map = map, competition = 2)
p8 <- population("pop8", time = 1, N = 2000, map = map, competition = 1)
model <- compile_model(
populations = list(p1, p2, p3, p4, p5, p6, p7, p8),
generation_time = 1, simulation_length = 5000, resolution = 0.1,
mating = 0.1, dispersal = 0.05
)
ts <-
slim(model, sequence_length = 10e6 / scaling, recombination_rate = 1e-8, random_seed = SEED) %>%
ts_simplify() %>%
ts_mutate(mutation_rate = 1e-7, random_seed = SEED)
locations <- ts_nodes(ts) %>% filter(time == max(time))
heterozygosity <- ts_samples(ts) %>%
group_by(pop) %>%
sample_n(100) %>%
mutate(pi = ts_diversity(ts, name)$diversity)
## ----echo = FALSE, eval = eval_chunk && record_snapshot-----------------------
# system(sprintf("cp -r %s %s", model$path, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex2"))
# ts_save(ts, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex2.trees")
## ----figure_ex2, fig.height=9, fig.width=6------------------------------------
p_ex2_clustering <- ggplot() +
geom_sf(data = map) +
geom_sf(data = locations, aes(color = pop), size = 0.05, alpha = 0.25) +
facet_grid(. ~ pop, switch = "x") +
xlab("spatial distributions emerged in the simulation") +
theme(
strip.background = element_blank(),
strip.text = element_text(size = 11),
panel.grid = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
panel.background = element_blank()
) +
guides(color = "none")
p_ex2_diversity <- ggplot(heterozygosity, aes(pop, pi, color = pop)) +
geom_violin(color = "black") +
geom_jitter(alpha = 0.5) +
labs(y = "individual heterozygosity") +
guides(color = "none") +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(), panel.grid.major.x = element_blank(),
plot.margin = margin(t = 0.2, r = 0.2, b = -0.1, l = 0.2, "cm"))
p_ex2 <- plot_grid(
p_code,
plot_grid(
p_ex2_diversity,
p_ex2_clustering +
theme(plot.margin = margin(t = 0, r = 0.4, b = 0, l = 1.8, "cm")),
nrow = 2,
rel_heights = c(1, 0.5),
labels = c("B", "C")
),
nrow = 2, labels = c("A", ""), rel_heights = c(1.5, 1)
)
p_ex2
## ----include = FALSE, eval = eval_chunk && record_snapshot--------------------
# ggsave("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex2.pdf", p_ex2, width = 6, height = 9, units = "in")
## ----fig.keep='none'----------------------------------------------------------
map <- world(xrange = c(-13, 70), yrange = c(18, 65), crs = 3035)
R1 <- region(
"EHG range", map,
polygon = list(c(26, 55), c(38, 53), c(48, 53), c(60, 53),
c(60, 60), c(48, 63), c(38, 63), c(26, 60))
)
R2 <- region(
"Europe", map,
polygon = list(
c(-8, 35), c(-5, 36), c(10, 38), c(20, 35), c(25, 35),
c(33, 45), c(20, 58), c(-5, 60), c(-15, 50)
)
)
R3 <- region(
"Anatolia", map,
polygon = list(c(28, 35), c(40, 35), c(42, 40),
c(30, 43), c(27, 40), c(25, 38))
)
R4 <- join(R2, R3)
R5 <- region(
"YAM range", map,
polygon = list(c(26, 50), c(38, 49), c(48, 50),
c(48, 56), c(38, 59), c(26, 56))
)
ooa_trajectory <- list(c(40, 30), c(50, 30), c(60, 40), c(45, 55))
## ----ex3, time_it = TRUE, eval = eval_chunk, fig.keep='none'------------------
map <- world(xrange = c(-13, 70), yrange = c(18, 65), crs = 3035)
ooa <- population(
"OOA", time = 50000, N = 500, remove = 23000,
map = map, center = c(33, 30), radius = 400e3
) %>%
move(trajectory = ooa_trajectory, start = 50000, end = 40000, snapshots = 30)
ehg <- population(
"EHG", time = 28000, N = 1000, parent = ooa, remove = 6000,
map = map, polygon = R1
)
eur <- population(
"EUR", time = 30000, N = 2000, parent = ooa,
map = map, polygon = R2
) %>%
resize(N = 10000, time = 5000, end = 0, how = "exponential")
ana <- population(
"ANA", time = 25000, N = 4000, parent = ooa, remove = 3000,
map = map, polygon = R3
) %>%
expand_range(by = 3e6, start = 10000, end = 7000, polygon = R4, snapshots = 15)
yam <- population(
"YAM", time = 7000, N = 600, parent = ehg, remove = 2500,
map = map, polygon = R5
) %>%
move(trajectory = list(c(15, 50)), start = 5000, end = 3000, snapshots = 10)
gf <- list(
gene_flow(ana, to = yam, rate = 0.5, start = 6500, end = 5000),
gene_flow(ana, to = eur, rate = 0.6, start = 8000, end = 6000),
gene_flow(yam, to = eur, rate = 0.7, start = 3500, end = 3000)
)
model <- compile_model(
populations = list(ooa, ehg, eur, ana, yam), gene_flow = gf,
generation_time = 30, resolution = 10e3,
competition = 150e3, mating = 120e3, dispersal = 90e3
)
samples <- schedule_sampling(
model, times = seq(0, 50000, by = 1000),
list(ehg, 20), list(ana, 20), list(yam, 20), list(eur, 20)
)
plot_model(model, sizes = FALSE)
plot_map(model)
ts <- slim(
model, burnin = 200000, samples = samples, random_seed = SEED,
sequence_length = 200000, recombination_rate = 1e-8
)
## ----echo = FALSE, eval = eval_chunk && record_snapshot-----------------------
# system(sprintf("cp -r %s %s", model$path, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3"))
# ts_save(ts, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3.trees")
## ----include=FALSE, eval=FALSE------------------------------------------------
# devtools::load_all()
# init_env()
# model <- read_model("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3")
# ts <- ts_load("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3.trees", model = model)
#
# library(dplyr)
# library(ggplot2)
# library(purrr)
# library(tidyr)
# library(cowplot)
# library(forcats)
#
# init_env()
#
# SEED <- 42
# set.seed(SEED)
#
# # placeholder "figure" where a code chunk will be pasted
# p_code <- ggplot() +
# geom_text(aes(x = 1, y = 1), label = "code will be here") +
# theme_void()
#
# map <- world(xrange = c(-13, 70), yrange = c(18, 65), crs = 3035)
#
# e <- "EUR_578"
## ----figure_ex3, fig.height=5, fig.width=7, eval = eval_chunk-----------------
p_map <- plot_map(model) +
theme(legend.position = "bottom") +
guides(alpha = "none")
p_ex3 <- plot_grid(
p_code,
plot_grid(
plot_model(model, sizes = FALSE),
p_map,
labels = c("B", "C"), nrow = 2, rel_heights = c(1, 1)
),
ncol = 2, labels = c("A", ""), rel_widths = c(1, 1.2)
)
p_ex3
## ----include = FALSE, eval = eval_chunk && record_snapshot--------------------
# ggsave("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3.pdf", p_ex3, width = 9, height = 6, units = "in")
## ----include = FALSE, eval = FALSE--------------------------------------------
# # eurs <- ts_samples(ts) %>% filter(pop == "EUR") %>% pull(name)
# # # eurs <- c("EUR_542", "EUR_549", "EUR_567", "EUR_581", "EUR_582")
# # eurs <- e <- c("EUR_578")
#
# # #############
# # ###### FOUND THE IND_578 (see anc.pdf)
# # ###### actually ,keep the anc.pdf in the archive
#
# # # finding a EUR individual with illustratively rich spatial ancestry
# # pdf("~/Desktop/anc.pdf")
# # for (e in eurs) {
# # ancestors <- ts_ancestors(ts, e, verbose=T)
# # if (length(unique(as.character(ancestors$parent_pop))) < 5) next
# # chrom_names <- stats::setNames(
# # c(paste(e, "(chromosome 1, node 10)"), paste(e, "(chromosome 2, node 11)")),
# # unique(ancestors$node_id)
# # )
# # p_ancestors <- ggplot() +
# # geom_sf(data = map) +
# # geom_sf(data = ancestors, size = 0.5, alpha = 0.2) +
# # geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"),
# # aes(shape = parent_pop, color = parent_pop)) +
# # geom_sf(data = filter(ts_nodes(ts), name == e), size = 3) +
# # coord_sf(expand = 0) +
# # labs(x = "longitude", y = "latitude") +
# # theme_bw() +
# # facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) +
# # theme(legend.position = "bottom"); print(p_ancestors)
# # }
# # dev.off()
#
#
# # # finding appropriate ancestors for plotting an illustrative tree ("YAM" ancestor)
# # ggplot() +
# # geom_sf(data = map) +
# # geom_sf(data = ancestors, size = 0.5, alpha = 0.2) +
# # geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"),
# # aes(shape = parent_pop, color = parent_pop)) +
# # geom_sf(data = filter(ts_nodes(ts), name == e), size = 1) +
# # geom_sf_label(data = filter(ancestors, parent_pop == "YAM"), aes(label = parent_id), alpha = 0.5) +
# # coord_sf(expand = 0) +
# # labs(x = "longitude", y = "latitude") +
# # theme_bw() +
# # # facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) +
# # theme(legend.position = "bottom")
#
# # # finding appropriate ancestors for plotting an illustrative tree ("ANA" ancestor)
# # ggplot() +
# # geom_sf(data = map) +
# # geom_sf(data = ancestors, size = 0.5, alpha = 0.2) +
# # geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"),
# # aes(shape = parent_pop, color = parent_pop)) +
# # geom_sf(data = filter(ts_nodes(ts), name == e), size = 1) +
# # geom_sf_label(data = filter(ancestors, parent_pop == "ANA"), aes(label = parent_id), alpha = 0.5) +
# # coord_sf(expand = 0, xlim = c(20, 40), ylim = c(35, 45), crs = 4326) +
# # labs(x = "longitude", y = "latitude") +
# # theme_bw() +
# # # facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) +
# # theme(legend.position = "bottom")
#
# # # finding appropriate leaves for plotting an illustrative tree ("EHG" ancestor)
# # ggplot() +
# # geom_sf(data = map) +
# # geom_sf(data = ancestors, size = 0.5, alpha = 0.2) +
# # geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"),
# # aes(shape = parent_pop, color = parent_pop)) +
# # geom_sf(data = filter(ts_nodes(ts), name == e), size = 1) +
# # geom_sf_label(data = filter(ancestors, parent_pop == "EHG"), aes(label = parent_id), alpha = 0.5) +
# # coord_sf(expand = 0) +
# # labs(x = "longitude", y = "latitude") +
# # theme_bw() +
# # # facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) +
# # theme(legend.position = "bottom")
#
# # ts_yam_anc <- ts_descendants(ts, x = 39806) %>% filter(!is.na(name)) %>% as.data.frame() %>% arrange(parent_time)
# # ts_ana_anc <- ts_descendants(ts, x = 41734) %>% filter(!is.na(name)) %>% as.data.frame() %>% filter(grepl("ANA_", name))
# # ts_ehg_anc <- ts_descendants(ts, x = 42843) %>% filter(!is.na(name)) %>% as.data.frame( %>% arrange(parent_time))
#
# # names <- c("EUR_578", ts_yam_anc$name, ts_ana_anc$name, ts_ehg_anc$name)
## ----ex4, time_it = TRUE, eval = eval_chunk, fig.keep='none'------------------
ts_small <- ts_simplify(ts, simplify_to = c("EUR_578", "YAM_75", "ANA_163", "EHG_208"))
# tskit uses zero-based indexing
tree <- ts_phylo(ts_small, i = 15 / scaling)
nodes <- ts_nodes(tree)
edges <- ts_edges(tree)
ancestors <- ts_ancestors(ts, "EUR_578")
## ----echo = FALSE, eval = eval_chunk && record_snapshot-----------------------
# ts_save(ts, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3_small.trees")
## ----figure_ex4, eval = Sys.getenv("R_HAS_GGTREE") == TRUE && eval_chunk, fig.height=9, fig.width=8.5, class.source = "fold-hide"----
# library(ggtree)
#
# # prepare annotation table for ggtree linking R phylo node ID (not tskit integer
# # ID!) of each node with its population name
# df <- as_tibble(nodes) %>% select(node = phylo_id, pop)
#
# abs_comma <- function (x, ...) {
# format(abs(x) / 1000, ..., scientific = FALSE, trim = TRUE)
# }
#
# highlight_nodes <- as_tibble(nodes) %>% dplyr::filter(name == "EUR_578") %>% .$phylo_id
#
# p_tree <- ggtree(tree, aes(color = pop, fill = pop)) %<+% df +
# geom_tiplab(align = TRUE, geom = "label", offset = 2000,
# color = "white", fontface = "bold", size = 2.7) +
# geom_tiplab(align = TRUE, geom = NULL, linetype = "dotted", size = 0) +
# geom_point2(aes(subset = (node %in% highlight_nodes)), color = "black", size = 2.7) +
# geom_label2(aes(label = label, subset = !isTip),
# color = "black", size = 2.7) +
# theme_tree2() +
# theme(legend.position = "none") +
# xlab("time before present [thousand years ago]") +
# scale_x_continuous(limits = c(-80000, 31000), labels = abs_comma,
# breaks = -c(100, 80, 60, 40, 20, 0) * 1000)
# p_tree <- revts(p_tree)
#
# # nodes$label <- ifelse(is.na(nodes$name), nodes$node_id, nodes$name)
# nodes$label <- sapply(1:nrow(nodes), function(i) {
# if (is.na(nodes[i, ]$name))
# nodes[i, ]$node_id
# else {
# ind <- nodes[i, ]$name
# paste(nodes[!is.na(nodes$name) & nodes$name == ind, ]$node_id, collapse = "&")
# }
# })
#
# p_map <- ggplot() +
# geom_sf(data = map) +
# geom_sf(data = edges, aes(color = parent_pop), size = 0.5) +
# geom_sf_label(data = nodes[!nodes$sampled, ],
# aes(label = node_id, fill = pop), size = 3) +
# geom_sf_label(data = nodes[nodes$sampled, ],
# aes(label = label, fill = pop), size = 3,
# fontface = "bold", color = "white") +
# coord_sf(xlim = c(3177066.1, 7188656.9),
# ylim = c(757021.7, 5202983.3), expand = 0) +
# guides(fill = guide_legend("", override.aes = aes(label = ""))) +
# guides(color = "none") +
# scale_colour_discrete(drop = FALSE) +
# scale_fill_discrete(drop = FALSE) +
# theme_bw() +
# theme(legend.position = "bottom",
# axis.title.x = element_blank(),
# axis.title.y = element_blank())
#
# chrom_names <- stats::setNames(
# c("EUR_578 (node 6)", "EUR_578 (node 7)"),
# unique(ancestors$node_id)
# )
#
# p_ancestors <- ggplot() +
# geom_sf(data = map) +
# geom_sf(data = ancestors, size = 0.5, alpha = 0.25) +
# geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"),
# aes(shape = parent_pop, color = parent_pop)) +
# geom_sf(data = filter(ts_nodes(ts), name == "EUR_578"), size = 3) +
# coord_sf(expand = 0) +
# labs(x = "longitude", y = "latitude") +
# theme_bw() +
# facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) +
# theme(legend.position = "none")
#
# p_legend <- ggdraw() + draw_grob(grid::grobTree(get_legend(p_map)))
#
# p_ex4 <- plot_grid(
# p_code,
# plot_grid(p_tree + theme(legend.position = "none"),
# p_map + theme(legend.position = "none"),
# labels = c("B", "C"), rel_widths = c(1, 0.9)),
# p_ancestors,
# p_legend,
# labels = c("A", "", "D", ""),
# nrow = 4, rel_heights = c(0.5, 1, 1, 0.1)
# )
#
# p_ex4
## ----include = FALSE, eval = eval_chunk && record_snapshot--------------------
# ggsave("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex4.pdf", p_ex4, width = 8.5, height = 9, units = "in")
## ----echo=FALSE---------------------------------------------------------------
data.frame(
example = as.vector(names(all_times)),
time = as.numeric(all_times),
units = sapply(all_times, units),
row.names = NULL
) %>% knitr::kable()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.