Nothing
env_present <- slendr:::is_slendr_env_present() eval_chunk <- Sys.which("slim") != "" && env_present && Sys.getenv("RUNNER_OS") == "" knitr::opts_chunk$set( collapse = TRUE, comment = "#>", dpi = 80, eval = eval_chunk ) record_snapshot <- FALSE # only save PDFs on a local machine # reporting knitr chunk run times based on: # https://bookdown.org/yihui/rmarkdown-cookbook/time-chunk.html#time-chunk all_times <- list() knitr::knit_hooks$set(time_it = local({ now <- NULL function(before, options) { if (before) { now <<- Sys.time() } else { res <- difftime(Sys.time(), now) all_times[[options$label]] <<- res } } }))
# workaround for GitHub actions getting "Killed" due to out of memory issues # -- when running unit tests on GitHub, smaller amount of sequence will be # simulated if (Sys.getenv("RUNNER_OS") != "") { scaling <- 10 # use 10X less sequence on GitHub actions } else { scaling <- 1 # but simulate normal amount of data otherwise } library(slendr) library(dplyr) library(ggplot2) library(purrr) library(tidyr) library(cowplot) library(forcats) init_env() SEED <- 42 set.seed(SEED)
# placeholder "figure" where a code chunk will be pasted p_code <- ggplot() + geom_text(aes(x = 1, y = 1), label = "code will be here") + theme_void()
o <- population("o", time = 1, N = 100) c <- population("c", time = 2500, N = 100, parent = o) a <- population("a", time = 2800, N = 100, parent = c) b <- population("b", time = 3700, N = 100, parent = a) x1 <- population("x1", time = 4000, N = 15000, parent = c) x2 <- population("x2", time = 4300, N = 15000, parent = x1) gf <- gene_flow(from = b, to = x1, start = 5400, end = 5800, 0.1) model <- compile_model( populations = list(o, a, b, c, x1, x2), gene_flow = gf, generation_time = 1, simulation_length = 6000 ) plot_model(model, sizes = FALSE, proportions = TRUE) # panel B ts <- msprime(model, sequence_length = 100e6 / scaling, recombination_rate = 1e-8, random_seed = SEED) %>% ts_mutate(mutation_rate = 1e-8, random_seed = SEED) samples <- ts_samples(ts) %>% group_by(pop) %>% sample_n(100) # panel C divergence <- ts_divergence(ts, split(samples$name, samples$pop)) # panel D f4ratio <- ts_f4ratio( ts, X = filter(samples, pop %in% c("x1", "x2"))$name, A = "a_1", B = "b_1", C = "c_1", O = "o_1" )
system(sprintf("cp -r %s %s", model$path, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex1")) ts_save(ts, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex1.trees")
divergence <- divergence %>% mutate(pair = paste(x, "-", y)) f4ratio <- f4ratio %>% mutate(population = gsub("^(.*)_.*$", "\\1", X), alpha = alpha * 100) p_ex1_divergence <- divergence %>% ggplot(aes(fct_reorder(pair, divergence), divergence)) + geom_point(size = 2.5) + xlab("population pair") + ylab("pairwise divergence") + theme_minimal() + theme(legend.position = "bottom", legend.text = element_text(size = 10), axis.text.x = element_text(hjust = 1, angle = 45, size = 8), axis.title.x = element_blank()) p_ex1_f4ratio <- f4ratio %>% ggplot(aes(population, alpha)) + geom_hline(yintercept = 0, linetype = 2) + geom_jitter(alpha = 0.5) + geom_boxplot(outlier.shape = NA, alpha = 0.7) + ylab(base::expression(italic("f")[4]~"-ratio ancestry proportion [%]")) + theme_minimal() + coord_cartesian(ylim = c(0, 20)) + theme(legend.position = "none", axis.text.x = element_text(size = 11), axis.title.x = element_blank(), panel.grid.major.x = element_blank())#; p_ex3_f4ratio # let's avoid ggpubr as another dependency: # https://github.com/kassambara/ggpubr/blob/master/R/as_ggplot.R#L27 p_ex1_legend <- ggdraw() + draw_grob(grid::grobTree(get_legend(p_ex1_divergence))) p_ex1_model <- plot_model(model, sizes = FALSE, proportions = TRUE) p_ex1 <- plot_grid( p_code, plot_grid( p_ex1_model, plot_grid( p_ex1_divergence + theme(legend.position = "none"), p_ex1_f4ratio, ncol = 2, rel_widths = c(1, 0.8), labels = c("C", "D") ), p_ex1_legend, nrow = 3, rel_heights = c(1, 1, 0.1), labels = "B" ), nrow = 1, labels = c("A", "") ) p_ex1
ggsave("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex1.pdf", p_ex1, width = 9, height = 6, units = "in")
map <- world(xrange = c(0, 10), yrange = c(0, 10), landscape = region(center = c(5, 5), radius = 5)) p1 <- population("pop1", time = 1, N = 2000, map = map, competition = 0) p2 <- population("pop2", time = 1, N = 2000, map = map, competition = 9) p3 <- population("pop3", time = 1, N = 2000, map = map, competition = 6) p4 <- population("pop4", time = 1, N = 2000, map = map, competition = 5) p5 <- population("pop5", time = 1, N = 2000, map = map, competition = 4) p6 <- population("pop6", time = 1, N = 2000, map = map, competition = 3) p7 <- population("pop7", time = 1, N = 2000, map = map, competition = 2) p8 <- population("pop8", time = 1, N = 2000, map = map, competition = 1) model <- compile_model( populations = list(p1, p2, p3, p4, p5, p6, p7, p8), generation_time = 1, simulation_length = 5000, resolution = 0.1, mating = 0.1, dispersal = 0.05 ) ts <- slim(model, sequence_length = 10e6 / scaling, recombination_rate = 1e-8, random_seed = SEED) %>% ts_simplify() %>% ts_mutate(mutation_rate = 1e-7, random_seed = SEED) locations <- ts_nodes(ts) %>% filter(time == max(time)) heterozygosity <- ts_samples(ts) %>% group_by(pop) %>% sample_n(100) %>% mutate(pi = ts_diversity(ts, name)$diversity)
system(sprintf("cp -r %s %s", model$path, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex2")) ts_save(ts, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex2.trees")
p_ex2_clustering <- ggplot() + geom_sf(data = map) + geom_sf(data = locations, aes(color = pop), size = 0.05, alpha = 0.25) + facet_grid(. ~ pop, switch = "x") + xlab("spatial distributions emerged in the simulation") + theme( strip.background = element_blank(), strip.text = element_text(size = 11), panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(), panel.background = element_blank() ) + guides(color = "none") p_ex2_diversity <- ggplot(heterozygosity, aes(pop, pi, color = pop)) + geom_violin(color = "black") + geom_jitter(alpha = 0.5) + labs(y = "individual heterozygosity") + guides(color = "none") + theme_minimal() + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), panel.grid.major.x = element_blank(), plot.margin = margin(t = 0.2, r = 0.2, b = -0.1, l = 0.2, "cm")) p_ex2 <- plot_grid( p_code, plot_grid( p_ex2_diversity, p_ex2_clustering + theme(plot.margin = margin(t = 0, r = 0.4, b = 0, l = 1.8, "cm")), nrow = 2, rel_heights = c(1, 0.5), labels = c("B", "C") ), nrow = 2, labels = c("A", ""), rel_heights = c(1.5, 1) ) p_ex2
ggsave("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex2.pdf", p_ex2, width = 6, height = 9, units = "in")
map <- world(xrange = c(-13, 70), yrange = c(18, 65), crs = 3035) R1 <- region( "EHG range", map, polygon = list(c(26, 55), c(38, 53), c(48, 53), c(60, 53), c(60, 60), c(48, 63), c(38, 63), c(26, 60)) ) R2 <- region( "Europe", map, polygon = list( c(-8, 35), c(-5, 36), c(10, 38), c(20, 35), c(25, 35), c(33, 45), c(20, 58), c(-5, 60), c(-15, 50) ) ) R3 <- region( "Anatolia", map, polygon = list(c(28, 35), c(40, 35), c(42, 40), c(30, 43), c(27, 40), c(25, 38)) ) R4 <- join(R2, R3) R5 <- region( "YAM range", map, polygon = list(c(26, 50), c(38, 49), c(48, 50), c(48, 56), c(38, 59), c(26, 56)) ) ooa_trajectory <- list(c(40, 30), c(50, 30), c(60, 40), c(45, 55))
map <- world(xrange = c(-13, 70), yrange = c(18, 65), crs = 3035) ooa <- population( "OOA", time = 50000, N = 500, remove = 23000, map = map, center = c(33, 30), radius = 400e3 ) %>% move(trajectory = ooa_trajectory, start = 50000, end = 40000, snapshots = 30) ehg <- population( "EHG", time = 28000, N = 1000, parent = ooa, remove = 6000, map = map, polygon = R1 ) eur <- population( "EUR", time = 30000, N = 2000, parent = ooa, map = map, polygon = R2 ) %>% resize(N = 10000, time = 5000, end = 0, how = "exponential") ana <- population( "ANA", time = 25000, N = 4000, parent = ooa, remove = 3000, map = map, polygon = R3 ) %>% expand_range(by = 3e6, start = 10000, end = 7000, polygon = R4, snapshots = 15) yam <- population( "YAM", time = 7000, N = 600, parent = ehg, remove = 2500, map = map, polygon = R5 ) %>% move(trajectory = list(c(15, 50)), start = 5000, end = 3000, snapshots = 10) gf <- list( gene_flow(ana, to = yam, rate = 0.5, start = 6500, end = 5000), gene_flow(ana, to = eur, rate = 0.6, start = 8000, end = 6000), gene_flow(yam, to = eur, rate = 0.7, start = 3500, end = 3000) ) model <- compile_model( populations = list(ooa, ehg, eur, ana, yam), gene_flow = gf, generation_time = 30, resolution = 10e3, competition = 150e3, mating = 120e3, dispersal = 90e3 ) samples <- schedule_sampling( model, times = seq(0, 50000, by = 1000), list(ehg, 20), list(ana, 20), list(yam, 20), list(eur, 20) ) plot_model(model, sizes = FALSE) plot_map(model) ts <- slim( model, burnin = 200000, samples = samples, random_seed = SEED, sequence_length = 200000, recombination_rate = 1e-8 )
system(sprintf("cp -r %s %s", model$path, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3")) ts_save(ts, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3.trees")
devtools::load_all() init_env() model <- read_model("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3") ts <- ts_load("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3.trees", model = model) library(dplyr) library(ggplot2) library(purrr) library(tidyr) library(cowplot) library(forcats) init_env() SEED <- 42 set.seed(SEED) # placeholder "figure" where a code chunk will be pasted p_code <- ggplot() + geom_text(aes(x = 1, y = 1), label = "code will be here") + theme_void() map <- world(xrange = c(-13, 70), yrange = c(18, 65), crs = 3035) e <- "EUR_578"
p_map <- plot_map(model) + theme(legend.position = "bottom") + guides(alpha = "none") p_ex3 <- plot_grid( p_code, plot_grid( plot_model(model, sizes = FALSE), p_map, labels = c("B", "C"), nrow = 2, rel_heights = c(1, 1) ), ncol = 2, labels = c("A", ""), rel_widths = c(1, 1.2) ) p_ex3
ggsave("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3.pdf", p_ex3, width = 9, height = 6, units = "in")
# eurs <- ts_samples(ts) %>% filter(pop == "EUR") %>% pull(name) # # eurs <- c("EUR_542", "EUR_549", "EUR_567", "EUR_581", "EUR_582") # eurs <- e <- c("EUR_578") # ############# # ###### FOUND THE IND_578 (see anc.pdf) # ###### actually ,keep the anc.pdf in the archive # # finding a EUR individual with illustratively rich spatial ancestry # pdf("~/Desktop/anc.pdf") # for (e in eurs) { # ancestors <- ts_ancestors(ts, e, verbose=T) # if (length(unique(as.character(ancestors$parent_pop))) < 5) next # chrom_names <- stats::setNames( # c(paste(e, "(chromosome 1, node 10)"), paste(e, "(chromosome 2, node 11)")), # unique(ancestors$node_id) # ) # p_ancestors <- ggplot() + # geom_sf(data = map) + # geom_sf(data = ancestors, size = 0.5, alpha = 0.2) + # geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"), # aes(shape = parent_pop, color = parent_pop)) + # geom_sf(data = filter(ts_nodes(ts), name == e), size = 3) + # coord_sf(expand = 0) + # labs(x = "longitude", y = "latitude") + # theme_bw() + # facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) + # theme(legend.position = "bottom"); print(p_ancestors) # } # dev.off() # # finding appropriate ancestors for plotting an illustrative tree ("YAM" ancestor) # ggplot() + # geom_sf(data = map) + # geom_sf(data = ancestors, size = 0.5, alpha = 0.2) + # geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"), # aes(shape = parent_pop, color = parent_pop)) + # geom_sf(data = filter(ts_nodes(ts), name == e), size = 1) + # geom_sf_label(data = filter(ancestors, parent_pop == "YAM"), aes(label = parent_id), alpha = 0.5) + # coord_sf(expand = 0) + # labs(x = "longitude", y = "latitude") + # theme_bw() + # # facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) + # theme(legend.position = "bottom") # # finding appropriate ancestors for plotting an illustrative tree ("ANA" ancestor) # ggplot() + # geom_sf(data = map) + # geom_sf(data = ancestors, size = 0.5, alpha = 0.2) + # geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"), # aes(shape = parent_pop, color = parent_pop)) + # geom_sf(data = filter(ts_nodes(ts), name == e), size = 1) + # geom_sf_label(data = filter(ancestors, parent_pop == "ANA"), aes(label = parent_id), alpha = 0.5) + # coord_sf(expand = 0, xlim = c(20, 40), ylim = c(35, 45), crs = 4326) + # labs(x = "longitude", y = "latitude") + # theme_bw() + # # facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) + # theme(legend.position = "bottom") # # finding appropriate leaves for plotting an illustrative tree ("EHG" ancestor) # ggplot() + # geom_sf(data = map) + # geom_sf(data = ancestors, size = 0.5, alpha = 0.2) + # geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"), # aes(shape = parent_pop, color = parent_pop)) + # geom_sf(data = filter(ts_nodes(ts), name == e), size = 1) + # geom_sf_label(data = filter(ancestors, parent_pop == "EHG"), aes(label = parent_id), alpha = 0.5) + # coord_sf(expand = 0) + # labs(x = "longitude", y = "latitude") + # theme_bw() + # # facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) + # theme(legend.position = "bottom") # ts_yam_anc <- ts_descendants(ts, x = 39806) %>% filter(!is.na(name)) %>% as.data.frame() %>% arrange(parent_time) # ts_ana_anc <- ts_descendants(ts, x = 41734) %>% filter(!is.na(name)) %>% as.data.frame() %>% filter(grepl("ANA_", name)) # ts_ehg_anc <- ts_descendants(ts, x = 42843) %>% filter(!is.na(name)) %>% as.data.frame( %>% arrange(parent_time)) # names <- c("EUR_578", ts_yam_anc$name, ts_ana_anc$name, ts_ehg_anc$name)
ts_small <- ts_simplify(ts, simplify_to = c("EUR_578", "YAM_75", "ANA_163", "EHG_208")) # tskit uses zero-based indexing tree <- ts_phylo(ts_small, i = (20 - 1) / scaling) nodes <- ts_nodes(tree) edges <- ts_edges(tree) ancestors <- ts_ancestors(ts, "EUR_578")
ts_save(ts, "/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex3_small.trees")
library(ggtree) # prepare annotation table for ggtree linking R phylo node ID (not tskit integer # ID!) of each node with its population name df <- as_tibble(nodes) %>% select(node = phylo_id, pop) abs_comma <- function (x, ...) { format(abs(x) / 1000, ..., scientific = FALSE, trim = TRUE) } highlight_nodes <- as_tibble(nodes) %>% dplyr::filter(name == "EUR_578") %>% .$phylo_id p_tree <- ggtree(tree, aes(color = pop, fill = pop)) %<+% df + geom_tiplab(align = TRUE, geom = "label", offset = 2000, color = "white", fontface = "bold", size = 2.7) + geom_tiplab(align = TRUE, geom = NULL, linetype = "dotted", size = 0) + geom_point2(aes(subset = (node %in% highlight_nodes)), color = "black", size = 2.7) + geom_label2(aes(label = label, subset = !isTip), color = "black", size = 2.7) + theme_tree2() + theme(legend.position = "none") + xlab("time before present [thousand years ago]") + scale_x_continuous(limits = c(-80000, 31000), labels = abs_comma, breaks = -c(100, 80, 60, 40, 20, 0) * 1000) p_tree <- revts(p_tree) # nodes$label <- ifelse(is.na(nodes$name), nodes$node_id, nodes$name) nodes$label <- sapply(1:nrow(nodes), function(i) { if (is.na(nodes[i, ]$name)) nodes[i, ]$node_id else { ind <- nodes[i, ]$name paste(nodes[!is.na(nodes$name) & nodes$name == ind, ]$node_id, collapse = "&") } }) p_map <- ggplot() + geom_sf(data = map) + geom_sf(data = edges, aes(color = parent_pop), size = 0.5) + geom_sf_label(data = nodes[!nodes$sampled, ], aes(label = node_id, fill = pop), size = 3) + geom_sf_label(data = nodes[nodes$sampled, ], aes(label = label, fill = pop), size = 3, fontface = "bold", color = "white") + coord_sf(xlim = c(3177066.1, 7188656.9), ylim = c(757021.7, 5202983.3), expand = 0) + guides(fill = guide_legend("", override.aes = aes(label = ""))) + guides(color = "none") + scale_colour_discrete(drop = FALSE) + scale_fill_discrete(drop = FALSE) + theme_bw() + theme(legend.position = "bottom", axis.title.x = element_blank(), axis.title.y = element_blank()) chrom_names <- stats::setNames( c("EUR_578 (node 6)", "EUR_578 (node 7)"), unique(ancestors$node_id) ) p_ancestors <- ggplot() + geom_sf(data = map) + geom_sf(data = ancestors, size = 0.5, alpha = 0.25) + geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"), aes(shape = parent_pop, color = parent_pop)) + geom_sf(data = filter(ts_nodes(ts), name == "EUR_578"), size = 3) + coord_sf(expand = 0) + labs(x = "longitude", y = "latitude") + theme_bw() + facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) + theme(legend.position = "none") p_legend <- ggdraw() + draw_grob(grid::grobTree(get_legend(p_map))) p_ex4 <- plot_grid( p_code, plot_grid(p_tree + theme(legend.position = "none"), p_map + theme(legend.position = "none"), labels = c("B", "C"), rel_widths = c(1, 0.9)), p_ancestors, p_legend, labels = c("A", "", "D", ""), nrow = 4, rel_heights = c(0.5, 1, 1, 0.1) ) p_ex4
ggsave("/Users/mp/Documents/postdoc/slendr-paper/preprint_models_v0.5/ex4.pdf", p_ex4, width = 8.5, height = 9, units = "in")
The following times were measured on a 16'' MacBook Pro with the Apple M1 Pro chip (2021 model), 32 GB RAM, running macOS Ventura Version 13.1.
data.frame( example = as.vector(names(all_times)), time = as.numeric(all_times), units = sapply(all_times, units), row.names = NULL ) %>% knitr::kable()
In the table above, ex1
, ex2
, ex3
, and ex4
correspond to runtimes of code shown in one of the four example figures in the slendr paper.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.