Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
set.seed(123)
## -----------------------------------------------------------------------------
library(tidypopgen)
vcf_path <-
system.file("/extdata/anolis/punctatus_t70_s10_n46_filtered.recode.vcf.gz",
package = "tidypopgen"
)
anole_gt <-
gen_tibble(vcf_path, quiet = TRUE, backingfile = tempfile("anolis_"))
## -----------------------------------------------------------------------------
anole_gt
## -----------------------------------------------------------------------------
pops_path <- system.file("/extdata/anolis/punctatus_n46_meta.csv",
package = "tidypopgen"
)
pops <- read.csv(pops_path)
pops
## ----error=TRUE---------------------------------------------------------------
try({
anole_gt <- anole_gt %>% left_join(pops, by = "id")
})
## -----------------------------------------------------------------------------
anole_gt %>% glimpse()
## -----------------------------------------------------------------------------
anole_gt <- gt_add_sf(anole_gt, c("longitude", "latitude"))
anole_gt
## -----------------------------------------------------------------------------
library(rnaturalearth)
library(ggplot2)
map <- ne_countries(
continent = "South America",
type = "map_units", scale = "medium"
)
ggplot() +
geom_sf(data = map) +
geom_sf(data = anole_gt$geometry) +
coord_sf(
xlim = c(-85, -30),
ylim = c(-30, 15)
) +
theme_minimal()
## ----error=TRUE---------------------------------------------------------------
try({
anole_pca <- anole_gt %>% gt_pca_partialSVD(k = 30)
})
## -----------------------------------------------------------------------------
anole_gt <- gt_impute_simple(anole_gt, method = "mode")
## -----------------------------------------------------------------------------
anole_pca <- anole_gt %>% gt_pca_partialSVD(k = 30)
## -----------------------------------------------------------------------------
anole_pca
## -----------------------------------------------------------------------------
tidy(anole_pca, matrix = "eigenvalues")
## ----fig.alt = "Scree Plot of eigenvalues for each Principal Component"-------
autoplot(anole_pca, type = "screeplot")
## ----fig.alt = "Score plot of individuals across the first and second Principal Components"----
autoplot(anole_pca, type = "scores")
## ----fig.alt = "Score plot of individuals across the first and second Principal Components, with individual samples coloured by population"----
library(ggplot2)
autoplot(anole_pca, type = "scores") +
aes(color = anole_gt$population) +
labs(color = "population")
## -----------------------------------------------------------------------------
anole_gt <- augment(anole_pca, data = anole_gt)
## ----fig.alt = "Another score plot of individuals across the first and second Principal Components, with individual samples coloured by population, using ggplot2"----
anole_gt %>% ggplot(aes(.fittedPC1, .fittedPC2, color = population)) +
geom_point() +
labs(x = "PC1", y = "PC2", color = "Population")
## ----fig.alt = "Plot of the loadings of Principal Component 1 for each loci"----
autoplot(anole_pca, type = "loadings")
## -----------------------------------------------------------------------------
anole_gt_load <- augment_loci(anole_pca, data = anole_gt)
## ----fig.alt = "Plot of the cumulative loadings for each Principal Component"----
library(ggplot2)
tidy(anole_pca, matrix = "eigenvalues") %>%
ggplot(mapping = aes(x = PC, y = cumulative)) +
geom_point()
## -----------------------------------------------------------------------------
anole_clusters <- gt_cluster_pca(anole_pca, n_pca = 10)
## ----fig.alt = "Plot of BIC (Bayesian Information Criteria) for each value of k"----
autoplot(anole_clusters)
## -----------------------------------------------------------------------------
anole_clusters <- gt_cluster_pca_best_k(anole_clusters)
## -----------------------------------------------------------------------------
anole_clusters$best_k
## -----------------------------------------------------------------------------
anole_dapc <- gt_dapc(anole_clusters)
## -----------------------------------------------------------------------------
anole_dapc
## -----------------------------------------------------------------------------
tidy(anole_dapc, matrix = "eigenvalues")
## ----fig.alt = "Scree plot of the eigenvalues on the two discriminant axes (defined by `ld`)"----
autoplot(anole_dapc, type = "screeplot")
## ----fig.alt = "Bar plot of eigenvalues against the two discriminant axes"----
tidy(anole_dapc, matrix = "eigenvalues") %>%
ggplot(aes(x = LD, y = eigenvalue)) +
geom_col()
## ----fig.alt = "Scatterplot of the scores of each individual on the two discriminant axes (defined by `ld`)"----
autoplot(anole_dapc, type = "scores")
## ----fig.alt = "Bar plot showing the probability of assignment to each cluster"----
autoplot(anole_dapc, type = "components", group = anole_gt$population)
## ----fig.alt = "Plot of the loadings of Principal Component 1 for each loci"----
autoplot(anole_dapc, "loadings")
## ----fig.alt = "Scatterplot of individuals with ellipses surrounding each cluster of individuals identified by the DAPC analysis. Eigenvalues are displayed in an inset in the bottom right corner of the plot."----
library(adegenet)
scatter(anole_dapc, posi.da = "bottomright")
## -----------------------------------------------------------------------------
anole_gt <- anole_gt %>% mutate(dapc = anole_dapc$grp)
ggplot() +
geom_sf(data = map) +
geom_sf(data = anole_gt$geometry, aes(color = anole_gt$dapc)) +
coord_sf(
xlim = c(-85, -30),
ylim = c(-30, 15)
) +
labs(color = "DAPC cluster") +
theme_minimal()
## ----results='hide', echo=TRUE, eval=FALSE------------------------------------
# anole_gt <- anole_gt %>% group_by(population)
#
# anole_adm_original <- gt_admixture(
# x = anole_gt,
# k = 2:6,
# n_runs = 1,
# crossval = TRUE,
# seed = 1
# )
## ----results="hide", echo=FALSE, eval=TRUE, warning=FALSE---------------------
anole_gt <- anole_gt %>% group_by(population)
anole_adm <- readRDS("a03_anole_adm.rds")
## ----fig.alt = "Plot of cross-entropy for each value of K, showing K = 3 has the lowest cross-entropy"----
autoplot(anole_adm, type = "cv")
## ----fig.alt = "Barplot of individuals coloured by predicted ancestry proportion (Q) from each of K ancestral sources"----
autoplot(anole_adm,
k = 3, run = 1, data = anole_gt, annotate_group = FALSE,
type = "barplot"
)
## ----fig.alt = "Barplot of individuals coloured by predicted ancestry proportion (Q) from each of K ancestral sources"----
autoplot(anole_adm,
type = "barplot", k = 3, run = 1, data = anole_gt,
annotate_group = TRUE, arrange_by_group = TRUE
)
## ----fig.alt = "Barplot of individuals coloured by predicted ancestry proportion (Q) from each of K ancestral sources, with individuals arranged according to their dominant ancestry propotion"----
autoplot(anole_adm,
type = "barplot", k = 3, run = 1,
data = anole_gt, annotate_group = TRUE, arrange_by_group = TRUE,
arrange_by_indiv = TRUE, reorder_within_groups = TRUE
)
## -----------------------------------------------------------------------------
q_mat <- get_q_matrix(anole_adm, k = 3, run = 1)
## -----------------------------------------------------------------------------
anole_gt_adm <- augment(q_mat, data = anole_gt)
head(anole_gt_adm)
## -----------------------------------------------------------------------------
tidy_q <- tidy(q_mat, anole_gt)
head(tidy_q)
## ----fig.alt = "Barplot of individuals coloured by predicted ancestry proportion (Q) from each of K ancestral sources"----
tidy_q <- tidy_q %>%
dplyr::group_by(id) %>%
dplyr::mutate(dominant_q = max(percentage)) %>%
dplyr::ungroup() %>%
dplyr::arrange(group, dplyr::desc(dominant_q)) %>%
dplyr::mutate(
plot_order = dplyr::row_number(),
id = factor(id, levels = unique(id))
)
plt <- ggplot2::ggplot(tidy_q, ggplot2::aes(x = id, y = percentage, fill = q)) +
ggplot2::geom_col(
width = 1,
position = ggplot2::position_stack(reverse = TRUE)
) +
ggplot2::labs(
y = "Population Structure for K = 3",
title = "ADMIXTURE algorithm on A. punctatus"
) +
theme_distruct() +
scale_fill_distruct()
plt
## -----------------------------------------------------------------------------
anole_adm <- gt_admix_reorder_q(anole_adm, group = anole_gt$pop)
## ----fig.alt = "Barplot of individuals coloured by predicted ancestry proportion (Q) from each of K ancestral sources, labelled Atlantic forest or Amazonian forest"----
autoplot(anole_adm,
type = "barplot", k = 3, run = 1,
annotate_group = TRUE, arrange_by_group = TRUE,
arrange_by_indiv = TRUE, reorder_within_groups = TRUE
)
## ----echo=FALSE, results='hide'-----------------------------------------------
# reload the admxiture results (they were reordered above)
anole_adm <- readRDS("a03_anole_adm.rds")
runs <- c(1)
k_values <- c(2:6)
q_matrices <- list()
adm_dir <- file.path(tempdir(), "anolis_adm")
if (!dir.exists(adm_dir)) {
dir.create(adm_dir)
}
for (x in k_values) {
q_matrices[[x]] <- list()
for (i in runs) {
q_matrices[[x]][[i]] <- get_q_matrix(anole_adm, k = x, run = i)
write.table(q_matrices[[x]][[i]],
file = paste0(adm_dir, "/K", x, "run", i, ".Q"),
col.names = FALSE, row.names = FALSE, quote = FALSE
)
}
}
## -----------------------------------------------------------------------------
adm_dir <- file.path(tempdir(), "anolis_adm")
list.files(adm_dir)
## -----------------------------------------------------------------------------
q_list <- read_q_files(adm_dir)
summary(q_list)
## -----------------------------------------------------------------------------
head(get_q_matrix(q_list, k = 3, run = 1))
## ----fig.alt = "Barplot of individuals coloured by predicted ancestry proportion (Q) from each of K ancestral sources"----
autoplot(get_q_matrix(q_list, k = 3, run = 1),
data = anole_gt,
annotate_group = TRUE, arrange_by_group = TRUE,
arrange_by_indiv = TRUE, reorder_within_groups = TRUE
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.