#' Call `ggtree()` and add metadata
#'
#' This function calls the `ggtree` function of the ggtree package on the tree
#' component of a tidygenomes object and adds all available genome and node
#' metadata.
#'
#' @param tg A tidygenomes object
#' @param ... Extra arguments to pass on to `ggtree()`
#'
#' @return A ggplot object
#'
#' @export
ggtree_augmented <- function(tg, ...) {
# verify whether ggtree package is attached
if (! "ggtree" %in% .packages()) {
stop(paste("The ggtree package must be loaded to use this function"))
}
if (! is.null(tg$phylogroups)) {
tg <- tg %>% modify_at("nodes", left_join, tg$phylogroups, by = "phylogroup")
}
if (! is.null(tg$patterns)) {
tg <- tg %>% modify_at("nodes", left_join, tg$patterns, by = "node")
}
nodes <-
tg$nodes %>%
left_join(tg$genomes, by = "node") %>%
select(label = node, everything())
ggtree::ggtree(tg$tree, ...) %<+%
nodes
}
#' Construct an "upset plot" to explore gene presence/absence
#'
#' This function constructs an "upset plot" showing patterns of gene familiy
#' presence/absence in order of pattern frequency (number of gene families
#' following the pattern). The genomes are ordered along their phylogeny.
#'
#' @param tg A tidygenomes object
#' @param genome_name Name of a variable (unquoted) from the genome or
#' phylogroup table that should be used to name the genomes
#' @param genome_col Name of a variable (unquoted) from the genome or phylogroup
#' table that should be used to color the genome names
#' @param genome_bold Name of a logical variable (unquoted) from the genome or
#' phylogroup table that indicates which genome names should be in bold
#' @param color_scale Ggplot color scale generated by a scale_color_... type
#' function
#' @param n The number of presence/absence patterns to plot
#' @param freq_min The minimum frequency of patterns to appear on the plot
#' @param barchart_height The relative height of the frequency barchart,
#' expressed as a number of rows from the main plot
#' @param frequency_labels Logical variable indicating whether the frequency
#' numbers should be plotted as labels on top of the barchart
#' @param phylogroups Should horizontal lines be plotted between phylogroups?
#'
#' @return A ggplot object
#'
#' @export
upset_plot <- function(
tg, genome_name = genome, genome_col = NULL, genome_bold = NULL,
color_scale = scale_color_brewer(palette = "Paired", guide = "none"),
n = 50,
freq_min = NULL,
barchart_height = 50,
frequency_labels = F,
phylogroups = F
) {
genome_name <- rlang::enexpr(genome_name)
genome_col <- rlang::enexpr(genome_col)
genome_bold <- rlang::enexpr(genome_bold)
if (is.null(tg$patterns)) stop("no patterns found")
tg$patterns <-
tg$patterns %>%
arrange(desc(frequency)) %>%
slice(1:UQ(n)) %>%
{if (! is.null(freq_min)) filter(., frequency >= !! freq_min) else .} %>%
mutate(pattern_fct = factor(pattern, levels = pattern))
genomes <-
genomes_extended(tg) %>%
mutate(node_fct = factor(
node, levels = !! tipnodes_ladderized(tg$tree)
)) %>%
arrange(desc(node_fct)) %>%
mutate(genome_name_fct = factor(
!! genome_name, levels = !! genome_name
)) %>%
mutate(genome_bold = !! genome_bold)
genomes$is_phylogroup_ancestor <- NULL
genomes$node <- NULL
if (phylogroups) {
yintercepts <-
genomes %>%
mutate(rank = as.numeric(genome_name_fct)) %>%
group_by(phylogroup) %>%
slice(1) %>%
ungroup() %>%
mutate(yintercept = rank - 0.5) %>%
filter(yintercept > 1) %>%
pull(yintercept)
}
theme_upset <-
theme(
axis.text.x = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
panel.background = element_rect(fill = "transparent"),
plot.background = element_rect(fill = "transparent")
)
plot_main <-
tg$components %>%
right_join(tg$patterns, by = "pattern") %>%
left_join(genomes, by = "genome") %>%
{if (! is.null(tg$patterns$node)) left_join(., tg$nodes, by = "node") else .} %>%
ggplot(aes(x = pattern_fct, y = genome_name_fct, group = pattern_fct)) +
geom_line(col = "grey") +
geom_point(size = 3, aes(col = !! genome_col)) +
{if (phylogroups) geom_hline(yintercept = yintercepts) else NULL} +
color_scale +
theme_bw() +
theme_upset
if (! is.null(genome_bold)) {
plot_main <- plot_main +
theme(axis.text.y = element_text(
face = if_else(genomes$genome_bold, "bold", "plain")
))
}
plot_marg <-
tg$patterns %>%
ggplot(aes(x = pattern_fct, y = frequency)) +
geom_histogram(stat = "identity") +
ylim(c(0, 1.1 * max(tg$patterns$frequency))) +
theme_bw() +
theme_upset +
theme(
panel.border = element_blank(),
panel.grid = element_blank()
)
if (frequency_labels) {
plot_marg <-
plot_marg +
geom_text(aes(label = frequency), vjust= - 0.25, size = 2)
}
egg::ggarrange(
plot_marg, plot_main,
nrow = 2, ncol = 1, heights = c(barchart_height, nrow(genomes))
)
}
#' Construct an heatmap to explore a pair variable
#'
#' This function constructs a heatmap of a given variable from the pair table.
#' Genomes are ordered along the phylogeny.
#'
#' @param tg A tidygenomes object
#' @param genome_label Variable (unquoted) from the genome table to use as
#' genome label
#' @param distance Variable (unquoted) from the pair table to plot on the
#' heatmap
#'
#' @return A ggplot object
#'
#' @export
heatmap <- function(tg, genome_label, distance, complete_pairs = T) {
genome_label <- rlang::enexpr(genome_label)
distance <- rlang::enexpr(distance)
tg$genomes <-
tg$genomes %>%
mutate(genome_label = !! genome_label) %>%
mutate(node_fct = factor(node, levels = tipnodes_ladderized(tg$tree))) %>%
arrange(node_fct) %>%
mutate(genome_label_fct = factor(genome_label, levels = genome_label))
tg$pairs %>%
{if (complete_pairs) complete_pairs(., genome_1, genome_2) else .} %>%
left_join(
tg$genomes %>% rename(genome_1 = genome), by = "genome_1"
) %>%
left_join(
tg$genomes %>% rename(genome_2 = genome), by = "genome_2",
suffix = c("_1", "_2")
) %>%
mutate(distance = !! distance) %>%
ggplot(aes(x = genome_label_fct_1, y = genome_label_fct_2, fill = distance)) +
geom_tile() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.