R/plotters.R

Defines functions upset_plot ggtree_augmented

Documented in ggtree_augmented upset_plot

#' Call `ggtree()` and add metadata
#'
#' This function calls [ggtree::ggtree()] on the tree component of a tidygenomes object and
#' adds all available genome and node metadata.
#'
#' @param tg A tidygenomes object
#' @param ... Extra arguments to pass on to `ggtree()`
#' 
#' @return A ggplot object
#' 
#' @importFrom ggtree %<+%
#' @export
ggtree_augmented <- function(tg, ...) {
  
  if (! is.null(tg$phylogroups)) {
    
    tg <- tg %>% modify_at("nodes", left_join, tg$phylogroups, by = "phylogroup")
    
  }
  
  if (! is.null(tg$patterns)) {
    
    tg <- tg %>% modify_at("nodes", left_join, tg$patterns, by = "node")
    
  }
  
  nodes <- 
    tg$nodes %>%
    left_join(tg$genomes, by = "node") %>%
    select(label = node, everything())
  
  ggtree::ggtree(tg$tree, ...) %<+%
    nodes 
  
}

#' Construct an "upset plot" to explore gene presence/absence
#'
#' This function constructs an "upset plot" showing patterns of gene familiy
#' presence/absence in order of pattern frequency (number of gene families
#' following the pattern). The genomes are ordered along their phylogeny.
#'
#' @param tg A tidygenomes object
#' @param genome_name Name of a variable (unquoted) from the genome or
#'   phylogroup table that should be used to name the genomes
#' @param genome_col Name of a variable (unquoted) from the genome or phylogroup
#'   table that should be used to color the genome names
#' @param genome_bold Name of a logical variable (unquoted) from the genome or
#'   phylogroup table that indicates which genome names should be in bold
#' @param color_scale Ggplot color scale generated by a scale_color_... type
#'   function
#' @param n The number of presence/absence patterns to plot
#' @param freq_min The minimum frequency of patterns to appear on the plot
#' @param barchart_height The relative height of the frequency barchart,
#'   expressed as a number of rows from the main plot
#' @param frequency_labels Logical variable indicating whether the frequency
#'   numbers should be plotted as labels on top of the barchart
#' @param phylogroups Should horizontal lines be plotted between phylogroups? 
#' 
#' @return A ggplot object
#' 
#' @export
upset_plot <- function(
  tg, genome_name = genome, genome_col = NULL, genome_bold = NULL, 
  color_scale = scale_color_brewer(palette = "Paired", guide = "none"), 
  n = 50,
  freq_min = NULL, 
  barchart_height = 50,
  frequency_labels = F,
  phylogroups = F
) {
  
  genome_name <- rlang::enexpr(genome_name) 
  genome_col <- rlang::enexpr(genome_col)
  genome_bold <- rlang::enexpr(genome_bold)
  
  if (is.null(tg$patterns)) stop("no patterns found")
  
  tg$patterns <-
    tg$patterns %>%
    arrange(desc(frequency)) %>%
    slice(1:UQ(n)) %>%
    {if (! is.null(freq_min)) filter(., frequency >= !! freq_min) else .} %>%
    mutate(pattern_fct = factor(pattern, levels = pattern)) 
  
  genomes <- 
    genomes_extended(tg) %>%
    mutate(node_fct = factor(
      node, levels = !! tipnodes_ladderized(tg$tree)
    )) %>%
    arrange(desc(node_fct)) %>%
    mutate(genome_name_fct = factor(
      !! genome_name, levels = !! genome_name
    )) %>%
    mutate(genome_bold = !! genome_bold)
  
  genomes$is_phylogroup_ancestor <- NULL
  genomes$node <- NULL
  
  if (phylogroups) {
    
    yintercepts <- 
      genomes %>%
      mutate(rank = as.numeric(genome_name_fct)) %>%
      group_by(phylogroup) %>%
      slice(1) %>%
      ungroup() %>%
      mutate(yintercept = rank - 0.5) %>%
      filter(yintercept > 1) %>%
      pull(yintercept)
    
  }
  
  theme_upset <- 
    theme(
      axis.text.x = element_blank(), 
      axis.title = element_blank(),
      axis.ticks = element_blank(), 
      panel.background = element_rect(fill = "transparent"), 
      plot.background = element_rect(fill = "transparent")
    )
  
  plot_main <- 
    tg$components %>%
    right_join(tg$patterns, by = "pattern") %>%
    left_join(genomes, by = "genome") %>%
    {if (! is.null(tg$patterns$node)) left_join(., tg$nodes, by = "node") else .} %>%
    ggplot(aes(x = pattern_fct, y = genome_name_fct, group = pattern_fct)) +
    geom_line(col = "grey") +
    geom_point(size = 3, aes(col = !! genome_col)) +
    {if (phylogroups) geom_hline(yintercept = yintercepts) else NULL} + 
    color_scale +
    theme_bw() +
    theme_upset
  
  if (! is.null(genome_bold)) {
    plot_main <- plot_main +
      theme(axis.text.y = element_text(
        face = if_else(genomes$genome_bold, "bold", "plain")
      ))
  }
  
  plot_marg <- 
    tg$patterns %>% 
    ggplot(aes(x = pattern_fct, y = frequency)) +
    geom_histogram(stat = "identity") + 
    ylim(c(0, 1.1 * max(tg$patterns$frequency))) +
    theme_bw() +
    theme_upset +
    theme(
      panel.border = element_blank(),
      panel.grid = element_blank()
    )
  
  if (frequency_labels) {
    plot_marg <-
      plot_marg +
      geom_text(aes(label = frequency), vjust= - 0.25, size = 2)
  }
  
  egg::ggarrange(
    plot_marg, plot_main,
    nrow = 2, ncol = 1, heights = c(barchart_height, nrow(genomes))
  )
  
}

#' Construct an heatmap to explore a pair variable
#'
#' This function constructs a heatmap of a given variable from the pair table.
#' Genomes are ordered along the phylogeny.
#'
#' @param tg A tidygenomes object
#' @param genome_label Variable (unquoted) from the genome table to use as
#'   genome label
#' @param distance Variable (unquoted) from the pair table to plot on the
#'   heatmap
#' 
#' @return A ggplot object
#' 
#' @export
heatmap <- function(tg, genome_label, distance, complete_pairs = T) {
  
  genome_label <- rlang::enexpr(genome_label)
  distance <- rlang::enexpr(distance)
  
  tg$genomes <-
    tg$genomes %>%
    mutate(genome_label = !! genome_label) %>%
    mutate(node_fct = factor(node, levels = tipnodes_ladderized(tg$tree))) %>%
    arrange(node_fct) %>%
    mutate(genome_label_fct = factor(genome_label, levels = genome_label))
  
  tg$pairs %>%
    {if (complete_pairs) complete_pairs(., genome_1, genome_2) else .} %>%
    left_join(
        tg$genomes %>% rename(genome_1 = genome), by = "genome_1"
      ) %>%
    left_join(
      tg$genomes %>% rename(genome_2 = genome), by = "genome_2", 
      suffix = c("_1", "_2")
    ) %>%
    mutate(distance = !! distance) %>%
    ggplot(aes(x = genome_label_fct_1, y = genome_label_fct_2, fill = distance)) +
    geom_tile() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

}
SWittouck/tidyorthogroups documentation built on Feb. 2, 2023, 12:45 a.m.