metannoviz: Process and visualize output from the MetAnnotate pipeline

Documented in generate_ggplot visualize

# visualize.R
# Visualize metannotate data
# Copyright Jackson M. Tsuji, 2020 (Neufeld Lab)

#' Generate a ggplot of the MetAnnotate data
#'
#' @aliases metannotate_ggplot
#' @description Generates the ggplot of MetAnnotate data
#' @param metannotate_data Tibble of normalized metannotate data - see \code{\link{normalize}} and \code{\link{combine_replicates}}
#' @param hit_totals Tibble of total normalized hits per HMM/sample; this is generated in \code{\link{normalize}} and \code{\link{combine_replicates}}
#' @param plotting_colour_data Tibble of plot colours generated by \code{\link{process_plotting_colours}}
#' @param plotting_taxon The taxonomy rank to summarize bars/bubbles to; MUST MATCH the collapse taxon of the MetAnnotate table
#' @param normalizing_HMM the name of the normalizing HMM (e.g., 'rpoB')
#' @param plot_type Either 'bar' or 'bubble'
#' @param space ggplot setting; 'fixed' or 'free'
#' @param bubble_size_range numeric vector of length two with the small and large bubble sizes
#' @param alpha ggplot value; transparency (for bubble plots)
#' @param bubble_labels logical; show percent labels on the bubbles for bubble plots?
#' @return A ggplot of MetAnnotate data
#' @export
generate_ggplot <- function(metannotate_data, hit_totals, plotting_colour_data,
                            plotting_taxon, normalizing_HMM, plot_type = "bar",
                            space = "free", bubble_size_range = c(1,20), alpha = 0.8,
                            bubble_labels = TRUE) {

  # Check metannotate data has been normalized
  # TODO - consider making a more exhaustive data check
  if ("percent_abundance" %in% colnames(metannotate_data) == FALSE) {
    stop(paste0("Provided metannotate_data table does not contain the expected 'percent_abundance' column. "),
         "Are you sure that you have already run normalize()?")
  }

  # Check taxon rank
  if ((tolower(plotting_taxon) %in% dplyr::pull(TAXONOMY_NAMING, taxonomy)) == FALSE) {
    stop(paste0("Taxon must be a standard seven-rank taxonomy entry; you provided '", taxon, "'."))
  }
  plotting_taxon_colname <- TAXONOMY_NAMING$metannotate_colnames[match(tolower(plotting_taxon),
                                                                       TAXONOMY_NAMING$taxonomy)]
  plotting_taxon_label <- paste0(substring(plotting_taxon, 1, 1) %>% toupper(),
                                 substring(plotting_taxon, 2, nchar(plotting_taxon)))

  # TODO - can this be moved down to the 'bubble' area without breaking anything?
  if (plot_type == "bubble") {
    metannotate_data$label <- round(metannotate_data$percent_abundance, digits = 1)
  }
  
  # Make the base plot
  metannotate_plot <- ggplot(metannotate_data) +
    theme_bw() +
    # TODO - expose some theme elements to user or make fonts scalable automatically
    theme(axis.title = element_text(size = 12),
          strip.text = element_text(size = 11, face = "italic"),
          strip.background = element_rect(fill = "#e6e6e6"),
          axis.text = element_text(size = 10, colour = "black"),
          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
          axis.ticks = element_line(size = 0.5, colour = "black"),
          axis.line = element_line(size = 0.5),
          legend.text = element_text(size = 10, face = "italic"),
          legend.title = element_text(size = 8),
          legend.key = element_rect(colour = "transparent"),
          legend.key.size = unit(6, "mm"),
          legend.spacing = unit(1, "mm"),
          legend.box.just = "left") +
    xlab("Sample")
  
  if (space == "fixed") {
    metannotate_plot <- metannotate_plot +
      facet_grid(HMM.Family ~ ., scales = "free", space = "fixed")
  } else if (space == "free") {
    metannotate_plot <- metannotate_plot +
      facet_grid(HMM.Family ~ ., scales = "free", space = "free")
  } else {
    stop(paste0("'space' must be either 'free' or 'fixed'. You provided '", space, "'."))
  }
  
  if (plot_type == "bar") {
    futile.logger::flog.debug("Generating barplot")

    metannotate_plot <- metannotate_plot +
      geom_bar(data = hit_totals, aes(x = Dataset, weight = percent_abundance), fill = "#808080") +
      geom_bar(aes_string(x = "Dataset", weight = "percent_abundance",
                          fill = plotting_taxon_colname)) +
      scale_fill_manual(values = plotting_colour_data$colour) +
      theme(panel.grid = element_blank(),
            panel.border = element_rect(colour = "transparent"),
            panel.spacing.y = unit(3, "mm")) +
      guides(fill = guide_legend(ncol = 1, title = element_blank())) +
      ylab(paste0("Gene hits relative to ", normalizing_HMM, " (%; normalized)"))
    
  } else if (plot_type == "bubble") {
    futile.logger::flog.debug("Generating bubble plot")

    # TODO - allow user to toggle which taxon rank to use
    legend_taxon_colname <- "Closest.Homolog.Phylum"
    legend_taxon <- "Phylum"
    fill_colours <- dplyr::pull(metannotate_data, legend_taxon_colname) %>%
        unique() %>%
        length() %>%
        choose_discrete_colour_scale()

    metannotate_plot <- metannotate_plot +
      geom_point(aes_string(x = "Dataset", y = plotting_taxon_colname,
                            size = "percent_abundance", fill = legend_taxon_colname),
                 shape = 21, alpha = alpha) +
      scale_size_continuous(range = bubble_size_range) +
      scale_fill_manual(values = fill_colours) +
      theme(axis.text.y = element_text(size = 5, face = "italic")) +
      guides(fill = guide_legend(title = legend_taxon)) +
      ylab(paste0(plotting_taxon_label, " of closest homologue"))
    
    if (bubble_labels == TRUE) {
      metannotate_plot <- metannotate_plot +
        geom_text(aes_string(x = "Dataset", y = plotting_taxon_colname, label = "label"),
                  size = 2) +
        guides(size = FALSE)
    } else if (bubble_labels == FALSE) {
      metannotate_plot <- metannotate_plot +
        guides(size = guide_legend(title = paste0("Gene hits relative to \n", normalizing_HMM, " (%; normalized)"),
                                   override.aes = list(fill = "#4d4d4d")))
    }
    
  } else {

    stop(paste0("'plot_type' must be either 'bar' or 'bubble'; ",
                "you provided '", plot_type, "'."))

  }
  
  return(metannotate_plot)
}

#' Wrapper for convenient generation of MetAnnotate ggplot data
#'
#' @aliases plot metannotate_plotter
#' @description Wrapper to generate a ggplot of MetAnnotate data with subset, colours, labels, and so on
#' @param metannotate_data_normalized_list List output of \code{\link{combine_replicates}} - replicates must be collapsed
#' @param colouring_template_filename Filename of the colouring template you want to load
#' If the file does not exist, then this function will write a template to that file
#' If 'NA' is entered, then the function will auto-generate colours and continue on
#' @param top_x Numeric vector (length 1) giving the subsetting amount you desire.
#' If top_x >=1, the script will return the "top_x most abundant taxa" for each Dataset/HMM.Family
#' If top_x <1, the script will return "all taxa of (top_x * 100%) abundance or greater for each Dataset/HMM.Family -
#' but see below.
#' @param percent_mode If top_x <1, there are two different methods for keeping the most abundant organisms:
#' - "within_sample" -- the normalized % abundance relative to rpoB is used
#' - "within_HMM" -- the percent abundance of that taxon within the specific HMM gene hits is used.
#' You won't notice much of a different between these modes unless one of your HMMs has very few hits and you want to
#' show some of the taxa that were hit. This would be a good time to use 'within_HMM'.
#' @param normalizing_HMM Name of the normalizing HMM (e.g., "rpoB")]; specify 'auto' to attempt auto-detection
#' @param plot_normalizing_HMM Retain the normalizing_HMM in the final ggplot?
#' @param dump_raw_data Return the normalized and subsetted table in lieu of a ggplot
#' @param ... Other fine-tuned plotting options controlled by \code{\link{generate_ggplot}}
#' @return A ggplot of MetAnnotate data (or raw data; see above)
#' @export
visualize <- function(metannotate_data_normalized_list, colouring_template_filename = NA,
                      top_x = NA, percent_mode = "within_sample", normalizing_HMM = "auto",
                      plot_normalizing_HMM = TRUE, dump_raw_data = FALSE, ...) {
  # # Example column names of the plotting table, if collapsed to family
  # [1] "Dataset"                      "HMM.Family"                   "Closest.Homolog.Superkingdom"
  # [4] "Closest.Homolog.Phylum"       "Closest.Homolog.Class"        "Closest.Homolog.Order"       
  # [7] "Closest.Homolog.Family"       "percent_abundance" 

  # Check that the first input is actually a list
  if (class(metannotate_data_normalized_list)[1] != "list") {
    stop("Input object is not a list as expected. Are you sure that you have already run normalize()?")
  }

  # Extract list components
  metannotate_data <- metannotate_data_normalized_list$metannotate_data
  hit_totals <- tidyr::pivot_longer(metannotate_data_normalized_list$total_normalized_hits, -Dataset,
                                    names_to = "HMM.Family", values_to = "percent_abundance")
  hit_totals$HMM.Family <- factor(hit_totals$HMM.Family, levels = unique(hit_totals$HMM.Family), ordered = TRUE)

  # Check metannotate data has been normalized
  # TODO - consider making a more exhaustive data check
  if ("percent_abundance" %in% colnames(metannotate_data) == FALSE) {
    stop(paste0("Provided metannotate_data table does not contain the expected 'percent_abundance' column. "),
         "Are you sure that you have already run normalize()?")
  }

  # Detect the taxonomy that the data has been collapsed to
  plotting_taxon_colname <- TAXONOMY_NAMING$metannotate_colnames[
    TAXONOMY_NAMING$metannotate_colnames %in% colnames(metannotate_data)] %>%
    tail(n = 1)
  plotting_taxon <- TAXONOMY_NAMING$taxonomy[match(plotting_taxon_colname,
                                                   TAXONOMY_NAMING$metannotate_colnames)]
  futile.logger::flog.debug(paste0("Plotting input dataframe has been collapsed to the '", plotting_taxon, "' level."))
  
  # Filter to the desired top_x cutoff
  # TODO - longer-term move abundance filtration out of this script for clarity
  metannotate_data <- filter_by_abundance(metannotate_data, top_x, percent_mode = percent_mode)

  # Determine normalizing HMM for labelling on the plot
  if (normalizing_HMM == "auto") {
    normalizing_HMM  <- dplyr::group_by(hit_totals, HMM.Family) %>%
      dplyr::summarise(mean_abund = mean(percent_abundance)) %>%
      dplyr::filter(mean_abund == 100) %>%
      dplyr::pull(HMM.Family)

    if (length(normalizing_HMM) != 1) {
      stop("Auto-detection of the normalizing_HMM failed; you'll have to specify it manually.")
    }
  }

  # Remove normalizing_HMM from the hit totals to avoid plotting an extraneous bar at 100%
  hit_totals <- dplyr::filter(hit_totals, HMM.Family != normalizing_HMM)

  # Optionally remove the normalizing HMM from the final plot altogether
  if (plot_normalizing_HMM == FALSE) {
    metannotate_data <- dplyr::filter(metannotate_data, HMM.Family != normalizing_HMM)

    if (nrow(metannotate_data) == 0) {
      stop(paste0("Looks like you have no data left after removing the normalizing_HMM; ",
                  "did you only include one HMM in your data? ",
                  "Probably best that you leave 'plot_normalizing_HMM = TRUE'."))
    }

  } else if (plot_normalizing_HMM != TRUE) {
    stop(paste0("'plot_normalizing_HMM' must be either TRUE or FALSE; you specified '",
                      plot_normalizing_HMM, "'."))
  }

  # Make or read in a plotting colour table; or generate auto-colours
  plotting_colour_data <- process_plotting_colours(metannotate_data, colouring_template_filename)

  # Make the plotting column into an ordered factor based on the plotting_colours order
  metannotate_data[,plotting_taxon_colname] <- factor(dplyr::pull(metannotate_data, plotting_taxon_colname),
                                                      levels = unique(dplyr::pull(plotting_colour_data,
                                                                                  plotting_taxon_colname)),
                                                      ordered = TRUE)

  if (dump_raw_data == TRUE) {
    # TODO - this is bad design; function should not output something totally different given a flag...
    futile.logger::flog.info("Dumping raw data in lieu of a ggplot")
    metannotate_plot <- metannotate_data

  } else {
    futile.logger::flog.info("Creating the ggplot")
    metannotate_plot <- generate_ggplot(metannotate_data = metannotate_data,
                                        hit_totals = hit_totals,
                                        plotting_colour_data = plotting_colour_data,
                                        plotting_taxon = plotting_taxon,
                                        normalizing_HMM = normalizing_HMM,
                                        ...)
  }

  return(metannotate_plot)
}
MetAnnotate/metannoviz documentation built on Aug. 2, 2020, 3:12 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
MetAnnotate/metannoviz
Process and visualize output from the MetAnnotate pipeline

R/visualize.R
In MetAnnotate/metannoviz: Process and visualize output from the MetAnnotate pipeline

Defines functions visualize generate_ggplot

Documented in generate_ggplot visualize

R Package Documentation

Browse R Packages

We want your feedback!

MetAnnotate/metannoviz Process and visualize output from the MetAnnotate pipeline

R/visualize.R In MetAnnotate/metannoviz: Process and visualize output from the MetAnnotate pipeline

Defines functions visualize generate_ggplot

Documented in generate_ggplot visualize

R Package Documentation

Browse R Packages

We want your feedback!

MetAnnotate/metannoviz
Process and visualize output from the MetAnnotate pipeline

R/visualize.R
In MetAnnotate/metannoviz: Process and visualize output from the MetAnnotate pipeline