# visualize.R
# Visualize metannotate data
# Copyright Jackson M. Tsuji, 2020 (Neufeld Lab)
#' Generate a ggplot of the MetAnnotate data
#'
#' @aliases metannotate_ggplot
#' @description Generates the ggplot of MetAnnotate data
#' @param metannotate_data Tibble of normalized metannotate data - see \code{\link{normalize}} and \code{\link{combine_replicates}}
#' @param hit_totals Tibble of total normalized hits per HMM/sample; this is generated in \code{\link{normalize}} and \code{\link{combine_replicates}}
#' @param plotting_colour_data Tibble of plot colours generated by \code{\link{process_plotting_colours}}
#' @param plotting_taxon The taxonomy rank to summarize bars/bubbles to; MUST MATCH the collapse taxon of the MetAnnotate table
#' @param normalizing_HMM the name of the normalizing HMM (e.g., 'rpoB')
#' @param plot_type Either 'bar' or 'bubble'
#' @param space ggplot setting; 'fixed' or 'free'
#' @param bubble_size_range numeric vector of length two with the small and large bubble sizes
#' @param alpha ggplot value; transparency (for bubble plots)
#' @param bubble_labels logical; show percent labels on the bubbles for bubble plots?
#' @return A ggplot of MetAnnotate data
#' @export
generate_ggplot <- function(metannotate_data, hit_totals, plotting_colour_data,
plotting_taxon, normalizing_HMM, plot_type = "bar",
space = "free", bubble_size_range = c(1,20), alpha = 0.8,
bubble_labels = TRUE) {
# Check metannotate data has been normalized
# TODO - consider making a more exhaustive data check
if ("percent_abundance" %in% colnames(metannotate_data) == FALSE) {
stop(paste0("Provided metannotate_data table does not contain the expected 'percent_abundance' column. "),
"Are you sure that you have already run normalize()?")
}
# Check taxon rank
if ((tolower(plotting_taxon) %in% dplyr::pull(TAXONOMY_NAMING, taxonomy)) == FALSE) {
stop(paste0("Taxon must be a standard seven-rank taxonomy entry; you provided '", taxon, "'."))
}
plotting_taxon_colname <- TAXONOMY_NAMING$metannotate_colnames[match(tolower(plotting_taxon),
TAXONOMY_NAMING$taxonomy)]
plotting_taxon_label <- paste0(substring(plotting_taxon, 1, 1) %>% toupper(),
substring(plotting_taxon, 2, nchar(plotting_taxon)))
# TODO - can this be moved down to the 'bubble' area without breaking anything?
if (plot_type == "bubble") {
metannotate_data$label <- round(metannotate_data$percent_abundance, digits = 1)
}
# Make the base plot
metannotate_plot <- ggplot(metannotate_data) +
theme_bw() +
# TODO - expose some theme elements to user or make fonts scalable automatically
theme(axis.title = element_text(size = 12),
strip.text = element_text(size = 11, face = "italic"),
strip.background = element_rect(fill = "#e6e6e6"),
axis.text = element_text(size = 10, colour = "black"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
axis.ticks = element_line(size = 0.5, colour = "black"),
axis.line = element_line(size = 0.5),
legend.text = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 8),
legend.key = element_rect(colour = "transparent"),
legend.key.size = unit(6, "mm"),
legend.spacing = unit(1, "mm"),
legend.box.just = "left") +
xlab("Sample")
if (space == "fixed") {
metannotate_plot <- metannotate_plot +
facet_grid(HMM.Family ~ ., scales = "free", space = "fixed")
} else if (space == "free") {
metannotate_plot <- metannotate_plot +
facet_grid(HMM.Family ~ ., scales = "free", space = "free")
} else {
stop(paste0("'space' must be either 'free' or 'fixed'. You provided '", space, "'."))
}
if (plot_type == "bar") {
futile.logger::flog.debug("Generating barplot")
metannotate_plot <- metannotate_plot +
geom_bar(data = hit_totals, aes(x = Dataset, weight = percent_abundance), fill = "#808080") +
geom_bar(aes_string(x = "Dataset", weight = "percent_abundance",
fill = plotting_taxon_colname)) +
scale_fill_manual(values = plotting_colour_data$colour) +
theme(panel.grid = element_blank(),
panel.border = element_rect(colour = "transparent"),
panel.spacing.y = unit(3, "mm")) +
guides(fill = guide_legend(ncol = 1, title = element_blank())) +
ylab(paste0("Gene hits relative to ", normalizing_HMM, " (%; normalized)"))
} else if (plot_type == "bubble") {
futile.logger::flog.debug("Generating bubble plot")
# TODO - allow user to toggle which taxon rank to use
legend_taxon_colname <- "Closest.Homolog.Phylum"
legend_taxon <- "Phylum"
fill_colours <- dplyr::pull(metannotate_data, legend_taxon_colname) %>%
unique() %>%
length() %>%
choose_discrete_colour_scale()
metannotate_plot <- metannotate_plot +
geom_point(aes_string(x = "Dataset", y = plotting_taxon_colname,
size = "percent_abundance", fill = legend_taxon_colname),
shape = 21, alpha = alpha) +
scale_size_continuous(range = bubble_size_range) +
scale_fill_manual(values = fill_colours) +
theme(axis.text.y = element_text(size = 5, face = "italic")) +
guides(fill = guide_legend(title = legend_taxon)) +
ylab(paste0(plotting_taxon_label, " of closest homologue"))
if (bubble_labels == TRUE) {
metannotate_plot <- metannotate_plot +
geom_text(aes_string(x = "Dataset", y = plotting_taxon_colname, label = "label"),
size = 2) +
guides(size = FALSE)
} else if (bubble_labels == FALSE) {
metannotate_plot <- metannotate_plot +
guides(size = guide_legend(title = paste0("Gene hits relative to \n", normalizing_HMM, " (%; normalized)"),
override.aes = list(fill = "#4d4d4d")))
}
} else {
stop(paste0("'plot_type' must be either 'bar' or 'bubble'; ",
"you provided '", plot_type, "'."))
}
return(metannotate_plot)
}
#' Wrapper for convenient generation of MetAnnotate ggplot data
#'
#' @aliases plot metannotate_plotter
#' @description Wrapper to generate a ggplot of MetAnnotate data with subset, colours, labels, and so on
#' @param metannotate_data_normalized_list List output of \code{\link{combine_replicates}} - replicates must be collapsed
#' @param colouring_template_filename Filename of the colouring template you want to load
#' If the file does not exist, then this function will write a template to that file
#' If 'NA' is entered, then the function will auto-generate colours and continue on
#' @param top_x Numeric vector (length 1) giving the subsetting amount you desire.
#' If top_x >=1, the script will return the "top_x most abundant taxa" for each Dataset/HMM.Family
#' If top_x <1, the script will return "all taxa of (top_x * 100%) abundance or greater for each Dataset/HMM.Family -
#' but see below.
#' @param percent_mode If top_x <1, there are two different methods for keeping the most abundant organisms:
#' - "within_sample" -- the normalized % abundance relative to rpoB is used
#' - "within_HMM" -- the percent abundance of that taxon within the specific HMM gene hits is used.
#' You won't notice much of a different between these modes unless one of your HMMs has very few hits and you want to
#' show some of the taxa that were hit. This would be a good time to use 'within_HMM'.
#' @param normalizing_HMM Name of the normalizing HMM (e.g., "rpoB")]; specify 'auto' to attempt auto-detection
#' @param plot_normalizing_HMM Retain the normalizing_HMM in the final ggplot?
#' @param dump_raw_data Return the normalized and subsetted table in lieu of a ggplot
#' @param ... Other fine-tuned plotting options controlled by \code{\link{generate_ggplot}}
#' @return A ggplot of MetAnnotate data (or raw data; see above)
#' @export
visualize <- function(metannotate_data_normalized_list, colouring_template_filename = NA,
top_x = NA, percent_mode = "within_sample", normalizing_HMM = "auto",
plot_normalizing_HMM = TRUE, dump_raw_data = FALSE, ...) {
# # Example column names of the plotting table, if collapsed to family
# [1] "Dataset" "HMM.Family" "Closest.Homolog.Superkingdom"
# [4] "Closest.Homolog.Phylum" "Closest.Homolog.Class" "Closest.Homolog.Order"
# [7] "Closest.Homolog.Family" "percent_abundance"
# Check that the first input is actually a list
if (class(metannotate_data_normalized_list)[1] != "list") {
stop("Input object is not a list as expected. Are you sure that you have already run normalize()?")
}
# Extract list components
metannotate_data <- metannotate_data_normalized_list$metannotate_data
hit_totals <- tidyr::pivot_longer(metannotate_data_normalized_list$total_normalized_hits, -Dataset,
names_to = "HMM.Family", values_to = "percent_abundance")
hit_totals$HMM.Family <- factor(hit_totals$HMM.Family, levels = unique(hit_totals$HMM.Family), ordered = TRUE)
# Check metannotate data has been normalized
# TODO - consider making a more exhaustive data check
if ("percent_abundance" %in% colnames(metannotate_data) == FALSE) {
stop(paste0("Provided metannotate_data table does not contain the expected 'percent_abundance' column. "),
"Are you sure that you have already run normalize()?")
}
# Detect the taxonomy that the data has been collapsed to
plotting_taxon_colname <- TAXONOMY_NAMING$metannotate_colnames[
TAXONOMY_NAMING$metannotate_colnames %in% colnames(metannotate_data)] %>%
tail(n = 1)
plotting_taxon <- TAXONOMY_NAMING$taxonomy[match(plotting_taxon_colname,
TAXONOMY_NAMING$metannotate_colnames)]
futile.logger::flog.debug(paste0("Plotting input dataframe has been collapsed to the '", plotting_taxon, "' level."))
# Filter to the desired top_x cutoff
# TODO - longer-term move abundance filtration out of this script for clarity
metannotate_data <- filter_by_abundance(metannotate_data, top_x, percent_mode = percent_mode)
# Determine normalizing HMM for labelling on the plot
if (normalizing_HMM == "auto") {
normalizing_HMM <- dplyr::group_by(hit_totals, HMM.Family) %>%
dplyr::summarise(mean_abund = mean(percent_abundance)) %>%
dplyr::filter(mean_abund == 100) %>%
dplyr::pull(HMM.Family)
if (length(normalizing_HMM) != 1) {
stop("Auto-detection of the normalizing_HMM failed; you'll have to specify it manually.")
}
}
# Remove normalizing_HMM from the hit totals to avoid plotting an extraneous bar at 100%
hit_totals <- dplyr::filter(hit_totals, HMM.Family != normalizing_HMM)
# Optionally remove the normalizing HMM from the final plot altogether
if (plot_normalizing_HMM == FALSE) {
metannotate_data <- dplyr::filter(metannotate_data, HMM.Family != normalizing_HMM)
if (nrow(metannotate_data) == 0) {
stop(paste0("Looks like you have no data left after removing the normalizing_HMM; ",
"did you only include one HMM in your data? ",
"Probably best that you leave 'plot_normalizing_HMM = TRUE'."))
}
} else if (plot_normalizing_HMM != TRUE) {
stop(paste0("'plot_normalizing_HMM' must be either TRUE or FALSE; you specified '",
plot_normalizing_HMM, "'."))
}
# Make or read in a plotting colour table; or generate auto-colours
plotting_colour_data <- process_plotting_colours(metannotate_data, colouring_template_filename)
# Make the plotting column into an ordered factor based on the plotting_colours order
metannotate_data[,plotting_taxon_colname] <- factor(dplyr::pull(metannotate_data, plotting_taxon_colname),
levels = unique(dplyr::pull(plotting_colour_data,
plotting_taxon_colname)),
ordered = TRUE)
if (dump_raw_data == TRUE) {
# TODO - this is bad design; function should not output something totally different given a flag...
futile.logger::flog.info("Dumping raw data in lieu of a ggplot")
metannotate_plot <- metannotate_data
} else {
futile.logger::flog.info("Creating the ggplot")
metannotate_plot <- generate_ggplot(metannotate_data = metannotate_data,
hit_totals = hit_totals,
plotting_colour_data = plotting_colour_data,
plotting_taxon = plotting_taxon,
normalizing_HMM = normalizing_HMM,
...)
}
return(metannotate_plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.