R/plotFoldEnrichment.R

Defines functions plotFoldEnrichment

Documented in plotFoldEnrichment

#' plotFoldEnrichment
#' @description This function uses the results generated using \code{\link{diffEnrich}}
#' to generate a bar plot describing the fold enrichment of a set of given KEGG
#' pathways stratified by their enrichment in list 1 or list 2. Users can plot
#' all pathways based on the adjusted p-value threshold used in \code{\link{diffEnrich}}
#' and the top N pathways sorted by the adjusted p-value threshold used in \code{\link{diffEnrich}}.
#' \code{plotFoldEnrich} returns a ggplot2 object so users can add additional
#' customizations.
#' @param de_res Dataframe. Generated using \code{\link{diffEnrich}}
#' @param pval Numeric. Threshold for filtering pathways based on adjusted pvalue in de_res
#' @param N Numeric. Number of top pathways to plot after filtering based on pval
#'
#' @return ggplot object. If the user calls \code{plotFoldEnrich} and
#' assigns it to an object (see example) then no plot will print in viewer,
#' but if \code{plotFoldEnrich} is called without being assigned to an
#' object the plot will print to the viewer. Users can edit the ggplot object
#' as they would any other ggplot object (e.g. add title, theme, etc.).
#'
#' @details This function generates a grouped bar plot using ggplot2 and the
#' ggnewscale package. KEGG pathways are plotted on the y-axis and fold
#' enrichment is plotted on the x-axis. each KEGG pathway has a bar plotting
#' its fold enrichment in list 1 (red) and its fold enrichment in list 2 (blue).
#' The transparency of the bars correspond to the adjusted p-value for the
#' pathway's enrichment in the given list. The p-value presented as text to the
#' right of each pair of bars is the adjusted p-value associated with the
#' differential enrichment of the pathway between the two lists, and the pathways
#' are ordered from top to bottom by this p-value (i.e. smallest p-value on top
#' of plot, and largest p-value on bottom of plot).
#'
#' @import dplyr
#'         ggplot2
#' @importFrom reshape2 melt
#' @importFrom stats reorder
#' @importFrom rlang .data
#' @importFrom ggnewscale new_scale_fill
#' @export
#'
#' @examples
#' \dontrun{
#' plot <- plotFoldEnrichment(de_res = diff_enrich, pval = 0.05, N = 5)
#' }
plotFoldEnrichment <- function(de_res, pval, N){
  # library(dplyr)
  # library(ggplot2)
  ## Check arguments
  if(missing(de_res)){stop("Argument missing: de_res")}
  if(missing(pval)){stop("Argument missing: pval - please provide a threshold")}
  if(missing(N)){stop("Argument missing: N - if you'd like to plot top pathways, please provide a threshold and make sure N > 0")}
  if(N < 1){stop("Number of top genes (N) must be > 0")}
  if(inherits(de_res, "diffEnrich") != TRUE){stop("de_res must be an object of class 'diffEnrich'. Please generate this object using the diffEnrich function provided in this package.")}

  ###########################################################
  # Prepare and reshape data for plotting using ggplot
  ###########################################################

  ## Strip extra columns from de_res and filter based on pval. Then sort by pval.
  df <- de_res$de_table %>%
    dplyr::select(.data$KEGG_PATHWAY_ID, .data$KEGG_PATHWAY_description,
                  .data$fold_enrichment_list1, .data$fold_enrichment_list2,
                  .data$enrich_p_list1, .data$enrich_p_list2,
                  .data$odd_ratio, .data$diff_enrich_adjusted) %>%
    dplyr::mutate(KEGG_PATHWAY_description = sapply(strsplit(.data$KEGG_PATHWAY_description, split = " - "), function(x) x[1])) %>%
    dplyr::arrange(.data$diff_enrich_adjusted) %>%
    dplyr::filter(.data$diff_enrich_adjusted < pval) %>%
    dplyr::slice(1:N)

  if(dim(df)[1] < N){warning(paste0("'N' is larger than the number of significant pathways based on 'pval'. plotFoldEnrichment will only print the significant pathways. There were ", dim(df)[1]))}

  ## Melt data set
  df.melt <-reshape2::melt(df, id.vars = c('KEGG_PATHWAY_ID', 'KEGG_PATHWAY_description'))

  ## Clean up melted data frame
  df.ss <- df.melt %>%
    dplyr::filter(.data$variable %in% c("fold_enrichment_list1", "fold_enrichment_list2",
                                        "enrich_p_list1", "enrich_p_list2",
                                        "diff_enrich_adjusted"))

  ## get vector of pvals
  pvals <- subset(df.ss, df.ss$variable %in% c("enrich_p_list1", "enrich_p_list2"))

  ## Generate data set to be used for plotting
  bardat.tmp <- subset(df.ss, df.ss$variable %in% c("fold_enrichment_list1", "fold_enrichment_list2")) %>%
    dplyr::mutate(alpha = log10(pvals$value),
                  pvals = pvals$value) %>%
    dplyr::arrange(.data$pvals)

  bardat <- merge(bardat.tmp, df[, c(1,8)], by = "KEGG_PATHWAY_ID")

  ###########################################################
  # Generate plot
  ###########################################################
  # library(ggnewscale)
  # First, we'll make a plot and save it as a variable
  g <- ggplot(bardat, aes(x=stats::reorder(.data$KEGG_PATHWAY_description, -.data$diff_enrich_adjusted), y=.data$value)) +
    geom_bar(stat="identity", aes(col=.data$variable, group=.data$variable, fill=.data$pvals), position="dodge") +
    ylim(0, max(bardat$value) + 0.6) + xlab("") +
    coord_flip() +
    scale_fill_brewer(palette = "Set1",
                      name="",
                      breaks=c("fold_enrichment_list1", "fold_enrichment_list2"),
                      labels=c("Fold Enrichment in \nlist 1\n", "Fold enrichment in \nlist 2\n")) +
    scale_fill_continuous(trans = 'log10') +
    geom_text(data=subset(df.ss, df.ss$variable %in% c("diff_enrich_adjusted")),
              aes(x = .data$KEGG_PATHWAY_description, y = (max(bardat$value) + 0.3), label = round(.data$value, 4)))

  # Next, we'll take the coordinates of this layers data and match them back to the original data.
  ld <- ggplot2::layer_data(g)
  ld <- ld[, c("xmin", "xmax", "ymin", "ymax")]

  # Match back to original data
  matches <- match(ld$ymax, bardat$value)

  # Supplement with original data
  ld$pvals <- log10(bardat$pvals[matches])
  ld$descr <- bardat$KEGG_PATHWAY_description[matches]
  ld$vars <- bardat$variable[matches]

  ## Merge ld with df.ss
  df_ptext <- merge(ld, df.ss, by.x = "descr", by.y = "KEGG_PATHWAY_description")
  df_ptext <- subset(df_ptext, df_ptext$variable %in% c("diff_enrich_adjusted")) %>%
    dplyr::filter(!duplicated(.data$value))

  ## Format list p-values
  options(scipen = 0, digits = 1)
  lpval <-formatC(as.numeric(summary(bardat$pvals))[c(1,2,3,5,6)], digits = 1)

  ## decide how to print p-val text
  pt <- ifelse((dim(df)[1] >= N), N, dim(df)[1])

  ## Generate finale plot
  p <- ggplot(mapping = aes(xmin = .data$xmin, xmax = .data$xmax, ymin = .data$ymin, ymax = .data$ymax)) +
    geom_rect(data = ld[ld$vars == "fold_enrichment_list1", ], aes(fill = .data$pvals), color = 'black') +
    ylim(0, max(bardat$value) + 1.0) + xlab("") + ylab("Fold Enrichment") +
    scale_fill_gradient(low = "darkred", high = "white",
                        #trans = 'log10',
                        limits = c(min(ld$pvals), 0),
                        breaks = as.numeric(summary(ld$pvals))[c(1,2,3,5,6)],
                        labels = lpval,
                        name = paste0("P-values List 1", "\n", "gene count: ", de_res$de_table[1,6])) +
    ggnewscale::new_scale_fill() +
    geom_rect(data = ld[ld$vars == "fold_enrichment_list2", ], aes(fill = .data$pvals), color = 'black') +
    scale_fill_gradient(low =  "navy", high = "white",
                        #trans = 'log10',
                        limits = c(min(ld$pvals), 0),
                        breaks = as.numeric(summary(ld$pvals))[c(1,2,3,5,6)],
                        labels = lpval,
                        name = paste0("P-values List 2", "\n", "gene count: ", de_res$de_table[1,12])) +
    scale_x_continuous(breaks = seq_along(unique(ld$descr)),
                       labels = unique(ld$descr)) +
    geom_hline(yintercept=1.0, linetype ='dashed') +
    coord_flip() + theme_bw() +
    geom_text(data=df_ptext,
              aes(x = 1:pt, y = (max(bardat$value) + 0.5), label = sort(round(.data$value, 5), decreasing = TRUE)))
  return(p)
}

Try the diffEnrich package in your browser

Any scripts or data that you put into this service are public.

diffEnrich documentation built on June 28, 2022, 1:08 a.m.