R/plot_volcano_nvars.R

Defines functions plot_volcano_nvars

Documented in plot_volcano_nvars

#' Generate a set of volcano plots of genes from a differential expression (limma) analysis, for one or more contrasts
#'
#' Generate a set of volcano plots of genes from a differential expression (limma) analysis, for one or
#' more contrasts. These plots can be output to plotting windows, or to pdfs. The points can be colored
#' based on fold-change and p-value thresholds. Points can also be labeled with gene names, and the points
#' to be labeled can be set based on an ellipse oriented to the x- and y-axes.
#' @param topGenes.pairwise a list of data frames, each typically containing the output of a call to \code{topTable} for a single contrast. Each list element should be named with an identifier for the contrast, and must contain genes, log2 fold-change, and adjusted p-values. Can optionally include a "threshold" column, which should be boolean indicating genes passing significance thresholds.
#' @param my_cols a vector of colors for plotting points. First element provides the color for points not exceeding significance thresholds; second element provides the color for points exceeding significance thresholds.
#' @param file_prefix a character string. If provided, the function outputs pdfs of the plots, named "{file_prefix}.{list_element_name}.pdf".
#' @param plotdims a numeric vector, the size (in inches) of the plotting object. Either the size of the pdf, or the size of the plotting window.
#' @param color_by_threshold logical, whether to color points based on exceeding a threshold for logFC and p-value. Used only if fc_cut and p_cut are not NULL.
#' @param fc_cut numeric, the (absolute value) log2 fold-change threshold for determining significance of genes. This value is also plotted as vertical dotted lines. Setting to NULL removes the lines.
#' @param p_cut numeric, the p-value threshold for determining significance of genes. This value is also plotted as a horizontal dotted line. Setting to NULL removes the lines.
#' @param x_lim,y_lim either "auto", NULL, or numeric vectors. If "auto", x- and y-limits are determined from the data using \code{get_xy_lims}. If NULL, default plot limits are used. If provided as numeric vectors, the lower and upper limits of the plotting space along the x- and y-axes. Passed to \code{ggplot2::xlim}.
#' @param gene_labs logical, whether to include gene labels for genes with extreme logFC and p-value. If \code{TRUE}, genes with values outside the labeling ellipse will be labeled.
#' @param x_cut,y_cut numeric, the radii of the labeling ellipse along the x- and y-axes. Genes with values outside the ellipse are labeled with gene names. Default to 0, which results in all genes being labeled.
#' @param ... additional parameters passed to \code{pdf}.
#' @import ggplot2
#' @details A separate plot is generated for each element of topGenes.pairwise. 
#' @export
#' @usage \code{
#' plot_volcano_nvars(
#'      topGenes.pairwise, my_cols=c("darkcyan", "darkorange"),
#'      file_prefix=NULL, plotdims=c(9,9),
#'      color_by_threshold=TRUE, fc_cut=log2(1.5), p_cut=0.01,
#'      x_lim="auto", y_lim="auto",
#'      gene_labs=FALSE, x_cut=0, y_cut=0,
#'      ...)}
plot_volcano_nvars <-
  function(topGenes.pairwise, my_cols=c("darkcyan", "darkorange"),
           file_prefix=NULL, plotdims=c(9,9),
           color_by_threshold=TRUE, fc_cut=log2(1.5), p_cut=0.01,
           x_lim="auto", y_lim="auto",
           gene_labs=FALSE, x_cut=0, y_cut=0,
           ...) {
    if (color_by_threshold & (is.null(fc_cut) | is.null(p_cut)))
      stop("Cannot plot points by threshold with null values of fc_cut or p_cut.")
    if (identical(x_lim, "auto") | identical(y_lim, "auto")) {
      xy_lims <-
        get_xy_lims(topGenes.pairwise, pairwise=TRUE, min_x_abs=fc_cut, min_y2=-log10(p_cut))
      if (identical(x_lim, "auto")) x_lim <- xy_lims[["x"]]
      if (identical(y_lim, "auto")) y_lim <- xy_lims[["y"]]
    }
    
    for (i in names(topGenes.pairwise)) {
      topGenes.tmp <- topGenes.pairwise[[i]]
      colnames(topGenes.tmp) <- str_replace(colnames(topGenes.tmp), paste0(".", i), "") # strip out the de-ambiguation from the column names, if present
      topGenes.tmp$genes <- rownames(topGenes.tmp)
      if (color_by_threshold & is.null(topGenes.tmp$threshold))
        topGenes.tmp$threshold <-
        (abs(topGenes.tmp$logFC) > fc_cut) & (topGenes.tmp$adj.P.Val < p_cut)
      
      if (color_by_threshold) {
        volcano <-
          ggplot(data = topGenes.tmp,
                 aes(x=logFC, y=-log10(adj.P.Val), colour=threshold)) +
          geom_point(alpha=0.6, size=3, shape=16) +
          scale_colour_manual(values=my_cols)
      } else {
        volcano <-
          ggplot(data = topGenes.tmp,
                 aes(x=logFC, y=-log10(adj.P.Val))) +
          geom_point(alpha=0.6, size=3, shape=16, color=my_cols[1])
      }
      
      volcano <- volcano +
        theme(legend.position = "none") +
        xlab("log2 fold change") + ylab("-log10 Adj P")
      
      if (!is.null(fc_cut)) {
        volcano <- volcano + 
          geom_vline(xintercept = fc_cut, linetype="dotted", size=1.0) +
          geom_vline(xintercept = -fc_cut, linetype="dotted", size=1.0)
      }
      if (!is.null(p_cut)) {
        volcano <- volcano +
          geom_hline(yintercept = -log10(p_cut), linetype="dotted",size=1.0)
      }
      if (!is.null(x_lim)) {volcano <- volcano + xlim(x_lim)}
      if (!is.null(y_lim)) {volcano <- volcano + ylim(y_lim)}
      if (gene_labs) {
        volcano <- volcano +
          geom_text(data=topGenes.tmp[((topGenes.tmp$logFC^2)/(x_cut^2) + (log10(topGenes.tmp$adj.P.Val)^2)/(y_cut^2)) > 1,],
                    aes(label=genes),
                    color="black", size=3, vjust=1, hjust=0.5)}
      
      if (!is.null(file_prefix)) {
        pdf(file=paste(file_prefix, i, "pdf", sep="."), w=plotdims[1], h=plotdims[2], ...)
        on.exit(dev.off(), add=TRUE) # close plotting device on exit
      } else {
        quartz(plotdims[1],plotdims[2])
        volcano <- volcano + ggtitle(i)
      }
      
      print(volcano)
    }
  }
BenaroyaResearch/limmaTools documentation built on Dec. 17, 2021, 10:49 a.m.