#' Generate a set of volcano plots of genes from a differential expression (limma) analysis, for one or more contrasts, with point color or shape determined by a variable of interest.
#'
#' Generate a set of volcano plots of genes from a differential expression (limma) analysis, for one or
#' more contrasts, with point color or shape determined by a variable of interest. These plots can be
#' output to plotting windows, or to pdfs. The points can be colored on a continuous or discrete scale,
#' based on variables at the gene level. Points can also have shape determined by gene-level variables on
#' a discrete scale. Points can be labeled with gene names, and the points to be labeled can be set
#' based on an ellipse oriented to the x- and y-axes.
#' @param topGenes.pairwise a list of data frames, each typically containing the output of a call to \code{topTable} for a single contrast. Each list element should be named with an identifier for the contrast, and must contain genes, log2 fold-change, and adjusted p-values. If plotting shapes or colors by a variable, each list element must contain a column matching that variable.
#' @param file_prefix a character string. If provided, the function outputs pdfs of the plots, named "{file_prefix}.{list_element_name}.{colored_by_{color_by_var}.}{shape_by_{shape_by_var}.}pdf".
#' @param plotdims a numeric vector, the size (in inches) of the plotting object. Either the size of the pdf, or the size of the plotting window.
#' @param fc_cut numeric, the (absolute value) log2 fold-change threshold for determining significance of genes. This value is also plotted as vertical dotted lines. Setting to NULL removes the lines.
#' @param p_cut numeric, the p-value threshold for determining significance of genes. This value is also plotted as a horizontal dotted line. Setting to NULL removes the lines.
#' @param color_by_var (optional) character string or integer identifying the column in topGenes to color points by. If not provided, points are plotted in black.
#' @param color_by_var_levels (optional) character vector defining the order of elements in the variable used for coloring points; this order is used for the plot legend and to match the order of colors (if provided). If not provided, levels are taken from the factor levels (if color_by_var is a factor), or else are ordered by order of appearance in \code{topGenes}.
#' @param color_var_lab (optional) string to be used as the title for the color legends.
#' @param my_cols (optional) vector of colors to use for plotting. If \code{color_by_var} is numeric, should have two elements, providing the start and end points for a continuous color scale (generated by \code{scale_color_gradient}). If color_by_var is not numeric, should be a vector with one color for each level of \code{color_by_var}; if the number of values supplied is less than the numer of levels in color_by_var, additional values are interpolated using colorRampPalette. By default, uses a range from blue to red.
#' @param na_col color to use for NA values of \code{color_by_var}.
#' @param pch_by_var (optional) character string or integer identifying the column in topGenes to vary point shapes by. If not provided, points are plotted as dots.
#' @param pch_by_var_levels (optional) character vector defining the order of elements in the variable used for point shapes; this order is used for the plot legend and to match the order of shapes (if provided). If not provided, levels of the variable are ordered by order of appearance in topGenes.
#' @param pch_var_lab (optional) string to be used as the title for the point shape legends.
#' @param my_pch vector of shapes to use for plotting. Required if plotting points by shape; if not provided, all points will be plotted as dots. Must contain at least as many elements as the number of unique elements in \code{pch_by_var}.
#' @param x_lim,y_lim either "auto", NULL, or numeric vectors. If "auto", x- and y-limits are determined from the data using \code{get_xy_lims}. If NULL, default plot limits are used. If provided as numeric vectors, the lower and upper limits of the plotting space along the x- and y-axes. Passed to \code{ggplot2::xlim}.
#' @param gene_labs logical, whether to include gene labels for genes with extreme logFC and p-value. If \code{TRUE}, genes with values outside the labeling ellipse will be labeled.
#' @param x_cut,y_cut numeric, the radii of the labeling ellipse along the x- and y-axes. Genes with values outside the ellipse are labeled with gene names. Default to 0, which results in all genes being labeled.
#' @param point_order character string, specifying how to order the points. Currently accepted values are "random", which randomizes the order of the points, and "input", which sends the points to ggplot as they are in the input data frame. Defaults to "random".
#' @param ... additional parameters passed to \code{pdf}.
#' @import ggplot2
#' @export
#' @usage \code{
#' plot_volcano_byvar_nvars(
#' topGenes.pairwise, file_prefix=NULL, plotdims=c(9,9),
#' fc_cut=log2(1.5), p_cut=0.01,
#' color_by_var=NULL, color_by_var_levels=NULL, color_var_lab=NULL,
#' my_cols=c("blue","red"), na_col="grey50",
#' pch_by_var=NULL, pch_by_var_levels=NULL, pch_var_lab=NULL,
#' my_pch=NULL,
#' x_lim="auto", y_lim="auto",
#' gene_labs=FALSE, x_cut=0, y_cut=0,
#' point_order="random",
#' ...)}
plot_volcano_byvar_nvars <-
function(topGenes.pairwise, file_prefix=NULL, plotdims=c(9,9),
fc_cut=log2(1.5), p_cut=0.01,
color_by_var=NULL, color_by_var_levels=NULL, color_var_lab=NULL,
my_cols=c("blue","red"), na_col="grey50",
pch_by_var=NULL, pch_by_var_levels=NULL, pch_var_lab=NULL, my_pch=NULL,
x_lim="auto", y_lim="auto",
gene_labs=FALSE, x_cut=NULL, y_cut=NULL,
point_order="random",
...) {
plot_color_by_var <- !is.null(color_by_var)
plot_pch_by_var <- !is.null(pch_by_var)
if (identical(x_lim, "auto") | identical(y_lim, "auto")) {
xy_lims <- get_xy_lims(topGenes.pairwise, pairwise=TRUE, min_x_abs=fc_cut, min_y2=-log10(p_cut))
if (identical(x_lim, "auto")) x_lim <- xy_lims[["x"]]
if (identical(y_lim, "auto")) y_lim <- xy_lims[["y"]]
}
for (i in names(topGenes.pairwise)) {
topGenes.tmp <- topGenes.pairwise[[i]]
colnames(topGenes.tmp) <-
stringr::str_replace(
colnames(topGenes.tmp),
paste0(".", i), "") # strip out the de-ambiguation from the column names
topGenes.tmp$genes <- rownames(topGenes.tmp)
file_suffix <- "pdf"
color_scale <- NULL; color_labs <- NULL; color_points <- NULL
if (plot_color_by_var) {
if (!is.numeric(topGenes.tmp[[color_by_var]])) {
if (is.null(color_by_var_levels)) {
if (is.factor(topGenes.tmp[[color_by_var]])) {
color_by_var_levels <- levels(topGenes.tmp[[color_by_var]])
} else {
color_by_var_levels <- unique(na.omit(topGenes.tmp[[color_by_var]]))
}
}
topGenes.tmp[[color_by_var]] <- factor(topGenes.tmp[[color_by_var]], levels=color_by_var_levels)
if (length(my_cols) < length(color_by_var_levels))
my_cols <- colorRampPalette(colors=my_cols)(length(color_by_var_levels))
color_scale <- scale_color_manual(values=my_cols, na.value=na_col)
} else {
color_scale <- scale_color_gradient(low=my_cols[1], high=my_cols[2], na.value=na_col)
}
file_suffix <- paste0("color_by_", color_by_var, ".", file_suffix)
color_labs <- if (!is.null(color_var_lab)) labs(color=color_var_lab) else labs(color=color_by_var)
color_points <- geom_point(aes_string(color=color_by_var, fill=color_by_var))
}
pch_scale <- NULL; pch_labs <- NULL; pch_points <- NULL
if (plot_pch_by_var) {
if (is.null(pch_by_var_levels)) pch_by_var_levels <- unique(na.omit(topGenes.tmp[[pch_by_var]]))
topGenes.tmp[[pch_by_var]] <- factor(topGenes.tmp[[pch_by_var]], levels=pch_by_var_levels)
file_suffix <- paste0("pch_by_", pch_by_var, ".", file_suffix)
if (!is.null(my_pch)) pch_scale <- scale_shape_manual(values=my_pch)
pch_labs <- if (!is.null(pch_var_lab)) labs(shape=pch_var_lab) else labs(shape=pch_by_var)
pch_points <- geom_point(aes_string(shape=pch_by_var))
}
if (plot_color_by_var & plot_pch_by_var) {
plot_points <- geom_point(aes_string(color=color_by_var, shape=pch_by_var), alpha=0.6, size=3)
} else if (plot_color_by_var & !plot_pch_by_var) {
plot_points <- geom_point(aes_string(color=color_by_var), alpha=0.6, size=3)
} else if (!plot_color_by_var & plot_pch_by_var) {
plot_points <- geom_point(aes_string(shape=pch_by_var), alpha=0.6, size=3)
} else plot_points <- geom_point(alpha=0.6, size=3)
topGenes.tmp <-
miscHelpers::order_points(topGenes.tmp, method=point_order)
# generate volcano plot
volcano <-
ggplot(data = topGenes.tmp,
aes(x=logFC, y=-log10(adj.P.Val))) +
# theme(legend.position = "none") +
color_scale + color_labs +
pch_scale + pch_labs +
plot_points +
xlab("log2 fold change") + ylab("-log10 Adj P")
if (!is.null(fc_cut)) {
volcano <- volcano +
geom_vline(xintercept = fc_cut, linetype="dotted", size=1.0) +
geom_vline(xintercept = -fc_cut, linetype="dotted", size=1.0)
}
if (!is.null(p_cut)) {
volcano <- volcano +
geom_hline(yintercept = -log10(p_cut), linetype="dotted",size=1.0)
}
if (!is.null(x_lim)) {volcano <- volcano + xlim(x_lim)}
if (!is.null(y_lim)) {volcano <- volcano + ylim(y_lim)}
if (gene_labs) {
volcano <- volcano +
geom_text(
data=topGenes.tmp[((topGenes.tmp$logFC^2)/(x_cut^2) +
(log10(topGenes.tmp$adj.P.Val)^2)/(y_cut^2)) > 1,],
aes(label=genes),
color="black", size=3, vjust=1, hjust=0.5)}
if (!is.null(file_prefix)) {
pdf(file=paste(file_prefix, i, file_suffix, sep="."), w=plotdims[1], h=plotdims[2], ...)
on.exit(dev.off(), add=TRUE) # close plotting device on exit
} else {
quartz(plotdims[1],plotdims[2])
volcano <- volcano + ggtitle(i)
}
print(volcano)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.