#' @title Volcano Plot
#'
#' @description Once the differential analysis has been performed, it is possible to visualize the volcano plots employing this function.
#' @description The volcano plot is generated by the employment of ggplot2, setting xlimit and ylimit based on the data. If there are genes with pvalue equal to infinity, those are forced to the maximum value of the pvalue. If 'my_comparison' paramater is not provided (default: NULL), the function will extract the name of the first subfolder inside "./results" and use it. The volcano plots are saved in the a subfolder for each comparison (Figure 1).
#' @param DE_results DESeq2 results table. Accepts both a file (.tsv, .csv, tab-separated .txt) or a dataframe (see example below). Requires 'log2FoldChange' and 'padj' columns.
#' @param my_comparison The comparison to plot (control_vs_treatment, a_vs_b, ...). If the user is employing the whole workflow use exactly the name of the comparison indicated in the comparison dataframe.
#' @param highlight_genes A (optional) list of genes the user would like to highlight (label) in the volcano plot. It accepts a dataframe, a character vector or a path to a file in .txt format.
#' @param log2FC_thresh Threshold value for log2(Fold Change) to highlight genes as differentially expressed (default = 0).
#' @param padj_thresh Threshold value for adjusted p-value to highlight genes as significant (default = 0.05).
#' @param del_csv Specify the delimiter of the .csv file (default = ","). This is because opening .csv files with Excel messes up the format and changes the delimiter to ";".
#' @param outfolder The name to assign to the folder for output saving. (Default = "./results").
#' @return No return value. Files will be produced as part of normal execution.
#' @examples
#' \dontrun{
#' filename <- "./results/H460.2D_vs_H460.3D.2p/DE_H460.2D_vs_H460.3D.2p_allres.tsv"
#' volcanoplot(
#' DE_results = filename,
#' my_comparison = "H460.2D_vs_H460.3D.2p",
#' log2FC_thresh = 0,
#' padj_thresh = 0.05,
#' highlight_genes = NULL,
#' del_csv = ",",
#' outfolder = "./results"
#' )
#' }
#' @export
volcanoplot <- function(DE_results,
my_comparison = NULL,
highlight_genes = NULL,
log2FC_thresh = 0,
padj_thresh = 0.05,
del_csv = ",",
outfolder = "./results") {
if (is.data.frame(DE_results)) {
if (is.null(my_comparison)) {
stop("If DE results is a dataframe you must specify my comparison.")
}
res <- DE_results
} else if (grepl(".tsv", DE_results)) {
res_path <- DE_results
res <- read_tsv(res_path, col_types = cols())
} else if (grepl(".csv", DE_results)) {
res_path <- DE_results
res <- read_delim(res_path, col_types = cols(), delim = del_csv)
} else if (grepl(".txt", DE_results)) {
res_path <- DE_results
res <- read_delim(res_path, delim = "\t", col_types = cols())
} else {
warning("Provide a file .tsv, .csv, .txt tab-separated, or a data.frame")
}
if (is.null(my_comparison)) {
if (grepl("_vs_", res_path)) {
my_comparison <- str_match(res_path, pattern = paste0(outfolder, "(.*?)\\/"))[2]
title <- paste0("Volcano Plot for ", gsub("_", " ", my_comparison))
} else {
title <- paste0("Volcano Plot")
}
} else {
title <- paste0("Volcano Plot for ", gsub("_", " ", my_comparison))
}
volcanoData <- cbind.data.frame(res$log2FoldChange, -log10(res$padj))
colnames(volcanoData) <- c("logFC", "-log10(padj)")
rownames(volcanoData) <- res[[1]]
up <- res %>%
dplyr::filter(.data$log2FoldChange >= log2FC_thresh & .data$padj <= padj_thresh) %>%
dplyr::select(.data$genes) %>%
dplyr::pull()
down <- res %>%
dplyr::filter(.data$log2FoldChange <= -log2FC_thresh & .data$padj <= padj_thresh) %>%
dplyr::select(.data$genes) %>%
dplyr::pull()
if (!is.null(highlight_genes)) {
if (is.data.frame(highlight_genes)) {
highlight_genes <- dplyr::pull(highlight_genes)
} else if (is.character(highlight_genes) & !grepl(".txt", highlight_genes)[1]) {
highlight_genes <- highlight_genes
} else if (grepl(".txt", highlight_genes)) {
highlight_genes <- read_delim(highlight_genes, delim = "\t", col_types = cols(), col_names = F) %>% dplyr::pull()
}
labeled_genes_df <- volcanoData[rownames(volcanoData) %in% highlight_genes, ]
labeled_genes_up <- rownames(labeled_genes_df[labeled_genes_df$logFC > 0, ])
labeled_genes_down <- rownames(labeled_genes_df[labeled_genes_df$logFC < 0, ])
}
volcanoData$`-log10(padj)`[volcanoData$`-log10(padj)` == Inf] <- max(volcanoData$`-log10(padj)`[is.finite(volcanoData$`-log10(padj)`)])
if (abs(round(min(volcanoData$logFC, na.rm = T))) > round(max(volcanoData$logFC, na.rm = T))) {
xlim_n <- round(min(volcanoData$logFC, na.rm = T)) - 1
xlim_p <- abs(round(min(volcanoData$logFC, na.rm = T))) + 1
x_high_n <- xlim_n + 10
x_high_p <- max(volcanoData$logFC, na.rm = T) - 5
} else {
xlim_n <- -round(max(volcanoData$logFC, na.rm = T)) - 1
xlim_p <- round(max(volcanoData$logFC, na.rm = T)) + 1
x_high_n <- min(volcanoData$logFC, na.rm = T) - 5
x_high_p <- xlim_p - 10
}
p <- ggplot(volcanoData, aes(x = .data$logFC, y = .data$`-log10(padj)`)) +
theme_classic() +
ggtitle(label = title, subtitle = paste0("|logFC|>", log2FC_thresh, " & padj<", padj_thresh)) +
theme(
plot.title = element_text(size = (15), face = "bold.italic"),
legend.title = element_blank(),
legend.position = "bottom"
) +
geom_point(aes(colour = "Not_DE"), size = 0.8) +
geom_point(
data = volcanoData[rownames(volcanoData) %in% up, ],
aes(x = .data$logFC, y = .data$`-log10(padj)`, colour = "Upregulated"), size = 1.5
) +
geom_point(
data = volcanoData[rownames(volcanoData) %in% down, ],
aes(x = .data$logFC, y = .data$`-log10(padj)`, colour = "Downregulated"), size = 1.5
) +
{
if (!is.null(highlight_genes)) {
geom_text_repel(
data = volcanoData[rownames(volcanoData) %in% labeled_genes_up, ], size = 3, segment.size = 0.15,
xlim = c(x_high_p, NA), ylim = c(max(volcanoData$`-log10(padj)`) / 3, NA),
segment.color = "grey50", direction = "y", box.padding = 1, max.overlaps = 50,
aes(x = .data$logFC, y = .data$`-log10(padj)`, label = labeled_genes_up)
)
}
} +
{
if (!is.null(highlight_genes)) {
geom_text_repel(
data = volcanoData[rownames(volcanoData) %in% labeled_genes_down, ], size = 3, segment.size = 0.15,
xlim = c(NA, x_high_n), ylim = c(max(volcanoData$`-log10(padj)`) / 3, NA),
segment.color = "grey50", direction = "y", box.padding = 1, max.overlaps = 50,
aes(x = .data$logFC, y = .data$`-log10(padj)`, label = labeled_genes_down)
)
}
} +
{
if (!is.null(highlight_genes)) geom_point(data = labeled_genes_df, pch = 10)
} +
scale_fill_continuous(guide = guide_legend()) +
scale_color_manual(
aesthetics = "colour", values = c("Not_DE" = "grey79", "Downregulated" = "#66C2A5", "Upregulated" = "#D53E4F"),
breaks = c("Downregulated", "Upregulated")
) +
geom_vline(xintercept = log2FC_thresh, col = "grey", linetype = "dotted", size = 0.5) +
geom_vline(xintercept = -log2FC_thresh, col = "grey", linetype = "dotted", size = 0.5) +
geom_hline(yintercept = -log10(padj_thresh), col = "grey", linetype = "dotted", size = 0.5) +
scale_y_continuous(expand = expansion(mult = c(0, 0.01))) +
scale_x_continuous(expand = expansion(mult = c(0.01, 0.01)), limits = c(xlim_n, xlim_p))
my_volcano_dir <- file.path(outfolder, my_comparison, paste0("filtered_DE", "_thFC", log2FC_thresh, "_thPval", padj_thresh))
if (!dir.exists(my_volcano_dir)) dir.create(my_volcano_dir, recursive = T)
if (!is.null(highlight_genes)) {
png(paste0(my_volcano_dir, "/", "volcanoplot_highlighted.png"), width = 2200, height = 2200, res = 300)
print(p)
dev.off()
} else {
png(paste0(my_volcano_dir, "/", "volcano.png"), width = 2200, height = 2200, res = 300)
print(p)
dev.off()
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.