Nothing
#' @title Plot volcanoplot
#' @description This function processes the summary statistics table generated by differential expression analysis
#' like \code{limma} or \code{DESeq2} to show on the volcano plot with the highlight gene set option (like disease
#' related genes from Disease vs Healthy comparison).
#'
#' @param data Summary statistics table or a list contain multiple summary statistics tables from limma or DEseq2, where each row is a gene.
#' @param comp.names A character vector that contains the comparison names which correspond to the same order as \code{data}. Required if \code{data} is list. No default.
#' @param geneset Summary statistic table that contains the genes which needed to be highlighted, the gene name format (in row names)
#' needs to be consistent with the main summary statistics table). For example, this summary statistics
#' table could be the output summary statistics table from the Disease vs Healthy comparison (Only contains
#' the subsetted significant genes to be highlighted).
#' @param geneset.FCflag The column name of fold change in \code{geneset}, Default = "logFC".
#' @param highlight.1 Genes to be highlighted, in the format of a vector consists of gene names. The gene name format
#' needs to be consistent to the main summary statistics table.
#' @param highlight.2 Genes to be highlighted, in the format of a vector consists of gene names. The gene name format
#' needs to be consistent to the main summary statistics table.
#' @param upcolor The color of the gene names in \code{highlight.1} or the positive fold change gene
#' in \code{geneset}, default = "#FDE725FF" (viridis color palette).
#' @param downcolor The color of the gene names in \code{highlight.2} or the negative fold change gene
#' in \code{geneset}, default = "#440154FF" (viridis color palette).
#' @param plot.save.to The file name and address where to save the volcano plot, e.g. "~/address_to_folder/volcano_plot.png".
#' @param xlim Range of x axis. Default = \code{c(-3,3)}.
#' @param ylim Range of x axis. Default = \code{c(0,6)}.
#' @param FCflag Column name of log2FC in the summary statistics table. Default = "logFC".
#' @param FDRflag Column name of FDR in the summary statistics table. Default = "adj.P.Val".
#' @param highlight.FC.cutoff Fold change cutoff line want to be shown on the plot. Default = 1.5.
#' @param highlight.FDR.cutoff FDR cutoff shades want to be shown on the plot. Default = 0.05.
#' @param title The plot title. Default "Volcano plot".
#' @param xlab The label for x-axis. Default "log2 Fold Change".
#' @param ylab The label for y-axis. Default "log10(FDR)".
#'
#'
#' @return The function return a volcano plot as a ggplot object.
#'
#' @importFrom ggplot2 ggsave ggplot geom_point geom_vline xlim ylim labs theme geom_hline xlab ylab ggtitle facet_wrap
#' @importFrom dplyr %>%
#' @importFrom tibble as_tibble
#' @importFrom tidyr separate
#' @importFrom purrr map
#' @importFrom rlang .data
#'
#' @export plot_volcano
#'
#' @references Xingpeng Li & Tatiana Gelaf Romer & Olya Besedina, RVA - RNAseq Visualization Automation tool.
#'
#' @details The function takes the summary statistics table and returns a ggplot, with the option to highlight
#' genes, e.g. disease signature genes, the genes which are up-regulated and down-regulated in diseased subjects.
#'
#'
#' @examples
#' plot_volcano(data = Sample_summary_statistics_table,
#' geneset = Sample_disease_gene_set)
#'
#' plot_volcano(data = list(Sample_summary_statistics_table, Sample_summary_statistics_table1),
#' comp.names = c("A", "B"),
#' geneset = Sample_disease_gene_set)
plot_volcano <- function(data=data,
comp.names = NULL,
geneset = NULL,
geneset.FCflag = "logFC",
highlight.1 = NULL,
highlight.2 = NULL,
upcolor = "#FF0000",
downcolor = "#0000FF",
plot.save.to = NULL,
xlim = c(-4,4),
ylim = c(0,12),
FCflag = "logFC",
FDRflag = "adj.P.Val",
highlight.FC.cutoff = 1.5,
highlight.FDR.cutoff = 0.05,
title= "Volcano plot",
xlab = "log2 Fold Change",
ylab = "log10(FDR)"
){
message(paste0("\n\n Running plot volcano... Please make sure gene id type(rownames) of `data` consistent to that of `geneset` (if provided). \n\n"))
suppressWarnings({
suppressMessages({
validate.geneset(data, geneset, highlight.1, highlight.2)
if(is.data.frame(geneset)) {
if(inherits(data, 'list')){
#proposed change here instead of referrin to rownames of data[[1]], gonna take intersect of all rows present
geneset.FC.thresh <- geneset[,geneset.FCflag] > 0
geneset.genes <- rownames(geneset)
#collect all rownames from list of dataframes and find intersect amongst all of them
rname_colec <- map(data,rownames)
common_rnames <- Reduce(intersect,rname_colec)
message(paste0("\n\n Provided input list had a total of ", length(common_rnames),
" in common, non-common gene id will not be considered. \n\n"))
highlight.1 <- geneset.genes[geneset.FC.thresh] %>%
as.character %>%
intersect(common_rnames) # need to add to validator to compare the gene names consistent across the data.list
highlight.2 <- geneset.genes[!geneset.FC.thresh] %>%
as.character %>%
intersect(common_rnames)
} else {
geneset.FC.thresh <- geneset[,geneset.FCflag] > 0
geneset.genes <- rownames(geneset)
highlight.1 <- geneset.genes[geneset.FC.thresh] %>%
as.character %>%
intersect(rownames(data)) # need to add to validator to compare the gene names consistent across the data list
highlight.2 <- geneset.genes[!geneset.FC.thresh] %>%
as.character %>%
intersect(rownames(data))
}
}
if(inherits(data, "list")){
#check if same length of the data list names are provided
if(!is.null(comp.names) & length(data) != length(comp.names)){
message("Please make sure the provided summary statistics list 'dat.list' has the same length as 'comp.names'.")
return(NULL)
}
data.n <- set_names(data,comp.names)
data.p <- map(data.n, as_tibble, rownames = "gene") %>%
bind_rows(.id = "Comparisons.ID")
data.p$Comparisons.ID = factor(data.p$Comparisons.ID, levels = comp.names)
}else{
data.p <- data
data.p$gene <- rownames(data.p)
}
#log transform the cutoff lines on the plot
highlight.FC.cutoff <- log2(highlight.FC.cutoff)
highlight.FDR.cutoff <- -log10(highlight.FDR.cutoff)
#rename the column names for plotting
colnames(data.p)[which(names(data.p) == FCflag)] <- "FC"
colnames(data.p)[which(names(data.p) == FDRflag)] <- "FDR"
p <- ggplot() +
geom_point(data=data.p, aes(x = .data$FC, y = -log10(.data$FDR)),color="grey") +
geom_point(data=data.p[data.p$gene %in% highlight.1,],
aes(x = .data$FC, y = -log10(.data$FDR)),
color=upcolor,
size=1.5,
shape = 21) +
geom_point(data=data.p[data.p$gene %in% highlight.2,],
aes(x = .data$FC, y = -log10(.data$FDR)),
color=downcolor,
size=1.5,
shape = 21) +
geom_vline(xintercept=-log2(highlight.FC.cutoff), linetype="dashed", colour = "black") +
geom_vline(xintercept=log2(highlight.FC.cutoff), linetype="dashed", colour = "black") +
geom_hline(yintercept= -log10(highlight.FDR.cutoff),color = "grey") +
ylim(ylim) +
xlim(xlim) +
ggtitle(title) +
xlab(xlab) +
ylab(ylab)
if(inherits(data, "list")){
p <- p + facet_wrap(facets = "Comparisons.ID", nrow = 1)
}
if(is.null(plot.save.to)){
p
}else{
ggsave(filename = plot.save.to,
plot = p,
dpi = 300,
units = "in",
device='png')
}
data.is.list <- inherits(data, "list")
if(data.is.list) {
validate.comp.names(comp.names, data)
}
return(p)
})
})
}
# make a warning in geneset.type to say that row names must be gene names;
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.