Nothing
#' @title Check number of DE genes at different cutoff combinations
#' @description This function processes summary statistics table generated by differential expression analysis
#' like \code{limma} or \code{DESeq2} to evaluate the number of differntially expressed genes with different FDR and
#' fold change cutoff.
#'
#' @param data Summary statistics table or a list of summary statistics tables from limma or DEseq2, where each row is a gene.
#' @param comp.names A character vector that contains the comparison names which correspond to the same order as \code{data}.
#' @param FCflag The column name of the log2FC in the summary statistics table. Default = "logFC".
#' @param FDRflag The column name of the False Discovery Rate (FDR) in the summary statistics table. Default = "adj.P.Val".
#' @param FCmin The minimum starting fold change cutoff to be checked, so the minimum fold change cutoff to be evaluated
#' will be FCmin + FCstep, FCmin default = 1.
#' @param FCmax The maximum fold change cutoff to be checked, default = 2.
#' @param FCstep The step from the minimum to maximum fold change cutoff, one step increase at a time, default = 0.01.
#' @param p.min The minimum starting FDR cutoff to be checked, so the minimum fold change cutoff to be evaluated
#' will be p.min + p.step, p.min default = 0.
#' @param p.max The maximum FDR cutoff to be checked, default = 0.2.
#' @param p.step The step from the minimum to maximum fold change cutoff, one step increase at a time, default = 0.005.
#' @param plot.save.to The address where to save the plot from simplified cutoff combination with FDR of 0.01, 0.05, 0.1, and 0.2.
#' @param gen.3d.plot Whether generate a 3d plotly object to visualize the result, only applys to single dataframe input, default = F.
#' @param gen.plot Whether generate a plot to visualize the result, default = T.
#'
#'
#' @return If the input \code{data} is a data list, then a multi-facet ggplot plot object which contains each
#' of the summary statistics table will be returned; otherwise, if the input \code{data} is a data frame, then the function will return a list which contains 3 elements:
#' \item{df.sub}{A dataframe, which contains the number of genes(3rd column) with FDR (1st column), Fold Change (2nd column)}
#' \item{plot3d}{A plotly object to show the 3d illustration of all possible cutoff selectiosn and
#' the number of DE genes in the 3d surface}
#' \item{gp}{A ggplot object to show the simplified cutoff combination result}
#'
#'
#' @importFrom dplyr %>% filter as_tibble mutate bind_rows
#' @importFrom ggplot2 ggsave ggplot geom_bar geom_text aes theme_minimal labs position_dodge element_text scale_fill_viridis_d facet_grid
#' @importFrom haven as_factor
#' @importFrom purrr map2 set_names map
#' @importFrom plotly plot_ly add_markers layout
#' @importFrom rlang .data
#'
#' @export plot_cutoff
#'
#' @references Xingpeng Li & Olya Besedina, RVA - RNAseq Visualization Automation tool.
#'
#' @details The function takes the summary statistics and returns a list which contains 3 objects: a table which describes the number
#' of DE genes with different cutoff combinations of FDR and fold change, a ggplot object which depicts a simplified version of
#' cutoff selection combination, and a plotly 3d visulization object which depicts a high resolution of cutoff combinations.
#' The default range of the fold change is from 1 to 2, and p value is from 0 to 0.2, with the step of 0.01
#' for FC and 0.005 for FDR.
#'
#'
#' @examples
#' plot_cutoff(Sample_summary_statistics_table)
#'
#'plot_cutoff(data = list(Sample_summary_statistics_table, Sample_summary_statistics_table1),
#' comp.names = c("A", "B"))
#'
#'
#'
plot_cutoff <- function(data = data,
comp.names = NULL,
FCflag = "logFC",
FDRflag = "adj.P.Val",
FCmin = 1.2,
FCmax = 2,
FCstep = 0.1,
p.min = 0,
p.max = 0.2,
p.step = 0.01,
plot.save.to = NULL,
gen.3d.plot=TRUE,
gen.plot=TRUE){
suppressWarnings({
suppressMessages({
validate.FC(FCmin, FCmax, FCstep)
validate.pvals(p.min, p.max, p.step)
plot.save.exists <- !is.null(plot.save.to)
FCs <- seq(from = FCmin, to = FCmax, by = FCstep)
if(inherits(data, "list")) {
validate.comp.names(comp.names, data)
message(paste0("The p.step parameters are",
" ignored with list inputs for simplified output."))
pvalues <- c(0.01, 0.05, 0.1, 0.2)
df <- map(data, plot_cutoff_single,
FCflag, FDRflag,
FCs,
pvalues) %>%
set_names(comp.names) %>%
bind_rows(, .id = "Comparisons.ID") %>%
mutate(Comparisons.ID = factor(.data$Comparisons.ID,levels = comp.names))
} else {
pvalues <- seq(from = p.min, to = p.max, by = p.step)
df <- plot_cutoff_single(data,
FCflag,
FDRflag,
FCs,
pvalues)
}
if(inherits(data, "list")) {
gp <- get.cutoff.ggplot(df, FCflag, FDRflag)
gp <- gp + facet_grid(Comparisons.ID ~ .)
if(gen.3d.plot) {
warning(paste0("Although gen.3d.plot = T, no 3D plots are",
" generated when the input is a list. To",
" create 3D plots for a list of dataframes,",
" run plot_cutoff on each dataframe separately.")
)
}
if(!gen.plot) {
return(df)
}
if(plot.save.exists) {
ggsave(filename = plot.save.to,
plot = gp,
dpi = 300,
units = "in",
device='png')
}
return(gp)
}
#following only run when input is a single dataframe
produce.cutoff.message(data,
FCmin, FCmax, FCstep,
FDRflag, p.min, p.max, p.step)
plot3d <- NULL
if(gen.3d.plot){
plot3d <- make.cutoff.plotly(df)
}
if(gen.plot) {
df.sub <- df %>%
filter(.data$FC %in% FCs, .data$pvalue %in% c(0.01, 0.05, 0.1, 0.2))
gp <- get.cutoff.ggplot(df.sub, FCflag, FDRflag)
if(!plot.save.exists) {
warning(paste0("Plot file name not specified, a plot in",
" ggplot object will be output to the second",
" object of the return list!")
)
} else {
ggsave(filename = plot.save.to,
plot = gp,
dpi = 300,
units = "in",
device='png')
}
}
return(list(df, plot3d, gp))
})
})
}
#' @title Create plotly object for number of DE genes at different cutoff combinations
#' @description This function processes summary statistics table generated by differential expression analysis
#' like \code{limma} or \code{DESeq2} to produce an interactibe visual object
#' which depicts the number of differntially expressed genes with different FDR and
#' fold change cutoff.
#'
#' @param df Summary statistics table from limma or DEseq2, where each row is a gene.
#'
#' @importFrom dplyr %>% filter as_tibble mutate bind_rows
#' @importFrom plotly plot_ly add_markers layout
#' @importFrom rlang .data
#'
make.cutoff.plotly <- function(df) {
df0 <- df %>% mutate(FC = as.numeric(as.character(.data$FC)),
pvalue = as.numeric(as.character(.data$pvalue)))
plot_ly(data = df0,
x = ~FC,
y = ~pvalue,
z = ~Number_of_Genes,
opacity = 0.5,
type = "scatter3d",
mode = "markers",
color = ~Number_of_Genes,
size = 1) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'Fold Change'),
yaxis = list(title = 'FDR'),
zaxis = list(title = 'Number of DE genes')))
}
#' @title Create plotly object for number of DE genes at different cutoff combinations
#' @description This function processes summary statistics table generated by differential expression analysis
#' like \code{limma} or \code{DESeq2} and produces a table which contains gene counts
#' for each of the pvalue and FC combination
#'
#' @param datin Summary statistics table from limma or DEseq2, where each row is a gene.
#' @param pvalues A set of p-values for FDR cutoff to be checked.
#' @param FCs A set of fold change cutoff to be checked.
#' @param FCflag The column name of the log2FC in the summary statistics table.
#' @param FDRflag The column name of the False Discovery Rate (FDR) in the summary statistics table.
#'
plot_cutoff_single <- function(datin,
FCflag,
FDRflag,
FCs,
pvalues) {
validate.stats(datin, FCflag, FDRflag)
produce.cutoff.warning(datin, FDRflag)
df <- get.cutoff.df(datin, pvalues, FCs, FCflag, FDRflag)
return(df)
}
#' @title Create ggplot object for number of differntially expressed genes with
#' different FDR and fold change cutoff.
#' @description This function processes dataframe from plot_cutoff_single function
#' and produces a ggplot object which depicts the number of differntially expressed
#' genes with different FDR and fold change cutoff.
#'
#' @param df Dataframe from plot_cutoff_single.
#' @param FCflag The column name of the log2FC in the summary statistics table.
#' @param FDRflag The column name of the False Discovery Rate (FDR) in the summary statistics table.
#' @importFrom ggplot2 ggsave ggplot geom_bar geom_text aes theme_minimal labs position_dodge element_text scale_fill_viridis_d facet_grid
get.cutoff.ggplot <- function(df, FCflag, FDRflag) {
ggplot(df, aes(x=.data$FC, y=.data$Number_of_Genes, fill=.data$pvalue)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_text(aes(label=.data$Number_of_Genes), vjust=-0.3, color="black",
position = position_dodge(0.9), size=3.5) +
labs(x = FCflag, fill = FDRflag) +
ylab("Number of Genes under cutoff") +
xlab("FC") +
labs(fill = 'FDR') +
theme(strip.text.y = element_text(angle = 0)) +
scale_fill_viridis_d()
}
#a simpler version to output in result in dataframe
#' @title Create ggplot object for number of differntially expressed genes with
#' different FDR and fold change cutoff.
#' @description This function processes dataframe from plot_cutoff_single function
#' and produces a ggplot object which depicts the number of differntially expressed
#' genes with different FDR and fold change cutoff.
#'
#' @param datin Dataframe from plot_cutoff_single.
#' @param FCflag The column name of the log2FC in the summary statistics table.
#' @param FDRflag The column name of the False Discovery Rate (FDR) in the summary statistics table.
#' @param pvalues A set of p-values for FDR cutoff to be checked.
#' @param FCs A set of fold change cutoff to be checked.
#'
#' @importFrom dplyr %>% filter as_tibble mutate bind_rows
#' @importFrom haven as_factor
#' @importFrom purrr map2 set_names map
#' @importFrom rlang .data
#' @importFrom stats complete.cases
get.cutoff.df <- function(datin,
pvalues,
FCs,
FCflag = "logFC",
FDRflag = "adj.P.Val") {
df <- expand.grid(list(pvalues, FCs)) %>%
as_tibble %>%
stats::setNames(c("pvalue","FC"))
datin <- datin[,c(FDRflag,FCflag)] %>% stats::setNames(c("DAT_FDR","DAT_FC"))
datin <- datin[complete.cases(datin), ] #only plot complete cases in the result
get.gene.num <- function(FC, pvalue, FCflag, FDRflag){
num.pass <- datin %>%
filter(.data$DAT_FDR <= pvalue , abs(.data$DAT_FC) >= log2(FC)) %>%
nrow
return(num.pass)
}
df$Number_of_Genes <- unlist(map2(df$FC, df$pvalue, get.gene.num))
df <- df %>%
mutate(pvalue = as.factor(.data$pvalue),
FC = as.factor(.data$FC),
Number_of_Genes = as.numeric(.data$Number_of_Genes))
return(df)
}
#' @title Create a warning about pvalue or FDR minimum value
#' @description This function processes summary statistics table generated by differential expression analysis
#' like \code{limma} or \code{DESeq2} and produces a warning about pvalue or FDR minimum value
#'
#' @param data Summary statistics table from limma or DEseq2, where each row is a gene.
#' @param FDRflag The column name of the False Discovery Rate (FDR) in the summary statistics table.
produce.cutoff.warning <- function(data,
FDRflag) {
if(min(data[,FDRflag]) > 0.05){
warning(paste0("This summary statistics table has a minimum",
" pvalue or FDR of ", min(data[,FDRflag]),
", it is likely there are no DE genes in this",
" summary statistics table."))
return(TRUE)
}
FALSE
}
#' @title Create a message about fold change and pvalues used to produce the plot.
#' @description This function processes summary statistics table generated by differential expression analysis
#' like \code{limma} or \code{DESeq2} and produces a message about pvalues and fold change used.
#'
#' @param data Summary statistics table from limma or DEseq2, where each row is a gene.
#' @param FDRflag The column name of the False Discovery Rate (FDR) in the summary statistics table.
#' @param FCmin The minimum starting fold change cutoff to be checked, so the minimum fold change cutoff to be evaluated
#' will be FCmin + FCstep, FCmin default = 1.
#' @param FCmax The maximum fold change cutoff to be checked, default = 2.
#' @param FCstep The step from the minimum to maximum fold change cutoff, one step increase at a time, default = 0.01.
#' @param p.min The minimum starting FDR cutoff to be checked, so the minimum fold change cutoff to be evaluated
#' will be p.min + p.step, p.min default = 0.
#' @param p.max The maximum FDR cutoff to be checked, default = 0.2.
#' @param p.step The step from the minimum to maximum fold change cutoff, one step increase at a time, default = 0.005.
produce.cutoff.message <- function(data,
FCmin, FCmax, FCstep,
FDRflag, p.min, p.max, p.step) {
if(!produce.cutoff.warning(data, FDRflag)) {
message(paste0("With the fold change from ",FCmin," to ",FCmax,
" using a step of ", FCstep," and with ", FDRflag,
" values ranging from ",p.min," to ",p.max,
" with a step of ", p.step, ", \nIn total,",
((FCmax - FCmin)/FCstep)*((p.max-p.min)/p.step),
" cutoff combinations can be visualized in the 3d plot.\n"))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.