#' @title Plot number of DE genes that is common across combinations
#'
#' @description
#' \code{ggplot2_intersectPlot} function generates and plots the list of
#' differentially expressed (DE) genes that are found in all
#' combinations at any particular replicate level. Often in small-scale
#' RNA-seq experiments, the inclusion or exclusion of any paricular sample can
#' result in a very different list of DE genes. To reduce the influence of any
#' particular sample in the entire dataset analysis, it may be desirable to
#' identify the list of DE genes that are enriched regardless of any specific
#' sample(s) inclusion. This approach may be most useful analyzing the list of
#' common DE genes at the greatest possible replicate to take advantage of the
#' robust feature as well as employing typically the longest list of DE genes.
#'
#' @details
#' Similar to how increasing number of detected DE genes can be found with more
#' biological replicates, the list of common DE genes is expected to increase
#' with more replicates. This eventually levels off as majority of DE genes have
#' been found.
#'
#' @param deg The list of DE genes generated by one of ERSSA::DE_*.R scripts.
#' @param path Path to which the plot will be saved. Default to current working
#' directory.
#' @param save_plot Boolean. Whether to save plot to drive. Default to TRUE.
#'
#' @return A list is returned containing:
#' \itemize{
#' \item{gg_object} {the ggplot2 object, which can then be further
#' customized.}
#' \item{intersect.dataframe} {the tidy table version used for plotting.}
#' \item{deg_dataframe} {the tidy table version of DEG numbers for
#' plotting mean.}
#' \item{intersect_genes} {list of vectors containing DE genes with vector
#' name indicating the associated replicate level.}
#' \item{full_num_DEG} {The number of DE genes with all samples included.}
#' }
#' @author Zixuan Shao, \email{Zixuanshao.zach@@gmail.com}
#'
#' @examples
#' # load edgeR deg object generated by erssa_edger using example dataset
#' # example dataset containing 1000 genes, 4 replicates and 5 comb. per rep.
#' # level
#' data(deg.partial, package = "ERSSA")
#'
#' gg_intersect = ggplot2_intersectPlot(deg.partial)
#'
#' @references
#' H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
#' Springer-Verlag New York, 2009.
#'
#' @export
#'
#' @import ggplot2
ggplot2_intersectPlot = function(deg=NULL, path='.', save_plot=TRUE){
if (is.null(deg)){
stop('Missing required deg argument in ggplot2_intersectPlot function')
}
# all rep levels except full
all_rep_levels = names(deg)[1:length(deg)-1]
# calculate list of intersect genes
inters_genes = lapply(all_rep_levels, function(rep_i){
inters_genes_i = Reduce(intersect, deg[[rep_i]])
return(inters_genes_i)
})
names(inters_genes) = all_rep_levels
# intersecting genes dataframe for plotting
rep_levels = sapply(all_rep_levels, function(x)
strsplit(x, split='_')[[1]][2])
inters_genes_num = sapply(inters_genes, function(x) length(x))
inters_genes_df = data.frame(replicate_number=rep_levels,
num_intersect_genes=inters_genes_num)
inters_genes_df$replicate_number = factor(inters_genes_df$replicate_number,
levels = unique(
inters_genes_df$replicate_number))
# collapse deg list and convert to tidy format for plotting
deg_one_list = unlist(deg, recursive = FALSE)
deg_one_list[['full.comb_1']]=NULL
num_DEG = vapply(deg_one_list, length, FUN.VALUE = numeric(1),
USE.NAMES = FALSE)
rep_level = vapply(names(deg_one_list), function(x) {
rep_i = strsplit(x, split='\\.')[[1]][1]
rep_i = strsplit(rep_i, split='\\_')[[1]][2]
}, FUN.VALUE = character(1), USE.NAMES = FALSE)
comb_ID = vapply(names(deg_one_list), function(x) {
comb_i = strsplit(x, split='\\.')[[1]][2]
comb_i = strsplit(comb_i, split='\\_')[[1]][2]
}, FUN.VALUE = character(1), USE.NAMES = FALSE)
# number of deg in full dataset
full_num_DEG = length(deg$full$comb_1)
# create data frame for plotting
deg_df = data.frame(num_DEG = num_DEG, rep_level = rep_level,
comb_ID = comb_ID)
deg_df$rep_level = factor(deg_df$rep_level, levels =
unique(deg_df$rep_level))
# plot
gg = ggplot(inters_genes_df, aes_(x=~replicate_number,
y=~num_intersect_genes))+
geom_col(width=0.7) +
theme_bw(base_size=14) +
stat_summary(data = deg_df, aes(rep_level, num_DEG,
colour="Mean"),
group=1, fun.y='mean', geom='line', size=1) +
geom_hline(aes(yintercept=full_num_DEG, color =
"Full dataset"), size=0.75,
linetype="dashed", show.legend = TRUE) +
scale_colour_manual(values=c("Full dataset"="blue",
"Mean"='red')) +
labs(x='Replicate number',
y='Number of intersecting DE genes', colour="") +
geom_text(aes_(label = ~num_intersect_genes),
vjust = ifelse(inters_genes_df$num_intersect_genes >= 0,
-0.2, 1.2)) +
guides(color = guide_legend(title = 'DE genes',
override.aes = list(linetype = c("dashed", "solid"))))
if (save_plot==TRUE){
# create dir to save results
folder_path = file.path(path)
dir.create(folder_path, showWarnings = FALSE)
# save plot
ggsave(filename=file.path(path,'ERSSA_plot_3_IntersectDEGenes.png'),
plot=gg, dpi=300, width = 20,
height = 15, units = "cm")
}
return(list(gg_object=gg, intersect.dataframe= inters_genes_df,
deg_dataframe = deg_df,
intersect_genes= inters_genes, full_num_DEG=full_num_DEG))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.