#' @title Plot TPR and FPR of each combination
#'
#' @description
#' \code{ggplot2_TPR_FPR} function uses the full dataset list of DE genes as the
#' ground truth to calculate the True Positive Rate (TPR) and False Positive
#' Rate (FPR) for each sample combinations tested. The TPR and FPR are then
#' plotted with FPR on x-axis and TPR on y-axis similar to a ROC curve.
#'
#' @details
#' Using the list of DE genes generated from the full dataset as the ground
#' truth should be done with caution. Since the true list of DE genes is not
#' known, this is the best alternative. This plot enables the visualization of
#' the sensitivity and (1-specificity) of the DE gene detection at the tested
#' replicate levels. At a sufficient replicate level, a relatively high TPR can
#' be reached with reasonable low FPR. Such a replicate level is sufficient for
#' most studies as additional replicates produce little improvement in TPR.
#'
#' @param deg The list of DE genes generated by one of ERSSA::DE_*.R scripts.
#' @param count_table.filtered The filtered count table with non- and
#' low-expression genes removed. Used to identify the genes found to be non-DE.
#' @param stat The statistics used to summarize TPR and FPR at each replicate
#' level. Options include 'mean' and 'median'. Default = 'mean'.
#' @param path Path to which the plot will be saved. Default to current working
#' directory.
#' @param save_plot Boolean. Whether to save plot to drive. Default to TRUE.
#'
#' @return A list is returned containing:
#' \itemize{
#' \item{gg_object} {the ggplot2 object, which can then be further
#' customized.}
#' \item{TPR_FPR.dataframe} {the tidy table used for plotting.}
#' \item{list_TP_FP_genes} {lists of TP and FP genes. Follow the format of
#' deg object with each comb_n now as a list contain two vectors, one for
#' each of TP and FP list of DE genes}
#' }
#' @author Zixuan Shao, \email{Zixuanshao.zach@@gmail.com}
#'
#' @examples
#' # load edgeR deg object generated by erssa_edger using example dataset
#' # example dataset containing 1000 genes, 4 replicates and 5 comb. per rep.
#' # level
#' data(deg.partial, package = "ERSSA")
#' data(count_table.filtered.partial, package = "ERSSA")
#'
#' gg_TPR_FPR = ggplot2_TPR_FPRPlot(deg.partial, count_table.filtered.partial)
#'
#' @references
#' H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
#' Springer-Verlag New York, 2009.
#'
#' @export
#'
#' @importFrom grDevices colorRampPalette
#' @importFrom RColorBrewer brewer.pal
#' @importFrom stats aggregate
#' @import ggplot2
ggplot2_TPR_FPRPlot = function(deg=NULL, count_table.filtered=NULL,
stat='mean', path='.', save_plot=TRUE){
if (is.null(deg)){
stop('Missing required deg argument in ggplot2_TPR_FPRPlot function')
} else if (is.null(count_table.filtered)){
stop(paste0('Missing required count_table.filtered argument in ',
'ggplot2_TPR_FPRPlot function'))
} else if (!(is.data.frame(count_table.filtered))){
stop('count_table is not an expected data.frame object')
} else if (!(stat %in% c('mean', 'median'))){
stop(paste0('Only mean or median summary statistics supported in',
'ggplot2_TPR_FPRPlot function'))
} else if (length(unique(sapply(count_table.filtered, class)))!=1){
stop(paste0('More than one data type detected in count table, please ',
'make sure count table contains only numbers and that the ',
'list of gene names is the data.frame index'))
}
# DEG list at full
full_deg = deg$full$comb_1
# non-DEG list at full
full_nondeg = rownames(count_table.filtered)[which(!(
rownames(count_table.filtered) %in% full_deg))]
# calculate TP and FP genes in each test
list_TP_FP_genes = lapply(deg, function(rep_i) {
lapply(rep_i, function(comb_i) {
TP_genes_i = comb_i[comb_i %in% full_deg]
FP_genes_i = comb_i[!(comb_i %in% full_deg)]
TPR_i = length(TP_genes_i)/length(full_deg)
FPR_i = length(FP_genes_i)/length(full_nondeg)
return(list(TP_genes=TP_genes_i, FP_genes=FP_genes_i))
})
})
# TP_FP_genes is a list of tests each containing two lists of
# TP and FP genes
TP_FP_genes = unlist(list_TP_FP_genes, recursive = FALSE)
# calculate TPR, FPR
TPR = vapply(TP_FP_genes, function(x) {
TPR_i = length(x$TP_genes)/length(full_deg)
}, FUN.VALUE = numeric(1), USE.NAMES = FALSE)
FPR = vapply(TP_FP_genes, function(x) {
FPR_i = length(x$FP_genes)/length(full_nondeg)
}, FUN.VALUE = numeric(1), USE.NAMES = FALSE)
replicate = vapply(names(TP_FP_genes), function(x) {
rep_i = strsplit(x, split='\\.')[[1]][1]
}, FUN.VALUE = character(1), USE.NAMES = FALSE)
combination = vapply(names(TP_FP_genes), function(x) {
comb_i = strsplit(x, split='\\.')[[1]][2]
}, FUN.VALUE = character(1), USE.NAMES = FALSE)
# dataframe for plotting
TPR_FPR.dataframe = data.frame(TPR=TPR, FPR=FPR, replicate=replicate,
combination=combination)
TPR_FPR.dataframe$replicate=factor(TPR_FPR.dataframe$replicate,
levels=unique(TPR_FPR.dataframe$replicate))
# plot
getPalette = colorRampPalette(
brewer.pal(9, "Spectral"))
rep_colors = getPalette(length(names(deg)))
rep_colors = c(rep_colors[1:length(rep_colors)-1],'#999999')
gg = ggplot(TPR_FPR.dataframe,
aes(FPR, TPR, colour=replicate)) +
geom_point(size=2) +
theme_bw(base_size=14) +
labs(x='FPR', y="TPR", colour ='Number of \nreplicates') +
geom_point(data=aggregate(TPR_FPR.dataframe[,1:2],
list(TPR_FPR.dataframe$replicate),
FUN=stat),
size=3, shape=23,
color='black', fill=rep_colors,
stroke = 1) +
scale_color_manual(values = rep_colors) +
ylim(0,1)
if (save_plot==TRUE){
# create dir to save results
folder_path = file.path(path)
dir.create(folder_path, showWarnings = FALSE)
# save plot
ggsave(filename=file.path(path,'ERSSA_plot_4_FPRvTPRPlot.png'),
plot=gg, dpi=300, width = 20,
height = 15, units = "cm")
}
return(list(gg_object=gg, TPR_FPR.dataframe = TPR_FPR.dataframe,
list_TP_FP_genes = list_TP_FP_genes))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.