R/plot_TPR_FPR.R

Defines functions ggplot2_TPR_FPRPlot

Documented in ggplot2_TPR_FPRPlot

#' @title Plot TPR and FPR of each combination
#'
#' @description
#' \code{ggplot2_TPR_FPR} function uses the full dataset list of DE genes as the
#'  ground truth to calculate the True Positive Rate (TPR) and False Positive
#'  Rate (FPR) for each sample combinations tested. The TPR and FPR are then
#'  plotted with FPR on x-axis and TPR on y-axis similar to a ROC curve.
#'
#' @details
#' Using the list of DE genes generated from the full dataset as the ground
#' truth should be done with caution. Since the true list of DE genes is not
#' known, this is the best alternative. This plot enables the visualization of
#' the sensitivity and (1-specificity) of the DE gene detection at the tested
#' replicate levels. At a sufficient replicate level, a relatively high TPR can
#' be reached with reasonable low FPR. Such a replicate level is sufficient for
#' most studies as additional replicates produce little improvement in TPR.
#'
#' @param deg The list of DE genes generated by one of ERSSA::DE_*.R scripts.
#' @param count_table.filtered The filtered count table with non- and
#' low-expression genes removed. Used to identify the genes found to be non-DE.
#' @param stat The statistics used to summarize TPR and FPR at each replicate
#' level. Options include 'mean' and 'median'. Default = 'mean'.
#' @param path Path to which the plot will be saved. Default to current working
#' directory.
#' @param save_plot Boolean. Whether to save plot to drive. Default to TRUE.
#'
#' @return A list is returned containing:
#'  \itemize{
#'   \item{gg_object} {the ggplot2 object, which can then be further
#'   customized.}
#'   \item{TPR_FPR.dataframe} {the tidy table used for plotting.}
#'   \item{list_TP_FP_genes} {lists of TP and FP genes. Follow the format of
#'   deg object with each comb_n now as a list contain two vectors, one for
#'   each of TP and FP list of DE genes}
#' }
#' @author Zixuan Shao, \email{Zixuanshao.zach@@gmail.com}
#'
#' @examples
#' # load edgeR deg object generated by erssa_edger using example dataset
#' # example dataset containing 1000 genes, 4 replicates and 5 comb. per rep.
#' # level
#' data(deg.partial, package = "ERSSA")
#' data(count_table.filtered.partial, package = "ERSSA")
#'
#' gg_TPR_FPR = ggplot2_TPR_FPRPlot(deg.partial, count_table.filtered.partial)
#'
#' @references
#' H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
#' Springer-Verlag New York, 2009.
#'
#' @export
#'
#' @importFrom grDevices colorRampPalette
#' @importFrom RColorBrewer brewer.pal
#' @importFrom stats aggregate
#' @import ggplot2


ggplot2_TPR_FPRPlot = function(deg=NULL, count_table.filtered=NULL,
                               stat='mean', path='.', save_plot=TRUE){

    if (is.null(deg)){
        stop('Missing required deg argument in ggplot2_TPR_FPRPlot function')
    } else if (is.null(count_table.filtered)){
        stop(paste0('Missing required count_table.filtered argument in ',
                    'ggplot2_TPR_FPRPlot function'))
    } else if (!(is.data.frame(count_table.filtered))){
        stop('count_table is not an expected data.frame object')
    } else if (!(stat %in% c('mean', 'median'))){
        stop(paste0('Only mean or median summary statistics supported in',
                    'ggplot2_TPR_FPRPlot function'))
    } else if (length(unique(sapply(count_table.filtered, class)))!=1){
        stop(paste0('More than one data type detected in count table, please ',
                    'make sure count table contains only numbers and that the ',
                    'list of gene names is the data.frame index'))
    }

    # DEG list at full
    full_deg = deg$full$comb_1

    # non-DEG list at full
    full_nondeg = rownames(count_table.filtered)[which(!(
        rownames(count_table.filtered) %in% full_deg))]

    # calculate TP and FP genes in each test
    list_TP_FP_genes = lapply(deg, function(rep_i) {

        lapply(rep_i, function(comb_i) {

            TP_genes_i = comb_i[comb_i %in% full_deg]
            FP_genes_i = comb_i[!(comb_i %in% full_deg)]
            TPR_i = length(TP_genes_i)/length(full_deg)
            FPR_i = length(FP_genes_i)/length(full_nondeg)

            return(list(TP_genes=TP_genes_i, FP_genes=FP_genes_i))
        })

    })

    # TP_FP_genes is a list of tests each containing two lists of
    # TP and FP genes
    TP_FP_genes = unlist(list_TP_FP_genes, recursive = FALSE)

    # calculate TPR, FPR
    TPR = vapply(TP_FP_genes, function(x) {
        TPR_i = length(x$TP_genes)/length(full_deg)
    }, FUN.VALUE = numeric(1), USE.NAMES = FALSE)

    FPR = vapply(TP_FP_genes, function(x) {
        FPR_i = length(x$FP_genes)/length(full_nondeg)
    }, FUN.VALUE = numeric(1), USE.NAMES = FALSE)

    replicate = vapply(names(TP_FP_genes), function(x) {
        rep_i = strsplit(x, split='\\.')[[1]][1]
    }, FUN.VALUE = character(1), USE.NAMES = FALSE)

    combination = vapply(names(TP_FP_genes), function(x) {
        comb_i = strsplit(x, split='\\.')[[1]][2]
    }, FUN.VALUE = character(1), USE.NAMES = FALSE)


    # dataframe for plotting
    TPR_FPR.dataframe = data.frame(TPR=TPR, FPR=FPR, replicate=replicate,
                                   combination=combination)
    TPR_FPR.dataframe$replicate=factor(TPR_FPR.dataframe$replicate,
                                   levels=unique(TPR_FPR.dataframe$replicate))

    # plot
    getPalette = colorRampPalette(
        brewer.pal(9, "Spectral"))
    rep_colors = getPalette(length(names(deg)))
    rep_colors = c(rep_colors[1:length(rep_colors)-1],'#999999')

    gg = ggplot(TPR_FPR.dataframe,
                aes(FPR, TPR, colour=replicate)) +
        geom_point(size=2) +
        theme_bw(base_size=14) +
        labs(x='FPR', y="TPR", colour ='Number of \nreplicates') +
        geom_point(data=aggregate(TPR_FPR.dataframe[,1:2],
                                  list(TPR_FPR.dataframe$replicate),
                                  FUN=stat),
                   size=3, shape=23,
                   color='black', fill=rep_colors,
                   stroke = 1) +
        scale_color_manual(values = rep_colors) +
        ylim(0,1)


    if (save_plot==TRUE){

        # create dir to save results
        folder_path = file.path(path)
        dir.create(folder_path, showWarnings = FALSE)

        # save plot
        ggsave(filename=file.path(path,'ERSSA_plot_4_FPRvTPRPlot.png'),
               plot=gg, dpi=300, width = 20,
               height = 15, units = "cm")
    }


    return(list(gg_object=gg, TPR_FPR.dataframe = TPR_FPR.dataframe,
                list_TP_FP_genes = list_TP_FP_genes))
}

Try the ERSSA package in your browser

Any scripts or data that you put into this service are public.

ERSSA documentation built on Nov. 8, 2020, 7:44 p.m.