R/plot_marginalPlot.R

Defines functions ggplot2_marginPlot

Documented in ggplot2_marginPlot

#' @title Plot percent increase in detection of DE genes across replicate levels
#'
#' @description
#' \code{ggplot2_marginPlot} function plots the percent change in number of DE
#' genes identified at each step-wise increase in replicate level.
#'
#' @details
#' The percent change is calculated as (margin*100%)/(num. of DE genes at the
#' lower replicate level). The results are visualized as bar plots. Either mean
#' or median can be used for the calculation.
#'
#' @param deg The list of DE genes generated by one of ERSSA::DE_*.R scripts.
#' @param stat The statistic used for plotting. Options include 'mean',
#' 'median'. Default='median'.
#' @param path Path to which the plot will be saved. Default to current working
#' directory.
#' @param save_plot Boolean. Whether to save plot to drive. Default to TRUE.
#'
#' @return A list is returned containing:
#'  \itemize{
#'   \item{gg_object} {the ggplot2 object, which can then be further
#'   customized.}
#'   \item{marg_diff.dataframe} {the tidy table version of percent changes for
#'   plotting.}
#' }
#' @author Zixuan Shao, \email{Zixuanshao.zach@@gmail.com}
#'
#' @examples
#' # load edgeR deg object generated by erssa_edger using example dataset
#' # example dataset containing 1000 genes, 4 replicates and 5 comb. per rep.
#' # level
#' data(deg.partial, package = "ERSSA")
#'
#' gg_margin = ggplot2_marginPlot(deg.partial)
#'
#' @references
#' H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
#' Springer-Verlag New York, 2009.
#'
#' @export
#'
#' @import ggplot2
#' @importFrom stats median

ggplot2_marginPlot = function(deg=NULL, stat='median', path='.', save_plot=TRUE){

    if (is.null(deg)){
        stop('Missing required deg argument in ggplot2_marginPlot function')
    }

    # create name for the step-change in replicate level
    name = vapply(seq(1, length(deg)-1), function(index) {
        paste0(names(deg)[index], ' \u2192 ',
                                  names(deg)[index+1])
    }, FUN.VALUE = character(1), USE.NAMES = FALSE)

    # calculate percent difference
    percent_diff = vapply(seq(1, length(deg)-1), function(index) {
        num_i = sapply(deg[[names(deg)[index]]], function(x) length(x))
        num_j = sapply(deg[[names(deg)[index+1]]], function(x) length(x))
        if (stat=='mean'){
            per_diff = (mean(num_j)-mean(num_i))*100/mean(num_i)
        } else if (stat=='median'){
            per_diff = (median(num_j)-median(num_i))*100/
                median(num_i)
        } else {
            stop('Only mean or median currently supported for plotting marginal
                 difference in Num. of DE genes.')
        }
        return(per_diff)
    }, FUN.VALUE = numeric(1), USE.NAMES = FALSE)

    # round the diff
    rounded_per_diff = sapply(round(percent_diff,1), function(x) paste0(x,'%'))

    # dataframe for plotting
    per_diff_df = data.frame(replicate=name, per_diff=percent_diff,
                             rounded_per_diff=rounded_per_diff)
    per_diff_df$replicate = factor(per_diff_df$replicate, levels =
                                       per_diff_df$replicate)

    # plot
    gg = ggplot(per_diff_df, aes_string(x = 'replicate', y='per_diff')) +
        geom_col(width=0.7) +
        theme_bw(base_size=14) +
        labs(x='', y=paste0('Percent change in ',stat,
                            ' number of DE genes')) +
        geom_text(aes(label = rounded_per_diff), vjust =
                      ifelse(per_diff_df$per_diff >= 0, -0.2, 1.2)) +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))

    if (save_plot==TRUE){

        # create dir to save results
        folder_path = file.path(path)
        dir.create(folder_path, showWarnings = FALSE)

        # save plot
        ggsave(filename= file.path(path,
                                   'ERSSA_plot_2_MarginalNumOfDEGenes.png'),
               plot=gg, dpi=300, width = 20,
               height = 15, units = "cm")

    }

    return(list(gg_object=gg, marg_diff.dataframe = per_diff_df))
}

Try the ERSSA package in your browser

Any scripts or data that you put into this service are public.

ERSSA documentation built on Nov. 8, 2020, 7:44 p.m.