R/plot_dotplot.R

Defines functions ggplot2_dotplot

Documented in ggplot2_dotplot

#' @title Plot number of DE genes
#'
#' @description
#' \code{ggplot2_dotplot} function plots the number of differentially expressed
#' (DE) genes in each test.
#'
#' @details
#' The number of DE genes are plotted as dots grouped by the associated
#' replicate level. At each replicate level, a boxplot is drawn to mainly show
#' the first and third quartiles as well as the median. Additionally,
#' A red line is drawn representing the mean at each replicate level. A
#' horizontal dashed blue line represents the number of DE genes found with
#' all samples.
#'
#' @param deg The list of DE genes generated by one of ERSSA::DE_*.R scripts.
#' @param path Path to which the plot will be saved. Default to current working
#' directory.
#' @param save_plot Boolean. Whether to save plot to drive. Default to TRUE.
#'
#' @return A list is returned containing:
#'  \itemize{
#'   \item{gg_object} {the ggplot2 object, which can then be further
#'   customized.}
#'   \item{deg_dataframe} {the tidy table version of DEG numbers for plotting.}
#'   \item{full_num_DEG} {The number of DE genes with all samples included.}
#' }
#' @author Zixuan Shao, \email{Zixuanshao.zach@@gmail.com}
#'
#' @examples
#' # load edgeR deg object generated by erssa_edger using example dataset
#' # example dataset containing 1000 genes, 4 replicates and 5 comb. per rep.
#' # level
#' data(deg.partial, package = "ERSSA")
#'
#' gg_dot = ggplot2_dotplot(deg.partial)
#'
#' @references
#' H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
#' Springer-Verlag New York, 2009.
#'
#' @export
#'
#' @import ggplot2


ggplot2_dotplot = function(deg=NULL, path='.', save_plot=TRUE){

    if (is.null(deg)){
        stop('Missing required deg argument in ggplot2_dotplot function')
    }

    # collapse deg list and convert to tidy format for plotting
    deg_one_list = unlist(deg, recursive = FALSE)
    deg_one_list[['full.comb_1']]=NULL

    num_DEG = vapply(deg_one_list, length, FUN.VALUE = numeric(1),
                     USE.NAMES = FALSE)

    rep_level = vapply(names(deg_one_list), function(x) {
        rep_i = strsplit(x, split='\\.')[[1]][1]
        rep_i = strsplit(rep_i, split='\\_')[[1]][2]
    }, FUN.VALUE = character(1), USE.NAMES = FALSE)

    comb_ID = vapply(names(deg_one_list), function(x) {
        comb_i = strsplit(x, split='\\.')[[1]][2]
        comb_i = strsplit(comb_i, split='\\_')[[1]][2]
    }, FUN.VALUE = character(1), USE.NAMES = FALSE)

    # number of deg in full dataset
    full_num_DEG = length(deg$full$comb_1)

    # create data frame for plotting
    deg_df = data.frame(num_DEG = num_DEG, rep_level = rep_level,
                        comb_ID = comb_ID)
    deg_df$rep_level = factor(deg_df$rep_level, levels =
                                  unique(deg_df$rep_level))

    # plot DE genes at each replicate level
    gg = ggplot(deg_df, aes(rep_level, num_DEG)) +
        geom_boxplot(outlier.shape = NA) +
        geom_jitter(height = 0, width=0.1) +
        theme_bw(base_size=14) +
        geom_hline(aes(yintercept=full_num_DEG,
                       color = "Full dataset"), size=0.75,
                   linetype="dashed", show.legend = TRUE) +
        stat_summary(aes(rep_level, num_DEG, colour="Mean"),
                     group=1, fun.y='mean', geom='line', size=1) +
        scale_colour_manual(values=c("Mean"="red", "Full dataset"="blue"))+
        labs(x='Replicate number', y='Number of DE genes', colour="") +
        guides(color = guide_legend(
            override.aes = list(linetype = c("dashed", "solid"))))

    if (save_plot==TRUE){

        # create dir to save results
        folder_path = file.path(path)
        dir.create(folder_path, showWarnings = FALSE)

        # save plot
        ggsave(filename=file.path(path,'ERSSA_plot_1_NumOfDEGenes.png'),
               plot=gg, dpi=300, width = 20,
               height = 15, units = "cm")

    }

    return(list(gg_object=gg, deg_dataframe = deg_df,
                full_num_DEG = full_num_DEG))
}
zshao1/ERSSA documentation built on July 19, 2023, 9:20 p.m.