R/PAC_jitter.R

Defines functions PAC_jitter

Documented in PAC_jitter

#' Plots jitter plot from PAC object
#'
#' \code{PAC_jitter} Plots a jitter plot using the information in the Anno and
#' Counts tables in a PAC object.
#'
#' Given a PAC object with grouped summaries the function will use column(s) in
#' the Anno object to group the Counts table by row and then plot a jitter plot
#' based on that
#'
#' @family PAC analysis
#'
#' @seealso \url{https://github.com/Danis102} for updates on the current
#'   package.
#'
#' @param PAC PAC-object containing Anno and a summary list-folder with
#'   summarized dataframe(s) for example generated by PAC_summary. The function
#'   can also be applied on a dataframe containing a column with annotation
#'   groupings (e.g. biotype) and a column with summarized data (e.g. log2 fold
#'   changes). Rows should be unique sequences as the rownames of a PAC summary
#'   object.
#'
#' @param type Character. If type="jitter" (default) the jitter-plots will be
#'   returned. If type="violine", a violin-plot will instead the returned.
#'
#' @param box Logical whether a boxplot should be plotted or not (default=TRUE)
#'   
#' @param summary_target List with: 1st object being a character vector of the
#'   target dataframe in summary and 2nd object being a character vector of the
#'   target column(s) in that dataframe. In case of input being a dataframe,
#'   summary_target can be a character vector indicating the column with the
#'   summarized data.
#'
#' @param anno_target Character vector with the name of the target column in
#'   Anno or the name of the annotation column in case of input being a
#'   dataframe.
#'
#' @param limits Integer vector with the y-limits to be parsed to ggplot2.
#'
#' @param ypos_n Integer setting the y position of the n counts to be parsed to
#'   ggplot2.
#'
#' @param colors Character vector with color codes to be parsed to ggplot2.
#'
#' @return A plot-list object with jitter plots generated by ggplot2.
#'
#' @examples
#' 
#' ## Prepare
#' load(system.file("extdata", "drosophila_sRNA_pac_filt_anno.Rdata", 
#'                   package = "seqpac", mustWork = TRUE))
#' pac <- PAC_norm(pac, norm="cpm")
#' 
#' pac <- PAC_summary(PAC=pac, norm = "cpm", type = "log2FC", 
#'                    pheno_target=list("stage"))
#' pac <- PAC_summary(PAC=pac, norm = "cpm", type = "percentgrand", 
#'                    pheno_target=list("stage"))
#' 
#' ## Jitter plots
#' plots_FC <- PAC_jitter(pac, summary_target=list("Log2FC_stage"), 
#'                        anno_target=list("Biotypes_mis0"))
#' plots_FCgrand <- PAC_jitter(pac, summary_target=list("percGrand_stage"), 
#'                             anno_target=list("Biotypes_mis0"))
#' 
#' cowplot::plot_grid(plotlist=plots_FC, nrow = 3, ncol = 1)
#' cowplot::plot_grid(plotlist=plots_FCgrand, nrow = 3, ncol = 1) 
#'
#' ## Violin plots instead
#' plots_FC <- PAC_jitter(pac, type="violin", 
#'                        summary_target=list("Log2FC_stage"), 
#'                        anno_target=list("Biotypes_mis0"))
#' plots_FCgrand <- PAC_jitter(pac, type="violin", 
#'                             summary_target=list("percGrand_stage"), 
#'                             anno_target=list("Biotypes_mis0"))
#'
#' cowplot::plot_grid(plotlist=plots_FC, nrow = 3, ncol = 1)
#' cowplot::plot_grid(plotlist=plots_FCgrand, nrow = 3, ncol = 1)  
#'
#'
#' ## Violin with changed biotype order
#' new_order  <- as.character(unique(anno(pac)$Biotypes_mis0))[c(2,4,3,6,7,5,1)]
#' plots_FC <- PAC_jitter(pac, type="violin", 
#'                        summary_target=list("Log2FC_stage"), 
#'                        anno_target=list("Biotypes_mis0", new_order))
#' cowplot::plot_grid(plotlist=plots_FC, nrow = 3, ncol = 1)
#' 
#' 
#' @export
#'
PAC_jitter <- function(PAC, summary_target=NULL, anno_target=NULL, 
                       type="jitter", limits=NULL, ypos_n=NULL, colors=NULL, 
                       box=TRUE){
  
   values <- biotype <-  NULL
  
  ## Check S4
  if(isS4(PAC)){
    tp <- "S4"
    PAC <- as(PAC, "list")
  }else{
    tp <- "S3"
  }
  
  if(!PAC_check(PAC)){
    stop("Input was not a PAC object.")
    }
  
  if(is.null(summary_target[[1]])){
    stop(
      "\nYou need to specify a target in PAC@summary with summary_target.")
    }
  if(is.null(names(PAC$summary[[summary_target[[1]]]]))){
    stop(
      "\nYou need to specify a valid summary_target.",
      "\n(Hint: Double check correct object name in",
      "\nPAC@summary or rerun the 'PAC_summary' function.)"
      )
    }
  if(length(summary_target)==1){
    summary_target[[2]] <- names(PAC$summary[[summary_target[[1]]]])
    }
  
  ann <- PAC$Anno
  if(is.null(anno_target[[1]])){
    warning(
      "No anno_target was specified.",
      "\nJitter plot will not be divided into biotypes.")
    ann$All <- "All"
    anno_target <- list("All", "All")}
  if(length(colnames(PAC$Anno)[anno_target[[1]]]) <1){
    stop(
      "\nYou need to specify a valid anno_target.",
      "\n(Hint: Double check correct column name in PAC@Anno.)")
    }
  if(length(anno_target) ==1){
    anno_target[[2]] <- unique(PAC$Anno[,anno_target[[1]]])
    }
  ann_filt <- ann[,anno_target[[1]]] %in% anno_target[[2]]
  df <- PAC$summary[[summary_target[[1]]]]
  df <- df[,colnames(df) %in% summary_target[[2]],drop=FALSE]
  df <- df[ann_filt,,drop=FALSE]
  ann <- ann[,anno_target[[1]], drop=FALSE]
  ann <- ann[ann_filt,,drop=FALSE]
  stopifnot(identical(rownames(ann), rownames(df))) 
  
  if(grepl("percent", summary_target[[1]])){
    cutoff <- 100
  } else {
      cutoff <- 0
  }
  perc_up <- stats::aggregate(
    df, list(as.character(ann[,1])), function(x){ 
      perc_up <- sum(as.numeric(x > 0))/length(x)
  perc_up <- round(perc_up, digits=3)*100
  return(perc_up)
  })
  perc_up <-  perc_up[match(
    as.character(anno_target[[2]]), as.character(perc_up$Group.1)),]
  ## Make graphs
  if(is.null(colors)){
    bio <- as.character(unique(ann[,1]))
    n_extra  <- sum(bio %in% c("no_anno", "other"))
    colfunc <- grDevices::colorRampPalette(c("#094A6B", "#FFFFCC", "#9D0014"))
    if(n_extra==1){colors <- c(colfunc(length(bio)-1), "#6E6E6E")}
    if(n_extra==2){colors <- c(colfunc(length(bio)-2), "#6E6E6E", "#BCBCBD")}
    if(n_extra==0){colors <- colfunc(length(bio))}
  }
  plt_lst <- as.list(seq.int(ncol(df)))
  names(plt_lst) <- colnames(df)
  plt_lst <- plt_lst[match(summary_target[[2]], names(plt_lst))]
  plot_lst <- lapply(plt_lst, function(num){ 
    data <- data.frame(biotype=ann[,1], values=df[,num])
    data$biotype <- factor(data$biotype, levels=anno_target[[2]])
    mima  <- c(min(data$values), max(data$values))
    max_lim <- max(sqrt(mima^2))*1.3
    if(is.null(ypos_n)){ypos_n <- max(sqrt(mima^2))*1.35}
    if(is.null(limits)){limits <- c((mima[1]*1.2),  max_lim)}
    if(is.null(limits)){limits <- c((mima[1]*1.2),  max_lim)}
    
    if(type=="jitter"){
      p <- ggplot2::ggplot(
        data, ggplot2::aes(x=biotype, y=values, col=biotype, fill=biotype))+
        ggplot2::geom_hline(yintercept=0, col="#707177", cex=0.6) +
        ggplot2::geom_jitter(position=ggplot2::position_jitter(0.2), cex=1.5)+
        ggplot2::stat_summary(geom = "crossbar", fun="median", 
                              fun.max = "median", fun.min = "median",
                              width=0.7, cex=0.4, position = "identity", 
                              col="Black") +
        ggplot2::geom_text(stat="count", ggplot2::aes(
            label=paste0("n=", ggplot2::after_stat(count) , "\nup:", perc_up[,num+1], "%")), 
            size=3.5, y=ypos_n, col="Black") +
        ggplot2::labs(title=paste0(colnames(df)[num]) , x="Biotype" , 
                      y =  paste0(summary_target[[1]]) ) +
        ggplot2::theme_classic()+
        ggplot2::coord_cartesian(ylim =limits) +
        ggplot2::theme(legend.position="none", 
                       axis.title.x = ggplot2::element_text(size=15), 
                       axis.text.x = ggplot2::element_text(angle = 45, 
                                                           hjust = 0.95, 
                                                           size=13), 
                       axis.title.y = ggplot2::element_text(size=15) , 
                       axis.text.y = ggplot2::element_text(size=13))+
        ggplot2::scale_color_manual(values=colors)

      if(box==TRUE){
        p <- p+ggplot2::geom_boxplot(width=0.3, fill="white", col="black", 
                                     alpha=0.7,  outlier.shape = NA)
        }

    }
    if(type=="violin"){
      p <- ggplot2::ggplot(
        data, ggplot2::aes(x=biotype, y=values, col=biotype, fill=biotype))+
        ggplot2::geom_hline(yintercept=0, col="#707177", cex=0.6) +
        ggplot2::geom_violin(width=0.9, trim=TRUE, scale="width", 
                             color="black")+
        ggplot2::geom_text(stat="count", ggplot2::aes(
          label=paste0("n=", ggplot2::after_stat(count) , "\nup:", perc_up[,num+1], "%")), 
          size=3.5, y=ypos_n, col="Black") +
        ggplot2::labs(title=paste0(colnames(df)[num]) , x="Biotype" , 
                      y =  paste0(summary_target[[1]]) ) +
        ggplot2::theme_classic()+
        ggplot2::scale_y_continuous(limits =limits) +
        ggplot2::theme(legend.position="none", 
                       axis.title.x = ggplot2::element_text(size=15), 
                       axis.text.x = ggplot2::element_text(angle = 45, 
                                                           hjust = 0.95, 
                                                           size=13), 
                       axis.title.y = ggplot2::element_text(size=15),
                       axis.text.y = ggplot2::element_text(size=13))+
        ggplot2::scale_fill_manual(values=colors)
 
      if(box==TRUE){
        p <- p + ggplot2::geom_boxplot(width=0.3, fill="white", col="black", 
                                       alpha=0.7,  outlier.shape = NA)}
    }
    return(p)
  })
  return(plot_lst)
}
Danis102/seqpac documentation built on Aug. 26, 2023, 10:15 a.m.