R/PAC_pie.R

Defines functions PAC_pie

Documented in PAC_pie

#' Pie plot from PAC
#'
#' \code{PAC_pie} analyses nucleotide bias.
#'
#' Given a PAC object the function will summarize the counts into percent
#' biotype and plot a pie chart.
#'
#' @family PAC analysis
#'
#' @seealso \url{https://github.com/Danis102} for updates on the current
#'   package.
#'
#' @param PAC PAC-list object.
#' 
#' @param anno_target List with: 
#'                          1st object being character vector of target
#'                          column(s) in Anno, 2nd object being a character
#'                          vector of the target biotype(s) in the target column
#'                          (1st object). Important, the 2nd object is order
#'                          sensitive, meaning that categories will appear in
#'                          the same order in the pie. (default=NULL)
#'
#'
#' @param pheno_target List with: 
#'                          1st object being character vector of target
#'                          column(s) in Pheno, 2nd object being a character
#'                          vector of the target group(s) in the target column
#'                          (1st object). Important, the 2nd object is order
#'                          sensitive, meaning that categories will appear in
#'                          the same order in the pie. (default=NULL)
#'                          
#' @param summary Character defining whether to stack individual samples or
#'   using a mean of samples. If summary="samples" individual samples will be
#'   plotted (default). If summary="pheno" means of the sample groups targeted
#'   by the pheno_target input will be plotted. If summary="all" mean of all
#'   samples will be plotted.
#'
#' @param colors Character vector RGB color codes as generated by for example
#'   grDevices::colorRampPalette. If colors=NULL (default), colors will be
#'   generated using a default color palette. Important: In default mode,
#'   categories named "other" or no_anno" will automatically receive a grey
#'   color.
#'
#' @param angle Integer (positive or negative) that sets the rotation of
#'   the pie.
#'   
#' @param labels Character that sets what labels to plot in the actual pie
#'   besides the legend. If lables="all" (default), then the anno_target variables will be
#'   combined with percentage. If lables="percent" only percentages will be
#'   plotted with the pie. If labels="none", no lables will be present
#'   besides the legend.
#'   
#' @param no_anno Logical whether PAC sequences without an annotation should be
#'   removed prior to plotting. Specifically, if no_anno=FALSE, then sequences
#'   annotated with "no_anno" in the anno_target column will be removed prior to
#'   plotting. When no_anno=TRUE (default), then all sequences will be included
#'   (unless excluded in the anno_target object).
#'   
#' @return A pie plot
#'
#' @examples
#' 
#' 
#' ##########################################
#' ### Pie charts in seqpac 
#' ##----------------------------------------
#' 
#' load(system.file("extdata", "drosophila_sRNA_pac_filt_anno.Rdata", 
#'                   package = "seqpac", mustWork = TRUE))
#' 
#' # Choose an anno_target and plot samples (summary="samples"; default)
#' output_pie <- PAC_pie(pac, anno_target=list("Biotypes_mis0"))
#' output_pie[[1]]
#' output_pie[[6]]
#' 
#' # Summary="all" will give a mean of all samples:
#' PAC_pie(pac, anno_target=list("Biotypes_mis0"), summary="all")
#' # Rotate:
#' PAC_pie(pac, anno_target=list("Biotypes_mis0"), summary="all", angle=180)
#' 
#' 
#' #  Make ordered pie charts of grand mean percent of all samples
#' ord_bio <- as.character(sort(unique(anno(pac)$Biotypes_mis3)), 
#'                                     unique(anno(pac)$Biotypes_mis0))
#' output_pie_1 <- PAC_pie(pac, anno_target=list("Biotypes_mis0", ord_bio), 
#'                         summary="all")
#' output_pie_2 <- PAC_pie(pac, anno_target=list("Biotypes_mis3", ord_bio), 
#'                         summary="all")
#' cowplot::plot_grid(plotlist=c(output_pie_1, output_pie_2), nrow=2, 
#'                    scale = 1.0)
#' 
#' #  Shortcut to remove no annotations ("no_anno") in the anno_target column
#' PAC_pie(pac, anno_target=list("Biotypes_mis3"), summary="all", no_anno=TRUE)
#' PAC_pie(pac, anno_target=list("Biotypes_mis3"), summary="all", no_anno=FALSE)
#' 
#' @export


PAC_pie <- function(PAC, anno_target=NULL, pheno_target=NULL, colors=NULL, 
                    labels="all", no_anno=TRUE, summary="sample", angle=-25){
  
  ## Check S4
  if(isS4(PAC)){
    tp <- "S4"
    PAC <- as(PAC, "list")
  }else{
    tp <- "S3"
  }
  
  quiet <- function(x) { 
    sink(tempfile()) 
    on.exit(sink()) 
    invisible(force(x)) 
    }
  
  stopifnot(PAC_check(PAC))
  types <- variables <- NULL
  
  ## Prepare targets
  if(!is.null(pheno_target)){ 
    if(length(pheno_target)==1){ 
      pheno_target[[2]] <- as.character(unique(PAC$Pheno[,pheno_target[[1]]]))
    }
  }else{
    PAC$Pheno$eXtra_Col <- rownames(PAC$Pheno)
    pheno_target <- list(NA)
    pheno_target[[1]] <-  "eXtra_Col"
    pheno_target[[2]] <-  PAC$Pheno$eXtra_Col
    }
  if(!is.null(anno_target)){ 
    if(length(anno_target)==1){ 
      anno_target[[2]] <- as.character(unique(PAC$Anno[,anno_target[[1]]]))
    }
  }

  ## Subset
  PAC_sub <- PAC_filter(PAC, subset_only=TRUE, pheno_target=pheno_target, 
                        anno_target=anno_target)
  anno <- PAC_sub$Anno
  pheno <- PAC_sub$Pheno
  data <- PAC_sub$Counts
  
  ### Removing no_Anno
  if(no_anno==FALSE){
    data <- data[!as.character(anno[, anno_target[[1]]]) == "no_anno",]
    anno <- anno[!as.character(anno[, anno_target[[1]]]) == "no_anno",]
  }
  
  ### Totaling all counts per sample and sums each per biotype
  if(summary=="all"){
    tot_cnts <- mean(colSums(data))
    names(tot_cnts) <- "all"
    data_shrt <- stats::aggregate(data, list(anno[, anno_target[[1]]]), "sum")
    data_shrt <- data.frame(Group.1=data_shrt[,1], 
                            all= rowMeans(data_shrt[,-1]))
    
  }else{
  
    if(summary %in% c("sample","samples","pheno", "Pheno")){
      tot_cnts <- colSums(data)
      data_shrt <- stats::aggregate(data, list(anno[, anno_target[[1]]]), "sum")
    }
    
    if(summary %in% c("pheno", "Pheno")){
      x <- split(pheno, pheno[, pheno_target[[1]]])
      data_pheno_shrt <- as.data.frame(data_shrt$Group.1)
     for(i in seq.int(length(x))){
       names <- as.data.frame(x[i])
       names <- rownames(names)
       data_pheno <- data_shrt[, colnames(data_shrt) %in% names]
       data_pheno <- rowMeans(data_pheno)
       data_pheno_shrt <- as.data.frame(cbind(data_pheno_shrt,data_pheno))}
    colnames(data_pheno_shrt) <- c("Group.1", names(x))
    data_shrt <- data_pheno_shrt
    tot_cnts <- colSums(data_shrt[,-1])
    }
  }  
  
  data_shrt_perc <- data_shrt
  data_shrt_perc[,-1] <- "NA"
  for (i in seq.int(length(tot_cnts))){ 
    data_shrt_perc[,1+i]   <- data_shrt[,1+i]/tot_cnts[i]
  }
  
  data_long_perc <- reshape2::melt(data_shrt_perc, id.vars="Group.1")
  colnames(data_long_perc) <- c("Category", "Sample", "Percent")
  data_long_perc$Percent <- data_long_perc$Percent*100

  ## Fix order
  # Anno
  bio <- anno_target[[2]] 
  extra  <- which(bio %in% c("no_anno", "other"))
  bio <- bio[c(extra, (seq.int(length(bio)))[!seq.int(length(bio)) %in% extra])] 
  data_long_perc$Category <- factor(as.character(data_long_perc$Category), 
                                    levels=bio)
    
  # Pheno
  if(is.null(pheno_target)){
    data_long_perc$Sample <- factor(
      as.character(data_long_perc$Sample), 
      levels=as.character(unique(data_long_perc$Sample)))
  }else{
    if(summary %in% c("sample","samples")){
    stopifnot(
      any(!rownames(PAC$Pheno) %in% as.character(data_long_perc$Sample))==FALSE
      )
    sampl_ord <- do.call("c", split(rownames(PAC$Pheno), 
                                    factor(PAC$Pheno[,pheno_target[[1]]], 
                                           levels=pheno_target[[2]])))
    data_long_perc$Sample <- factor(as.character(data_long_perc$Sample), 
                                    levels=as.character(sampl_ord))
    data_long_perc <- data_long_perc[!is.na(data_long_perc$Sample),,drop=FALSE]
    }
  }

  ### Plot data
  if(is.null(colors)){
    n_extra  <- length(extra)
    colfunc <- grDevices::colorRampPalette(c("#094A6B", "#EBEBA6", "#9D0014"))
    if(n_extra==1){colors <- c(colfunc(length(bio)-1), "#6E6E6E")}
    if(n_extra==2){colors <- c(colfunc(length(bio)-2), "#6E6E6E", "#BCBCBD")}
    if(n_extra==0){colors <- colfunc(length(bio))}
  }
  
  prec_lst <- split(data_long_perc, data_long_perc$Sample)   
  prec_lst <- lapply(prec_lst, function(x){
    x <- x[match(levels(x$Category), x$Category),,drop=FALSE]
    x$Category <- factor(levels(x$Category), levels=levels(x$Category))
    x$Percent[is.na(x$Percent)] <- 0
    return(x)
  })
  plt_lst<- quiet(lapply(prec_lst, function(x){
    # setup labels
    if(labels=="all"){
      labs <- paste0(x$Category, "_", round(x$Percent, digits=0), "%")
    }
    if(labels=="percent"){  
      labs <- paste0(round(x$Percent, digits=0), "%")
    }
    if(labels=="none"){ 
      labs <- NA 
    }
    # plot and record
    p1  <- graphics::pie(x$Percent, 
               labels=labs, 
               col=rev(colors), init.angle = angle)
    print(p1)
    rp <- grDevices::recordPlot()
    return(rp)
    }))
  graphics::plot.new()
  df <- data.frame(types=prec_lst[[1]]$Category, 
                   variables=prec_lst[[1]]$Category, 
                   levels=levels(prec_lst[[1]]$Category)) 
  leg <- cowplot::get_legend(ggplot2::ggplot(
    df, ggplot2::aes(x=types, fill=variables)) + 
                               ggplot2::geom_bar(color="black") + 
                               ggplot2::scale_fill_manual(values=rev(colors)))
  return(c(plt_lst, list(legend=leg)))
}
Danis102/seqpac documentation built on Aug. 26, 2023, 10:15 a.m.