R/barplotNul.R

Defines functions barplotNul

Documented in barplotNul

#' Bar plot of number of null counts per sample
#'
#' Bar plot of number of null counts per sample
#'
#' @param counts matrix of counts
#' @param group vector of the condition from which each sample belongs
#' @param out \code{TRUE} to export the figure
#' @param col colors of the bars
#' @param versionName versionName of the project
#' @return A barplot
#' @author Marie-Agnes Dillies and Hugo Varet

# created Feb 7th, 2012
# modified April 30th, 2012 (target$group instead of target)
# modified Sept 27th, 2012 (size of cex names)
# modified Oct 30th, 2012 (png)
# modified Jan 16th, 2013 (pdf, graphDir)
# modified Sept 20th, 2013 (legend)
# modified Sept 24th, 2013 (colors)
# modified Oct 25th, 2013 (modification for multiple factors)
# modified Mar 17th, 2014 (added abline for % of features with only null counts)
# modified Mar 21st, 2014 (removed outputfile argument)
# modified Aug 5th, 2014 (removed graphDir argument)
# modified July 3rd, 2015 (added diagnostic plots)
# modified Oct 19th, 2017 (percentage instead of proportion)
# modified August 26th, 2019 (ggplot2)

barplotNul <- function(counts, group, out=TRUE, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), versionName="."){
  
  if (out) pdf(file=paste0("figures/", versionName, "-barplotNull.pdf"), width=min(14,7+3*ncol(counts)/10), height=7)
  percentage <- apply(counts, 2, function(x){sum(x == 0)})*100/nrow(counts)
  percentage.allNull <- (nrow(counts) - nrow(removeNul(counts)))*100/nrow(counts)
  group <- data.frame(group=apply(group, 1, paste, collapse="-"))
  group$group <- factor(group$group, levels=unique(group$group))
  d <- data.frame(percentage=percentage, sample=factor(names(percentage), levels=names(percentage)), group)
  print(ggplot(d, aes(x=.data$sample, y=.data$percentage, fill=.data$group)) +
          geom_bar(stat="identity", show.legend=TRUE) +
          labs(fill="") +
          scale_fill_manual(values=col) +
          xlab("Samples") + 
          ylab("Percentage of null counts") +
          scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))) +
          ggtitle(paste(versionName, "Percentage of null counts per sample", sep=" - ")) +
          theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
          geom_hline(yintercept=percentage.allNull, linetype="dashed", color="black", size=1))
  if (out) dev.off()
  
  pdf(file=paste0("figures/", versionName, "-diagLowCounts.pdf"), width=14, height=7)
  tab <- table(apply(removeNul(counts), 1, function(x){sum(x==0)}))
  if (any(names(tab)=="0")) tab <- tab[-which(names(tab)=="0")]
  d <- data.frame(x=factor(names(tab), levels=names(tab)), tab=as.numeric(tab))
  p1 <- ggplot(d, aes(x=.data$x, y=.data$tab)) +
    geom_bar(stat="identity", show.legend=FALSE) +
    xlab("Number of null counts") + 
    ylab("Number of features") +
    scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))) +
    ggtitle(paste(versionName, "Number of null counts per feature", sep=" - "))
      # + theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
  d <- data.frame(m=apply(counts, 1, mean))
  d <- d[which(d$m != 0),, drop=FALSE]
  p2 <- ggplot(data=d, aes(x=.data$m)) +
    geom_histogram(bins=50) +
    scale_x_continuous(trans = log10_trans(),
                       breaks = trans_breaks("log10", function(x) 10^x),
                       labels = trans_format("log10", math_format(~10^.x))) +
    scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))) +
    xlab("Mean") +
    ylab("Frequency") +
    ggtitle(paste(versionName, "Distribution of the feature means", sep=" - "))
  grid.arrange(p1, p2, ncol=2, nrow=1)
  dev.off()
}
biomics-pasteur-fr/RNADiff documentation built on Aug. 27, 2020, 12:44 a.m.