R/diagSizeFactors.R

Defines functions diagSizeFactors

Documented in diagSizeFactors

#' Check the right value of sizeFactors
#'
#' Check the right value of sizeFactors
#'
#' @param dds a \code{DESeqDataSet} object
#' @param group vector of the condition from which each sample belongs
#' @param col colors of the points
#' @param out \code{TRUE} to export the figure
#' @param versionName versionName of the project
#' @return Histograms
#' @author Marie-Agnes Dillies and Hugo Varet

# created April 2nd, 2013
# modified Sept 24th, 2013 (diagDir > graphDir)
# modified Sept 26th, 2013 (option out)
# modified Oct 29th, 2013 (deleted target argument)
# modified Feb 6th, 2014 (fix x-axis)
# modified Mar 21st, 2014 (removed outputfile argument)
# modified July 30th, 2014 (added size factors vs total # of counts diagnostic plot)
# modified Aug 5th, 2014 (removed graphDir argument)
# modified July 19th, 2016 (total number of reads in millions)
# modified May 2nd, 2017 (add labels and colors)
# modified August 26th, 2019 (ggplot2)

diagSizeFactors <- function(dds, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), 
                            out = TRUE, versionName="."){
  group <- data.frame(group=apply(group, 1, paste, collapse="-"))
  group$group <- factor(group$group, levels=unique(group$group))
  if (out) pdf(file=paste0("figures/", versionName, "-diagSizeFactors.pdf"))
  geomeans <- exp(rowMeans(log(counts(dds))))
  samples <- colnames(counts(dds))
  counts.trans <- log2(counts(dds)/geomeans)
  counts.trans <- counts.trans[which(!is.na(apply(counts.trans, 1, sum))),]
  for (j in 1:ncol(dds)){
    d <- data.frame(x=counts.trans[,j])
    print(ggplot(data=d, aes(x=.data$x)) +
            geom_histogram(bins=100) +
            scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))) +
            xlab(expression(log[2]~(counts/geometric~mean))) +
            ylab("Frequency") +
            ggtitle(paste0(versionName," - Size factors diagnostic - Sample ", samples[j])) +
            geom_vline(xintercept=log2(sizeFactors(dds)[j]), linetype="dashed", color="red", size=1))
  }
  if (out) dev.off()
  # total read counts vs size factors
  if (out) pdf(file=paste0("figures/", versionName, "-diagSizeFactorsTC.pdf"), width=8)
  d <- data.frame(sf=sizeFactors(dds), 
                  libsize=colSums(counts(dds))/1e6, 
                  group, 
                  sample=factor(colnames(dds), levels=colnames(dds)))
  print(ggplot(data=d, aes(x=.data$sf, y=.data$libsize, color=.data$group, label=.data$sample)) + 
    geom_point(show.legend=TRUE, size=3) +
    scale_colour_manual(values=col) +
    labs(color="") +
    geom_text_repel(show.legend=FALSE, size=5, point.padding=0.2) +
    xlab("Size factors") +
    ylab("Total number of reads (millions)") +
    ggtitle("Diagnostic: size factors vs total number of reads") +
    geom_abline(slope=coefficients(lm(libsize ~ sf + 0, data=d)), intercept=0, show.legend=FALSE, linetype="dashed", color="grey"))
  if (out) dev.off()
}
biomics-pasteur-fr/RNADiff documentation built on Aug. 27, 2020, 12:44 a.m.