R/PCAPlot.R

Defines functions PCAPlot

Documented in PCAPlot

#' PCA of samples (for DESeq2 object)
#'
#' PCA of samples (for DESeq2 object)
#'
#' @param dds a \code{DESeqDataSet} object
#' @param group vector of the condition from which each sample belongs
#' @param n number of features to keep among the most variant
#' @param type.trans type of transformation to use (\code{"VST"} or \code{"rlog"})
#' @param out \code{TRUE} to export the figure
#' @param col colors of the bars
#' @param axesList list of couples of axes on which projecting the samples
#' @param versionName versionName of the project
#' @return A plot of the two first principal components and the coordinates of the samples
#' @author Marie-Agnes Dillies and Hugo Varet

# created March 27th, 2013
# modified Sept 19th, 2013 (adapted for DESeq2)
# modified Sept 20th, 2013 (estimateDispersions now run in the main script, better display of labels)
# modified Sept 24th, 2013 (colors)
# modified Sept 30th, 2013 (add type.norm)
# modified Oct 8th, 2013 (add lightgray lines on the plot)
# modified Oct 14th, 2013 (type.norm > type.trans and plotPCA > PCAPlot)
# modified Oct 25th, 2013 (modification for multiple factors)
# modified Mar 21st, 2014 (removed outputfile argument)
# modified April 2nd, 2014 (use of varianceStabilizingTransformation() and rlogTransformation() instead of getVarianceStabilizedData() and rlogData())
# modified July 30th, 2014 (remove axes argument)
# modified Aug 5th, 2014 (removed graphDir argument)
# modified March 9th, 2015 (diagnostic eigen values)
# modified June 15th, 2015 (take the mininimum between 500 and the number of features)
# modified April 04th, 2016 (simplification with tmpFunction(), add axesList argument)
# modified August 26th, 2019 (ggplot2)

PCAPlot <- function(dds, group, n=min(500,nrow(counts(dds))), type.trans=c("VST","rlog"), out=TRUE,
                    col=c("lightblue","orange","MediumVioletRed","SpringGreen"), axesList=list(c(1,2), c(1,3)),
                    versionName="."){

  type.trans <- type.trans[1]
  group <- data.frame(group=apply(group, 1, paste, collapse="-"))
  group$group <- factor(group$group, levels=unique(group$group))
  
  if (out) pdf(file=paste0("figures/", versionName, "-PCA.pdf"), width=8)
  # normalisation vst ou rlog
  if (type.trans == "VST") counts.trans <- assay(varianceStabilizingTransformation(dds))
  else counts.trans <- assay(rlogTransformation(dds))

  # PCA sur les 500 features les plus variants (pour reduire la dimension)
  rv = apply(counts.trans, 1, var, na.rm=TRUE)
  pca = prcomp(t(counts.trans[order(rv, decreasing = TRUE), ][1:n,]))
  prp <- pca$sdev^2 * 100 / sum(pca$sdev^2)
  prp <- round(prp,2)

  tmpFunction <- function(axes=c(1, 2)){
    index1 <- axes[1]
    index2 <- axes[2]
    d <- data.frame(x=pca$x[,index1], 
                    y=pca$x[,index2], 
                    group=group$group, 
                    sample=factor(rownames(pca$x), levels=rownames(pca$x)))
    ggplot(data=d, aes(x=.data$x, y=.data$y, color=group, label=sample)) + 
      geom_point(show.legend=TRUE, size=3) +
      labs(color="") +
      scale_colour_manual(values=col) +
      geom_text_repel(show.legend=FALSE, size=5, point.padding=0.2) +
      xlab(paste0("PC", index1, " (",prp[index1],"%)")) +
      ylab(paste0("PC", index2, " (",prp[index2],"%)")) +
      ggtitle(paste(versionName, "Principal Component Analysis", sep=" - "))
  }
  for (axes in axesList) print(tmpFunction(axes))
  if (out) dev.off()

  pdf(paste0("figures/", versionName, "-diagEigenValuesPCA.pdf"))
  d <- data.frame(pc=factor(1:length(pca$sdev)), eg=pca$sdev^2)
  d <- d[1:min(15, nrow(d)),]
  print(ggplot(d, aes(x=.data$pc, y=.data$eg)) +
          geom_bar(stat="identity", show.legend=FALSE) +
          scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))) +
          xlab("Principal component") + 
          ylab("Eigen value") +
          ggtitle(paste(versionName, "Eigen values of the PCA", sep=" - ")))
  dev.off()
  
  return(invisible(pca$x))
}
biomics-pasteur-fr/RNADiff documentation built on Aug. 27, 2020, 12:44 a.m.