R/DESeq2.R

#*  Copyright (C) 2018 the DEUS contributors.
#*  Website: https://github.com/timjeske/DEUS
#*
#*  This file is part of the DEUS R package.
#*
#*  The DEUS R package is free software: you can redistribute it and/or modify
#*  it under the terms of the GNU General Public License as published by
#*  the Free Software Foundation, either version 3 of the License, or
#*  (at your option) any later version.
#*
#*  This program is distributed in the hope that it will be useful,
#*  but WITHOUT ANY WARRANTY; without even the implied warranty of
#*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#*  GNU General Public License for more details.
#*
#*  You should have received a copy of the GNU General Public License
#*  along with this program.  If not, see <http://www.gnu.org/licenses/>.


#' Run differential expression analysis based on DESeq2
#'
#' The function provides a reference implementation of differential expression analysis using \link[DESeq2]{DESeq}.
#' It is possible to replace this function by any other method generating at least the log2 fold change, a p-value and an IHW p-value for each unique sequence to be compatible with downstream functions.
#'
#' @param count_table The count table generated by \link[DEUS]{createCountTableFromFastQs}
#' @param pheno_info A data frame with sample identifiers as row names including the columns used for the design formula.
#' The sample identifiers must be identical to those in the count_table.
#' @param design Design formula for differential expression analysis
#' @param contrast Argument to specify the comparison for which the results table will be generated (corresponds to 'contrast' argument in \link[DESeq2]{results} and \link[DESeq2]{lfcShrink}). Required if interaction terms are part of the design formula.
#' @param effect_name Argument to specify the comparision for which the results table will be generated (corresponds to 'name' argument in \link[DESeq2]{results} and 'coef' argument in \link[DESeq2]{lfcShrink}). Required if interaction terms are part of the design formula.
#' @param shrink_type DESeq2 shrinkage estimator (corresponds to 'type' argument in \link[DESeq2]{lfcShrink}).
#' @param out_dir Directory to save sample distance map, PCA and MA plot.
#' @param prefix Prefix for the names of the PDF files containing the sample distance map, the PCA and the MA plot.
#' @return Returns a list consisting of 'normCounts' and the 'deResult'.
#' 'normCounts' is a data frame of normalized counts with sequences as row names.
#' 'deResult' is a data frame with sequences as row names and the columns 'pvalue', 'padj', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'IHWPval'.
#' See \link[DESeq2]{results} and \link[IHW]{ihw.default} for further documentation on the columns.
#' @export

runDESeq2 <- function(count_table, pheno_info, design, contrast, effect_name, shrink_type = "normal", out_dir, prefix = "DESeq2") {

  if( any(grepl(":", design)) && missing(contrast) && missing(effect_name))  {
    stop("Design formula has interaction terms but no constrast or effect_name argument provided! Please change your design formular or provide a contrast of effect_name argument.",call.=T)
  }

  dds <- DESeq2::DESeqDataSetFromMatrix(countData = count_table, colData = pheno_info, design = design)

  if(missing(contrast) && missing(effect_name)) {
    dds <- DESeq2::DESeq(dds, betaPrior=TRUE)
    res <- DESeq2::results(dds)
  } else if(!missing(contrast)) {
    dds <- DESeq2::DESeq(dds)
    if(any(grepl(":", design))) {
      if(shrink_type != "ashr") {
        warning("The shrinkage estimator was set as 'ashr' due to given design formular with interaction terms.")
      }
      shrink_type = "ashr"
    }
    if(shrink_type == "apeglm") {
      warning("The shrinkage estimator 'apeglm' cannot be used together with the contrast argument. Will use the 'normal' shrinkage estimator.")
      shrink_type = "normal"
    }
    res <- DESeq2::lfcShrink(dds, contrast = contrast, type=shrink_type)
  } else if (!missing(effect_name)) {
    dds <- DESeq2::DESeq(dds)
    if(any(grepl(":", design))) {
      if(shrink_type != "apgelm") {
        warning("The shrinkage estimator was set as 'apeglm' due to given design formular with interaction terms.")
      }
      shrink_type = "apeglm"
    }
    if(shrink_type == "ashr") {
      warning("The shrinkage estimator 'ashr' cannot be used together with the effect_name argument. Will use the 'normal' shrinkage estimator.")
      shrink_type = "normal"
    }
    res <- DESeq2::lfcShrink(dds, coef = effect_name, type=shrink_type)
  }

  # reorder columns as pvalue, padj, baseMean, log2FoldChange, lfcSE, stat
  pval_idx <- which("pvalue" == colnames(res))
  pval_adj_idx <- which("padj" == colnames(res))
  other_idx <- intersect(which("pvalue" != colnames(res)), which( "padj" != colnames(res)))
  res <- res[, c(pval_idx, pval_adj_idx, other_idx)]
  res$"IHWPvalue"=IHW::adj_pvalues(IHW::ihw(res$pvalue~res$baseMean, data=res,alpha=0.05))

  if(!missing(out_dir)) {
    # plot sample distance
    rld <- DESeq2::rlog(dds, blind=FALSE)
    plotSampleDistanceMap(rld,out_dir,prefix)

    # plot PCA
    plotPCA(rld,pheno_info,out_dir,prefix)

    # plot MA
    pdf(paste(out_dir,paste(prefix, "MAplot_shrunken.pdf", sep="_"),sep="/"), onefile=FALSE)
    DESeq2::plotMA(res, main = paste(prefix, "MA plot", sep=" "), ylim=c(-4,4))
    dev.off()
  }

  # rename pvalue to Pvalue and log2FoldChange to Log2FoldChange
  res <- as.data.frame(res)
  colnames(res) <- gsub("pvalue", "Pvalue", colnames(res))
  colnames(res) <- gsub("log2FoldChange", "Log2FoldChange", colnames(res))
  counts_norm<-DESeq2::counts(dds, normalized=TRUE)
  newList <- list("norm_counts" = counts_norm, "de_result" = res)
  return(newList)
}

#' Plot sample distance heatmap
#'
#' This function is internally called by \link[DEUS]{runDESeq2}
#'
#' @param rld Count table after 'regularized log transformation' by \link[DESeq2]{rlog}
#' @param out_dir Directory for PDF file including the sample distance heatmap
#' @param prefix Prefix for name of PDF file
#' @return DESeq2_sample_dist.pdf in output directory
#' @export

plotSampleDistanceMap <- function(rld, out_dir,prefix="DESeq2") {
  pdf(paste(out_dir, paste(prefix, "sample_dist.pdf", sep="_"), sep="/"), onefile=FALSE)
  sampleDists <- dist(t(SummarizedExperiment::assay(rld)))
  sampleDistMatrix <- as.matrix(sampleDists)
  rownames(sampleDistMatrix) <- colnames(rld)
  colnames(sampleDistMatrix) <- NULL
  colors <- colorRampPalette( rev(RColorBrewer::brewer.pal(9, "Blues")) )(255)
  pheatmap::pheatmap(sampleDistMatrix,
           clustering_distance_rows=sampleDists,
           clustering_distance_cols=sampleDists,
           col=colors)
  dev.off()
}

#' Plot result of PCA
#'
#' This function is internally called by \link[DEUS]{runDESeq2}
#'
#' @param rld Count table after 'regularized log transformation' by \link[DESeq2]{rlog}
#' @param pheno_info A data frame with sample identifiers as row names including maximal two columns defining (sub-)conditions.
#' The sample identifiers must be identical to those in the rld table.
#' @param out_dir Directory for PDF file including the PCA plot
#' @param prefix Prefix for name of PDF file
#' @return DESeq2_PCA.pdf in output directory
#' @export

plotPCA <- function(rld, pheno_info, out_dir, prefix = "DESeq2") {
  mypheno_info <- pheno_info[, !(names(pheno_info) %in% c("sample")), drop=FALSE]
  pdf(paste(out_dir,paste(prefix,"PCA.pdf",sep="_"), sep="/"), onefile=FALSE)
  if(ncol(mypheno_info) > 1) {
    data <- DESeq2::plotPCA(rld, intgroup=colnames(mypheno_info), returnData=TRUE)
    percentVar <- round(100 * attr(data, "percentVar"))
    plt <- ggplot2::ggplot(data, ggplot2::aes(PC1, PC2, color=mypheno_info[,1], shape=mypheno_info[,2])) +
      ggplot2::geom_point(size=3) +
      ggplot2::xlab(paste0("PC1: ",percentVar[1],"% variance")) +
      ggplot2::ylab(paste0("PC2: ",percentVar[2],"% variance")) +
      ggplot2::labs(color=colnames(mypheno_info)[1]) +
      ggplot2::labs(shape=colnames(mypheno_info)[2])
  } else {
    plt <- DESeq2::plotPCA(rld, intgroup=colnames(mypheno_info))
  }
  print(plt)
  dev.off()
}
timjeske/DEUS documentation built on June 6, 2019, 12:59 p.m.