R/exportComplete.DESeq2.R

Defines functions exportComplete.DESeq2

Documented in exportComplete.DESeq2

#' Export complete data and results from DESeq2
#'
#' Export complete data and results from DESeq2
#'
#' @param dds a \code{DESeqDataSet}
#' @param results list of results of \code{results(dds,...)} with chosen parameters
#' @param alpha threshold to apply to the FDR
#' @param group vector of the condition from which each sample belongs
#' @param cooksCutoff Cook's distance threshold for detecting outliers (\code{Inf}
#' to disable the detection, \code{NULL} to keep DESeq2 threshold)
#' @param conds biological conditions of the experiment
#' @param versionName versionName of the project
#' @param info \code{data.frame} containing information about features
#' @param export \code{FALSE} to avoid creating the Excel files (gain of time)
#' @return A list of \code{data.frame} containing the results of the differential analysis (counts, FC, log2FC, p-value, etc.)
#' @author Marie-Agnes Dillies and Hugo Varet

# created Feb 14th, 2012
# modified March 9th, 2012 (order of cond1 and cond2)
# modified August 1st (add info as input parameter)
# modified October 17th (add norm as input parameter)
# modified April 4th 2013 (input cds instead of counts and norm)
# modified Sept 19th 2013 (adapted to DESeq2, less information is provided)
# modified Sept 20th 2013 (add FC, and mean per condition)
# modified Sept 24th 2013 (projectName > versionName and tabDir)
# modified Sept 27th 2013 (ajout colonne outlier)
# modified Oct 10th 2013 (fixed bug when selecting columns and simplified the construction of complete)
# modified Oct 14th 2013 (added all.x=TRUE when merging rownames and info)
# modified Oct 18th 2013 (added indepFiltering and Cook's cut-off)
# modified Oct 28th 2013 (renamed the function exportComplete > exportComplete2conds)
# modified Oct 28th 2013 (deleted target argument)
# modified Nov 6th 2013 (removed moderated logFC, replaced by shrunk logFC in DESeq2)
# modified Nov 8th 2013 (cond1 and cond2 args becomes conds)
# modified Nov 8th, 2013 (res is now a list)
# modified Nov 13th, 2013 (also export complete.complete)
# modified Nov 14th, 2013 (removed alpha argument which was useless)
# modified Dec 4th, 2013 (provide only samples of interest in output files)
# modified Jan 9th, 2014 (added merge several times)
# modified Feb 3nd, 2014 (.99 instead of .75 for the quantile of the Cook's cut-off)
# modified Feb 5th, 2014 (added an argument to avoid creating the Excel files)
# modified Feb 14th, 2014 (optimized the creation of the complete list)
# modified Mar 26th, 2014 (baseMean, FC and log2FC now rounded)
# modified May 5th, 2014 (added print(name) in the loop)
# modified July 31th, 2014 (modified names in the output data frame and removed adjMethod argument)
# modified Aug 5th, 2014 (removed tabDir argument)
# modified Aug 5th, 2014 (export of diff tables now in this function)
# modified Oct 27th, 2014 (export counts and normalized counts)
# modified Dec 15th, 2014 (check there is not duplicated IDs in info)
# modified June 23rd, 2016 (quote=FALSE when exporting the tables)
# modified Sept 24th, 2019 (add stat column)

exportComplete.DESeq2 <- function(dds, results, alpha=0.05, group=NULL, cooksCutoff, conds=NULL,
                                  versionName=".", info = NULL, export=TRUE){
  
  names(results) <- gsub("_", " ", names(results))
  
  if (is.null(info)) info <- data.frame(Id=rownames(results[[1]])) else names(info)[1] <- "Id"
  if (any(duplicated(info[,1]))) stop("Duplicated IDs in the annotations")
  
  # raw and normalized counts
  write.table(counts(dds), file=paste0("tables/", versionName,".counts.xls"), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE)
  write.table(round(counts(dds, normalized=TRUE)), file=paste0("tables/", versionName,".normCounts.xls"), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE)
  counts <- data.frame(Id=rownames(counts(dds)), counts(dds), round(counts(dds, normalized=TRUE)))
  colnames(counts) <- c("Id", colnames(counts(dds)), paste0("norm.", colnames(counts(dds))))
  # baseMean avec identifiant
  bm <- data.frame(Id=rownames(results[[1]]),baseMean=round(results[[1]][,"baseMean"],2))
  # merge des info, comptages et baseMean selon l'Id
  base <- merge(merge(info, counts, by="Id", all.y=TRUE), bm, by="Id")
  
  tmp <- base[,paste("norm", colnames(counts(dds)), sep=".")]
  for (cond in conds){
    base[,cond] <- round(apply(as.data.frame(tmp[,group==cond]),1,mean),0)
  }
  complete.complete <- base
  
  complete <- vector("list",length(results)); names(complete) <- names(results);
  for (name in names(results)){
    print(name)
    complete.name <- base
    conds.supp <- setdiff(conds, gsub("\\(|\\)","",unlist(strsplit(name," vs "))))
    if (length(conds.supp)>0){
      complete.name <- complete.name[,-which(names(complete.name) %in% conds.supp)]
      samples.supp <- colnames(counts(dds))[group %in% conds.supp]
      col.supp <- c(samples.supp, paste("norm", samples.supp, sep="."))
      complete.name <- complete.name[,-which(names(complete.name) %in% col.supp)]
    }
    
    # ajout d'elements depuis results
    res.name <- data.frame(Id=rownames(results[[name]]), FC=round(2^(results[[name]][,"log2FoldChange"]), 3),
                           log2FoldChange=round(results[[name]][,"log2FoldChange"], 3), stat=round(results[[name]][,"stat"], 3),
                           pvalue=results[[name]][,"pvalue"], padj=results[[name]][,"padj"])
    complete.name <- merge(complete.name, res.name, by="Id")
    
    # ajout d'elements depuis mcols(dds)
    mcols.add <- data.frame(Id=rownames(counts(dds)),dispGeneEst=mcols(dds)$dispGeneEst,
                            dispFit=mcols(dds)$dispFit,dispMAP=mcols(dds)$dispMAP,
                            dispersion=mcols(dds)$dispersion,betaConv=mcols(dds)$betaConv,
                            maxCooks=mcols(dds)$maxCooks)
    if (is.null(cooksCutoff)){
      m <- nrow(attr(dds,"modelMatrix"))
      p <- ncol(attr(dds,"modelMatrix"))
      cooksCutoff <- qf(.99, p, m - p)
    }
    mcols.add$outlier <- ifelse(mcols(dds)$maxCooks > cooksCutoff,"Yes","No")
    complete.name <- merge(complete.name, mcols.add, by="Id")
    complete[[name]] <- complete.name
    
    # s?lection des up et down
    up.name <- complete.name[which(complete.name$padj <= alpha & complete.name$betaConv & complete.name$log2FoldChange>=0),]
    up.name <- up.name[order(up.name$padj),]
    down.name <- complete.name[which(complete.name$padj <= alpha & complete.name$betaConv & complete.name$log2FoldChange<=0),]
    down.name <- down.name[order(down.name$padj),]
    
    name <- gsub(" ","",name)
    if (export){
      write.table(complete.name, file=paste0("tables/", versionName,".",name,".complete.xls"), sep="\t", row.names=FALSE, dec=".", quote=FALSE)
      write.table(up.name, file=paste0("tables/", versionName,".",name,".up.xls"), row.names=FALSE, sep="\t", dec=".", quote=FALSE)
      write.table(down.name, file=paste0("tables/", versionName,".",name,".down.xls"), row.names=FALSE, sep="\t", dec=".", quote=FALSE)    
    }
    
    keep <- c("FC","log2FoldChange","padj")
    complete.complete[,paste(name,keep,sep=".")] <- complete.name[,keep]
  }
  
  if (length(results)>=2 & export){
    write.table(complete.complete, file=paste0("tables/", versionName,".complete.xls"),
                sep="\t", row.names=FALSE, dec=".", quote=FALSE)
  }
  
  return(complete)
}
biomics-pasteur-fr/RNADiff documentation built on Aug. 27, 2020, 12:44 a.m.