R/exportComplete.edgeR.R

Defines functions exportComplete.edgeR

Documented in exportComplete.edgeR

#' Export complete data and results from edgeR
#'
#' Export complete data and results from edgeR
#'
#' @param dge a \code{DGEList} object
#' @param res list of results of \code{topTags(glmLRT(glmFit(dge, design)),...)$table}
#' @param alpha threshold to apply to the FDR
#' @param group vector of the condition from which each sample belongs
#' @param adjMethod p-value adjustment method for multiple testing
#' @param conds biological conditions of the experiment
#' @param versionName versionName of the project
#' @param info \code{data.frame} containing information about features
#' @param export \code{FALSE} to avoid creating the Excel files (gain of time)
#' @return A list of \code{data.frame} containing the results of the differential analysis (counts, FC, log2FC, p-value, etc.)
#' @author Marie-Agnes Dillies and Hugo Varet

# created Nov 14th, 2014
# modified Dec 4th, 2013 (provide only samples of interest in output files)
# modified Jan 10th, 2014 (added merge several times)
# modified Feb 5th, 2014 (added an argument to avoid creating the Excel files)
# modified Feb 14th, 2014 (optimized the creation of the complete list)
# modified Feb 18th, 2014 (select individuals concerned by the comparison)
# modified Mar 26th, 2014 (baseMean, FC and log2FC now rounded)
# modified May 5th, 2014 (fixed a bug when calculating baseMean)
# modified May 5th, 2014 (added print(name) in the loop)
# modified July 31th, 2014 (modified names in the output data frame and removed adjMethod argument)
# modified Aug 5th, 2014 (removed tabDir argument)
# modified Aug 5th, 2014 (export of diff tables now in this function)
# modified Oct 27th, 2014 (export counts and normalized counts)
# modified Dec 15th, 2014 (check there is not duplicated IDs in info)
# modified June 23rd, 2016 (quote=FALSE when exporting the tables)

exportComplete.edgeR <- function(dge, res, alpha=0.05, group=NULL, adjMethod, conds=NULL,
                                 versionName=".", info = NULL, export=TRUE){

  names(res) <- gsub("_"," ",names(res))

  if (is.null(info)) info <- data.frame(Id=rownames(res[[1]])) else names(info)[1] <- "Id"
  if (any(duplicated(info[,1]))) stop("Duplicated IDs in the annotations")
  
  # raw and normalized counts
  write.table(dge$counts, file=paste0("tables/", versionName,".counts.xls"), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE)
  write.table(round(normCounts.edgeR(dge)), file=paste0("tables/", versionName,".normCounts.xls"), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE)
  counts <- data.frame(Id=rownames(dge$counts), dge$counts, round(normCounts.edgeR(dge)))
  colnames(counts) <- c("Id", colnames(dge$counts), paste0("norm.", colnames(dge$counts)))

  # merge des info, comptages et baseMean selon l'Id
  base <- merge(info, counts, by="Id", all.y=TRUE)
  
  tmp <- base[,paste("norm", colnames(dge$counts), sep=".")]
  base$baseMean <- round(apply(tmp,1,mean),digits=2)
  for (cond in conds){
    base[,cond] <- round(apply(as.data.frame(tmp[,group==cond]),1,mean),digits=0)
  }
  complete.complete <- base
    
  complete <- vector("list",length(res)); names(complete) <- names(res);
  for (name in names(res)){
    print(name)
	complete.name <- base
    conds.supp <- setdiff(conds, gsub("\\(|\\)","",unlist(strsplit(name," vs "))))
    if (length(conds.supp)>0){
      complete.name <- complete.name[,-which(names(complete.name) %in% conds.supp)]
      samples.supp <- colnames(dge$counts)[group %in% conds.supp]
      col.supp <- c(samples.supp, paste0("norm.", samples.supp))
      complete.name <- complete.name[,-which(names(complete.name) %in% col.supp)]
    }
    
    # ajout d'elements depuis res
    res.name <- data.frame(Id=rownames(res[[name]]),FC=round(2^(res[[name]][,"logFC"]),3),
                           log2FoldChange=round(res[[name]][,"logFC"],3),pvalue=res[[name]][,"PValue"],
						   padj=res[[name]][,"FDR"])
    complete.name <- merge(complete.name, res.name, by="Id")
    
    # ajout d'elements depuis dge
    dge.add <- data.frame(Id=rownames(dge$counts),tagwise.dispersion=dge$tagwise.dispersion,
                          trended.dispersion=dge$trended.dispersion)
    complete.name <- merge(complete.name, dge.add, by="Id")
    complete[[name]] <- complete.name

    # select up and down
    up.name <- complete.name[which(complete.name$padj <= alpha & complete.name$log2FoldChange>=0),]
    up.name <- up.name[order(up.name$padj),]
    down.name <- complete.name[which(complete.name$padj <= alpha & complete.name$log2FoldChange<=0),]
    down.name <- down.name[order(down.name$padj),]
	
    name <- gsub(" ","",name)

    if (export){
      write.table(complete.name, file=paste0("tables/", versionName,".",name,".complete.xls"), sep="\t", row.names=FALSE, dec=".", quote=FALSE)
      write.table(up.name, file=paste0("tables/", versionName,".",name,".up.xls"), row.names=FALSE, sep="\t", dec=".", quote=FALSE)
      write.table(down.name, file=paste0("tables/", versionName,".",name,".down.xls"), row.names=FALSE, sep="\t", dec=".", quote=FALSE) 
    }
    
    keep <- c("FC","log2FoldChange","padj")
    complete.complete[,paste(name,keep,sep=".")] <- complete.name[,keep]
  }
  
  if (length(res)>=2 & export){
    write.table(complete.complete, file=paste0("tables/", versionName,".complete.xls"),
                sep="\t", row.names=FALSE, dec=".", quote=FALSE)
  }
  
  return(complete)
}
biomics-pasteur-fr/RNADiff documentation built on Aug. 27, 2020, 12:44 a.m.