R/writeEmapFile.R

Defines functions writeEmapFile

Documented in writeEmapFile

#' Writes enrichment file for use in Cytoscape EnrichmentMap
#'
#' @param results_file (list) file paths to results.txt for each population comparison
#' 		(generated by setupGSEArun.R).
#' @param ENRICH_NES (integer) NES cutoff to select validated selection-enriched
#'		pathways (default=3).
#' @param out_file (char) name of output file.
#'
#' @return none. Output file written into directory of first population comparison.
#'    (e.g., CEU_YRI directory)
#' @export
#'

writeEmapFile <- function(results_file, ENRICH_NES=3, out_file) {

  # Read in GSEA results files formatted for EnrichmentMap
  cat(sprintf("* Reading in GSEA results file %s\n", results_file))
  resFiles <- lapply(results_file, function(x) read.delim(x, h=TRUE, as.is=TRUE))
  for (i in seq_along(resFiles)) {
    resFiles[[i]] <- resFiles[[i]][,c(1,1,5,6,4)]
    colnames(resFiles[[i]])[2] <- "Description"
    colnames(resFiles[[i]])[3:5] <- paste(colnames(resFiles[[i]][, c(3:5)]), i, sep="_")
  }

  # Combine results from both population comparisons
  resComb <- reduce(resFiles, left_join, by=c("Geneset", "Description"))

  # Select for enriched pathways
  resComb_enrich <- filter(resComb, NES_1 >= ENRICH_NES & FDR_1 <= 0.05)
  if (length(resFiles) > 1) { # filter by second population analysis if run
    resComb_enrich <- filter(resComb_enrich, NES_2 >= ENRICH_NES & FDR_2 <= 0.05)
  }

  # Remove gene set annotation details from Description column
  # Needed for AutoAnnotate app to properly annotate aggregated gene sets

  resComb_enrich$Description <- gsub("\\%.*", "", resComb_enrich$Description)
  # Write out file for EnrichmentMap input
  EMout <- resComb_enrich[,c(1:4)]
  cat(sprintf("* Writing out formatted GSEA file to %s.\n", out_file))
  write.table(EMout, out_file, col=TRUE, row=FALSE, quote=FALSE, sep="\t")
}
BaderLab/POPPATHR documentation built on Dec. 17, 2021, 9:53 a.m.