Function Enrichment Analysis: GO and KEGG

  enrichDatabases<-c(
                     #"geneontology_Biological_Process",
                     #"geneontology_Cellular_Component",
                     #"geneontology_Molecular_Function",
                     "geneontology_Biological_Process_noRedundant",
                     "geneontology_Cellular_Component_noRedundant",
                     "geneontology_Molecular_Function_noRedundant",
                     "pathway_KEGG"
                     #"pathway_Wikipathway",
                     #"network_miRNA_target",
                     #"network_PPI_BIOGRID",
                     #"network_Transcription_Factor_target"
  )
  if(dataForReport[["genome"]]=="mm10" || dataForReport[["genome"]]=="mm9") { #only support mouse and human at this time
    organism="mmusculus"
  }  else if(dataForReport[["genome"]]=="hg19" || dataForReport[["genome"]]=="hg38") {
    organism="hsapiens"
  } else {
    stop(paste0("Not supporting this genome to run WebGestaltR: ", dataForReport[["genome"]]))
  }
  projectName=WebGestaltR:::sanitizeFileName(basename(dataForReport[["reportOut"]]))
  selectedColumns=c("Tumor_Sample_Barcode",dataForReport$vIdCol,dataForReport$vafCol)
  functionEnrichResultDir=paste0(dataForReport$reportOutDir,"/FunctionEnrichmentAnalysis")
  dir.create(functionEnrichResultDir,showWarnings=FALSE)
if ("performFunctionEnrichmentAnalysis" %in% names(dataForReport) && dataForReport[["performFunctionEnrichmentAnalysis"]]) {
  cat(paste0("\n## All genes with mutations\n\n  "))

  mafNoSynSelectedColumns=dataForReport$maf@data[,..selectedColumns]
  geneFile=paste0(functionEnrichResultDir,"/",projectName,".mafNoSynSelectedColumns.maf")
  write.table(mafNoSynSelectedColumns,geneFile,sep="\t",row.names=FALSE,quote=FALSE)

  #pass parameters in by redefine commandArgs. https://stackoverflow.com/questions/14525580/how-to-pass-command-line-arguments-when-calling-source-on-an-r-file-within-ano
  commandArgs <- function(...) {c(organism,projectName,geneFile,functionEnrichResultDir,"genesymbol","genome")}
  source("~/source/ngsperl/lib/Annotation/WebGestaltR.r")
  rm(commandArgs) #del new to restore orginal commandArgs
}
if ("performFunctionEnrichmentAnalysis" %in% names(dataForReport) && dataForReport[["performFunctionEnrichmentAnalysis"]]) {

  annotationResultFiles=paste0(functionEnrichResultDir,"/Project_",projectName,"_",enrichDatabases,"/enrichment_results_",projectName,"_",enrichDatabases,".txt")
  annoFiles=data.frame(V1=annotationResultFiles,V2=projectName)
  deseq2Files=data.frame(V1=geneFile,V2=projectName)

  oldwd=getwd()
  setwd(functionEnrichResultDir)
  file.copy("~/source/ngsperl/lib/Annotation/WebGestaltDeseq2.rmd","WebGestaltDeseq2.rmd")
  source("~/source/ngsperl/lib/Annotation/WebGestaltDeseq2.r")
  setwd(oldwd)
}
#Genes with mutations in each group
if ("performFunctionEnrichmentAnalysis" %in% names(dataForReport) && 
    dataForReport[["performFunctionEnrichmentAnalysis"]] &&
    "performGroupSummary" %in% names(dataForReport) &&
    dataForReport$performGroupSummary) {
  cat(paste0("\n## Genes with mutations in each group\n\n  "))

  for (clinicalFeaturesOne in dataForReport[["clinicalFeatures"]]) {
    for (groupOne in unique(as.character(dataForReport[["maf"]]@data[[clinicalFeaturesOne]]))) {
        clinQueryText=paste0(clinicalFeaturesOne," %in% '",groupOne,"'")
        mafGroupOne = subsetMaf(maf = dataForReport[["maf"]], clinQuery = clinQueryText)

        mutationGenes=unique(mafGroupOne@data[["Hugo_Symbol"]])
        if (length(mutationGenes) <= 50) {
          warning(paste0("Too few mutation genes. Skip function enrichment analysis for ",clinicalFeaturesOne,": ",groupOne
          )
        )
        next
        }
      cat(paste0("\n### ",clinicalFeaturesOne,": ",groupOne,"\n\n  "))

      ##########################
      #do annotation
      ##########################
      mafNoSynSelectedColumns=mafGroupOne@data[,..selectedColumns]
      projectNameGroup=paste0(projectName,"_",clinicalFeaturesOne,"_",groupOne)
      geneFile=paste0(functionEnrichResultDir,"/",projectNameGroup,".mafNoSynSelectedColumns.maf")
      write.table(mafNoSynSelectedColumns,geneFile,sep="\t",row.names=FALSE,quote=FALSE)

      #pass parameters in by redefine commandArgs. https://stackoverflow.com/questions/14525580/how-to-pass-command-line-arguments-when-calling-source-on-an-r-file-within-ano
      commandArgs <- function(...) {c(organism,projectNameGroup,geneFile,functionEnrichResultDir,"genesymbol","genome")}
      source("~/source/ngsperl/lib/Annotation/WebGestaltR.r")
      rm(commandArgs) #del new to restore orginal commandArgs

      #################################
      #make tables
      #################################
      annotationResultFiles=paste0(functionEnrichResultDir,"/Project_",projectNameGroup,"_",enrichDatabases,"/enrichment_results_",projectNameGroup,"_",enrichDatabases,".txt")
      annoFiles=data.frame(V1=annotationResultFiles,V2=projectNameGroup)
      deseq2Files=data.frame(V1=geneFile,V2=projectNameGroup)

      oldwd=getwd()
      setwd(functionEnrichResultDir)
      file.copy("~/source/ngsperl/lib/Annotation/WebGestaltDeseq2.rmd","WebGestaltDeseq2.rmd",overwrite = TRUE)
      source("~/source/ngsperl/lib/Annotation/WebGestaltDeseq2.r")
      setwd(oldwd)

    }
  }

}


slzhao/mafreport documentation built on April 29, 2023, 8:39 a.m.