R/topGOalgo.R

Defines functions whichAlgorithms whichTests .sigGroups.classic .sigGroups.elim .sigGroups.weight .sigGroups.weight01 .sigGroups.parentChild .sigGroups.LEA .sigGroups.classicGsea .sigGroups.gseaElim

Documented in whichAlgorithms whichTests

######################################################################
## file containing all the algorithms for scoring GO terms.


## High-level function for runing the GO algorithms


.algoComp <- rbind(c(1, 0, 1, 1, 1, 0, 1, 1, 0),
                   c(1, 0, 1, 1, 1, 0, 1, 1, 0),
                   c(1, 0, 0, 0, 0, 0, 0, 0, 0),
                   c(1, 0, 1, 1, 1, 0, 1, 1, 0),
                   c(1, 0, 1, 1, 1, 0, 1, 1, 0),
                   c(1, 0, 0, 0, 0, 0, 0, 0, 0),
                   c(0, 0, 0, 0, 0, 0, 0, 0, 1),
                   c(0, 0, 0, 0, 0, 0, 0, 0, 1))
rownames(.algoComp) <- c("classic", "elim", "weight", "weight01", "lea", "parentchild",'classicgsea','elimgsea')
colnames(.algoComp) <- c("fisher", "z", "ks", "t", "globaltest", "category", "sum", "ks.ties","ks.csw")

.testNames <- c("GOFisherTest" , "GOKSTest", "GOtTest", "GOglobalTest", "GOSumTest", "GOKSTiesTest","GOKSCSWTest")
names(.testNames) <- c("fisher", "ks", "t", "globaltest", "sum", "ks.ties","ks.csw")

.algoClass <- c("classic", "elim", "weight", "weight01", "lea", "parentchild","classicgsea","elimgsea")
names(.algoClass) <- c("classic", "elim", "weight", "weight01", "lea", "parentchild","classicgsea","elimgsea")


## functions to extract the information from the .algoComp
whichAlgorithms <- function() {
  rownames(.algoComp)
}

whichTests <- function() {
  colnames(.algoComp)[colSums(.algoComp) > 0]
}

## function runTest(topONTdata, "elim", "KS", ...)
## ... are parameters for the test statistic

#' @name getSigGroups
#' @rdname getSigGroups
#' @title Interfaces for running the enrichment tests
#' @aliases getSigGroups runTest getSigGroups-methods getSigGroups,topONTdata,classicCount-method getSigGroups,topONTdata,classicScore-method getSigGroups,topONTdata,classicExpr-method getSigGroups,topONTdata,leaCount-method getSigGroups,topONTdata,leaScore-method getSigGroups,topONTdata,leaExpr-method getSigGroups,topONTdata,elimCount-method getSigGroups,topONTdata,elimScore-method getSigGroups,topONTdata,elimExpr-method getSigGroups,topONTdata,weightCount-method getSigGroups,topONTdata,weight01Count-method getSigGroups,topONTdata,weight01Score-method getSigGroups,topONTdata,weight01Expr-method getSigGroups,topONTdata,parentChild-method getSigGroups,topONTdata,pC-method runTest,topONTdata,character,character-method runTest,topONTdata,missing,character-method whichAlgorithms whichTests
#' @description These function are used for dispatching the specific algorithm for a given \code{topONTdata} object and a test statistic.
#' @usage
#' getSigGroups(object, test.stat, ...)
#' runTest(object, algorithm, statistic, ...)
#' whichAlgorithms()
#' whichTests()
#' 
#' @param object An object of class \code{topONTdata} This object contains all data necessary for runnig the test. 
#' @param test.stat An object of class \code{groupStats}. This object defines the test statistic.
#' @param algorithm Character string specifing which algorithm to use.   
#' @param statistic Character string specifing which test to use.    
#' @param \dots Other parameters. In the case of \code{runTest} they are used for defining the test statistic
#' 
#' @details
#'   The \code{runTest} function can be used only with a predefined set of
#'   test statistics and algorithms. The algorithms and the statistical
#'   tests which are accessible via the \code{runTest} function are shown
#'   by the \code{whichAlgorithms()} and \code{whichTests()} functions.
#' 
#'   The runTest function is a warping of the \code{getSigGroups} and the
#'   initialisation of a \code{groupStats} object functions.
#'   
#'   ...
#' 
#' 
#' @return An object of class \code{topONTresult}
#' @author Adrian Alexa
#' 
#' @seealso
#'   \code{\link{topONTdata-class}},
#'   \code{\link{groupStats-class}},
#'   \code{\link{topONTresult-class}}
#' 
#' 
#' 
#' @examples
#' 
#' ## load a sample topONTdata object
#' data(ONTdata)
#' GOdata
#' 
#' ##############################
#' ## getSigGroups interface
#' ##############################
#' 
#' ## define a test statistic
#' test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test")
#' ## perform the test
#' resultFis <- getSigGroups(GOdata, test.stat)
#' resultFis
#' 
#' 
#' 
#' ##############################
#' ## runTest interface
#' ##############################
#' 
#' ## Enrichment analysis by using the "classic" method and Fisher's exact test
#' resultFis <- runTest(GOdata, algorithm = "classic", statistic = "fisher")
#' resultFis
#' 
#' ## weight01 is the default algorithm 
#' weight01.fisher <- runTest(GOdata, statistic = "fisher")
#' weight01.fisher
#' 
#' 
#' ## not all combinations are possible!
#' # weight.ks <- runTest(GOdata, algorithm = "weight", statistic = "t")
#' 
#' @keywords methods


if(!isGeneric("runTest")) 
  setGeneric("runTest", function(object, algorithm, statistic, ...) standardGeneric("runTest"))

setMethod("runTest",
          signature(object = "topONTdata", algorithm = "character", statistic = "character"),
          function(object, algorithm, statistic, ...) { ## ... parameters for the test statistic
            #browser()
            statistic <- tolower(statistic)
            algorithm <- tolower(algorithm)
            ## we check if the algorithm support the given test statistic
            if(.algoComp[algorithm, statistic] == 0)
              stop("The algorithm doesn't support the given test statistic")

            algorithm <- .algoClass[algorithm]
            ## strong asumtions! 
            if(algorithm == "parentchild"){
              testType <- "pC"
            }else if(grepl('gsea',ignore.case = T,algorithm)){
              testType <-paste(sub(pattern = 'gsea',replacement = '',ignore.case = T,algorithm),'Gsea',sep = '')
            }else{
              testType <- as.character(findMethodSignatures(.testNames[statistic]))
              testType <- sub("classic", algorithm, testType) 
            }
            
            if(!isClass(testType))
              stop("Error in defining the test statistic Class")
            
            test.stat <- new(testType, testStatistic = get(.testNames[statistic]), name = statistic, ...)

            return(getSigGroups(object, test.stat))
          })

##resultKS <- runTest(GOdata, algorithm = "classic", statistic = "KS")

## dispatch method for weight01 (or topGO) algorithm - the default algorithm
setMethod("runTest",
          signature(object = "topONTdata", algorithm = "missing", statistic = "character"),
          function(object, statistic, ...) { ## ... parameters for the test statistic
            return(runTest(object, "weight01", statistic, ...))
          })



######################################################################
######################################################################
## This function is use for dispatching each algorithm
## probably more checking could be done in the end!

if(!isGeneric("getSigGroups")) 
  setGeneric("getSigGroups", function(object, test.stat, ...) standardGeneric("getSigGroups"))


########################## CLASSIC algorithm ##########################
## dispatch method for classic algorithm
setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "classicCount"),
          function(object, test.stat) { ## ... parameters for each algorithm
            
            ## update the test.stat object
            allMembers(test.stat) <- genes(object)
            sigMembers(test.stat) <- sigGenes(object)

            ## first take aside nodes that don't have any sig members
            x <- termStat(object)
            index <- x$Significant == 0
            not.sigTerms <- rownames(x)[index]
            .sigTerms <- rownames(x)[!index]

            cat("\n\t\t\t -- Classic Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")

            ## check if there is at least one nontrivial GO term !!!
            if(length(.sigTerms) > 0) {
              ## apply the algorithm
              algoRes <- .sigGroups.classic(genesInTerm(object, .sigTerms), test.stat)
            } else {
              algoRes <- numeric()
              warning("No enrichment can pe performed - there are no feasible terms!")
            }
            
            aux <- rep(1, length(not.sigTerms))
            names(aux) <- not.sigTerms
            algoRes <- c(algoRes, aux)
  
            ## STORE THE RESULTS
            .whichAlgorithm <- "classic"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
          })

setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "classicScore"),
          function(object, test.stat) {
            
            ## update the test.stat object if there is the case
            if(length(allScore(test.stat)) == 0) {
              allMembers(test.stat) <- genes(object)
              score(test.stat) <- geneScore(object, use.names = TRUE)
            }
            
            GOlist <- genesInTerm(object)
            cat("\n\t\t\t -- Classic Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
            
            algoRes <- .sigGroups.classic(GOlist, test.stat)
            
            ## STORE THE RESULTS
            .whichAlgorithm <- "classic"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
          })


setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "classicExpr"),
          function(object, test.stat) {

            if(length(allMembers(test.stat)) == 0)
              stop("In function getSigGroups: no expression data found")

            GOlist <- genesInTerm(object)
            cat("\n\t\t\t -- Classic Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            
            algoRes <- .sigGroups.classic(GOlist, test.stat)

            .whichAlgorithm <- "classic"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
          })






########################## ELIM algorithm ##########################
## dispatch method for elim algorithm
## to set the test statistic
## test.stat <- new("elimCount", testStatistic = GOFisherTest, cutOff = 0.05,  name = "Fisher test")
setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "elimCount"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            
            ## update the test.stat object
            allMembers(test.stat) <- genes(object)
            sigMembers(test.stat) <- sigGenes(object)
            
            ## first take aside nodes that don't have any sig members
            x <- termStat(object)
            index <- x$Significant == 0
            not.sigTerms <- rownames(x)[index]
            .sigTerms <- rownames(x)[!index]

            cat("\n\t\t\t -- Elim Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t cutOff: ", cutOff(test.stat), "\n")

            ## check if there is at least one nontrivial GO term !!!
            if(length(.sigTerms) > 0) {
              ## restrict the graph
              g <- subGraph(.sigTerms, graph(object))
              ## apply the algorithm
              algoRes <- .sigGroups.elim(g, test.stat)
            } else {
              algoRes <- numeric()
              warning("No enrichment can pe performed - there are no feasible terms!")
            }
              
            aux <- rep(1, length(not.sigTerms))
            names(aux) <- not.sigTerms
            algoRes <- c(algoRes, aux)
  
            .whichAlgorithm <- "elim"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes,
                       testName = paste(Name(test.stat), cutOff(test.stat), sep = " : "), 
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
          })


setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "elimScore"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            #browser()
            ## update the test.stat object if there is the case
            if(length(allScore(test.stat)) == 0) {
              allMembers(test.stat) <- genes(object)
              score(test.stat) <- geneScore(object, use.names = TRUE)
            }

            GOlist <- usedGO(object)
            cat("\n\t\t\t -- Elim Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t cutOff: ", cutOff(test.stat), "\n")
            cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
            
            ## apply the algorithm
            algoRes <- .sigGroups.elim(graph(object), test.stat)
            
            .whichAlgorithm <- "elim"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes,
                       testName = paste(Name(test.stat), cutOff(test.stat), sep = " : "), 
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(GOlist))))

          })


setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "elimExpr"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            
            ## update the test.stat object if there is the case
            if(length(allMembers(test.stat)) == 0) 
              stop("No expression data found")
            
            GOlist <- usedGO(object)
            cat("\n\t\t\t -- Elim Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t cutOff: ", cutOff(test.stat), "\n")
            
            ## apply the algorithm
            algoRes <- .sigGroups.elim(graph(object), test.stat)
            
            .whichAlgorithm <- "elim"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes,
                       testName = paste(Name(test.stat), cutOff(test.stat), sep = " : "), 
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
          })




########################## weight01(topGO) algorithm ##########################
## dispatch method for weight01 algorithm
## to set the test statistic
## test.stat <- new("weight01Count", testStatistic = GOFisherTest, name = "Fisher test")
setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "weight01Count"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            
            ## update the test.stat object
            allMembers(test.stat) <- genes(object)
            sigMembers(test.stat) <- sigGenes(object)
            
            ## first take aside nodes that don't have any sig members
            x <- termStat(object)
            index <- x$Significant == 0
            not.sigTerms <- rownames(x)[index]
            .sigTerms <- rownames(x)[!index]

            cat("\n\t\t\t -- Weight01 Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")

            ## check if there is at least one nontrivial GO term !!!
            if(length(.sigTerms) > 0) {
              ## restrict the graph
              g <- subGraph(.sigTerms, graph(object))
              ## apply the algorithm
              algoRes <- .sigGroups.weight01(g, test.stat)
            } else {
              algoRes <- numeric()
              warning("No enrichment can pe performed - there are no feasible terms!")
            }
              
            aux <- rep(1, length(not.sigTerms))
            names(aux) <- not.sigTerms
            algoRes <- c(algoRes, aux)
  
            .whichAlgorithm <- "weight01"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))

          })


setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "weight01Score"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm

            ## update the test.stat object if there is the case
            if(length(allScore(test.stat)) == 0) {
              allMembers(test.stat) <- genes(object)
              score(test.stat) <- geneScore(object, use.names = TRUE)
            }

            GOlist <- usedGO(object)
            cat("\n\t\t\t -- Weight01 Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
            
            ## apply the algorithm
            algoRes <- .sigGroups.weight01(graph(object), test.stat)
            
            .whichAlgorithm <- "weight01"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
          })


setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "weight01Expr"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            
            ## update the test.stat object if there is the case
            if(length(allMembers(test.stat)) == 0) 
              stop("No expression data found")
            
            GOlist <- usedGO(object)
            cat("\n\t\t\t -- Weight01 Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            
            ## apply the algorithm
            algoRes <- .sigGroups.weight01(graph(object), test.stat)
            
            .whichAlgorithm <- "weight01"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
          })


########################## WEIGHT algorithm ##########################
## dispatch method for the weight algorithm
## to set the test statistic
## test.stat <- new("weightCount", testStatistic = GOFisherTest, name = "Fisher test", ...)
setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "weightCount"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            
            ## update the test.stat object
            allMembers(test.stat) <- genes(object)
            sigMembers(test.stat) <- sigGenes(object)
            
            ## first take aside nodes that don't have any sig members
            x <- termStat(object)
            index <- x$Significant == 0
            not.sigTerms <- rownames(x)[index]
            .sigTerms <- rownames(x)[!index]
            
            cat("\n\t\t\t -- Weight Algorithm -- \n")
            cat(paste("\n\t\t The algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            
            ## check if there is at least one nontrivial GO term !!!
            if(length(.sigTerms) > 0) {
              ## restrict the graph
              g <- subGraph(.sigTerms, graph(object))
              ## apply the algorithm
              algoRes <- .sigGroups.weight(g, test.stat)
            } else {
              algoRes <- numeric()
              warning("No enrichment can pe performed - there are no feasible terms!")
            }
            
            aux <- rep(1, length(not.sigTerms))
            names(aux) <- not.sigTerms
            algoRes <- c(algoRes, aux)

            .whichAlgorithm <- "weight"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
          })



########################## Parent-Child algorithm ##########################
## dispatch method for parent-child algorithm
##
## to set the test statistic
## test.stat <- new("pC", testStatistic = GOFisherTest, joinFun = "intersect", name = "Fisher test")
setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "pC"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            
            ## first take aside nodes that don't have any sig members
            x <- termStat(object)
            index <- x$Significant == 0
            not.sigTerms <- rownames(x)[index]
            .sigTerms <- rownames(x)[!index]

            cat("\n\t\t\t -- Parent-Child Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")

            ## check if there is at least one nontrivial GO term !!!
            if(length(.sigTerms) > 0) {
              ## restrict the graph
              g <- subGraph(.sigTerms, graph(object))
              ## apply the algorithm
              algoRes <- .sigGroups.parentChild(g, test.stat, sigGenes(object))
            } else {
              algoRes <- numeric()
              warning("No enrichment can pe performed - there are no feasible terms!")
            }
                        
            aux <- rep(1, length(not.sigTerms))
            names(aux) <- not.sigTerms
            algoRes <- c(algoRes, aux)
  
            .whichAlgorithm <- "parentchild"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
          })


setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "parentChild"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            
            ## first take aside nodes that don't have any sig members
            x <- termStat(object)
            index <- x$Significant == 0
            not.sigTerms <- rownames(x)[index]
            .sigTerms <- rownames(x)[!index]

            cat("\n\t\t\t -- Parent-Child Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t join function: ", test.stat@joinFun, "\n")

            ## check if there is at least one nontrivial GO term !!!
            if(length(.sigTerms) > 0) {
              ## restrict the graph
              g <- subGraph(.sigTerms, graph(object))
              ## apply the algorithm
              algoRes <- .sigGroups.parentChild(g, test.stat, sigGenes(object))
            } else {
              algoRes <- numeric()
              warning("No enrichment can pe performed - there are no feasible terms!")
            }
            
            aux <- rep(1, length(not.sigTerms))
            names(aux) <- not.sigTerms
            algoRes <- c(algoRes, aux)

            .whichAlgorithm <- "parentchild"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
          })




########################## LEA algorithm ##########################
## dispatch method for the LEA algorithm
## to set the test statistic
## test.stat <- new("leaCount", testStatistic = GOFisherTest, name = "Fisher test")
setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "leaCount"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            
            ## update the test.stat object
            allMembers(test.stat) <- genes(object)
            sigMembers(test.stat) <- sigGenes(object)
            
            ## first take aside nodes that don't have any sig members
            x <- termStat(object)
            index <- x$Significant == 0
            not.sigTerms <- rownames(x)[index]
            .sigTerms <- rownames(x)[!index]
            
            cat("\n\t\t\t -- LEA Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t neighborhood depth: ", depth(test.stat), "\n")

            ## check if there is at least one nontrivial GO term !!!
            if(length(.sigTerms) > 0) {
              ## restrict the graph
              g <- subGraph(.sigTerms, graph(object))
              ## apply the algorithm
              algoRes <- .sigGroups.LEA(g, test.stat)
            } else {
              algoRes <- numeric()
              warning("No enrichment can pe performed - there are no feasible terms!")
            }
              
            aux <- rep(1, length(not.sigTerms))
            names(aux) <- not.sigTerms
            algoRes <- c(algoRes, aux)
  
            .whichAlgorithm <- "LEA"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object),
                         "\nOntology:", ontology(object),
                         "\nneighborhood depth:", depth(test.stat), sep = " "),
                       score = algoRes,
                       testName = Name(test.stat), 
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
          })


setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "leaScore"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm

            ## update the test.stat object if there is the case
            if(length(allScore(test.stat)) == 0) {
              allMembers(test.stat) <- genes(object)
              score(test.stat) <- geneScore(object, use.names = TRUE)
            }

            GOlist <- usedGO(object)
            cat("\n\t\t\t -- LEA Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t neighborhood depth: ", depth(test.stat), "\n")
            cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
            
            ## apply the algorithm
            algoRes <- .sigGroups.LEA(graph(object), test.stat)
            
            .whichAlgorithm <- "LEA"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object),
                         "\nOntology:", ontology(object),
                         "\nneighborhood depth:", depth(test.stat), sep = " "),
                       score = algoRes,
                       testName = Name(test.stat), 
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(GOlist))))

          })


setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "leaExpr"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            
            ## update the test.stat object if there is the case
            if(length(allMembers(test.stat)) == 0) 
              stop("No expression data found")
            
            GOlist <- usedGO(object)
            cat("\n\t\t\t -- LEA Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t neighborhood depth: ", depth(test.stat), "\n")
            
            ## apply the algorithm
            algoRes <- .sigGroups.LEA(graph(object), test.stat)
            
            .whichAlgorithm <- "LEA"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresult",
                       description = paste(description(object),
                         "\nOntology:", ontology(object),
                         "\nneighborhood depth:", depth(test.stat), sep = " "),
                       score = algoRes,
                       testName = Name(test.stat), 
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
          })



#############################################################################################################
#############################################################################################################


########################## CLASSIC algorithm ##########################
.sigGroups.classic <- function(GOlist, test.stat) {
  
  sigList <- sapply(GOlist,
                    function(term) {
                      ## we can't geve names .......
                      ## Name(gi)
                      members(test.stat) <- term
                      return(runTest(test.stat))
                    })
  
  return(sigList)
}


########################## ELIM algorithm ##########################
.sigGroups.elim <- function(goDAG, test.stat) {

  goDAG.r2l <- reverseArch(goDAG)
  nodeLevel <- buildLevels(goDAG.r2l, leafs2root = FALSE)
  levelsLookUp <- nodeLevel$level2nodes
  
  ##adjs.cutOff <- cutOff(test.stat) / numNodes(goDAG)
  adjs.cutOff <- cutOff(test.stat)
  #cat(paste("\n\t\t Parameters:\t cutOff = ", adjs.cutOff, "\n", sep =""))
  
  ## we use a lookup table to search for nodes that have were significant
  sigNodes.LookUP <- new.env(hash = TRUE, parent = emptyenv())

  ## hash table for the genes that we eliminate for each node
  ## we store the genes that we want to eliminate
  elimGenes.LookUP <- new.env(hash = TRUE, parent = emptyenv())
  
  ## hash table to store the result
  sigList <- new.env(hash = TRUE, parent = emptyenv())


  for(i in nodeLevel$noOfLevels:1) {
    currNodes.names <- get(as.character(i), envir = levelsLookUp, mode = 'character')
    ##children.currNodes <- adj(goDAG.r2l, currNodes.names)
    currAnno <- .genesInNode(goDAG, currNodes.names)

    .num.elimGenes <- length(unique(unlist(as.list(elimGenes.LookUP))))
    cat(paste("\n\t Level ", i, ":\t", length(currNodes.names),
              " nodes to be scored\t(", .num.elimGenes, " eliminated genes)\n", sep =""))
    
    for(termID in currNodes.names) {
      ## just in case the test.stat is modified by some methods
      group.test <- updateGroup(test.stat, name = termID, members = currAnno[[termID]])
      
      ## remove the genes from the term, if there are
      if(!exists(termID, envir = elimGenes.LookUP, mode = "character"))
        elim(group.test) <- character(0)
      else
        elim(group.test) <- get(termID, envir = elimGenes.LookUP, mode = 'character')

      ## run the test and store the result (the p-value)
      termSig <- runTest(group.test)
      assign(termID, termSig, envir = sigList)
      
      ## if we have a significant GO term 
      if(termSig <= adjs.cutOff) {
        ## we mark it
        assign(termID, termSig, envir = sigNodes.LookUP)

        ## we set the genes that we want to eliminate from the all ancestors
        elimGenesID <- currAnno[[termID]]

        ## we look for the ancestors
        ancestors <- setdiff(nodesInInducedGraph(goDAG, termID), termID)
        
        oldElimGenesID <- mget(ancestors, envir = elimGenes.LookUP,
                               mode = 'character', ifnotfound = list(NA))

        ## add the new genesID to the ancestors
        newElimGenesID <- lapply(oldElimGenesID,
                                 function(termGenes) {
                                   aux <- union(termGenes, elimGenesID)
                                   return(aux[!is.na(aux)])
                                 })

        
        ## update the lookUp table
        if(length(newElimGenesID) > 0)
          multiassign(names(newElimGenesID), newElimGenesID, envir = elimGenes.LookUP)
      }
    }
  }
  
  return(unlist(as.list(sigList)))
}


########################## WEIGHT algorithm ##########################
.sigGroups.weight <- function(goDAG, test.stat) {

  ## SOME FUNCTIONS THAT WE NEED
  
  ## make a join between x and y unsing the names as key
  ## combFun should be a vectorial function
  ## we can use functions like: pmin, *, pmax, ...
  x.comb.y <- function(x, y, combFun = get('*')) {
    
    ## check if we weight the same set: always the case when we
    ## set the genes in downNodes.LookUP
    nx <- names(x)
    ny <- names(y)
    if((length(nx) == length(ny)) && all(nx == ny))
      return(combFun(x, y))
    
    allNames <- union(nx, ny)
    commonNames <- intersect(nx, ny)
    
    z <- numeric(length(allNames))
    names(z) <- allNames
    
    z[nx] <- x
    z[ny] <- y
    z[commonNames] <- combFun(x[commonNames], y[commonNames])
    
    return(z)
  }


  setGeneWeights <- function(termID, geneWeights, LookUP.table) {
    oldWeigths <- get(termID, envir = LookUP.table)
    assign(termID, x.comb.y(geneWeights, oldWeigths), envir = LookUP.table)
  }

  
  ## this is a recursive function in termChildren
  computeTermSig <- function(group.test, termChildren) {

    termID <- Name(group.test)
    ## look to see if there are some genes that must be weighted
    Weights(group.test) <- get(termID, envir = upNodes.LookUP)    

    ## run the test (the p-value)
    termSig <- runTest(group.test)

    ## if we came from some recursive call
    if(length(termChildren) == 0) {
      assign(termID, termSig, envir = sigList)
      return()
    }
    
    ## since we konw that length(termChildren) > 0
    w <- pW <- numeric(length(termChildren))
    names(w) <- names(pW) <- termChildren
    
    for(child in termChildren) {
      ## look for the child significance
      ## we should have the child significance in sigList
      if(!exists(child, envir = sigList, inherits = FALSE))
        stop('the child of node u, should had been processed')
      childSig <- get(child, envir = sigList)
      
      w[child] <- getSigRatio(group.test, a = childSig, b = termSig)
      pW[child] <- penalise(group.test, a = childSig, b = termSig)
    }
    
    ## if w[child] > 1 than that child is more significant
    sig.termChildren <- names(w[w > 1])
    
    ## CASE 1:  if we don't have significant children
    ##          in this case we down-weight the genes in each child
    if(length(sig.termChildren) == 0) {
      for(child in termChildren) {
        ## the genes in this child
        gene.child <- currAnno[[child]]
        
        ## weights for the genes in this child
        gene.childWeights <- rep(pW[child] * w[child], length(gene.child))
        names(gene.childWeights) <- gene.child

        ## put the gene weights in the downNodes.LookUP hash-table
        if(!exists(child, envir = downNodes.LookUP, inherits = FALSE))
          gCW <- gene.childWeights
        else {
          ## if for child exists some genes that are weighted
          ## they are 'combine' using "pmin" function
          oldWeigths <- get(child, envir = downNodes.LookUP)
          gCW <- x.comb.y(gene.childWeights, oldWeigths, combFun = pmin)
        }

        assign(child, gCW, envir = downNodes.LookUP)
      }
      ## since we have only less significant genes that means
      ## that we don't recompute the significance for node 'u'
      assign(termID, termSig, envir = sigList)
      return()
    }
    
    ## CASE 2:   'child' is more significant that 'u'
    .nn <- members(group.test)
    geneWeights <- numeric(length(.nn)) + 1
    names(geneWeights) <- .nn

    for(child in sig.termChildren) {
      ## the genes in this child
      gene.child <- currAnno[[child]]
      
      gene.childWeights <- rep(pW[child] / w[child], length(gene.child))
      names(gene.childWeights) <- gene.child

      geneWeights <- x.comb.y(gene.childWeights, geneWeights)
    }

    ## we set the new weights only for node 'termID'
    setGeneWeights(termID, geneWeights, upNodes.LookUP)

    if(is.null(.gene.weights))
      assign('.gene.weights', geneWeights, envir = .main.envir)
    else
      assign('.gene.weights', x.comb.y(.gene.weights, geneWeights), envir = .main.envir)
    
    computeTermSig(group.test, setdiff(termChildren, sig.termChildren))
  }
  
  ##----------------------------------------------------------------------

  goDAG.r2l <- reverseArch(goDAG)
  ## get the levels list 
  nodeLevel <- buildLevels(goDAG.r2l, leafs2root = FALSE)
  levelsLookUp <- nodeLevel$level2nodes

  ## the annotations of the genes to GO
  currAnno <- .genesInNode(goDAG, nodes(goDAG))

  ## we use a lookup table to search for nodes that we have weighted the genes 
  aux <- lapply(currAnno,
                function(x) {
                  gw <- numeric(length(x)) + 1
                  names(gw) <- x
                  return(gw)
                })
  upNodes.LookUP <- list2env(aux, new.env(hash = TRUE, parent = emptyenv()))
  downNodes.LookUP <-  new.env(hash = TRUE, parent = emptyenv())

  ## we also use a hash table to store the result
  sigList <- new.env(hash = TRUE, parent = emptyenv())
  
  ## we will use frequently the children
  allChildren <- adj(goDAG.r2l, nodes(goDAG))
  

  ## START
  .main.envir <- environment()
  
  for(i in nodeLevel$noOfLevels:1) {
    currNodes.names <- get(as.character(i), envir = levelsLookUp, mode = 'character')

    ## some messages
    cat(paste("\n\t Level ", i, ":\t", length(currNodes.names), " nodes to be scored.\n", sep =""))

    for(termID in currNodes.names) {
      ## take the IDs of the gene in the current node
      ## we store the termID in the "name" slot of the test.stat
      group.test <- updateGroup(test.stat, name = termID, members = currAnno[[termID]])

      .gene.weights <- NULL
      ## look at all children of node u and compute the significance
      computeTermSig(group.test, allChildren[[termID]])

      if(!is.null(.gene.weights)) {
        ## get the upper induced subgraph from 'u' (including 'u')
        nodesUpSubgraph <- nodesInInducedGraph(goDAG, termID)
        ## set the weights for all the nodes
        
        lapply(setdiff(nodesUpSubgraph, termID), setGeneWeights, .gene.weights, upNodes.LookUP)
      }
    }
  }

  
  ## recompute the significance of the nodes in the downNodes.LookUP
  for(termID in ls(downNodes.LookUP)) {
    newWeights <- x.comb.y(get(termID, envir = upNodes.LookUP),
                           get(termID, envir = downNodes.LookUP))
    
    termSig <- runTest(updateGroup(test.stat, name = termID,
                                   members = currAnno[[termID]], weights = newWeights))
    
    assign(termID, termSig, envir = sigList)
  }
  
  return(unlist(as.list(sigList)))
}






########################## WEIGHT algorithm ##########################
.sigGroups.weight01 <- function(goDAG, test.stat) {

  ## SOME FUNCTIONS THAT WE NEED
  
  removeGenes <- function(termID, genesID, LookUP.table) {
    if(!exists(termID, envir = LookUP.table, mode = "character"))
      assign(termID, genesID, envir = LookUP.table)
    else
      assign(termID, union(genesID, get(termID, envir = LookUP.table)), envir = LookUP.table)
  }
  
  
  ## this is a recursive function in termChildren
  computeTermSig <- function(group.test, termChildren) {

    termID <- Name(group.test)

    ## remove the genes from the term, if there are
    if(!exists(termID, envir = upNodes.LookUP, mode = "character"))
      elim(group.test) <- character(0)
    else
      elim(group.test) <- get(termID, envir = upNodes.LookUP, mode = 'character')

    ## run the test (the p-value)
    termSig <- runTest(group.test)

    ## if we came from some recursive call
    if(length(termChildren) == 0) {
      assign(termID, termSig, envir = sigList)
      return()
    }
    
    ## since we konw that length(termChildren) > 0
    w <- numeric(length(termChildren))
    names(w) <- termChildren
    
    for(child in termChildren) {
      ## look for the child significance
      if(!exists(child, envir = sigList, inherits = FALSE))
        stop('the child of node u, should had been processed')
      ## we should have the child significance in sigList
      childSig <- get(child, envir = sigList)
      
      w[child] <- .sigRatio.01(a = childSig, b = termSig)
    }
    
    ## if w[child] > 1 than that child is more significant
    sig.termChildren <- names(w[w > 1])
    
    ## CASE 1:  if we don't have significant children 
    if(length(sig.termChildren) == 0) {
      ## since we have only less significant nodes that means
      ## that we don't recompute the significance for node 'u'
      assign(termID, termSig, envir = sigList)
      return()
    }
    
    ## CASE 2:   'child' is more significant that 'u'
    geneToRemove <- unique(unlist(currAnno[sig.termChildren]))

    ## we remove the genes only for node 'termID'
    removeGenes(termID, geneToRemove, upNodes.LookUP)

    if(is.null(.genesToRemove))
      assign('.genesToRemove', geneToRemove, envir = .main.envir)
    else
      assign('.genesToRemove', union(.genesToRemove, geneToRemove), envir = .main.envir)
    
    computeTermSig(group.test, setdiff(termChildren, sig.termChildren))
  }
  
  ##----------------------------------------------------------------------

  goDAG.r2l <- reverseArch(goDAG)
  ## get the levels list 
  nodeLevel <- buildLevels(goDAG.r2l, leafs2root = FALSE)
  levelsLookUp <- nodeLevel$level2nodes

  ## the annotations of the genes to GO
  currAnno <- .genesInNode(goDAG, nodes(goDAG))

  ## hash table for the genes that we eliminate for each node
  ## we store the genes that we want to eliminate
  upNodes.LookUP <- new.env(hash = TRUE, parent = emptyenv())

  ## we also use a hash table to store the result
  sigList <- new.env(hash = TRUE, parent = emptyenv())
  
  ## we will use frequently the children
  allChildren <- adj(goDAG.r2l, nodes(goDAG))
  

  ## START
  .main.envir <- environment()
  
  for(i in nodeLevel$noOfLevels:1) {
    currNodes.names <- get(as.character(i), envir = levelsLookUp, mode = 'character')

    ## some messages
    .num.elimGenes <- length(unique(unlist(as.list(upNodes.LookUP))))
    cat(paste("\n\t Level ", i, ":\t", length(currNodes.names),
              " nodes to be scored\t(", .num.elimGenes, " eliminated genes)\n", sep =""))
    
    for(termID in currNodes.names) {
      ## take the IDs of the gene in the current node
      ## we store the termID in the "name" slot of the test.stat
      group.test <- updateGroup(test.stat, name = termID, members = currAnno[[termID]])

      .genesToRemove <- NULL
      ## compute the significance of node u by looking at all children 
      computeTermSig(group.test, allChildren[[termID]])

      if(!is.null(.genesToRemove)) {
        ## get the nodes in the upper induced subgraph from 'u' (including 'u')
        ## and eliminate the genes for all these nodes
        lapply(setdiff(nodesInInducedGraph(goDAG, termID), termID),
               removeGenes, .genesToRemove, upNodes.LookUP)
      }
    }
  }
  
  return(unlist(as.list(sigList)))
}





########################## Parent-Child algorithm ##########################
.sigGroups.parentChild <- function(goDAG, test.stat, sigGenes) {

  nodeLevel <- buildLevels(goDAG, leafs2root = TRUE)
  levelsLookUp <- nodeLevel$level2nodes
  
  ## hash table to store the result
  sigList <- new.env(hash = TRUE, parent = emptyenv())

  ## we don't score the root node 
  for(i in nodeLevel$noOfLevels:2) {
    currNodes.names <- get(as.character(i), envir = levelsLookUp, mode = 'character')
    parents.currNodes <- adj(goDAG, currNodes.names)
    currAnno <- .genesInNode(goDAG, currNodes.names)

    cat(paste("\n\t Level ", i, ":\t", length(currNodes.names),
              " nodes to be scored.\n", sep =""))
    
    for(termID in currNodes.names) {
      ## just in case the test.stat is modified by some methods
      parents <- .genesInNode(goDAG, parents.currNodes[[termID]])
      group.test <- updateGroup(test.stat, members = currAnno[[termID]],
                                parents = parents, sigMembers = sigGenes)
      
      ## run the test and store the result (the p-value)
      termSig <- runTest(group.test)
      assign(termID, termSig, envir = sigList)
    }
  }

  ## the root will have p-value 1
  assign(get("1", envir = levelsLookUp, mode = 'character'), 1, envir = sigList)
  
  return(unlist(as.list(sigList)))
}



########################## LEA algorithm ##########################
.sigGroups.LEA <- function(goDAG, test.stat, LEA.class.pref = "lea") {

  allAnno <- .genesInNode(goDAG, nodes(goDAG))

  ## just to be sure there are no random genes in the elim slot
  elim(test.stat) <- character(0)
  group.test <- test.stat
  
  ## get the classic significance
  cat("\n\t Computing classical p-values ...\n")
  class(test.stat) <- sub(LEA.class.pref, "classic", class(test.stat))
  classicPval <- sapply(allAnno,
                        function(term) {
                          members(test.stat) <- term
                          return(runTest(test.stat))
                        })
    
  ## newPval are the corrected p-values, where nedded
  newPval <- classicPval

  ## keep the nodes to be further explored in a queue,
  ## and initiallze the queue with the root node
  rootNode <- getGraphRoot(goDAG)
  bestCounter <- 0 # counts the number of nodes with a better score then its children
 
  ## make the edges point from the root to the leafs
  goDAG <- reverseArch(goDAG)
  ## save the adjacent nodes (the children) into a list since will be faster to access them
  usedNodes <- nodes(goDAG)
  chList <- adj(goDAG, usedNodes)
  usedNodes <- usedNodes[sapply(chList, length) != 0]
  
  cat("\t Computing local p-values ...\n")
  for(cNode in usedNodes) {

    ## get the descendents of degree k of the current node 
    ch <- chList[[cNode]]
    i <- 1
    while(length(ch) > 0 && i < depth(group.test)) {
      ch <- unique(c(ch, unlist(chList[ch], use.names = FALSE)))
      i <- i + 1
    }
      
    ## check if the current node is a leaf -- should not happen though ... 
    if(length(ch) == 0)
      next
    
    ## we know that both cNode and ch are non empty
    ## the only thing left to do is to compute the new score
    optimCh <- ch[classicPval[ch] <= classicPval[cNode]] # the children with a better score then cNode

    ## CASE I: cNode has a better score then all its children
    if(length(optimCh) == 0) {
      bestCounter <- bestCounter + 1
      next
    }

    ################################################################################
    ## CASE II: at least one child has a better score then cNode, namely all optimCh

    ## check if the node has a very bad score (p-value ~ 1)
    ## this will happen seldom, but might remove some computations ....
    if(classicPval[cNode] > .9)
      next
    
    ## from the test statistic
    ## get the genes annotated to the children and set them as genes to be removed from cNode
    test.stat <- updateGroup(group.test, name = cNode, members = allAnno[[cNode]])
    elim(test.stat) <- unique(unlist(allAnno[optimCh], use.names = FALSE))
    
    newPval[cNode] <- max(classicPval[cNode], runTest(test.stat))
    ##newPval[cNode] <- runTest(test.stat)
  }

  cat("\t\t", bestCounter, "local optimal nodes found\n\t\t", sum(classicPval != newPval), "corrected p-values \n")

  return(newPval)
}

########################## GSEA algorithm ##########################
setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "classicGsea"),
          function(object, test.stat) {
            #browser()
            ## update the test.stat object if there is the case
            if(length(allScore(test.stat)) == 0) {
              allMembers(test.stat) <- genes(object)
              score(test.stat) <- geneScore(object, use.names = TRUE)
            }
            
            #GOlist <- genesInTerm(object)
            #here
            GOlist<-.genesInNode(graph(object),nodes(graph(object)),score = T)
            ##filter by size
            GOlist<-GOlist[which(test.stat@min.size<=sapply(GOlist,length) & sapply(GOlist,length)<=test.stat@max.size)]
            if(length(GOlist)==0){
              'no gene set found! Please change min.size and max.size.'
            }
            cat("\n\t\t\t -- Classic Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
            
            algoRes0 <- .sigGroups.classicGsea(GOlist, test.stat)
            
            report<-plot.result(A=as.matrix(object@exp),rl=algoRes0,O=test.stat@testStatPar$geneRanking,pty =object@pty,output.directory =test.stat@testStatPar$output.directory,
                                topgs=test.stat@testStatPar$topgs,nom.p.val.threshold=test.stat@cutOff)
            saveRDS(report,paste(test.stat@testStatPar$output.directory,'report.rds',sep = ''))
            #browser()
            #####use plot here or outside?
            algoRes <- unlist(sapply(algoRes0,function(x){x['p']},simplify = T))
            names(algoRes) <- names(algoRes0)
            
            ## STORE THE RESULTS
            .whichAlgorithm <- "classicGsea"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            l=list()
            # browser()
            for(i in names(report[-1])){
              l[[i]] = lapply(report[-1][[i]],function(x){x$report})
            }
            # l[[names(report[-1])[1]]]=lapply(report[-1][[1]],function(x){x$report})
            # l[[names(report[-1])[2]]]=lapply(report[-1][[2]],function(x){x$report})
            return(new("topONTresultGSEA",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(GOlist)),
                       global.report = report$report,
                       gs.report = l,
                       plots = lapply(report[-1][[i]],function(x){x$plot}),
                       cutOff = test.stat@cutOff
                   ))
          })

.sigGroups.classicGsea <- function(GOlist, test.stat) {
  #browser()
  #GOlist=GOlist[1:2]
  #GOlist<-GOlist['DOID:1441']
  total.gs<-length(GOlist)
  print(paste('total gene set: ',length(GOlist),sep = ''))
  sigList <- sapply(names(GOlist),
                    function(x) {
                      members(test.stat) <- as.character(GOlist[[x]])
                      test.stat@annotation.weight <- as.numeric(names(GOlist[[x]]))
                      names(test.stat@annotation.weight) <-members(test.stat)
                      test.stat@testStatPar$gsname<-x
                      print(paste(which(names(GOlist)==x),'/',total.gs,':',x,sep = ''))
                      return(runTest(test.stat))
                    },simplify = F)
  #browser()
  return(sigList)
}


setMethod("getSigGroups",
          signature(object = "topONTdata", test.stat = "elimGsea"),
          function(object, test.stat, ...) { ## ... parameters for each algorithm
            #browser()
            ## update the test.stat object if there is the case
            if(length(allScore(test.stat)) == 0) {
              allMembers(test.stat) <- genes(object)
              score(test.stat) <- geneScore(object, use.names = TRUE)
            }
            
            GOlist <- usedGO(object)
            
            cat("\n\t\t\t -- Elim Algorithm -- \n")
            cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
            cat("\t\t parameters: \n")
            cat("\t\t\t test statistic: ", Name(test.stat), "\n")
            cat("\t\t\t cutOff: ", cutOff(test.stat), "\n")
            cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
            
            ## apply the algorithm
            algoRes0 <- .sigGroups.gseaElim(graph(object), test.stat)
            #browser()
            ##remove those gs that has no gene in it after the elimination
            algoRes0<-algoRes0[!sapply(algoRes0,function(x){is.na(x$ES.obs)})]

            #browser()
            report<-plot.result(A=as.matrix(object@exp),rl=algoRes0,O=test.stat@testStatPar$geneRanking,pty =object@pty,output.directory =test.stat@testStatPar$output.directory,
                                topgs=test.stat@testStatPar$topgs,nom.p.val.threshold=test.stat@cutOff,doc.string=test.stat@testStatPar$doc.string)
            
            #####use plot here or outside?
            algoRes <- unlist(sapply(algoRes0,function(x){x['p']},simplify = T))
            names(algoRes) <- names(algoRes0)
            #browser()
            ## STORE THE RESULTS
            .whichAlgorithm <- "classicGsea"
            attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
            return(new("topONTresultGSEA",
                       description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
                       score = algoRes, testName = Name(test.stat),
                       algorithm = .whichAlgorithm,
                       geneData = c(.getGeneData(object), SigTerms = length(GOlist)),
                       global.report = report$report,
                       gs.report = lapply(report$ALL,function(x){x$report}),
                       plots = lapply(report[-1][[i]],function(x){x$plot}),
                       cutOff = test.stat@cutOff
            ))
            
          })


.sigGroups.gseaElim <- function(goDAG, test.stat) {
  
  goDAG.r2l <- reverseArch(goDAG)
  nodeLevel <- buildLevels(goDAG.r2l, leafs2root = FALSE)
  levelsLookUp <- nodeLevel$level2nodes
  
  ##adjs.cutOff <- cutOff(test.stat) / numNodes(goDAG)
  adjs.cutOff <- cutOff(test.stat)
  #cat(paste("\n\t\t Parameters:\t cutOff = ", adjs.cutOff, "\n", sep =""))
  
  ## we use a lookup table to search for nodes that have were significant
  sigNodes.LookUP <- new.env(hash = TRUE, parent = emptyenv())
  
  ## hash table for the genes that we eliminate for each node
  ## we store the genes that we want to eliminate
  elimGenes.LookUP <- new.env(hash = TRUE, parent = emptyenv())
  
  ## hash table to store the result
  sigList <- new.env(hash = TRUE, parent = emptyenv())
  result<-list()
  #browser()
  for(i in nodeLevel$noOfLevels:1) {
    currNodes.names <- get(as.character(i), envir = levelsLookUp, mode = 'character')
    ##children.currNodes <- adj(goDAG.r2l, currNodes.names)
    currAnno <- .genesInNode(goDAG, currNodes.names,score = T)
    
    .num.elimGenes <- length(unique(unlist(as.list(elimGenes.LookUP))))
    cat(paste("\n\t Level ", i, ":\t", length(currNodes.names),
              " nodes to be scored\t(", .num.elimGenes, " eliminated genes)\n", sep =""))
    
    for(termID in currNodes.names) {
      #browser()
      ## just in case the test.stat is modified by some methods
      test.stat@annotation.weight<-as.numeric(names(currAnno[[termID]]))
      names(test.stat@annotation.weight)<-currAnno[[termID]]
      group.test <- updateGroup(test.stat, name = termID, members = unname(currAnno[[termID]]))
      group.test@testStatPar$gsname<-termID
      #browser()
      ## remove the genes from the term, if there are
      if(!exists(termID, envir = elimGenes.LookUP, mode = "character"))
        elim(group.test) <- character(0)
      else{
        #browser()
        elim(group.test) <- get(termID, envir = elimGenes.LookUP, mode = 'character')
      }
      #browser()
      if(group.test@min.size<=length(group.test@members) & length(group.test@members) <= group.test@max.size){
      ## run the test and store the result (the p-value)
      termSig <- runTest(group.test)
      result[[group.test@name]]<-termSig
      assign(termID, termSig$p, envir = sigList)
      
      ## if we have a significant GO term 
      if(termSig$p < adjs.cutOff) {
        ## we mark it
        assign(termID, termSig$p, envir = sigNodes.LookUP)
        #browser()
        ## we set the genes that we want to eliminate from the all ancestors
        if(group.test@elim.gene.type=='core'){
          ## here we want to only eliminate the 'leading core' gene from gsea
          hits<-which(termSig$GSEA.results$indicator==1)
          
          if(termSig$GSEA.results$ES[1]>=0)
            core<-hits[hits<=termSig$GSEA.results$arg.ES[1]]
          else
            core<-hits[hits>=termSig$GSEA.results$arg.ES[1]]
          
          elimGenesID<-names(group.test@testStatPar$geneRanking$s2n.m[,1][group.test@testStatPar$geneRanking$o.m[,1][core]])
        }else{
          ##elim all gene 
          elimGenesID<-group.test@members
        }
        
        #we assign the annotation weight to the elim genes
        names(elimGenesID)<-group.test@annotation.weight[match(elimGenesID,group.test@members,nomatch = 0)]
        #browser()
        
        ## we look for the ancestors
        ## because we are aggrating the score, we only look for 
        ## the direct parent instrad of all the ancestors
        ancestors <- setdiff(nodesInInducedGraph(goDAG, termID), termID)
        #ancestors<-adj(goDAG, termID)
          
        
        
        oldElimGenesID <- mget(ancestors, envir = elimGenes.LookUP,
                               mode = 'character', ifnotfound = list(c()))
        
        ## add the new genesID to the ancestors
        .aggregateElimScore<-function(oldElim,newElim){
          for(i in 1:length(newElim)){
            if(newElim[[i]] %in% oldElim){
              old.W<-as.numeric(names(oldElim[which(oldElim==newElim[[i]])]))
              names(oldElim)[which(oldElim==newElim[[i]])]<-sum(old.W,as.numeric(names(newElim[i])))
            }else{
              oldElim<-c(oldElim,newElim[i])
            }
          }
          oldElim
        }   
        
        newElimGenesID<-lapply(oldElimGenesID,
                               function(termGenes) {
                                 aux<-.aggregateElimScore(termGenes,elimGenesID)
                                 return(aux[!is.na(aux)])
                               })
        
        # if(group.test@elim.type=='score'){
        # 
        # }else{
        #   newElimGenesID<-lapply(oldElimGenesID,
        #          function(termGenes) {
        #            aux<-union(termGenes, elimGenesID)
        #            return(aux[!is.na(aux)])
        #          })
        # }
        
        
        # 
        # newElimGenesID <- lapply(oldElimGenesID,
        #                          function(termGenes) {
        #                            aux <- union(termGenes, elimGenesID)
        #                            return(aux[!is.na(aux)])
        #                          })
        
        
        ## update the lookUp table
        if(length(newElimGenesID) > 0)
          multiassign(names(newElimGenesID), newElimGenesID, envir = elimGenes.LookUP)
      }
      }
    }
  }
  return(result)
  #return(unlist(as.list(sigList)))
}
hxin/topOnto documentation built on May 17, 2019, 9:15 p.m.