######################################################################
## file containing all the algorithms for scoring GO terms.
## High-level function for runing the GO algorithms
.algoComp <- rbind(c(1, 0, 1, 1, 1, 0, 1, 1, 0),
c(1, 0, 1, 1, 1, 0, 1, 1, 0),
c(1, 0, 0, 0, 0, 0, 0, 0, 0),
c(1, 0, 1, 1, 1, 0, 1, 1, 0),
c(1, 0, 1, 1, 1, 0, 1, 1, 0),
c(1, 0, 0, 0, 0, 0, 0, 0, 0),
c(0, 0, 0, 0, 0, 0, 0, 0, 1),
c(0, 0, 0, 0, 0, 0, 0, 0, 1))
rownames(.algoComp) <- c("classic", "elim", "weight", "weight01", "lea", "parentchild",'classicgsea','elimgsea')
colnames(.algoComp) <- c("fisher", "z", "ks", "t", "globaltest", "category", "sum", "ks.ties","ks.csw")
.testNames <- c("GOFisherTest" , "GOKSTest", "GOtTest", "GOglobalTest", "GOSumTest", "GOKSTiesTest","GOKSCSWTest")
names(.testNames) <- c("fisher", "ks", "t", "globaltest", "sum", "ks.ties","ks.csw")
.algoClass <- c("classic", "elim", "weight", "weight01", "lea", "parentchild","classicgsea","elimgsea")
names(.algoClass) <- c("classic", "elim", "weight", "weight01", "lea", "parentchild","classicgsea","elimgsea")
## functions to extract the information from the .algoComp
whichAlgorithms <- function() {
rownames(.algoComp)
}
whichTests <- function() {
colnames(.algoComp)[colSums(.algoComp) > 0]
}
## function runTest(topONTdata, "elim", "KS", ...)
## ... are parameters for the test statistic
#' @name getSigGroups
#' @rdname getSigGroups
#' @title Interfaces for running the enrichment tests
#' @aliases getSigGroups runTest getSigGroups-methods getSigGroups,topONTdata,classicCount-method getSigGroups,topONTdata,classicScore-method getSigGroups,topONTdata,classicExpr-method getSigGroups,topONTdata,leaCount-method getSigGroups,topONTdata,leaScore-method getSigGroups,topONTdata,leaExpr-method getSigGroups,topONTdata,elimCount-method getSigGroups,topONTdata,elimScore-method getSigGroups,topONTdata,elimExpr-method getSigGroups,topONTdata,weightCount-method getSigGroups,topONTdata,weight01Count-method getSigGroups,topONTdata,weight01Score-method getSigGroups,topONTdata,weight01Expr-method getSigGroups,topONTdata,parentChild-method getSigGroups,topONTdata,pC-method runTest,topONTdata,character,character-method runTest,topONTdata,missing,character-method whichAlgorithms whichTests
#' @description These function are used for dispatching the specific algorithm for a given \code{topONTdata} object and a test statistic.
#' @usage
#' getSigGroups(object, test.stat, ...)
#' runTest(object, algorithm, statistic, ...)
#' whichAlgorithms()
#' whichTests()
#'
#' @param object An object of class \code{topONTdata} This object contains all data necessary for runnig the test.
#' @param test.stat An object of class \code{groupStats}. This object defines the test statistic.
#' @param algorithm Character string specifing which algorithm to use.
#' @param statistic Character string specifing which test to use.
#' @param \dots Other parameters. In the case of \code{runTest} they are used for defining the test statistic
#'
#' @details
#' The \code{runTest} function can be used only with a predefined set of
#' test statistics and algorithms. The algorithms and the statistical
#' tests which are accessible via the \code{runTest} function are shown
#' by the \code{whichAlgorithms()} and \code{whichTests()} functions.
#'
#' The runTest function is a warping of the \code{getSigGroups} and the
#' initialisation of a \code{groupStats} object functions.
#'
#' ...
#'
#'
#' @return An object of class \code{topONTresult}
#' @author Adrian Alexa
#'
#' @seealso
#' \code{\link{topONTdata-class}},
#' \code{\link{groupStats-class}},
#' \code{\link{topONTresult-class}}
#'
#'
#'
#' @examples
#'
#' ## load a sample topONTdata object
#' data(ONTdata)
#' GOdata
#'
#' ##############################
#' ## getSigGroups interface
#' ##############################
#'
#' ## define a test statistic
#' test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test")
#' ## perform the test
#' resultFis <- getSigGroups(GOdata, test.stat)
#' resultFis
#'
#'
#'
#' ##############################
#' ## runTest interface
#' ##############################
#'
#' ## Enrichment analysis by using the "classic" method and Fisher's exact test
#' resultFis <- runTest(GOdata, algorithm = "classic", statistic = "fisher")
#' resultFis
#'
#' ## weight01 is the default algorithm
#' weight01.fisher <- runTest(GOdata, statistic = "fisher")
#' weight01.fisher
#'
#'
#' ## not all combinations are possible!
#' # weight.ks <- runTest(GOdata, algorithm = "weight", statistic = "t")
#'
#' @keywords methods
if(!isGeneric("runTest"))
setGeneric("runTest", function(object, algorithm, statistic, ...) standardGeneric("runTest"))
setMethod("runTest",
signature(object = "topONTdata", algorithm = "character", statistic = "character"),
function(object, algorithm, statistic, ...) { ## ... parameters for the test statistic
#browser()
statistic <- tolower(statistic)
algorithm <- tolower(algorithm)
## we check if the algorithm support the given test statistic
if(.algoComp[algorithm, statistic] == 0)
stop("The algorithm doesn't support the given test statistic")
algorithm <- .algoClass[algorithm]
## strong asumtions!
if(algorithm == "parentchild"){
testType <- "pC"
}else if(grepl('gsea',ignore.case = T,algorithm)){
testType <-paste(sub(pattern = 'gsea',replacement = '',ignore.case = T,algorithm),'Gsea',sep = '')
}else{
testType <- as.character(findMethodSignatures(.testNames[statistic]))
testType <- sub("classic", algorithm, testType)
}
if(!isClass(testType))
stop("Error in defining the test statistic Class")
test.stat <- new(testType, testStatistic = get(.testNames[statistic]), name = statistic, ...)
return(getSigGroups(object, test.stat))
})
##resultKS <- runTest(GOdata, algorithm = "classic", statistic = "KS")
## dispatch method for weight01 (or topGO) algorithm - the default algorithm
setMethod("runTest",
signature(object = "topONTdata", algorithm = "missing", statistic = "character"),
function(object, statistic, ...) { ## ... parameters for the test statistic
return(runTest(object, "weight01", statistic, ...))
})
######################################################################
######################################################################
## This function is use for dispatching each algorithm
## probably more checking could be done in the end!
if(!isGeneric("getSigGroups"))
setGeneric("getSigGroups", function(object, test.stat, ...) standardGeneric("getSigGroups"))
########################## CLASSIC algorithm ##########################
## dispatch method for classic algorithm
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "classicCount"),
function(object, test.stat) { ## ... parameters for each algorithm
## update the test.stat object
allMembers(test.stat) <- genes(object)
sigMembers(test.stat) <- sigGenes(object)
## first take aside nodes that don't have any sig members
x <- termStat(object)
index <- x$Significant == 0
not.sigTerms <- rownames(x)[index]
.sigTerms <- rownames(x)[!index]
cat("\n\t\t\t -- Classic Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
## check if there is at least one nontrivial GO term !!!
if(length(.sigTerms) > 0) {
## apply the algorithm
algoRes <- .sigGroups.classic(genesInTerm(object, .sigTerms), test.stat)
} else {
algoRes <- numeric()
warning("No enrichment can pe performed - there are no feasible terms!")
}
aux <- rep(1, length(not.sigTerms))
names(aux) <- not.sigTerms
algoRes <- c(algoRes, aux)
## STORE THE RESULTS
.whichAlgorithm <- "classic"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
})
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "classicScore"),
function(object, test.stat) {
## update the test.stat object if there is the case
if(length(allScore(test.stat)) == 0) {
allMembers(test.stat) <- genes(object)
score(test.stat) <- geneScore(object, use.names = TRUE)
}
GOlist <- genesInTerm(object)
cat("\n\t\t\t -- Classic Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
algoRes <- .sigGroups.classic(GOlist, test.stat)
## STORE THE RESULTS
.whichAlgorithm <- "classic"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
})
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "classicExpr"),
function(object, test.stat) {
if(length(allMembers(test.stat)) == 0)
stop("In function getSigGroups: no expression data found")
GOlist <- genesInTerm(object)
cat("\n\t\t\t -- Classic Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
algoRes <- .sigGroups.classic(GOlist, test.stat)
.whichAlgorithm <- "classic"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
})
########################## ELIM algorithm ##########################
## dispatch method for elim algorithm
## to set the test statistic
## test.stat <- new("elimCount", testStatistic = GOFisherTest, cutOff = 0.05, name = "Fisher test")
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "elimCount"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## update the test.stat object
allMembers(test.stat) <- genes(object)
sigMembers(test.stat) <- sigGenes(object)
## first take aside nodes that don't have any sig members
x <- termStat(object)
index <- x$Significant == 0
not.sigTerms <- rownames(x)[index]
.sigTerms <- rownames(x)[!index]
cat("\n\t\t\t -- Elim Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t cutOff: ", cutOff(test.stat), "\n")
## check if there is at least one nontrivial GO term !!!
if(length(.sigTerms) > 0) {
## restrict the graph
g <- subGraph(.sigTerms, graph(object))
## apply the algorithm
algoRes <- .sigGroups.elim(g, test.stat)
} else {
algoRes <- numeric()
warning("No enrichment can pe performed - there are no feasible terms!")
}
aux <- rep(1, length(not.sigTerms))
names(aux) <- not.sigTerms
algoRes <- c(algoRes, aux)
.whichAlgorithm <- "elim"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes,
testName = paste(Name(test.stat), cutOff(test.stat), sep = " : "),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
})
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "elimScore"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
#browser()
## update the test.stat object if there is the case
if(length(allScore(test.stat)) == 0) {
allMembers(test.stat) <- genes(object)
score(test.stat) <- geneScore(object, use.names = TRUE)
}
GOlist <- usedGO(object)
cat("\n\t\t\t -- Elim Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t cutOff: ", cutOff(test.stat), "\n")
cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
## apply the algorithm
algoRes <- .sigGroups.elim(graph(object), test.stat)
.whichAlgorithm <- "elim"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes,
testName = paste(Name(test.stat), cutOff(test.stat), sep = " : "),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
})
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "elimExpr"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## update the test.stat object if there is the case
if(length(allMembers(test.stat)) == 0)
stop("No expression data found")
GOlist <- usedGO(object)
cat("\n\t\t\t -- Elim Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t cutOff: ", cutOff(test.stat), "\n")
## apply the algorithm
algoRes <- .sigGroups.elim(graph(object), test.stat)
.whichAlgorithm <- "elim"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes,
testName = paste(Name(test.stat), cutOff(test.stat), sep = " : "),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
})
########################## weight01(topGO) algorithm ##########################
## dispatch method for weight01 algorithm
## to set the test statistic
## test.stat <- new("weight01Count", testStatistic = GOFisherTest, name = "Fisher test")
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "weight01Count"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## update the test.stat object
allMembers(test.stat) <- genes(object)
sigMembers(test.stat) <- sigGenes(object)
## first take aside nodes that don't have any sig members
x <- termStat(object)
index <- x$Significant == 0
not.sigTerms <- rownames(x)[index]
.sigTerms <- rownames(x)[!index]
cat("\n\t\t\t -- Weight01 Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
## check if there is at least one nontrivial GO term !!!
if(length(.sigTerms) > 0) {
## restrict the graph
g <- subGraph(.sigTerms, graph(object))
## apply the algorithm
algoRes <- .sigGroups.weight01(g, test.stat)
} else {
algoRes <- numeric()
warning("No enrichment can pe performed - there are no feasible terms!")
}
aux <- rep(1, length(not.sigTerms))
names(aux) <- not.sigTerms
algoRes <- c(algoRes, aux)
.whichAlgorithm <- "weight01"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
})
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "weight01Score"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## update the test.stat object if there is the case
if(length(allScore(test.stat)) == 0) {
allMembers(test.stat) <- genes(object)
score(test.stat) <- geneScore(object, use.names = TRUE)
}
GOlist <- usedGO(object)
cat("\n\t\t\t -- Weight01 Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
## apply the algorithm
algoRes <- .sigGroups.weight01(graph(object), test.stat)
.whichAlgorithm <- "weight01"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
})
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "weight01Expr"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## update the test.stat object if there is the case
if(length(allMembers(test.stat)) == 0)
stop("No expression data found")
GOlist <- usedGO(object)
cat("\n\t\t\t -- Weight01 Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
## apply the algorithm
algoRes <- .sigGroups.weight01(graph(object), test.stat)
.whichAlgorithm <- "weight01"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
})
########################## WEIGHT algorithm ##########################
## dispatch method for the weight algorithm
## to set the test statistic
## test.stat <- new("weightCount", testStatistic = GOFisherTest, name = "Fisher test", ...)
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "weightCount"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## update the test.stat object
allMembers(test.stat) <- genes(object)
sigMembers(test.stat) <- sigGenes(object)
## first take aside nodes that don't have any sig members
x <- termStat(object)
index <- x$Significant == 0
not.sigTerms <- rownames(x)[index]
.sigTerms <- rownames(x)[!index]
cat("\n\t\t\t -- Weight Algorithm -- \n")
cat(paste("\n\t\t The algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
## check if there is at least one nontrivial GO term !!!
if(length(.sigTerms) > 0) {
## restrict the graph
g <- subGraph(.sigTerms, graph(object))
## apply the algorithm
algoRes <- .sigGroups.weight(g, test.stat)
} else {
algoRes <- numeric()
warning("No enrichment can pe performed - there are no feasible terms!")
}
aux <- rep(1, length(not.sigTerms))
names(aux) <- not.sigTerms
algoRes <- c(algoRes, aux)
.whichAlgorithm <- "weight"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
})
########################## Parent-Child algorithm ##########################
## dispatch method for parent-child algorithm
##
## to set the test statistic
## test.stat <- new("pC", testStatistic = GOFisherTest, joinFun = "intersect", name = "Fisher test")
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "pC"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## first take aside nodes that don't have any sig members
x <- termStat(object)
index <- x$Significant == 0
not.sigTerms <- rownames(x)[index]
.sigTerms <- rownames(x)[!index]
cat("\n\t\t\t -- Parent-Child Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
## check if there is at least one nontrivial GO term !!!
if(length(.sigTerms) > 0) {
## restrict the graph
g <- subGraph(.sigTerms, graph(object))
## apply the algorithm
algoRes <- .sigGroups.parentChild(g, test.stat, sigGenes(object))
} else {
algoRes <- numeric()
warning("No enrichment can pe performed - there are no feasible terms!")
}
aux <- rep(1, length(not.sigTerms))
names(aux) <- not.sigTerms
algoRes <- c(algoRes, aux)
.whichAlgorithm <- "parentchild"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
})
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "parentChild"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## first take aside nodes that don't have any sig members
x <- termStat(object)
index <- x$Significant == 0
not.sigTerms <- rownames(x)[index]
.sigTerms <- rownames(x)[!index]
cat("\n\t\t\t -- Parent-Child Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t join function: ", test.stat@joinFun, "\n")
## check if there is at least one nontrivial GO term !!!
if(length(.sigTerms) > 0) {
## restrict the graph
g <- subGraph(.sigTerms, graph(object))
## apply the algorithm
algoRes <- .sigGroups.parentChild(g, test.stat, sigGenes(object))
} else {
algoRes <- numeric()
warning("No enrichment can pe performed - there are no feasible terms!")
}
aux <- rep(1, length(not.sigTerms))
names(aux) <- not.sigTerms
algoRes <- c(algoRes, aux)
.whichAlgorithm <- "parentchild"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
})
########################## LEA algorithm ##########################
## dispatch method for the LEA algorithm
## to set the test statistic
## test.stat <- new("leaCount", testStatistic = GOFisherTest, name = "Fisher test")
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "leaCount"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## update the test.stat object
allMembers(test.stat) <- genes(object)
sigMembers(test.stat) <- sigGenes(object)
## first take aside nodes that don't have any sig members
x <- termStat(object)
index <- x$Significant == 0
not.sigTerms <- rownames(x)[index]
.sigTerms <- rownames(x)[!index]
cat("\n\t\t\t -- LEA Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(.sigTerms), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t neighborhood depth: ", depth(test.stat), "\n")
## check if there is at least one nontrivial GO term !!!
if(length(.sigTerms) > 0) {
## restrict the graph
g <- subGraph(.sigTerms, graph(object))
## apply the algorithm
algoRes <- .sigGroups.LEA(g, test.stat)
} else {
algoRes <- numeric()
warning("No enrichment can pe performed - there are no feasible terms!")
}
aux <- rep(1, length(not.sigTerms))
names(aux) <- not.sigTerms
algoRes <- c(algoRes, aux)
.whichAlgorithm <- "LEA"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object),
"\nOntology:", ontology(object),
"\nneighborhood depth:", depth(test.stat), sep = " "),
score = algoRes,
testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(.sigTerms))))
})
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "leaScore"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## update the test.stat object if there is the case
if(length(allScore(test.stat)) == 0) {
allMembers(test.stat) <- genes(object)
score(test.stat) <- geneScore(object, use.names = TRUE)
}
GOlist <- usedGO(object)
cat("\n\t\t\t -- LEA Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t neighborhood depth: ", depth(test.stat), "\n")
cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
## apply the algorithm
algoRes <- .sigGroups.LEA(graph(object), test.stat)
.whichAlgorithm <- "LEA"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object),
"\nOntology:", ontology(object),
"\nneighborhood depth:", depth(test.stat), sep = " "),
score = algoRes,
testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
})
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "leaExpr"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
## update the test.stat object if there is the case
if(length(allMembers(test.stat)) == 0)
stop("No expression data found")
GOlist <- usedGO(object)
cat("\n\t\t\t -- LEA Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t neighborhood depth: ", depth(test.stat), "\n")
## apply the algorithm
algoRes <- .sigGroups.LEA(graph(object), test.stat)
.whichAlgorithm <- "LEA"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresult",
description = paste(description(object),
"\nOntology:", ontology(object),
"\nneighborhood depth:", depth(test.stat), sep = " "),
score = algoRes,
testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(GOlist))))
})
#############################################################################################################
#############################################################################################################
########################## CLASSIC algorithm ##########################
.sigGroups.classic <- function(GOlist, test.stat) {
sigList <- sapply(GOlist,
function(term) {
## we can't geve names .......
## Name(gi)
members(test.stat) <- term
return(runTest(test.stat))
})
return(sigList)
}
########################## ELIM algorithm ##########################
.sigGroups.elim <- function(goDAG, test.stat) {
goDAG.r2l <- reverseArch(goDAG)
nodeLevel <- buildLevels(goDAG.r2l, leafs2root = FALSE)
levelsLookUp <- nodeLevel$level2nodes
##adjs.cutOff <- cutOff(test.stat) / numNodes(goDAG)
adjs.cutOff <- cutOff(test.stat)
#cat(paste("\n\t\t Parameters:\t cutOff = ", adjs.cutOff, "\n", sep =""))
## we use a lookup table to search for nodes that have were significant
sigNodes.LookUP <- new.env(hash = TRUE, parent = emptyenv())
## hash table for the genes that we eliminate for each node
## we store the genes that we want to eliminate
elimGenes.LookUP <- new.env(hash = TRUE, parent = emptyenv())
## hash table to store the result
sigList <- new.env(hash = TRUE, parent = emptyenv())
for(i in nodeLevel$noOfLevels:1) {
currNodes.names <- get(as.character(i), envir = levelsLookUp, mode = 'character')
##children.currNodes <- adj(goDAG.r2l, currNodes.names)
currAnno <- .genesInNode(goDAG, currNodes.names)
.num.elimGenes <- length(unique(unlist(as.list(elimGenes.LookUP))))
cat(paste("\n\t Level ", i, ":\t", length(currNodes.names),
" nodes to be scored\t(", .num.elimGenes, " eliminated genes)\n", sep =""))
for(termID in currNodes.names) {
## just in case the test.stat is modified by some methods
group.test <- updateGroup(test.stat, name = termID, members = currAnno[[termID]])
## remove the genes from the term, if there are
if(!exists(termID, envir = elimGenes.LookUP, mode = "character"))
elim(group.test) <- character(0)
else
elim(group.test) <- get(termID, envir = elimGenes.LookUP, mode = 'character')
## run the test and store the result (the p-value)
termSig <- runTest(group.test)
assign(termID, termSig, envir = sigList)
## if we have a significant GO term
if(termSig <= adjs.cutOff) {
## we mark it
assign(termID, termSig, envir = sigNodes.LookUP)
## we set the genes that we want to eliminate from the all ancestors
elimGenesID <- currAnno[[termID]]
## we look for the ancestors
ancestors <- setdiff(nodesInInducedGraph(goDAG, termID), termID)
oldElimGenesID <- mget(ancestors, envir = elimGenes.LookUP,
mode = 'character', ifnotfound = list(NA))
## add the new genesID to the ancestors
newElimGenesID <- lapply(oldElimGenesID,
function(termGenes) {
aux <- union(termGenes, elimGenesID)
return(aux[!is.na(aux)])
})
## update the lookUp table
if(length(newElimGenesID) > 0)
multiassign(names(newElimGenesID), newElimGenesID, envir = elimGenes.LookUP)
}
}
}
return(unlist(as.list(sigList)))
}
########################## WEIGHT algorithm ##########################
.sigGroups.weight <- function(goDAG, test.stat) {
## SOME FUNCTIONS THAT WE NEED
## make a join between x and y unsing the names as key
## combFun should be a vectorial function
## we can use functions like: pmin, *, pmax, ...
x.comb.y <- function(x, y, combFun = get('*')) {
## check if we weight the same set: always the case when we
## set the genes in downNodes.LookUP
nx <- names(x)
ny <- names(y)
if((length(nx) == length(ny)) && all(nx == ny))
return(combFun(x, y))
allNames <- union(nx, ny)
commonNames <- intersect(nx, ny)
z <- numeric(length(allNames))
names(z) <- allNames
z[nx] <- x
z[ny] <- y
z[commonNames] <- combFun(x[commonNames], y[commonNames])
return(z)
}
setGeneWeights <- function(termID, geneWeights, LookUP.table) {
oldWeigths <- get(termID, envir = LookUP.table)
assign(termID, x.comb.y(geneWeights, oldWeigths), envir = LookUP.table)
}
## this is a recursive function in termChildren
computeTermSig <- function(group.test, termChildren) {
termID <- Name(group.test)
## look to see if there are some genes that must be weighted
Weights(group.test) <- get(termID, envir = upNodes.LookUP)
## run the test (the p-value)
termSig <- runTest(group.test)
## if we came from some recursive call
if(length(termChildren) == 0) {
assign(termID, termSig, envir = sigList)
return()
}
## since we konw that length(termChildren) > 0
w <- pW <- numeric(length(termChildren))
names(w) <- names(pW) <- termChildren
for(child in termChildren) {
## look for the child significance
## we should have the child significance in sigList
if(!exists(child, envir = sigList, inherits = FALSE))
stop('the child of node u, should had been processed')
childSig <- get(child, envir = sigList)
w[child] <- getSigRatio(group.test, a = childSig, b = termSig)
pW[child] <- penalise(group.test, a = childSig, b = termSig)
}
## if w[child] > 1 than that child is more significant
sig.termChildren <- names(w[w > 1])
## CASE 1: if we don't have significant children
## in this case we down-weight the genes in each child
if(length(sig.termChildren) == 0) {
for(child in termChildren) {
## the genes in this child
gene.child <- currAnno[[child]]
## weights for the genes in this child
gene.childWeights <- rep(pW[child] * w[child], length(gene.child))
names(gene.childWeights) <- gene.child
## put the gene weights in the downNodes.LookUP hash-table
if(!exists(child, envir = downNodes.LookUP, inherits = FALSE))
gCW <- gene.childWeights
else {
## if for child exists some genes that are weighted
## they are 'combine' using "pmin" function
oldWeigths <- get(child, envir = downNodes.LookUP)
gCW <- x.comb.y(gene.childWeights, oldWeigths, combFun = pmin)
}
assign(child, gCW, envir = downNodes.LookUP)
}
## since we have only less significant genes that means
## that we don't recompute the significance for node 'u'
assign(termID, termSig, envir = sigList)
return()
}
## CASE 2: 'child' is more significant that 'u'
.nn <- members(group.test)
geneWeights <- numeric(length(.nn)) + 1
names(geneWeights) <- .nn
for(child in sig.termChildren) {
## the genes in this child
gene.child <- currAnno[[child]]
gene.childWeights <- rep(pW[child] / w[child], length(gene.child))
names(gene.childWeights) <- gene.child
geneWeights <- x.comb.y(gene.childWeights, geneWeights)
}
## we set the new weights only for node 'termID'
setGeneWeights(termID, geneWeights, upNodes.LookUP)
if(is.null(.gene.weights))
assign('.gene.weights', geneWeights, envir = .main.envir)
else
assign('.gene.weights', x.comb.y(.gene.weights, geneWeights), envir = .main.envir)
computeTermSig(group.test, setdiff(termChildren, sig.termChildren))
}
##----------------------------------------------------------------------
goDAG.r2l <- reverseArch(goDAG)
## get the levels list
nodeLevel <- buildLevels(goDAG.r2l, leafs2root = FALSE)
levelsLookUp <- nodeLevel$level2nodes
## the annotations of the genes to GO
currAnno <- .genesInNode(goDAG, nodes(goDAG))
## we use a lookup table to search for nodes that we have weighted the genes
aux <- lapply(currAnno,
function(x) {
gw <- numeric(length(x)) + 1
names(gw) <- x
return(gw)
})
upNodes.LookUP <- list2env(aux, new.env(hash = TRUE, parent = emptyenv()))
downNodes.LookUP <- new.env(hash = TRUE, parent = emptyenv())
## we also use a hash table to store the result
sigList <- new.env(hash = TRUE, parent = emptyenv())
## we will use frequently the children
allChildren <- adj(goDAG.r2l, nodes(goDAG))
## START
.main.envir <- environment()
for(i in nodeLevel$noOfLevels:1) {
currNodes.names <- get(as.character(i), envir = levelsLookUp, mode = 'character')
## some messages
cat(paste("\n\t Level ", i, ":\t", length(currNodes.names), " nodes to be scored.\n", sep =""))
for(termID in currNodes.names) {
## take the IDs of the gene in the current node
## we store the termID in the "name" slot of the test.stat
group.test <- updateGroup(test.stat, name = termID, members = currAnno[[termID]])
.gene.weights <- NULL
## look at all children of node u and compute the significance
computeTermSig(group.test, allChildren[[termID]])
if(!is.null(.gene.weights)) {
## get the upper induced subgraph from 'u' (including 'u')
nodesUpSubgraph <- nodesInInducedGraph(goDAG, termID)
## set the weights for all the nodes
lapply(setdiff(nodesUpSubgraph, termID), setGeneWeights, .gene.weights, upNodes.LookUP)
}
}
}
## recompute the significance of the nodes in the downNodes.LookUP
for(termID in ls(downNodes.LookUP)) {
newWeights <- x.comb.y(get(termID, envir = upNodes.LookUP),
get(termID, envir = downNodes.LookUP))
termSig <- runTest(updateGroup(test.stat, name = termID,
members = currAnno[[termID]], weights = newWeights))
assign(termID, termSig, envir = sigList)
}
return(unlist(as.list(sigList)))
}
########################## WEIGHT algorithm ##########################
.sigGroups.weight01 <- function(goDAG, test.stat) {
## SOME FUNCTIONS THAT WE NEED
removeGenes <- function(termID, genesID, LookUP.table) {
if(!exists(termID, envir = LookUP.table, mode = "character"))
assign(termID, genesID, envir = LookUP.table)
else
assign(termID, union(genesID, get(termID, envir = LookUP.table)), envir = LookUP.table)
}
## this is a recursive function in termChildren
computeTermSig <- function(group.test, termChildren) {
termID <- Name(group.test)
## remove the genes from the term, if there are
if(!exists(termID, envir = upNodes.LookUP, mode = "character"))
elim(group.test) <- character(0)
else
elim(group.test) <- get(termID, envir = upNodes.LookUP, mode = 'character')
## run the test (the p-value)
termSig <- runTest(group.test)
## if we came from some recursive call
if(length(termChildren) == 0) {
assign(termID, termSig, envir = sigList)
return()
}
## since we konw that length(termChildren) > 0
w <- numeric(length(termChildren))
names(w) <- termChildren
for(child in termChildren) {
## look for the child significance
if(!exists(child, envir = sigList, inherits = FALSE))
stop('the child of node u, should had been processed')
## we should have the child significance in sigList
childSig <- get(child, envir = sigList)
w[child] <- .sigRatio.01(a = childSig, b = termSig)
}
## if w[child] > 1 than that child is more significant
sig.termChildren <- names(w[w > 1])
## CASE 1: if we don't have significant children
if(length(sig.termChildren) == 0) {
## since we have only less significant nodes that means
## that we don't recompute the significance for node 'u'
assign(termID, termSig, envir = sigList)
return()
}
## CASE 2: 'child' is more significant that 'u'
geneToRemove <- unique(unlist(currAnno[sig.termChildren]))
## we remove the genes only for node 'termID'
removeGenes(termID, geneToRemove, upNodes.LookUP)
if(is.null(.genesToRemove))
assign('.genesToRemove', geneToRemove, envir = .main.envir)
else
assign('.genesToRemove', union(.genesToRemove, geneToRemove), envir = .main.envir)
computeTermSig(group.test, setdiff(termChildren, sig.termChildren))
}
##----------------------------------------------------------------------
goDAG.r2l <- reverseArch(goDAG)
## get the levels list
nodeLevel <- buildLevels(goDAG.r2l, leafs2root = FALSE)
levelsLookUp <- nodeLevel$level2nodes
## the annotations of the genes to GO
currAnno <- .genesInNode(goDAG, nodes(goDAG))
## hash table for the genes that we eliminate for each node
## we store the genes that we want to eliminate
upNodes.LookUP <- new.env(hash = TRUE, parent = emptyenv())
## we also use a hash table to store the result
sigList <- new.env(hash = TRUE, parent = emptyenv())
## we will use frequently the children
allChildren <- adj(goDAG.r2l, nodes(goDAG))
## START
.main.envir <- environment()
for(i in nodeLevel$noOfLevels:1) {
currNodes.names <- get(as.character(i), envir = levelsLookUp, mode = 'character')
## some messages
.num.elimGenes <- length(unique(unlist(as.list(upNodes.LookUP))))
cat(paste("\n\t Level ", i, ":\t", length(currNodes.names),
" nodes to be scored\t(", .num.elimGenes, " eliminated genes)\n", sep =""))
for(termID in currNodes.names) {
## take the IDs of the gene in the current node
## we store the termID in the "name" slot of the test.stat
group.test <- updateGroup(test.stat, name = termID, members = currAnno[[termID]])
.genesToRemove <- NULL
## compute the significance of node u by looking at all children
computeTermSig(group.test, allChildren[[termID]])
if(!is.null(.genesToRemove)) {
## get the nodes in the upper induced subgraph from 'u' (including 'u')
## and eliminate the genes for all these nodes
lapply(setdiff(nodesInInducedGraph(goDAG, termID), termID),
removeGenes, .genesToRemove, upNodes.LookUP)
}
}
}
return(unlist(as.list(sigList)))
}
########################## Parent-Child algorithm ##########################
.sigGroups.parentChild <- function(goDAG, test.stat, sigGenes) {
nodeLevel <- buildLevels(goDAG, leafs2root = TRUE)
levelsLookUp <- nodeLevel$level2nodes
## hash table to store the result
sigList <- new.env(hash = TRUE, parent = emptyenv())
## we don't score the root node
for(i in nodeLevel$noOfLevels:2) {
currNodes.names <- get(as.character(i), envir = levelsLookUp, mode = 'character')
parents.currNodes <- adj(goDAG, currNodes.names)
currAnno <- .genesInNode(goDAG, currNodes.names)
cat(paste("\n\t Level ", i, ":\t", length(currNodes.names),
" nodes to be scored.\n", sep =""))
for(termID in currNodes.names) {
## just in case the test.stat is modified by some methods
parents <- .genesInNode(goDAG, parents.currNodes[[termID]])
group.test <- updateGroup(test.stat, members = currAnno[[termID]],
parents = parents, sigMembers = sigGenes)
## run the test and store the result (the p-value)
termSig <- runTest(group.test)
assign(termID, termSig, envir = sigList)
}
}
## the root will have p-value 1
assign(get("1", envir = levelsLookUp, mode = 'character'), 1, envir = sigList)
return(unlist(as.list(sigList)))
}
########################## LEA algorithm ##########################
.sigGroups.LEA <- function(goDAG, test.stat, LEA.class.pref = "lea") {
allAnno <- .genesInNode(goDAG, nodes(goDAG))
## just to be sure there are no random genes in the elim slot
elim(test.stat) <- character(0)
group.test <- test.stat
## get the classic significance
cat("\n\t Computing classical p-values ...\n")
class(test.stat) <- sub(LEA.class.pref, "classic", class(test.stat))
classicPval <- sapply(allAnno,
function(term) {
members(test.stat) <- term
return(runTest(test.stat))
})
## newPval are the corrected p-values, where nedded
newPval <- classicPval
## keep the nodes to be further explored in a queue,
## and initiallze the queue with the root node
rootNode <- getGraphRoot(goDAG)
bestCounter <- 0 # counts the number of nodes with a better score then its children
## make the edges point from the root to the leafs
goDAG <- reverseArch(goDAG)
## save the adjacent nodes (the children) into a list since will be faster to access them
usedNodes <- nodes(goDAG)
chList <- adj(goDAG, usedNodes)
usedNodes <- usedNodes[sapply(chList, length) != 0]
cat("\t Computing local p-values ...\n")
for(cNode in usedNodes) {
## get the descendents of degree k of the current node
ch <- chList[[cNode]]
i <- 1
while(length(ch) > 0 && i < depth(group.test)) {
ch <- unique(c(ch, unlist(chList[ch], use.names = FALSE)))
i <- i + 1
}
## check if the current node is a leaf -- should not happen though ...
if(length(ch) == 0)
next
## we know that both cNode and ch are non empty
## the only thing left to do is to compute the new score
optimCh <- ch[classicPval[ch] <= classicPval[cNode]] # the children with a better score then cNode
## CASE I: cNode has a better score then all its children
if(length(optimCh) == 0) {
bestCounter <- bestCounter + 1
next
}
################################################################################
## CASE II: at least one child has a better score then cNode, namely all optimCh
## check if the node has a very bad score (p-value ~ 1)
## this will happen seldom, but might remove some computations ....
if(classicPval[cNode] > .9)
next
## from the test statistic
## get the genes annotated to the children and set them as genes to be removed from cNode
test.stat <- updateGroup(group.test, name = cNode, members = allAnno[[cNode]])
elim(test.stat) <- unique(unlist(allAnno[optimCh], use.names = FALSE))
newPval[cNode] <- max(classicPval[cNode], runTest(test.stat))
##newPval[cNode] <- runTest(test.stat)
}
cat("\t\t", bestCounter, "local optimal nodes found\n\t\t", sum(classicPval != newPval), "corrected p-values \n")
return(newPval)
}
########################## GSEA algorithm ##########################
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "classicGsea"),
function(object, test.stat) {
#browser()
## update the test.stat object if there is the case
if(length(allScore(test.stat)) == 0) {
allMembers(test.stat) <- genes(object)
score(test.stat) <- geneScore(object, use.names = TRUE)
}
#GOlist <- genesInTerm(object)
#here
GOlist<-.genesInNode(graph(object),nodes(graph(object)),score = T)
##filter by size
GOlist<-GOlist[which(test.stat@min.size<=sapply(GOlist,length) & sapply(GOlist,length)<=test.stat@max.size)]
if(length(GOlist)==0){
'no gene set found! Please change min.size and max.size.'
}
cat("\n\t\t\t -- Classic Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
algoRes0 <- .sigGroups.classicGsea(GOlist, test.stat)
report<-plot.result(A=as.matrix(object@exp),rl=algoRes0,O=test.stat@testStatPar$geneRanking,pty =object@pty,output.directory =test.stat@testStatPar$output.directory,
topgs=test.stat@testStatPar$topgs,nom.p.val.threshold=test.stat@cutOff)
saveRDS(report,paste(test.stat@testStatPar$output.directory,'report.rds',sep = ''))
#browser()
#####use plot here or outside?
algoRes <- unlist(sapply(algoRes0,function(x){x['p']},simplify = T))
names(algoRes) <- names(algoRes0)
## STORE THE RESULTS
.whichAlgorithm <- "classicGsea"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
l=list()
# browser()
for(i in names(report[-1])){
l[[i]] = lapply(report[-1][[i]],function(x){x$report})
}
# l[[names(report[-1])[1]]]=lapply(report[-1][[1]],function(x){x$report})
# l[[names(report[-1])[2]]]=lapply(report[-1][[2]],function(x){x$report})
return(new("topONTresultGSEA",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(GOlist)),
global.report = report$report,
gs.report = l,
plots = lapply(report[-1][[i]],function(x){x$plot}),
cutOff = test.stat@cutOff
))
})
.sigGroups.classicGsea <- function(GOlist, test.stat) {
#browser()
#GOlist=GOlist[1:2]
#GOlist<-GOlist['DOID:1441']
total.gs<-length(GOlist)
print(paste('total gene set: ',length(GOlist),sep = ''))
sigList <- sapply(names(GOlist),
function(x) {
members(test.stat) <- as.character(GOlist[[x]])
test.stat@annotation.weight <- as.numeric(names(GOlist[[x]]))
names(test.stat@annotation.weight) <-members(test.stat)
test.stat@testStatPar$gsname<-x
print(paste(which(names(GOlist)==x),'/',total.gs,':',x,sep = ''))
return(runTest(test.stat))
},simplify = F)
#browser()
return(sigList)
}
setMethod("getSigGroups",
signature(object = "topONTdata", test.stat = "elimGsea"),
function(object, test.stat, ...) { ## ... parameters for each algorithm
#browser()
## update the test.stat object if there is the case
if(length(allScore(test.stat)) == 0) {
allMembers(test.stat) <- genes(object)
score(test.stat) <- geneScore(object, use.names = TRUE)
}
GOlist <- usedGO(object)
cat("\n\t\t\t -- Elim Algorithm -- \n")
cat(paste("\n\t\t the algorithm is scoring ", length(GOlist), " nontrivial nodes\n", sep =""))
cat("\t\t parameters: \n")
cat("\t\t\t test statistic: ", Name(test.stat), "\n")
cat("\t\t\t cutOff: ", cutOff(test.stat), "\n")
cat("\t\t\t score order: ", ifelse(scoreOrder(test.stat), "decreasing", "increasing"), "\n")
## apply the algorithm
algoRes0 <- .sigGroups.gseaElim(graph(object), test.stat)
#browser()
##remove those gs that has no gene in it after the elimination
algoRes0<-algoRes0[!sapply(algoRes0,function(x){is.na(x$ES.obs)})]
#browser()
report<-plot.result(A=as.matrix(object@exp),rl=algoRes0,O=test.stat@testStatPar$geneRanking,pty =object@pty,output.directory =test.stat@testStatPar$output.directory,
topgs=test.stat@testStatPar$topgs,nom.p.val.threshold=test.stat@cutOff,doc.string=test.stat@testStatPar$doc.string)
#####use plot here or outside?
algoRes <- unlist(sapply(algoRes0,function(x){x['p']},simplify = T))
names(algoRes) <- names(algoRes0)
#browser()
## STORE THE RESULTS
.whichAlgorithm <- "classicGsea"
attr(.whichAlgorithm, "testClass") <- as.character(class(test.stat))
return(new("topONTresultGSEA",
description = paste(description(object), "\nOntology:", ontology(object), sep = " "),
score = algoRes, testName = Name(test.stat),
algorithm = .whichAlgorithm,
geneData = c(.getGeneData(object), SigTerms = length(GOlist)),
global.report = report$report,
gs.report = lapply(report$ALL,function(x){x$report}),
plots = lapply(report[-1][[i]],function(x){x$plot}),
cutOff = test.stat@cutOff
))
})
.sigGroups.gseaElim <- function(goDAG, test.stat) {
goDAG.r2l <- reverseArch(goDAG)
nodeLevel <- buildLevels(goDAG.r2l, leafs2root = FALSE)
levelsLookUp <- nodeLevel$level2nodes
##adjs.cutOff <- cutOff(test.stat) / numNodes(goDAG)
adjs.cutOff <- cutOff(test.stat)
#cat(paste("\n\t\t Parameters:\t cutOff = ", adjs.cutOff, "\n", sep =""))
## we use a lookup table to search for nodes that have were significant
sigNodes.LookUP <- new.env(hash = TRUE, parent = emptyenv())
## hash table for the genes that we eliminate for each node
## we store the genes that we want to eliminate
elimGenes.LookUP <- new.env(hash = TRUE, parent = emptyenv())
## hash table to store the result
sigList <- new.env(hash = TRUE, parent = emptyenv())
result<-list()
#browser()
for(i in nodeLevel$noOfLevels:1) {
currNodes.names <- get(as.character(i), envir = levelsLookUp, mode = 'character')
##children.currNodes <- adj(goDAG.r2l, currNodes.names)
currAnno <- .genesInNode(goDAG, currNodes.names,score = T)
.num.elimGenes <- length(unique(unlist(as.list(elimGenes.LookUP))))
cat(paste("\n\t Level ", i, ":\t", length(currNodes.names),
" nodes to be scored\t(", .num.elimGenes, " eliminated genes)\n", sep =""))
for(termID in currNodes.names) {
#browser()
## just in case the test.stat is modified by some methods
test.stat@annotation.weight<-as.numeric(names(currAnno[[termID]]))
names(test.stat@annotation.weight)<-currAnno[[termID]]
group.test <- updateGroup(test.stat, name = termID, members = unname(currAnno[[termID]]))
group.test@testStatPar$gsname<-termID
#browser()
## remove the genes from the term, if there are
if(!exists(termID, envir = elimGenes.LookUP, mode = "character"))
elim(group.test) <- character(0)
else{
#browser()
elim(group.test) <- get(termID, envir = elimGenes.LookUP, mode = 'character')
}
#browser()
if(group.test@min.size<=length(group.test@members) & length(group.test@members) <= group.test@max.size){
## run the test and store the result (the p-value)
termSig <- runTest(group.test)
result[[group.test@name]]<-termSig
assign(termID, termSig$p, envir = sigList)
## if we have a significant GO term
if(termSig$p < adjs.cutOff) {
## we mark it
assign(termID, termSig$p, envir = sigNodes.LookUP)
#browser()
## we set the genes that we want to eliminate from the all ancestors
if(group.test@elim.gene.type=='core'){
## here we want to only eliminate the 'leading core' gene from gsea
hits<-which(termSig$GSEA.results$indicator==1)
if(termSig$GSEA.results$ES[1]>=0)
core<-hits[hits<=termSig$GSEA.results$arg.ES[1]]
else
core<-hits[hits>=termSig$GSEA.results$arg.ES[1]]
elimGenesID<-names(group.test@testStatPar$geneRanking$s2n.m[,1][group.test@testStatPar$geneRanking$o.m[,1][core]])
}else{
##elim all gene
elimGenesID<-group.test@members
}
#we assign the annotation weight to the elim genes
names(elimGenesID)<-group.test@annotation.weight[match(elimGenesID,group.test@members,nomatch = 0)]
#browser()
## we look for the ancestors
## because we are aggrating the score, we only look for
## the direct parent instrad of all the ancestors
ancestors <- setdiff(nodesInInducedGraph(goDAG, termID), termID)
#ancestors<-adj(goDAG, termID)
oldElimGenesID <- mget(ancestors, envir = elimGenes.LookUP,
mode = 'character', ifnotfound = list(c()))
## add the new genesID to the ancestors
.aggregateElimScore<-function(oldElim,newElim){
for(i in 1:length(newElim)){
if(newElim[[i]] %in% oldElim){
old.W<-as.numeric(names(oldElim[which(oldElim==newElim[[i]])]))
names(oldElim)[which(oldElim==newElim[[i]])]<-sum(old.W,as.numeric(names(newElim[i])))
}else{
oldElim<-c(oldElim,newElim[i])
}
}
oldElim
}
newElimGenesID<-lapply(oldElimGenesID,
function(termGenes) {
aux<-.aggregateElimScore(termGenes,elimGenesID)
return(aux[!is.na(aux)])
})
# if(group.test@elim.type=='score'){
#
# }else{
# newElimGenesID<-lapply(oldElimGenesID,
# function(termGenes) {
# aux<-union(termGenes, elimGenesID)
# return(aux[!is.na(aux)])
# })
# }
#
# newElimGenesID <- lapply(oldElimGenesID,
# function(termGenes) {
# aux <- union(termGenes, elimGenesID)
# return(aux[!is.na(aux)])
# })
## update the lookUp table
if(length(newElimGenesID) > 0)
multiassign(names(newElimGenesID), newElimGenesID, envir = elimGenes.LookUP)
}
}
}
}
return(result)
#return(unlist(as.list(sigList)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.