## The following code has been extracted from the GOSim package by Holger Fröhlich et. al. and adapted for parallelization.
## Holger Froehlich, N. Speer, A. Poustka, Tim Beissbarth (2007). GOSim - An R-Package for Computation of Information Theoretic GO Similarities Between Terms and Gene Products. BMC Bioinformatics, 8:166
# req in setOntology
getAncestors<-function(){
ontology<-get("ontology",envir=GOSimEnv)
if(ontology == "BP")
res<-AnnotationDbi::as.list(GOBPANCESTOR)
else if(ontology == "MF")
res<-AnnotationDbi::as.list(GOMFANCESTOR)
else if(ontology == "CC")
res<-AnnotationDbi::as.list(GOCCANCESTOR)
else
stop(paste("ontology", ontology, "not known!"))
return(res)
}
# req in setOntology
getParents<-function(){
ontology<-get("ontology",envir=GOSimEnv)
if(ontology == "BP")
res<-AnnotationDbi::as.list(GOBPPARENTS)
else if(ontology == "MF")
res<-AnnotationDbi::as.list(GOMFPARENTS)
else if(ontology == "CC")
res<-AnnotationDbi::as.list(GOCCPARENTS)
else
stop(paste("ontology", ontology, "not known!"))
return(res)
}
# req in setOntology
getChildren<-function(){
ontology<-get("ontology",envir=GOSimEnv)
if(ontology == "BP")
res<-AnnotationDbi::as.list(GOBPCHILDREN)
else if(ontology == "MF")
res<-AnnotationDbi::as.list(GOMFCHILDREN)
else if(ontology == "CC")
res<-AnnotationDbi::as.list(GOCCCHILDREN)
else
stop(paste("ontology", ontology, "not known!"))
return(res)
}
# req in calcICs
getOffsprings<-function(){
ontology<-get("ontology",envir=GOSimEnv)
if(ontology == "BP")
res<-AnnotationDbi::as.list(GOBPOFFSPRING)
else if(ontology == "MF")
res<-AnnotationDbi::as.list(GOMFOFFSPRING)
else if(ontology == "CC")
res<-AnnotationDbi::as.list(GOCCOFFSPRING)
else
stop(paste("ontology", ontology, "not known!"))
return(res)
}
# req in setOntology
calcICs<-function(cores = 1){
# if(!require(annotate))
# stop("Package annotate is required for function calcICs")
evidences<-get("evidences", envir=GOSimEnv)
ontology<-get("ontology",envir=GOSimEnv)
organism = get("organism", envir=GOSimEnv)
message(paste("calculating information contents for ontology ", ontology, " using evidence codes '", paste(evidences,collapse=", "), "' (", organism,") ...",sep=""))
ids<- AnnotationDbi::toTable(GOTERM)
ids = unique(ids[ids[,"Ontology"] == ontology,"go_id"]) # these are all GO terms, which belong to the corrseponding category
offspring <- getOffsprings()
gomap <- get("gomap",envir=GOSimEnv)
goterms<-unlist(sapply(gomap, function(x) names(x)), use.names=FALSE) # all GO terms appearing in an annotation
goterms<-goterms[goterms %in% ids] # this is to ensure that we only get GO terms mapping to the given ontology
tab<-table(goterms)
na<-names(tab)
s<-setdiff(ids, na) #ensure that GO terms NOT appearing in the annotation have 0 frequency
m<-double(length(s))
names(m)<-s
tab<-as.vector(tab)
names(tab)<-na
tab<-c(tab,m)
if(cores <= 1) {
ta<-sapply(ids,function(x){ t=tab[unlist(offspring[x])]; tab[x]+sum(t[!is.na(t)])})
} else {
ta<-simplify2array(mclapply(ids,function(x){ t=tab[unlist(offspring[x])]; tab[x]+sum(t[!is.na(t)])}, mc.cores = cores))
}
names(ta)<-ids
IC <- -log(ta/sum(tab)) # ACHTUNG: hier muß tab und nicht ta stehen: Die Vorkommenshäufigkeit des Wurzelknotens ist die Summe der Vorkommenshäufigkeiten aller Knoten OHNE Aufsummieren der Kinder!
# # IC[IC == Inf] = 0 # WRONG: GO terms which are not annotated have Inf information content (NOT 0: They cannot be treated like root!!!)
message("done")
return(IC)
}
setEvidenceLevel<-function(evidences="all", organism=org.Hs.egORGANISM, gomap=org.Hs.egGO, cores = 1){
message(paste("-> retrieving GO information for all available genes for organism '", organism, "' in GO database", sep=""))
assign("evidences", evidences, envir=GOSimEnv)
# gomap<-as.list(GOENTREZID2GO)
if(is(gomap, "Bimap")){
mapped_genes <- mappedkeys(gomap)
gomap = AnnotationDbi::as.list(gomap[mapped_genes])
}
else if(!is(gomap, "list"))
stop("gomap argument should be a nested list (see manual pages)!")
message(paste("-> filtering GO terms according to evidence levels '", evidences, "'",sep=""))
if((length(evidences) > 1) || (evidences!="all")){
gomap<-sapply(gomap,function(x) sapply(x,function(y) c(y$Evidence %in% evidences, y$Ontology)))
gomap<-sapply(gomap, function(x) x[2,x[1,]=="TRUE"])
gomap<-gomap[sapply(gomap,length) >0]
}
assign("gomap", gomap, envir=GOSimEnv)
assign("organism", organism, envir=GOSimEnv)
}
setOntology<-function(ont="BP", loadIC=FALSE, cores = 1){
assign("ontology", ont, envir=GOSimEnv)
if(loadIC){
if(!exists("IC",envir=GOSimEnv))
stop('Argument loadIC is TRUE, but IC object does not exist in GOSimEnv')
IC<-get("IC",envir=GOSimEnv)
IC["all"]=0
} else {
IC <- calcICs(cores = cores)
assign("IC", IC, envir=GOSimEnv)
}
assign("ancestor", getAncestors(), envir=GOSimEnv)
assign("children", getChildren(), envir=GOSimEnv)
assign("parents", getParents(), envir=GOSimEnv)
children<-get("children",envir=GOSimEnv)
parents<-get("parents",envir=GOSimEnv)
assign("nchildren", sapply(children,length) , envir=GOSimEnv)
assign("nparents", sapply(parents,length), envir=GOSimEnv)
nchildren<-get("nchildren",envir=GOSimEnv)
nparents<-get("nparents",envir=GOSimEnv)
assign("Eavg", (sum(nchildren) + sum(nparents))/ length(union(names(nchildren), names(nparents))), envir<-GOSimEnv)
assign("alphaParam", 0.5, envir=GOSimEnv)
assign("betaParam", 0.5, envir=GOSimEnv)
}
initialize <- function(evidences = "all", organism = "human", ontology = "BP", gomap=org.Hs.egGO, cores = 1) {
if(!exists('GOSimEnv'))
stop('There is no environment "GOSimEnv" defined, cannot initialize modified GOSim code')
GOSimEnv$organism <- NULL
GOSimEnv$ontology <- NULL
require(GO.db)
}
# req by get gene sim
# filter out genes not mapping to the category in question
filterGO<-function(genelist){
cat("filtering out genes not mapping to the currently set GO category ...")
if(!exists("GOSimEnv")) initialize()
IC<-get("IC", envir=GOSimEnv)
ids<-names(IC[IC != Inf]) # only consider GO terms with some known annotation
gomap<-get("gomap",envir=GOSimEnv)
k<-1
allgenes<-list()
for(i in 1:length(genelist)){
annoi<-gomap[[match(as.character(genelist[i]), names(gomap))]]
annoi<-intersect(names(annoi), as.character(ids))
if(length(annoi) > 0){
allgenes[[k]]<-list(annotation=annoi,genename=as.character(genelist[i]))
k<-k + 1
}
}
cat(" ===> list of ", length(genelist), "genes reduced to ", length(allgenes), "\n")
allgenes
}
# precompute term similarities for all pairs of GO terms belonging to the annotated genelists x (and y)
precomputeTermSims<-function(x, y=NULL, similarityTerm="JiangConrath", verbose=FALSE){
if(verbose)
message("precomputing term similarities ...")
gotermsx<-as.vector(unique(unlist(sapply(x, function(xx) xx$annotation))))
if(!is.null(y)){
gotermsy<-as.vector(unique(unlist(sapply(y, function(xx) xx$annotation))))
if(similarityTerm %in% c("diffKernelgraphLapl", "diffKernelLLE", "diffKernelpower", "diffKernelexpm")){
STerm = getTermSim(c(gotermsx, gotermsy), method=similarityTerm)
STerm = STerm[gotermsx, gotermsy]
return(STerm)
}
STerm<-matrix(0, nrow=length(gotermsx), ncol=length(gotermsy))
rownames(STerm)=gotermsx
colnames(STerm)=gotermsy
for(i in 1:length(gotermsx)){
for(j in 1:length(gotermsy)){
STerm[i,j]<-calcTermSim(gotermsx[i],gotermsy[j], similarityTerm, verbose)
}
}
} else{
if(similarityTerm %in% c("diffKernelgraphLapl", "diffKernelLLE", "diffKernelpower", "diffKernelexpm")){
STerm = getTermSim(gotermsx, method=similarityTerm)
return(STerm)
}
STerm<-matrix(0, nrow=length(gotermsx), ncol=length(gotermsx))
rownames(STerm)<-gotermsx
colnames(STerm)<-gotermsx
for(i in 1:length(gotermsx)){
STerm[i,i]<-calcTermSim(gotermsx[i], gotermsx[i], similarityTerm, verbose)
if(i > 1){
for(j in 1:(i-1)){
STerm[i,j]<-calcTermSim(gotermsx[i], gotermsx[j], similarityTerm, verbose)
STerm[j,i]<-STerm[i,j]
}
}
}
}
STerm
}
# basic term similarity for index i and j in GOIDs ids
calcTermSim<-function(ids, i, j, method="JiangConrath"){
calcTermSim(ids[i],ids[j], method)
}
# basic term similarity between term1 and term2
calcTermSim<-function(term1, term2, method="JiangConrath", verbose=FALSE){
if(!exists("GOSimEnv")) initialize()
IC<-get("IC", envir=GOSimEnv)
if(verbose)
message(paste("Terms:",term1,",",term2,"( method:",method,")"))
if(method== "Resnik")
return(IC[getMinimumSubsumer(term1,term2)] / max(IC[IC != Inf]))
else if(method == "JiangConrath")
return(1 - min(1, -2*IC[getMinimumSubsumer(term1,term2)] + IC[term1] + IC[term2]) )
else if(method == "Lin"){
res = 2*IC[getMinimumSubsumer(term1,term2)]/(IC[term1]+IC[term2])
return(ifelse(is.na(res), 0, res))
}
else if(method== "CoutoEnriched")
return(getEnrichedSim(term1, term2))
else if(method == "CoutoResnik")
return(getDisjCommAncSim(term1,term2, "Resnik"))
else if(method == "CoutoJiangConrath")
return(getDisjCommAncSim(term1,term2, "JiangConrath"))
else if(method == "CoutoLin"){
res = getDisjCommAncSim(term1,term2, "Lin")
return(ifelse(is.na(res), 0, res))
}
else if(method == "simIC"){ # Li et al.
MICA = getMinimumSubsumer(term1,term2)
res = 2*IC[MICA]/(IC[term1]+IC[term2]) * (1 - 1/(1 + IC[MICA]))
return(ifelse(is.na(res), 0, res))
}
else if(method == "GIC") # graph information content
return(getGIC(term1, term2))
else if(method == "relevance"){ # Schlicker et al.
MICA = getMinimumSubsumer(term1,term2)
res = (2*IC[MICA]/(IC[term1]+IC[term2]))*(1 - exp(-IC[MICA]))
return(ifelse(is.na(res), 0, res))
}
else if(method == "diffKernel"){
K = mget("K", envir=GOSimEnv, ifnotfound=list(function(x) stop(paste("Diffusion kernel not loaded!\nPlease invoke load.diffusion.kernel().", sep = ""), call. = FALSE)))$K
return(K[term1, term2])
}
else
stop(paste("calcTermSim: Unknown term similarity",method))
}
getMinimumSubsumer<-function(term1, term2){
if(!exists("GOSimEnv")) initialize()
ancestor<-get("ancestor",envir=GOSimEnv)
if(term1 == term2){
ms<-term1
return(ms)
}
an1<-unlist(ancestor[names(ancestor) == term1])
an2<-unlist(ancestor[names(ancestor) == term2])
case1<-which(an2 == term1) # term1 is the ms of term2
case2<-which(an1 == term2) # term2 is the ms of term1
if(length(case1) > 0){
ms<-term1
} else if(length(case2) > 0) {
ms<-term2
} else {
# find common ancestor with maximal information content
anall<-intersect(an1, an2)
IC<-get("IC", envir=GOSimEnv)
ms<-anall[which.max(IC[anall])]
}
if(is.null(ms) | length(ms) == 0)
ms <- "NA"
ms
}
# compute gene similarity for a pair of genes having GO terms anno1 and anno2
getGSim<-function(anno1, anno2, similarity="funSimMax", similarityTerm="JiangConrath", STerm=NULL, avg=FALSE, verbose=FALSE){
if(length(anno1) <= length(anno2)){
a1<-anno1
a2<-anno2
swap<-FALSE
}
else{
a1<-anno2
a2<-anno1
swap<-TRUE
}
if(!is.null(STerm)){ # use precomputed similarity values
if(!swap)
ker<-STerm[a1,a2]
else
ker<-STerm[a2,a1]
if(length(a1) == 1)
ker<-t(as.matrix(ker))
if(is.null(ker) || is.null(nrow(ker))){
warning(paste("No GO information for",a1,a2,". Similarity set to NaN."))
return(NaN)
}
if(nrow(ker) > ncol(ker))
ker<-t(ker)
}
else{
if(similarity %in% c("dot"))
return(getWeightedDotSim(a1, a2))
else{
# calculate term similarity
ker<-matrix(0,nrow=length(a1),ncol=length(a2))
for(i in 1:length(a1)){
for(j in 1:length(a2))
ker[i,j]<-calcTermSim(a1[i],a2[j], similarityTerm, verbose)
}
}
}
if(length(a1)*length(a2) > 0){
if(similarity == "OA"){
res<-.C("OAWrapper", ker, nrow(ker), ncol(ker), as.integer(1), ret=double(1))$ret
if(avg)
res = res/length(a2)
return(res)
}
else if(similarity == "max"){
return(max(ker))
}
else if(similarity == "mean"){
return(mean(ker))
}
else if(similarity == "funSimAvg"){
rowMax = mean(apply(ker,1,max))
colMax = mean(apply(ker,2,max))
return(0.5*(rowMax + colMax))
}
else if(similarity == "funSimMax"){
rowMax = mean(apply(ker,1,max))
colMax = mean(apply(ker,2,max))
return(max(rowMax, colMax))
}
else if(similarity == "hausdorff"){
rowMax = min(apply(ker,1,max))
colMax = min(apply(ker,2,max))
return(min(rowMax, colMax))
}
else
stop(paste("getGSim: Unknown gene similarity",similarity,"!"))
}
else{
warning(paste("No GO information for",a1,a2,". Similarity set to NaN."))
return(NaN)
}
}
# req for getGeneSim
normalize.kernel = function(Ker, kerself1=NULL, kerself2=NULL, method="none"){
if(method != "none"){
if(is.null(kerself1) || is.null(kerself2)){
if(method == "sqrt"){ # result between -1 and 1
Kd<-sqrt(diag(Ker) + 1e-10)
Ker<-Ker/(Kd%*%t(Kd))
}
else if(method == "Lin"){ # result: diagonal = 1
Kd = diag(Ker)
Ker = 2*Ker / outer(Kd, Kd, "+")
}
else if(method == "Tanimoto"){
Kd = diag(Ker)
Ker = Ker / (outer(Kd, Kd, "+") - Ker)
}
# else if(method == "variance")
# Ker = Ker /(mean(diag(Ker)) - mean(Ker))
else
stop(paste("Unknown normalization method", method))
diag(Ker) = 1
}
else{
if(method == "sqrt")
return(Ker / sqrt(kerself1 * kerself2))
else if(method == "Lin")
return(2*Ker / (kerself1 + kerself2))
else if (method == "Tanimoto")
return(Ker / (kerself1 + kerself2 - Ker))
else
stop(paste("Unknown normalization method", method))
}
}
Ker
}
getGeneSim<-function(genelist1, genelist2=NULL, similarity="funSimMax", similarityTerm="relevance", normalization=TRUE, method="sqrt", avg=(similarity=="OA"), verbose=FALSE, cores = 1){
genelist1 <- unique(genelist1)
genelist2 <- unique(genelist2)
if(length(genelist1) < 2 && is.null(genelist2))
stop("Gene list should contain more than 2 elements!")
allgenes<-filterGO(genelist1)
if(length(allgenes) > 1 && is.null(genelist2)){
if(!(similarity %in% c("dot")))
STerm<-precomputeTermSims(x=allgenes, similarityTerm=similarityTerm, verbose=verbose) # precompute term similarities => speed up!
else
STerm = NULL
if(verbose)
message(paste("Calculating similarity matrix with similarity measure",similarity))
# computes the columns of the triangular part of matrix Ker and returns each row as a vector
# this can be done in parallel
tri.cols <- mclapply(
seq_along(allgenes),
function(i) {
tri.col <- numeric(length(allgenes)) # initialize new empty col
annoi <- allgenes[[i]]$annotation
for(j in 1:i) {
annoj <- allgenes[[j]]$annotation
tri.col[j] <- getGSim(annoi,annoj, similarity, similarityTerm, STerm=STerm, avg=avg, verbose)
}
return(tri.col)
},
mc.cores = cores
)
# populate Ker by setting rows and columns = rows (triangular matrix)
Ker <- matrix(unlist(tri.cols), nrow=length(allgenes), ncol=length(allgenes))
Ker[lower.tri(Ker)] <- t(Ker)[lower.tri(Ker)] # copies the upper triangle to the lower
colnames(Ker) <- sapply(allgenes, function(x) x$genename)
rownames(Ker) <- colnames(Ker)
if(normalization){
Ker = normalize.kernel(Ker, method=method)
if(any(Ker > 1, na.rm=T)) # this has been updated
warning("Similarity matrix contains values > 1! This may happen with simlarity='funSimMax', if one gene's GO annotation is a complete subset of another gene's GO annotation.")
Ker[Ker>1] = 1 # can happen with similarity funSimMax in cases where one GO annotation is subset of another one
}
}
else if(length(allgenes) > 0 && length(genelist2) > 0){
allgenes2<-setdiff(filterGO(genelist2), allgenes)
if(length(allgenes) < 1)
stop('The supplied gene sets seem to be indentical')
if(!(similarity %in% c("dot")))
STerm<-precomputeTermSims(x=allgenes, y=allgenes2, similarityTerm=similarityTerm, verbose=verbose) # precompute term similarities => speed up!
else
STerm = NULL
if(verbose)
message(paste("Calculating similarity matrix with similarity measure",similarity))
Ker.list <- mclapply(
seq_along(allgenes2),
function(i) {
tri.col <- numeric(length(allgenes)) # initialize new empty col
annoi <- allgenes2[[i]]$annotation
if(normalization)
kerselfi = getGSim(annoi, annoi, similarity, similarityTerm, STerm=NULL, avg=avg, verbose)
for(j in seq_along(allgenes)) {
annoj <- allgenes[[j]]$annotation
tri.col[j] <- getGSim(annoi,annoj, similarity, similarityTerm, STerm=STerm, avg=avg, verbose)
if(normalization){
kerselfj = getGSim(annoj, annoj, similarity, similarityTerm, STerm=NULL, avg=avg, verbose)
tri.col[j] = normalize.kernel(tri.col[j], kerselfi, kerselfj, method=method)
}
}
return(tri.col)
},
mc.cores = cores
)
Ker <- matrix(unlist(Ker.list), nrow=length(allgenes), ncol=length(allgenes2))
colnames(Ker)<-sapply(allgenes2,function(x) x$genename)
rownames(Ker)<-sapply(allgenes,function(x) x$genename)
if(any(Ker > 1, na.rm=T))
warning("Similarity matrix contains values > 1! This may happen with simlarity='funSimMax', if one gene's GO annotation is a complete subset of another gene's GO annotation.")
Ker[Ker>1] = 1 # can happen with similarity funSimMax in cases where one GO annotation is subset of another one
}
else{
if(length(allgenes) == 0)
stop("No gene has GO information!")
else if(length(allgenes) == 1)
stop(paste("Only gene",allgenes," has GO information!"))
}
Ker
}
# compute FuSSiMeg enriched term similarity
getEnrichedSim<-function(term1, term2){
#if(!require(RBGL))
# stop("Package RBGL is required for function getDepthFactor")
if(!exists("GOSimEnv")) initialize()
ms<-getMinimumSubsumer(term1,term2)
IC<-get("IC", envir=GOSimEnv)
if(term1 != term2){
G<-getGOGraph(c(term1,term2))
if(term1 != ms){
path1=sp.between(G,term1,ms)[[1]]$path # path to ms
len<-length(path1)
delta1<-sum(sapply(path1[2:len],getDepthFactor,G)*sapply(path1[2:len],getDensityFactor)*(-diff(IC[path1])))
}
else
delta1<-0
if(term2 != ms){
path2<-sp.between(G,term2,ms)[[1]]$path # path to ms
len<-length(path2)
delta2<-sum(sapply(path2[2:len],getDepthFactor,G)*sapply(path2[2:len],getDensityFactor)*(-diff(IC[path2])))
}
else
delta2<-0
delta<-delta1 + delta2
sim<-1 - min(1, delta)
}
else
sim<-1
sim<-sim * IC[term1] * IC[term2] # correction given in equation (11) of the FuSSiMeg paper
sim[is.na(sim)] = 0
names(sim)<-c()
sim
}
# get GraSM similarity of common disjunctive ancestors of two terms
getDisjCommAncSim<-function(term1, term2, method="JiangConrath"){
if(!exists("GOSimEnv")) initialize()
IC<-get("IC", envir=GOSimEnv)
djca<-getDisjCommAnc(term1, term2)
ICdjca<-IC[djca]
ICdjca<-ICdjca[!is.na(ICdjca)]
ICshare<-mean(ICdjca)
if(method == "JiangConrath")
return(1-min(1,-2*ICshare + IC[term1] + IC[term2]))
else if(method == "Resnik")
return(ICshare)
else if(method == "Lin")
return(2*ICshare/(IC[term1]+IC[term2]))
else
stop(paste("getDisjCommAnc: Unknown term similarity",method))
}
# graph information content similarity related to Tanimoto-Jacard index
getGIC = function(term1, term2){
if(!exists("GOSimEnv")) initialize()
if(term1 == term2){
return(1)
}
IC<-get("IC", envir=GOSimEnv)
ancestor<-get("ancestor",envir=GOSimEnv)
an1<-unlist(ancestor[names(ancestor) == term1])
an2<-unlist(ancestor[names(ancestor) == term2])
ancommon = intersect(an1, an2)
anunion = union(an1, an2)
return(sum(IC[ancommon]) / sum(IC[anunion]))
}
# graph information content similarity related to Tanimoto-Jacard index
getGIC = function(term1, term2){
if(!exists("GOSimEnv")) initialize()
if(term1 == term2){
return(1)
}
IC<-get("IC", envir=GOSimEnv)
ancestor<-get("ancestor",envir=GOSimEnv)
an1<-unlist(ancestor[names(ancestor) == term1])
an2<-unlist(ancestor[names(ancestor) == term2])
ancommon = intersect(an1, an2)
anunion = union(an1, an2)
return(sum(IC[ancommon]) / sum(IC[anunion]))
}
getWeightedDotSim <- function(anno1, anno2){
v1 = getGeneFeatures.internal(anno1)
v2 = getGeneFeatures.internal(anno2)
dot = crossprod(v1,v2)
}
# calculate term similarities for a list of GO terms
getTermSim<-function(termlist, method="relevance", verbose=FALSE){
S<-matrix(0,nrow=length(termlist),ncol=length(termlist))
colnames(S)<-termlist
rownames(S)<-termlist
if(method %in% c("diffKernel")){
K = mget("K", envir=GOSimEnv, ifnotfound=list(function(x) stop(paste("Diffusion kernel not loaded!\nPlease invoke load.diffusion.kernel().", sep = ""), call. = FALSE)))$K
Ktmp = matrix(NA, ncol=length(termlist), nrow=length(termlist))
diag(Ktmp) = 1
termlist = intersect(termlist, colnames(K))
K = K[termlist, termlist]
return(K)
}
for(i in 1:length(termlist)){
S[i,i] <- calcTermSim(termlist[i],termlist[i], method, verbose)
if(i > 1){
for(j in 1:(i-1)){
S[i,j]<- calcTermSim(termlist[i],termlist[j], method, verbose)
S[j,i]<-S[i,j]
}
}
}
S
}
# get GraSM common disjunctive ancestors of two terms
getDisjCommAnc<-function(term1, term2){
if(!exists("GOSimEnv")) initialize()
ancestor<-get("ancestor",envir=GOSimEnv)
IC<-get("IC", envir=GOSimEnv)
if(term1 == term2){
return(term1)
}
else{
an1<-unlist(ancestor[names(ancestor) == term1])
an2<-unlist(ancestor[names(ancestor) == term2])
case1<-which(an2 == term1) # term1 is an ancestor of term2
case2<-which(an1 == term2) # term2 is an ancestor of term1
if(length(case1) > 0){
ancommon<-an1
andisj<-getDisjAnc(term1, an1)
}
else if(length(case2) > 0){
ancommon<-an2
andisj<-getDisjAnc(term2, an2)
}
else{
ancommon<-intersect(an1,an2)
andisj<-getDisjAnc(c(term1,term2), ancommon) # we only need to calculate the disjunctives among the common ancestors!
#andisj = unique(rbind(getDisjAnc(term1, an1), getDisjAnc(term2, an2)))
}
djca<-c()
cond1<-sapply(ancommon, function(x) setdiff(ancommon[which(IC[ancommon] >= IC[x])],x)) # which common ancestors are more informative than a_i?
if(length(cond1) > 0){ # look for those that are disjunctive
names(cond1)<-ancommon
for(i in 1:length(cond1)){
res<-sapply(cond1[i][[1]],function(x) any(andisj[,1] == names(cond1[i]) & andisj[,2] == x) | any(andisj[,2] == names(cond1[i]) & andisj[,1] == x))
if(all(res))
djca<-c(djca, names(cond1[i]))
}
djca= unique(djca)
}
if(length(djca)==0)
djca<-ancommon[which.max(IC[ancommon])]# take minimum subsumer otherwise
return(djca)
}
}
getGOGraph<-function(term, prune=Inf){
#if(!require(graph))
# stop("Package graph is required for function getGOGraph")
if(!exists("GOSimEnv")) initialize()
ontology<-get("ontology",envir=GOSimEnv)
if(ontology == "BP")
G<-GOGraph(term,GOBPPARENTS)
else if(ontology == "MF")
G<-GOGraph(term,GOMFPARENTS)
else if(ontology == "CC")
G<-GOGraph(term,GOCCPARENTS)
else
stop(paste("ontology", ontology, "not known!"))
if(prune != Inf){
dis = johnson.all.pairs.sp(G)
inc = unique(unlist(sapply(term, function(t) names(dis[t,])[dis[t,] < prune])))
G = subGraph(nodes(G)[inc], G)
}
G
}
# get FuSsiMeg depth factor
getDepthFactor<-function(term,G){
#if(!require(RBGL))
# stop("Package RBGL is required for function getDepthFactor")
if(!exists("GOSimEnv")) initialize()
d<-sp.between(G,term,"all")[[1]]$length + 1 # start with depth = 1!
D<-((d+1)/d)^get("alphaParam",envir=GOSimEnv)
D
}
# get FuSSiMeg density factor
getDensityFactor<-function(term){
if(!exists("GOSimEnv")) initialize()
nchildren<-get("nchildren",envir=GOSimEnv)
nparents<-get("nparents",envir=GOSimEnv)
e<-nchildren[term] + nparents[term]
betaParam<-get("betaParam",envir=GOSimEnv)
E<-(1-betaParam)*get("Eavg",envir=GOSimEnv)/e + betaParam
E
}
getGeneFeatures.internal = function(anno){
ancestor<-get("ancestor",envir=GOSimEnv)
an<-unlist(ancestor[names(ancestor) %in% anno])
IC<-get("IC", envir=GOSimEnv)
v = double(length(IC))
names(v) = names(IC)
v[c(anno, an)] = IC[c(anno, an)]
v = v[!is.na(v)]
v
}
# get GraSM disjunctive ancestors of a set of terms with ancestors an
getDisjAnc<-function(term, an){
#if(!require(RBGL))
# stop("Package RBGL is required for function getDisjAnc")
G<-getGOGraph(term)
disan<-matrix(0,ncol=2,nrow=0)
for(n1 in 1:length(an)){
if(n1 > 1){
for(n2 in 1:(n1-1)){
if(!separates(term, an[n1], an[n2], G) && !separates(term, an[n2], an[n1], G))
disan<-rbind(disan,c(an[n1], an[n2]))
}
}
}
disan
}
GOGraph = function(term, env){
oldEdges <- vector("list", length = 0)
oldNodes <- vector("character", length = 0)
newN <- term
done <- FALSE
while (!done) {
newN <- newN[!(newN %in% oldNodes)]
if (length(newN) == 0)
done <- TRUE
else {
oldNodes <- c(oldNodes, newN)
numE <- length(newN)
nedges <- AnnotationDbi::mget(newN, env = env, ifnotfound = NA)
nedges <- nedges[!is.na(nedges)]
oldEdges <- c(oldEdges, nedges)
if (length(nedges) > 0)
newN <- sort(unique(unlist(nedges)))
else newN <- NULL
}
}
rE <- vector("list", length = length(oldNodes))
names(rE) <- oldNodes
rE[names(oldEdges)] <- oldEdges
rE <- lapply(rE, function(x) match(x, oldNodes))
names(oldNodes) = oldNodes
return(new("graphNEL", nodes = oldNodes, edgeL = lapply(rE,
function(x) list(edges = x)), edgemode = "directed"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.