inst/Scripts/distance.R

##Copyright B. Ding, 2004, all rights reserved

##FIXME: I think some of Jianhua's new environments remove all need
##for these functions
## function to get all the parent nodes for "go" (including "go" itself)
##
##NAME of Beiying's function:
##go.all.parents <- function(go)

## function to get all the children nodes for "go" (including itself)
##go.all.children <- function(go)

## function to find all genes annotated at GO including its children nodes
## env1: mapping from LL to GO
## env2: mapping from GO to LL

##go2allgenes <- function(gos,env1=NULL,env2=NULL)
## calculate the distance between two GO nodes by the cardinality of their GO graph intersection divided by the cardinality of their graph union.
##go.dist <- function(go1,go2)


## calculate distance between GO graphs for geneslist1 and geneslist2
## using all GO nodes the genes can be mapped to
## env3: mapping from GO to ALL LL

geneslist.dist.allnode.ll <- function(geneslist1,geneslist2,ontology="BP",
env1=NULL, env2=NULL, env3=NULL)
  {
    geneslist1.graph <- NULL

    for (j in geneslist1)
      {
        geneslist1.gos <- unlist(mget(j,get(env1)))
        geneslist1.ontology <- geneslist1.gos[!is.na(mget(geneslist1.gos,get(paste("GO",ontology,"ID2TERM",sep=""))))]

    #  print(geneslist.ontology)
        if(length(geneslist1.ontology)==0)
          geneslist1.ontology <- NA

        geneslist1.graph <- union(geneslist1.graph,names(go.all.parents(geneslist1.ontology)))
      }

    geneslist2.graph <- NULL

    for (j in geneslist2)
      {
        geneslist2.gos <- unlist(mget(j,get(env1)))
        geneslist2.ontology <- geneslist2.gos[!is.na(mget(geneslist2.gos,get(paste("GO",ontology,"ID2TERM",sep=""))))]

    #  print(geneslist.ontology)
        if(length(geneslist2.ontology)==0)
          geneslist2.ontology <- NA

        geneslist2.graph <- union(geneslist2.graph,names(go.all.parents(geneslist2.ontology)))
      }

    geneslist1.geneslist2.intersect.graph <- intersect(geneslist1.graph,geneslist2.graph)
    geneslist1.geneslist2.intersect.graph <- geneslist1.geneslist2.intersect.graph[!(geneslist1.geneslist2.intersect.graph %in% c("GO:0008150","GO:0003674","GO:0005575","GO:0003673"))]
    #    print(gene.geneslist.intersect.graph)

    if(length(geneslist1.geneslist2.intersect.graph)>0)
      {

        geneslist1.geneslist2.intersect.graph.lls <- list()

        for (j in geneslist1.geneslist2.intersect.graph)
          {
              geneslist1.geneslist2.intersect.graph.lls[[j]] <-
              intersect(unlist(mget(j, get(env3), ifnotfound=NA)), geneslist2)
          }

        geneslist1.geneslist2.intersect.graph.lls <-
        geneslist1.geneslist2.intersect.graph.lls[sapply(
        geneslist1.geneslist2.intersect.graph.lls, function(x) length(x)>0)]

        score <- sum(sapply(geneslist1.geneslist2.intersect.graph.lls,
        function(x)
        length(x))/sapply(mget(names(geneslist1.geneslist2.intersect.graph.lls),
        get(env3), ifnotfound=NA),function(x)
                          length(x)))
    }else{
        score <- 0
    }
    return(score)
}

## significance test for gene.dist.allnode.ll where geneslist1 is the gene set of interest and geneslist2 is the candidate gene list
geneslist.dist.allnode.ll.sig <- function(geneslist1, geneslist2,
                                          geneslist2.pool=NULL,
                                          ontology="BP", n.samp=100,
                                          env1=NULL, env2=NULL,
                                          env3=NULL)
{
    score <- geneslist.dist.allnode.ll(geneslist1, geneslist2,
                                       ontology=ontology, env1, env2, env3)
                                        # print(score)
    score.null <- NULL

    for ( i in 1:n.samp) {
        print(i)
        geneslist2.null <- sample(geneslist2.pool, length(geneslist2),
                                  FALSE)
                                        # print(geneslist2.null)
        score.null[i] <- geneslist.dist.allnode.ll(geneslist1,
                      geneslist2.null, ontology=ontology, env1, env2, env3)
    }

    pval <- mean(score<=score.null)
    return(list(score=score, pval=pval))
  }

Try the GOstats package in your browser

Any scripts or data that you put into this service are public.

GOstats documentation built on Nov. 8, 2020, 8:06 p.m.