R/cluster.R
In msarc: Draw Diagrams (mis)Representing the Results of Mass Spec Experiments

Documented in msarc.filterTerms msarc.findGOterms msarc.getTerms msarc.loadTerms msarc.saveTerms

################################################################################

# brain-damaged way to create a list with n copies of uniprot, named from "gos"
# In: list of N GO terms, uniprot ID
# Out: list of N copies of uniprot ID, named with the incoming GO terms
augmentGOlist <- function(gos,uniprot) {
  z <- lapply(gos,function(x) uniprot)
  names(z) <- gos
  return(z)
}

# In: named list: uniprot IDs to lists of GO terms
# Out: named list: GO terms to lists of Uniprot IDs
invertGOlist <- function(gos) {
  pile <- mapply(augmentGOlist,gos,names(gos))
  flat <- list()
  for (rmap in pile) {
    for (n in names(rmap)) {
      flat[[n]] = c(rmap[[n]],flat[[n]])
    }
  }
  return(flat)
}

# In: a list of GO terms.
# Out: the list of those terms plus all ancestors.
addOneAncestorSet <- function(terms) {
  res <- unique(unlist(c(terms,lapply(terms,function(x) GO.db::GOMFANCESTOR[[x]]))))
  return(res)
}

# ASSUMPTION!!!!! Go terms do not have ancestors in other hierarchies, e.g.
# a term in the "molecular function" tree will not have an ancestor in
# the "biological process" tree.  Must check this sometime...
# In: a list of lists of GO terms
# Out: the same list, but the sub-lists contain all ancestors of the original
#      set
addGoAncestors <- function(golist) {
  result <- lapply(golist,addOneAncestorSet)
  return(result)
}

# filter list of GO terms on GO tree name ("MF", "BP", "CC")
# In: list of GO objects (from eg2GO
# Out: list of GO ids.
filterGo <- function(goset,gocat) {
  flag <- sapply(goset,function(x) x$Ontology == gocat)
  filt <- goset[flag]
  goid <- sapply(filt,function(x) x$GOID)
  return(unique(goid))
}

# given a vector of Uniprot IDs and a bimap, return the IDs that are found
# as keys in the bimap.
filterUniprotInXref <- function(uniprots,bimap) {
  found <- uniprots %in% AnnotationDbi::mappedkeys(bimap)
  nf <- uniprots[!found]
  if (length(nf) > 0) {
    warning("missing Uniprot IDs: ",paste(nf,sep=",",collapse=","),call.=F)
  }
  return(uniprots[found])
}

# In: a single Uniprot ID,plus the two maps
# Out: a list of GO ids associated with this Uniprot ID
oneUniprot2go <- function(id,uniprot2eg,eg2go) {
  egmap = eg2go[uniprot2eg[[id]]]
  egs = AnnotationDbi::mappedkeys(egmap)
  gos = lapply(egs,function(x) eg2go[[x]])
  fgos = unlist(gos,recursive=F)
  return(fgos)
}

# note that "map" is a table that maps entrez gene IDs to Uniprot ids, from
# the "org.Hs.eg.db" package, or equivalent from another organism's package
#
# "gocat" is "MF", "BP", "CC"
#
# "uniprot" is a column of uniprot IDs.
#
# "org.Hs.egGO" converts Entrez IDs to GO terms
uniprot2go <- function(uniprot,eg2uniprot,eg2go,gocat) {
  uniprot2eg <- AnnotationDbi::revmap(eg2uniprot)
  uniprotFilt <- filterUniprotInXref(uniprot,uniprot2eg)
  gos = lapply(uniprotFilt,function(x) oneUniprot2go(x,uniprot2eg,eg2go))
  gfilt <- lapply(gos,filterGo,gocat)
  names(gfilt) <- uniprotFilt
  return(gfilt)
}

# In: list of lists of Uniprot IDs, named with GO terms
# Out: data frame of GO terms, counts of each category, GO descriptions
go2table <- function(gos) {
  ids <- names(gos)
  sizes <- sapply(gos,length)
  terms <- sapply(ids,function(x) GO.db::GOTERM[[x]]@Term)
  df <- data.frame(sizes,ids,terms,stringsAsFactors=F)
  colnames(df) = c('count','id','description')
  return(df)
}

# return T if a GO term's ancestor is in the list of candidates, F otherwise
parentInList <- function(id,candidates) {
  if (is.na(id)) {
    warning("Got NA in parentInList")
    return(F)
  }
  anc <- GO.db::GOMFANCESTOR[[id]]
  found = candidates %in% anc
  return(sum(found) > 0)
}

# return list of go terms which don't have ancestors in this list
topLevel <- function(gos) {
  tl <- sapply(gos,function(x) !parentInList(x,gos))
  return(tl)
}

# assign kids to parents.  If there are multiple parents for a child, pick
# the one with the smallest count...
assignKidsToParents <- function(parents,kids,counts) {
}

# return largest element of the list
listmax <- function(lst) {
  m <- lst[[1]]
  for (n in lst) {
    if (n > m) {
      m <- n
    }
  }
  return(m)
}

# get smallest element of ns, based on counts in "counts"
getSmallest <- function(ns,counts) {
  if (length(ns) == 1) {
    return(ns[[1]])
  }
  curcount <- listmax(counts)+1
  curid <- "zork"
  for (n in ns) {
    if (counts[[n]] < curcount) {
      curcount <- counts[[n]]
      curid <- n
    }
  }
  return(curid)
}

# In: a) an msarc object
#     b) eg2uniprot -- an AnnotationDBI map from entrez IDs to Uniprot IDs
#     c) eg2go -- an AnnotationDBI map from entrez IDs to GO terms
#     d) gocat -- which GO category to use (MF)
# Out: named list of GO terms, each entry being a list of included uniprot IDs.
msarc.findGOterms <- function(msarc,eg2uniprot=org.Hs.eg.db::org.Hs.egUNIPROT,eg2go=org.Hs.eg.db::org.Hs.egGO,minCount=10) {
  if (requireNamespace("GO.db",quietly=TRUE)) {
    gocat <- "MF"
    gos <- uniprot2go(msarc$data$uniprot,eg2uniprot,eg2go,gocat)
    msarc$go2uniBase <- invertGOlist(gos)
    nonzeroflags <- sapply(gos,function(x) length(x) > 0)
    nonzeroterms <- names(gos)[nonzeroflags]
    missing = msarc$data$uniprot[!(msarc$data$uniprot %in% nonzeroterms)]
    # augment with ancestors
    gosAll <- addGoAncestors(gos)
    # invert the list: go terms to uniprot IDs
    go2uni <- invertGOlist(gosAll)
    go2uni$all <- c(go2uni$all,missing)
    msarc$go2uni <- go2uni
    msarc$go2uniAll <- go2uni
    gotbl <- go2table(msarc$go2uni)
    colnames(gotbl) <- c('count','id','description')
    rownames(gotbl) <- gotbl$id
    msarc$gotbl <- gotbl[gotbl$count >= minCount,]
    return(msarc)
  } else {
    stop("Failed to load required package 'GO.db'.")
  }
}

msarc.getTerms <- function(msarc) {
  return(msarc$gotbl)
}

msarc.saveTerms <- function(msarc,filename='go_terms.txt') {
  write.table(msarc$gotbl,file=filename,quote=F,row.names=F,col.names=F,sep='\t')
}

setCandidates <- function(msarc) {
  tbl <- msarc$gotbl
  candidates <- as.list(tbl$id)
  counts <- as.list(tbl$count)
  names(counts) <- candidates
  msarc$go2uni <- msarc$go2uni[tbl$id]
  msarc$candidates <- candidates
  msarc$counts <- counts
  return(msarc)
}

msarc.loadTerms <- function(msarc,filename='go_terms.txt') {
  tbl = read.table(filename,header=F,stringsAsFactors=F,sep='\t')
  colnames(tbl) <- c('count','id','description')
  rownames(tbl) <- tbl$id
  msarc$gotbl <- tbl
  msarc <- setCandidates(msarc)
  return(msarc)
}

msarc.filterTerms <- function(msarc,keepers) {
  if (class(keepers) == "data.frame") {
    klist <- keepers$id
  } else {
    klist <- keepers
  }
  msarc$gotbl <- msarc$gotbl[klist,]
  msarc$gotbl <- msarc$gotbl[!is.na(msarc$gotbl$id),]
  msarc <- setCandidates(msarc)
  return(msarc)
}

makeHierarchy <- function(terms,counts) {
  if (length(terms) == 0) {
    return(terms)
  }
  tl = topLevel(terms)
  parents = terms[tl]
  kids = terms[!tl]
  kidsets = list()
  for (p in parents) {
    kidsets[[p]] = list()
  }
  for (k in kids) {
    candidates = GO.db::GOMFANCESTOR[[k]]
    ps = parents[parents %in% candidates]
    p = getSmallest(ps,counts)
    kidsets[[p]] <- c(kidsets[[p]],k)
  }
  tree <- list()
  for (p in parents) {
    tree[[p]] = makeHierarchy(kidsets[[p]],counts)
  }
  return(tree)
}

Any scripts or data that you put into this service are public.

msarc documentation built on May 30, 2017, 12:53 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

msarc
Draw Diagrams (mis)Representing the Results of Mass Spec Experiments

R/cluster.R
In msarc: Draw Diagrams (mis)Representing the Results of Mass Spec Experiments

Documented in msarc.filterTerms msarc.findGOterms msarc.getTerms msarc.loadTerms msarc.saveTerms

Try the msarc package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

msarc Draw Diagrams (mis)Representing the Results of Mass Spec Experiments

R/cluster.R In msarc: Draw Diagrams (mis)Representing the Results of Mass Spec Experiments

Documented in msarc.filterTerms msarc.findGOterms msarc.getTerms msarc.loadTerms msarc.saveTerms

Try the msarc package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

msarc
Draw Diagrams (mis)Representing the Results of Mass Spec Experiments

R/cluster.R
In msarc: Draw Diagrams (mis)Representing the Results of Mass Spec Experiments