R/utils_ontology_manipulation.R

Defines functions ListAncestors ListSynonyms ListObsoletes ListKOs CreateDictionary RemoveObsoleteAndAlternative ObtainGeneGOancestors

# Small auxiliary functions for ontology manipulation
# (not exported to the package namespace)

# Prepares a listing of ancestors for each GO ID.
# Used to vectorize ObtainGeneGOancestors().
# Returns a list object with all GOXXANCESTOR lists combined.
ListAncestors <- function() {
  return(c(as.list(GO.db::GOBPANCESTOR),
           as.list(GO.db::GOCCANCESTOR),
           as.list(GO.db::GOMFANCESTOR)))
}


# Prepares a listing of synonymous for GO IDs with alternative ids.
# Used to vectorize RemoveObsoleteAndAlternative().
# Returns char vector containing GOSYNONYM's mapping in a single vector.
ListSynonyms <- function() {
  return(as.character(GO.db::GOSYNONYM))
}


# Prepares a listing of obsolete GO IDs.
# Used to vectorize RemoveObsoleteAndAlternative().
# Returns a vector with GOOBSOLETE's mapping.
ListObsoletes <- function() {
  allObsolete <- as.character(GO.db::GOOBSOLETE)
  names(allObsolete) <- NULL
  return(allObsolete)
}


# Prepares a listing of KO and their annotation.
# Returns a char vector of KOs and their annotation.
ListKOs <- function() {
  allKOs <- KEGGREST::keggList("ko")
  names(allKOs) <- gsub(pattern = "ko:", replacement = "",
                        x = names(allKOs), fixed = TRUE)
  return(allKOs)
}



# Creates an dictionary of valid terms from the data;
# does not contain an annotation description.
# Input:
#   test.anno: (list) genomes in the test group, each with a data frame that
#                     maps each genomic element to its annotations.
#   back.anno: (list) genomes in the background group, each with a data frame
#                     that maps each genomic element to its annotations.
#
# Returns char vector containing a dictionary of terms.
CreateDictionary <- function(test.anno, back.anno = NULL) {
  # Parse the tab format from Uniprot
  dict <- unique(unlist(sapply(test.anno, unlist)))

  if (!is.null(back.anno)){
    dict <- unique(c(dict, unlist(sapply(back.anno, unlist))))
  }
  # TODO: Why give each element a name that is equal to the element itself?
  # Is this really necessary?
  names(dict) <- dict

  return(dict)
}


# Removes GOs that are obsolete and replace alternative IDs for main IDs.
# Input:
#   geneIDs: char vector of factors mapping gene IDs to GO IDs.
#   allObsolete: char vector with GOOBSOLETE's mapping,
#                 generated by ListObsoletes()
#   allSynonym: char vector with GOSYNONYM's mapping,
#                 generated by ListSynonyms()
#
# Returns a char vector of factors mapping gene IDs to GO ID's,
#   without obsolete and alternative GO IDs.
RemoveObsoleteAndAlternative <- function(geneIDs, allObsolete, allSynonym) {

  geneIDs        <- setdiff(geneIDs, allObsolete)
  alternativeIDs <- intersect(geneIDs, names(allSynonym))
  geneIDs        <- setdiff(geneIDs, alternativeIDs)
  newIDs         <- allSynonym[alternativeIDs]

  return(unique(c(geneIDs, newIDs)))
}


# Finds all GO ID ancestors for a given gene
# Input::
#   geneIDs: char vector of factors mapping gene IDs to GO IDs.
#   allAncestor: list containing all GOXXANCESTOR combined,
#                 generated by ListAncestors()
#
# Returns a char vector with all GO IDs found for the gene.
ObtainGeneGOancestors <- function(geneIDs, allAncestor) {

  geneAncestors <- unlist(allAncestor[geneIDs], use.names = FALSE)
  geneAncestors <- geneAncestors[!(is.null(geneAncestors) | (geneAncestors == "all"))]
  geneIDs       <- as.character(unique(c(geneIDs, geneAncestors)))

  return(geneIDs)
}
fcampelo/KOMODO2-CRAN documentation built on March 7, 2020, 6:35 a.m.