#' Wrapper function to import Gene Ontology database
#'
#' This function includes collect_GO(), format_GO(), and compute_IC_GO()
#'
#' @param orgdb Database of organism.
#'
#' @param all_geneIDs List of genes ***Need clarification***.
#'
#' @return List.
#' @export
#'
#' @examples get_GO(orgdb = org.Hs.eg.db, all_geneIDs = all_geneIDs)
get_GO <- function(orgdb = org.Hs.eg.db, all_geneIDs = all_geneIDs){
temp_dict_GO <- collect_GO(orgdb, all_geneIDs = all_geneIDs)
temp_tidy_GO <- format_GO(dict = temp_dict_GO)
temp_IC_CO <- compute_IC_GO(tidy = temp_tidy_CO)
return(list(dict_GO = temp_dict_GO,
tidy_GO = temp_tidy_GO,
IC_GO = temp_IC_GO))
}
#' ***Please clarify***
#'
#' @param genes
#' @param orgdb
#' @param ont
#' @param level
#'
#' @return
#' @export
#'
#' @examples
do_groupGO <- function(genes, orgdb, ont, level){
res <- groupGO(
gene = genes,
OrgDb = orgdb,
keyType = "ENTREZID",
ont = ont,
level = level,
readable = FALSE
)
return(res)
}
#' Import Gene Ontology database
#'
#' @param orgdb
#' @param all_geneIDs
#'
#' @return
#' @export
#'
#' @examples
collect_GO <- function(orgdb, all_geneIDs){
#--------------------------------------------------
# Definitions
#--------------------------------------------------
category_names <- c("MF", "BP", "CC")
#--------------------------------------------------
# groupGO
#--------------------------------------------------
res <- list()
for(category in category_names){
tmp <- c()
level <- 1
while(1){
ggo <- try(
do_groupGO(gene = all_geneIDs, orgdb = orgdb, ont = category, level = level),
silent = TRUE
)
if(class(ggo) == "try-error"){
break
}else{
tmp <- c(tmp, list(ggo))
level <- level + 1
}
}
res[[category]] <- tmp
}
return(res)
}
#' Title
#'
#' @param dict
#'
#' @return
#' @export
#'
#' @examples
format_GO <- function(dict){
#--------------------------------------------------
# Definitions
#--------------------------------------------------
category_names <- names(dict)
#--------------------------------------------------
# Reformat
#--------------------------------------------------
res <- list()
for(category in category_names){
tmp <- c()
level_names <- length(dict[[category]])
for(level in 1:level_names){
tmp <- rbind(tmp, as.data.frame(dict[[category]][[level]]@result))
}
tmp <- unique(tmp) # Notice: the right hand side must be data.frame
df <- data.frame(
ID = tmp$ID,
Description = tmp$Description,
Count = tmp$Count,
Gene = NA,
GeneID = tmp$geneID
)
df$Count <- as.numeric(df$Count)
res[[category]] <- df
}
return(res)
}
#' Calculates information content (IC) from Gene Ontology data
#'
#' @param tidy
#'
#' @return
#' @export
#'
#' @examples
compute_IC_GO <- function(tidy){
#--------------------------------------------------
# Definitions
#--------------------------------------------------
category_names <- names(tidy)
#--------------------------------------------------
# Compute information contents (ICs)
#--------------------------------------------------
IC_GO <- list()
for(category in category_names){
#------------------------------
# Definitions
#------------------------------
df <- tidy[[category]]
all_geneIDs <- c()
for(i in 1:nrow(df))
all_geneIDs <- c(all_geneIDs, unlist(strsplit(df$GeneID[i], "/")))
all_geneIDs <- unique(all_geneIDs)
#--------------------------------------------------
# Prepare (count, child, parent)-table
#--------------------------------------------------
if(category == "MF") tmp <- toTable(GOMFOFFSPRING)
if(category == "BP") tmp <- toTable(GOBPOFFSPRING)
if(category == "CC") tmp <- toTable(GOCCOFFSPRING)
names(tmp) <- c("child", "parent")
tmp <- data.frame(num_annot_child = NA, child = tmp$child, parent = tmp$parent)
tmp <- tmp[which((tmp$child %in% df$ID) & (tmp$parent %in% df$ID)),]
rownames(tmp) <- 1:nrow(tmp)
#--------------------------------------------------
# Count the number of times a gene is annotated with the children
#--------------------------------------------------
for(i in 1:nrow(tmp)){
cnt <- df[which(df$ID == tmp$child[i]),]$Count
if(length(cnt) != 0)
tmp$num_annot_child[i] <- cnt
else
tmp$num_annot_child[i] <- 0
}
#--------------------------------------------------
# Add parents without child
#--------------------------------------------------
tmp <- rbind(tmp, data.frame(
num_annot_child = 0,
child = NA,
parent = setdiff(df$ID, unique(tmp$parent))
))
#--------------------------------------------------
# Compute the sum of `tmp$num_annot_child` for each parent
#--------------------------------------------------
ids <- unique(tmp$parent)
res <- data.frame(parent = ids, num_annot_parent = NA, sum_annot_child = NA)
for(i in 1:length(ids)){
cnt <- df[which(df$ID == ids[i]),]$Count
if(length(cnt) != 0)
res$num_annot_parent[i] <- cnt
else
res$num_annot_parent[i] <- 0
res$sum_annot_child[i] <- sum(tmp[which(tmp$parent == ids[i]),]$num_annot_child)
}
#--------------------------------------------------
# Compute IC for each parent
#--------------------------------------------------
res$Freq_parent <- res$num_annot_parent + res$sum_annot_child
if(category == "MF"){
ind <- which(res$parent == "GO:0003674")
if(length(ind) == 0)
stop("Error: tidy_GO must include root ontology term.")
res$Freq_root <- res[ind,]$Freq_parent
}else if(category == "BP"){
ind <- which(res$parent == "GO:0008150")
if(length(ind) == 0)
stop("Error: tidy_GO must include root ontology term.")
res$Freq_root <- res[ind,]$Freq_parent
}else if(category == "CC"){
ind <- which(res$parent == "GO:0005575")
if(length(ind) == 0)
stop("Error: tidy_GO must include root ontology term.")
res$Freq_root <- res[ind,]$Freq_parent
}
res$Prob <- res$Freq_parent / res$Freq_root
res$IC <- -log(res$Prob)
IC_GO[[category]] <- data.frame(ID = res$parent, IC = res$IC)
}
return(IC_GO)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.