#' Wrapper function to import Reactome database
#'
#' This function includes collect_Reactome() and format_Reactome()
#'
#' @param orgdb String containing database of organism using KEGG naming (e.g. "hsa").
#'
#' @param category_names String of category names ***Need clarification***.
#'
#' @return List.
#' @export
#'
#' @examples get_Reactome(orgdb = "Homo sapiens", category_names = c("PATHID"))
get_Reactome <- function(orgdb = "Homo sapiens", category_names = c("PATHID")){
temp_raw_dict_Reactome <- collect_Reactome(orgdb, category_names)
temp_dict_Reactome <- list(pathid = temp_raw_dict_Reactome[[category_names]][["success"]])
temp_tidy_Reactome <- format_Reactome(dict = temp_dict_Reactome)
return(list(raw_dict_Reactome = temp_raw_dict_Reactome,
dict_Reactome = temp_dict_Reactome,
tidy_Reactome = temp_tidy_Reactome))
}
#' Import Reactome database
#'
#' @param organism Organism for which Reactome database to import
#' (Available from ***please clarify***)
#' @param category_names Category for which Reactome database to import
#' (Available from ***please clarify***)
#'
#' @return Reactome database ***please clarify***
#' @export
#'
#' @examples collect_Reactome(organism = "Homo sapiens",
#' category_names = c("PATHID"))
collect_Reactome <- function(organism, category_names){
#--------------------------------------------------
# Definitions
#--------------------------------------------------
orgname <- paste(organism, ": ", sep = "")
genes <- c()
for(category in category_names){
if(category == "PATHID"){
ids <- toTable(reactomePATHID2EXTID)
}
genes <- c(genes, ids$gene_id)
}
genes <- unique(genes)
#--------------------------------------------------
# collect_Reactome
#--------------------------------------------------
res <- list()
for(category in category_names){
tmp <- c()
if(category == "PATHID"){
tmp <- toTable(reactomePATHNAME2ID)
tmp <- tmp[grep(orgname, iconv(tmp$path_name)), ] # `tmp` is a data.frame
ids <- toTable(reactomePATHID2EXTID)
}
colnames(tmp) <- c("ID", "Description")
#--------------------------------------------------
# Prepare map
#--------------------------------------------------
failure <- c()
map <- data.frame(
matrix(ncol = 3, nrow = 0, dimnames = list(NULL, c(
"ID",
"Description",
"NCBI_geneID"
))))
I <- length(tmp$ID)
for(i in 1:I){
#--------------------------------------------------
# Print the current process
#--------------------------------------------------
if((i == 1) || (i %% floor(0.25 * I) == 0 && i < 0.95 * I) || (i == I)){
text <- paste("Now processing ", i, "/", I, " for ", category, "...\n", sep = "")
cat(text)
}
#------------------------------
# Note: tmp and ids are not always comparable
#------------------------------
if(length(which(ids$DB_ID == tmp$ID[i])) == 0){
failure <- rbind(failure, tmp$ID[i])
next
}
gene_ncbiids <- as.matrix(ids[which(ids$DB_ID == tmp$ID[i]),]$gene_id)
add <- data.frame(
ID = tmp$ID[i],
Description = tmp$Description[i],
NCBI_geneID = gene_ncbiids
)
map <- rbind(map, add)
}
res[[category]][["success"]] <- map
res[[category]][["failure"]] <- data.frame(Failed_DB_ID = failure)
}
return(res)
}
#' Format imported downloaded Reactome database to ASURAT-friendly format
#'
#' @param dict
#'
#' @return
#' @export
#'
#' @examples dict_Reactome_raw <- collect_Reactome(organism = "Homo sapiens",
#' category_names = c("PATHID"))
#' dict_Reactome <- list(pathid = dict_Reactome_raw[["PATHID"]][["success"]])
#' tidy_Reactome <- format_Reactome(dict = dict_Reactome)
format_Reactome <- function(dict){
#--------------------------------------------------
# Definitions
#--------------------------------------------------
category_names <- names(dict)
#--------------------------------------------------
# Reformat
#--------------------------------------------------
res <- list()
for(category in category_names){
tmp <- dict[[category]]
map <- unique(data.frame(
ID = tmp[["ID"]],
Description = tmp[["Description"]],
Count = NA,
Gene = NA,
GeneID = NA
))
for(i in 1:nrow(map)){
#------------------------------
# Gene and Count
#------------------------------
genes <- unique(tmp[which(tmp[["ID"]] == map[["ID"]][i]),]$NCBI_geneID)
map$GeneID[i] <- paste(genes, collapse = "/")
map$Count[i] <- length(genes)
}
rownames(map) <- 1:nrow(map)
res[[category]] <- map
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.