#' Obtain reactome pathways
#'
#' @param species A string, for example "Homo sapiens" or "Mus musculus",
#' indicating the species to use.
#' @param overlap_limit (Optional) Any pathways that have an overlap
#' greater than overlap_limit are combined. Set to NULL to disable
#' this option.
#' @return A named list of vectors. Each vector corresponds to a Reactome pathway
#' and contains the entrezgene IDs of the genes in that pathway.
#' @export
get_reactome_pathways <- function(species, overlap_limit = 0.9) {
cat("Obtaining reactome pathway information for species:", species, "\n")
# Get list of reactome pathway names
# Map names to ID
# Map ID to entrez ID
# Save results as p by g matrix. (Optional; not performed.)
# library(reactome.db)
# Map reactome NAME to reactome ID. Subset on pathways for this species.
reactome_to_id <- as.list(reactome.db::reactomePATHNAME2ID)
available_species <- unique(
gsub("(: .*)", "", names(as.list(reactome.db::reactomePATHNAME2ID))))
index <- which(grepl(species,
names(as.list(reactome.db::reactomePATHNAME2ID))))
if(length(index) == 0) {
stop(species, "is not an available species. Use one of the following:\n\t",
paste(available_species, collapse = ", "), ".")
}
reactome_to_id <- reactome_to_id[index]
# Map reactome ID to entrez ID.
id_to_entrez <- as.list(reactome.db::reactomePATHID2EXTID)
# Subset on reactome IDs obtained in first step.
index <- which(names(id_to_entrez) %in% sapply(reactome_to_id, dplyr::first))
id_to_entrez <- id_to_entrez[index]
# Remove any pathways containing fewer than 5 genes.
sizes <- sapply(id_to_entrez, function(x) length(unique(x)))
id_to_entrez <- id_to_entrez[sizes >= 5]
# Map reactome ID back to reactome NAME.
id_to_reactome <- as.list(reactome.db::reactomePATHID2NAME)
# Subset on reactome IDs obtained in previous step.
index <- which(names(id_to_reactome) %in% names(id_to_entrez))
id_to_reactome <- id_to_reactome[index]
# Finally, map reactome NAME to entrez ID
pathway_list <- id_to_entrez
names(pathway_list) <- sapply(names(id_to_entrez), function(x) {
dplyr::first(id_to_reactome[[which(names(id_to_reactome) == x)[1]]])
})
names(pathway_list) <- gsub("^[ A-Za-z]*: ", "", names(pathway_list))
if(!is.null(overlap_limit)) {
cat("Combining pathways that have greater than", overlap_limit, "overlap.\n")
pathway_list <- combine_overlapping_pathways(pathway_list, overlap_limit)
}
return(pathway_list)
}
#' Modify a pathway list to combine overlapping pathways.
#'
#' @param pathway_list A list of pathways obtained from
#' \code{\link{get_reactome_pathways}}.
#' @param threshold A percentage between 0 and 1. If two pathways
#' overlap by more than this amount, they are combined into one pathway.
#' @return A modified list with overlapping pathways combined together.
#' @export
combine_overlapping_pathways <- function(pathway_list, overlap_limit = 0.9) {
# Determine which pathways are similar.
n <- length(pathway_list)
similar_to <- diag(FALSE, n)
for(i in 2:n) {
for(j in 1:(i - 1)) {
similar_to[i, j] <-
(length(intersect(pathway_list[[i]], pathway_list[[j]])) /
length(union(pathway_list[[i]], pathway_list[[j]]))) > overlap_limit
}
}
backup <- similar_to
similar_to <- backup
pathways_to_remove <- NULL
for(i in 1:ncol(similar_to)) {
if(sum(similar_to[, i]) > 1) {
index <- which(similar_to[, i])
names(pathway_list)[i] <- paste0(names(pathway_list)[i], " (See also: ",
paste0(names(pathway_list)[index],
collapse = "; "), ")")
pathways_to_remove <- c(pathways_to_remove, index)
}
}
pathways_to_remove <- unique(pathways_to_remove)
return(pathway_list[-pathways_to_remove])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.