databaseFiles <- c("BioCarta_2016.csv", "KEGG_2019_Human.csv", "Reactome_2016.csv", "WikiPathways_2019_Human.csv")
pathwayDB <- lapply(databaseFiles, function(pathwayName){
cat("Processing: ", pathwayName, fill = TRUE)
dat <- read.csv(here::here("inst", "extdata", "pathwayDB", "data", pathwayName), header = FALSE)
dat[dat == ""] <- NA
dat %>%
gather(Members, Genes, -V1) %>%
filter(!is.na(Genes)) %>%
rename(Pathways = V1) %>%
dplyr::select(Pathways, Genes) %>%
mutate(DB = gsub(".csv", "", pathwayName))
}) %>%
do.call(rbind, .)
## Processing: BioCarta_2016.csv
## Processing: KEGG_2019_Human.csv
## Processing: Reactome_2016.csv
## Processing: WikiPathways_2019_Human.csv
pathwayDB %>%
dplyr::select(DB, Pathways) %>%
group_by(DB) %>%
summarise(n = n_distinct(Pathways)) %>%
ggplot(aes(x = reorder(DB, -n), y = n)) +
geom_bar(stat = "identity") +
ylab("Number of pathways per DB") +
xlab("DB") +
theme_classic()
Reactome has the most genesets whereas BioCarta has the least number of genesets.
pathwayDB %>%
group_by(DB) %>%
summarise(n = n_distinct(Genes)) %>%
ggplot(aes(x = reorder(DB, -n), y = n)) +
geom_bar(stat = "identity") +
ylab("Number of genesets") +
xlab("DB") +
theme_classic()
BioCarta captures the least number of unique genes, whereas the remianing three capture >5K genes.
pathwayTally <- pathwayDB %>%
group_by(DB, Pathways) %>%
summarise(n = n())
pathwayTally %>%
ggplot(aes(x = n)) +
geom_histogram() +
facet_wrap(vars(DB), scales = "free") +
scale_y_log10() +
ylab("Frequency of genesets with a given number of genes") +
xlab("Number of genes") +
theme_classic()
| DB | Pathways | n | | :------------------------ | :----------------------------------------------------- | ---: | | BioCarta_2016 | MAPKinase Signaling Pathway Homo sapiens h mapkPathway | 56 | | KEGG_2019_Human | Pathways in cancer | 530 | | Reactome_2016 | Signal Transduction Homo sapiens R-HSA-162582 | 2465 | | WikiPathways_2019_Human | PI3K-Akt Signaling Pathway WP4172 | 340 |
| DB | Pathways | n | | :------------------------ | :-------------------------------------------------------------------------- | -: | | BioCarta_2016 | Acetylation and Deacetylation of RelA in Nucleus Homo sapiens h RELAPathway | 5 | | KEGG_2019_Human | Caffeine metabolism | 5 | | Reactome_2016 | Abacavir metabolism Homo sapiens R-HSA-2161541 | 5 | | WikiPathways_2019_Human | Catalytic cycle of mammalian Flavin-containing MonoOxygenases (FMOs) WP688 | 5 |
usethis::use_data(pathwayDB, overwrite = TRUE)
## ✔ Setting active project to '/Users/asingh/Documents/omicsBioAnalytics'
## ✔ Saving 'pathwayDB' to 'data/pathwayDB.rda'
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.