Pathbank is a new (Oct 2019) resource from the people that made SMPD (The Small Molecule Pathway Database).
library(data.table) all_pathbank_DT <- fread("/mnt/c/pathbank/pathbank_all_proteins.csv") setnames(all_pathbank_DT, c("PathBank ID", "Pathway Name", "Pathway Subject", "Species", "Uniprot ID", "Protein Name", "HMDBP ID", "DrugBank ID", "GenBank ID", "Gene Name", "Locus"), c("pathbank", "pathway", "pathwayType", "species", "accession", "title", "HMDBP", "drugbank", "genbank", "geneSymbol", "locus")) hsPathBankProteinSets_DT <- all_pathbank_DT[species == "Homo sapiens" & accession != "Unknown", .(pathbank, pathway, pathwayType, accession, geneSymbol)] %>% unique
Include additional identifiers via accession
uniprot_hs_IDmap <- fread("zcat /mnt/c/Uniprot/HUMAN_9606_idmapping_selected.tab.gz") hsPathBankProteinSets_DT[uniprot_hs_IDmap[,c(1,2,3,19)], geneID := V3, on = .(accession == V1)] hsPathBankProteinSets_DT[uniprot_hs_IDmap[,c(1,2,3,19)], ENSG := V19, on = .(accession == V1)]
Save as a package dataset
save(hsPathBankProteinSets_DT, file = ".data/hsPathBankSets_DT.RData", compress = "xz")
Let's try it on erlotinib
erlotinib_geneIDexpress <- erlotinib_adrHCC827_diffexDT[!is.na(geneID), .SD[ER3_pValue == min(ER3_pValue, na.rm = T)][1] , by = geneID] susceptibleCellLine_BetaUniformModel <- fitBetaUniformMixtureDistribution(erlotinib_geneIDexpress$ER3_pValue) noiseFractionUpperBound(susceptibleCellLine_BetaUniformModel) plot(susceptibleCellLine_BetaUniformModel) erlotinib_geneIDexpress[, betaUnifScore_FDR0.05 := betaUniformScore(ER3_pValue, susceptibleCellLine_BetaUniformModel, FDR = 0.05)] #Group by GeneIDs hsPathBankGeneIDsets_DT <- hsPathBankProteinSets_DT[!is.na(geneID) & pathwayType != "Metabolic", .(geneID = as.integer(unique(unlist( strsplit(geneID, "; "))))), by = .(pathway,pathwayType)] pathwayPvals <- erlotinib_geneIDexpress %>% .[hsPathBankGeneIDsets_DT,,on = "geneID", allow.cartesian=TRUE] %>% .[!is.na(ER3_pValue), .(pValueSet = list(ER3_pValue), members = list(geneSymbol), scoreSum = sum(betaUnifScore_FDR0.05, na.rm = T)), by = .(pathway,pathwayType)] pathwayPvals[, geneSet := str_c(sort(members[[1]]), collapse = ","), by = pathway] pathwayPvals[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], susceptibleCellLine_BetaUniformModel), by = geneSet] pathwayPvals[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = geneSet] pathwayPvals[!duplicated(geneSet), betaUniformMixtureQ := p.adjust(betaUniformMixtureP, "fdr")] pathwayPvals[,betaUniformMixtureQ := na.omit(betaUniformMixtureQ) , by = geneSet] pathwayPvals[!duplicated(geneSet), fishersQ := p.adjust(fishersP, "fdr")] pathwayPvals[,fishersQ := na.omit(fishersQ) , by = geneSet] pathwayPvals[betaUniformMixtureQ < 0.01][pathwayType == "Drug Action"][order(-scoreSum)] pathwayPvals[fishersQ < 0.01][pathwayType == "Drug Action"][order(-scoreSum)] pathwayPvals[betaUniformMixtureQ < 0.01][pathwayType == "Drug Action"][order(-scoreSum)] pathwayPvals[pathway %like% "inib"]
Some thoughts on this resource - there is an astonishing number of pathways in here, many of which are almost identical
library(fgsea) hallmark_pathways <- gmtPathways("/mnt/c/broad_genesets/h.all.v7.0.entrez.gmt") hallmark_pathways_DT <- map2_dfr(hallmark_pathways, names(hallmark_pathways), ~ data.table(name = .y, geneID = .x, type = "hallmark_pathways")) cannonical_pathways <- gmtPathways("/mnt/c/broad_genesets/c2.cp.v7.0.entrez.gmt") cannonical_pathways_DT <- map2_dfr(cannonical_pathways, names(cannonical_pathways), ~ data.table(name = .y, geneID = .x, type = "cannonical_pathways")) chemical_genetic_peturbations <- gmtPathways("/mnt/c/broad_genesets/c2.cgp.v7.0.entrez.gmt") CGP_geneSets_DT <- map2_dfr(chemical_genetic_peturbations, names(chemical_genetic_peturbations), ~ data.table(name = .y, geneID = .x, type = "chemical_genetic_peturbations")) erlotinibADRhcc827_betaUniformModel <- fitBetaUniformMixtureDistribution(erlotinib_adrHCC827_diffexDT$HCC827_pValue, nStarts = 20) erlotinib_adrHCC827_diffexDT[, betaUnifScore_FDR0.05 := betaUniformScore(HCC827_pValue, erlotinibADRhcc827_betaUniformModel, FDR = 0.05)] noiseFractionUpperBound(erlotinibADRhcc827_betaUniformModel) # Inspect ER3 and T15.2 ER3_betaUniformModel <- fitBetaUniformMixtureDistribution(erlotinib_adrHCC827_diffexDT$ER3_pValue, nStarts = 20) T15.2_betaUniformModel <- fitBetaUniformMixtureDistribution(erlotinib_adrHCC827_diffexDT$T15.2_pValue, nStarts = 20) plot(ER3_betaUniformModel) plot(T15.2_betaUniformModel) noiseFractionUpperBound(ER3_betaUniformModel) #HIGH! noiseFractionUpperBound(T15.2_betaUniformModel) #HIGH! erlotinib_adrHCC827_diffexDT[, betaUniformScoreFDR0.05_ER3 := betaUniformScore(ER3_pValue, ER3_betaUniformModel)] erlotinib_adrHCC827_diffexDT[, betaUniformScoreFDR0.05_T15.2 := betaUniformScore(T15.2_pValue, T15.2_betaUniformModel)] ### ER3_cannonicalSets_DT <- erlotinib_adrHCC827_diffexDT[ unique(cannonical_pathways_DT[, .(pathway = name, geneID = as.numeric(geneID))]), , on = "geneID", allow.cartesian=T][!is.na(ER3_pValue),.(pValueSet = list(ER3_pValue), geneSet = list(geneSymbol), scoreSum = sum(betaUniformScoreFDR0.05_ER3)), by = pathway] ER3_cannonicalSets_DT[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], ER3_betaUniformModel), by = pathway] ER3_cannonicalSets_DT[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = pathway] ER3_REACT <- ER3_cannonicalSets_DT[pathway %like% "REACT"] ER3_REACT[,ER3_betaQ := p.adjust(betaUniformMixtureP, "fdr")] ER3_KEGG <- ER3_cannonicalSets_DT[pathway %like% "KEGG"] ER3_KEGG[,ER3_betaQ := p.adjust(betaUniformMixtureP, "fdr")] T15.2_cannonicalSets_DT <- erlotinib_adrHCC827_diffexDT[ unique(cannonical_pathways_DT[, .(pathway = name, geneID = as.numeric(geneID))]), , on = "geneID", allow.cartesian=T][!is.na(T15.2_pValue),.(pValueSet = list(T15.2_pValue), geneSet = list(geneSymbol), scoreSum = sum(betaUniformScoreFDR0.05_T15.2)), by = pathway] T15.2_cannonicalSets_DT[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], T15.2_betaUniformModel), by = pathway] T15.2_cannonicalSets_DT[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = pathway] T15_REACT <- T15.2_cannonicalSets_DT[pathway %like% "REACT"] T15_REACT[,T15.2_betaQ := p.adjust(betaUniformMixtureP, "fdr")] T15_KEGG <- T15.2_cannonicalSets_DT[pathway %like% "KEGG"] T15_KEGG[,T15.2_betaQ := p.adjust(betaUniformMixtureP, "fdr")] combindedSet <- merge(ER3_REACT, T15_REACT, by = "pathway")[ (T15.2_betaQ <= 0.01) & (ER3_betaQ <= 0.01)] merge(ER3_KEGG, T15_KEGG, by = "pathway")[ (T15.2_betaQ <= 0.01) & (ER3_betaQ <= 0.01)]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.