Pathbank is a new (Oct 2019) resource from the people that made SMPD (The Small Molecule Pathway Database).

library(data.table)

all_pathbank_DT <- fread("/mnt/c/pathbank/pathbank_all_proteins.csv")

setnames(all_pathbank_DT,
  c("PathBank ID", "Pathway Name", "Pathway Subject", "Species", "Uniprot ID", "Protein Name", "HMDBP ID", "DrugBank ID", "GenBank ID", "Gene Name", "Locus"),
  c("pathbank", "pathway", "pathwayType", "species", "accession", "title", "HMDBP", "drugbank", "genbank", "geneSymbol", "locus"))


hsPathBankProteinSets_DT <- all_pathbank_DT[species == "Homo sapiens" & accession != "Unknown", .(pathbank, pathway, pathwayType, accession, geneSymbol)] %>% unique

Include additional identifiers via accession

uniprot_hs_IDmap <- fread("zcat /mnt/c/Uniprot/HUMAN_9606_idmapping_selected.tab.gz")

hsPathBankProteinSets_DT[uniprot_hs_IDmap[,c(1,2,3,19)], geneID := V3, on = .(accession == V1)]
hsPathBankProteinSets_DT[uniprot_hs_IDmap[,c(1,2,3,19)], ENSG := V19, on = .(accession == V1)]

Save as a package dataset

save(hsPathBankProteinSets_DT, file = ".data/hsPathBankSets_DT.RData", compress = "xz")

Let's try it on erlotinib

erlotinib_geneIDexpress <- erlotinib_adrHCC827_diffexDT[!is.na(geneID), .SD[ER3_pValue == min(ER3_pValue, na.rm = T)][1] , by = geneID]


susceptibleCellLine_BetaUniformModel <- fitBetaUniformMixtureDistribution(erlotinib_geneIDexpress$ER3_pValue)

noiseFractionUpperBound(susceptibleCellLine_BetaUniformModel)

plot(susceptibleCellLine_BetaUniformModel)

erlotinib_geneIDexpress[, betaUnifScore_FDR0.05 := betaUniformScore(ER3_pValue, susceptibleCellLine_BetaUniformModel, FDR = 0.05)]



#Group by GeneIDs
hsPathBankGeneIDsets_DT <- hsPathBankProteinSets_DT[!is.na(geneID) & pathwayType != "Metabolic", .(geneID = as.integer(unique(unlist( strsplit(geneID, "; "))))), by = .(pathway,pathwayType)]

pathwayPvals <- erlotinib_geneIDexpress %>% 
  .[hsPathBankGeneIDsets_DT,,on = "geneID", allow.cartesian=TRUE] %>%
  .[!is.na(ER3_pValue),
    .(pValueSet = list(ER3_pValue), members = list(geneSymbol), scoreSum = sum(betaUnifScore_FDR0.05, na.rm = T)), by = .(pathway,pathwayType)]

pathwayPvals[, geneSet := str_c(sort(members[[1]]), collapse = ","), by = pathway]

pathwayPvals[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], susceptibleCellLine_BetaUniformModel),  by = geneSet]

pathwayPvals[, fishersP := fishersPvalueSumTest(pValueSet[[1]]),  by = geneSet]

pathwayPvals[!duplicated(geneSet), betaUniformMixtureQ := p.adjust(betaUniformMixtureP, "fdr")]
pathwayPvals[,betaUniformMixtureQ := na.omit(betaUniformMixtureQ) , by = geneSet]

pathwayPvals[!duplicated(geneSet), fishersQ := p.adjust(fishersP, "fdr")]
pathwayPvals[,fishersQ := na.omit(fishersQ) , by = geneSet]

pathwayPvals[betaUniformMixtureQ < 0.01][pathwayType == "Drug Action"][order(-scoreSum)]

pathwayPvals[fishersQ < 0.01][pathwayType == "Drug Action"][order(-scoreSum)]
pathwayPvals[betaUniformMixtureQ < 0.01][pathwayType == "Drug Action"][order(-scoreSum)]

pathwayPvals[pathway %like% "inib"]

Some thoughts on this resource - there is an astonishing number of pathways in here, many of which are almost identical

library(fgsea)

hallmark_pathways <- gmtPathways("/mnt/c/broad_genesets/h.all.v7.0.entrez.gmt")
hallmark_pathways_DT <- map2_dfr(hallmark_pathways, names(hallmark_pathways), ~ data.table(name = .y, geneID = .x, type = "hallmark_pathways"))


cannonical_pathways <- gmtPathways("/mnt/c/broad_genesets/c2.cp.v7.0.entrez.gmt")
cannonical_pathways_DT <- map2_dfr(cannonical_pathways, names(cannonical_pathways), ~ data.table(name = .y, geneID = .x, type = "cannonical_pathways"))

chemical_genetic_peturbations <- gmtPathways("/mnt/c/broad_genesets/c2.cgp.v7.0.entrez.gmt")
CGP_geneSets_DT <- map2_dfr(chemical_genetic_peturbations, names(chemical_genetic_peturbations), ~ data.table(name = .y, geneID = .x, type = "chemical_genetic_peturbations"))



erlotinibADRhcc827_betaUniformModel <- fitBetaUniformMixtureDistribution(erlotinib_adrHCC827_diffexDT$HCC827_pValue, nStarts = 20)

erlotinib_adrHCC827_diffexDT[, betaUnifScore_FDR0.05 := betaUniformScore(HCC827_pValue, erlotinibADRhcc827_betaUniformModel, FDR = 0.05)]

noiseFractionUpperBound(erlotinibADRhcc827_betaUniformModel)

# Inspect ER3 and T15.2

ER3_betaUniformModel <- fitBetaUniformMixtureDistribution(erlotinib_adrHCC827_diffexDT$ER3_pValue, nStarts = 20)
T15.2_betaUniformModel <- fitBetaUniformMixtureDistribution(erlotinib_adrHCC827_diffexDT$T15.2_pValue, nStarts = 20)

plot(ER3_betaUniformModel)
plot(T15.2_betaUniformModel)

noiseFractionUpperBound(ER3_betaUniformModel) #HIGH!
noiseFractionUpperBound(T15.2_betaUniformModel) #HIGH!

erlotinib_adrHCC827_diffexDT[, betaUniformScoreFDR0.05_ER3 := betaUniformScore(ER3_pValue, ER3_betaUniformModel)]
erlotinib_adrHCC827_diffexDT[, betaUniformScoreFDR0.05_T15.2 := betaUniformScore(T15.2_pValue, T15.2_betaUniformModel)]
###

ER3_cannonicalSets_DT <- erlotinib_adrHCC827_diffexDT[ unique(cannonical_pathways_DT[, .(pathway = name, geneID = as.numeric(geneID))]), , on = "geneID", allow.cartesian=T][!is.na(ER3_pValue),.(pValueSet = list(ER3_pValue), geneSet = list(geneSymbol), scoreSum = sum(betaUniformScoreFDR0.05_ER3)), by = pathway]

ER3_cannonicalSets_DT[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], ER3_betaUniformModel), by = pathway]
ER3_cannonicalSets_DT[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = pathway]

ER3_REACT <- ER3_cannonicalSets_DT[pathway %like% "REACT"]
ER3_REACT[,ER3_betaQ := p.adjust(betaUniformMixtureP, "fdr")]

ER3_KEGG <- ER3_cannonicalSets_DT[pathway %like% "KEGG"]
ER3_KEGG[,ER3_betaQ := p.adjust(betaUniformMixtureP, "fdr")]


T15.2_cannonicalSets_DT <- erlotinib_adrHCC827_diffexDT[ unique(cannonical_pathways_DT[, .(pathway = name, geneID = as.numeric(geneID))]), , on = "geneID", allow.cartesian=T][!is.na(T15.2_pValue),.(pValueSet = list(T15.2_pValue), geneSet = list(geneSymbol), scoreSum = sum(betaUniformScoreFDR0.05_T15.2)), by = pathway]

T15.2_cannonicalSets_DT[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], T15.2_betaUniformModel), by = pathway]
T15.2_cannonicalSets_DT[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = pathway]

T15_REACT <- T15.2_cannonicalSets_DT[pathway %like% "REACT"]
T15_REACT[,T15.2_betaQ := p.adjust(betaUniformMixtureP, "fdr")]

T15_KEGG <- T15.2_cannonicalSets_DT[pathway %like% "KEGG"]
T15_KEGG[,T15.2_betaQ := p.adjust(betaUniformMixtureP, "fdr")]


combindedSet <- merge(ER3_REACT, T15_REACT, by = "pathway")[ (T15.2_betaQ <= 0.01) & (ER3_betaQ <= 0.01)]


merge(ER3_KEGG, T15_KEGG, by = "pathway")[ (T15.2_betaQ <= 0.01) & (ER3_betaQ <= 0.01)]


adamsardar/metaDEGth documentation built on Sept. 28, 2022, 10:12 p.m.