The bioplanet resources looks like a fantastic source of curated gene sets, culling for redundancy across sources. We shall include them in the package:

library(data.table)
library(stringr)

pathways downloaded directly from BioPlanet

pathwayMembershipDT <- fread("C:/Users/ASardar/Downloads/pathway.csv")

pathwayStatusDT <- fread("C:/Users/ASardar/Downloads/pathway-category.csv")

pathwayMembershipDT[ pathwayStatusDT, CATEGORY_NAME := CATEGORY_NAME, on = .(PATHWAY_ID == PATHWAY_ID)]


bioplanetPathwaysDT <- pathwayMembershipDT[,.(BioPlanetName = PATHWAY_NAME,
                                              BioPlanetID = PATHWAY_ID,
                                              geneID = GENE_ID,
                                              geneSymbol = GENE_SYMBOL,
                                              category = CATEGORY_NAME)]


save(bioplanetPathwaysDT, file = "./data/bioplanetPathwaysDT.RData", compress = "xz")

Let's try it out on the data

system.time( ness <- metaDEG(siTAZdiffex_HFL1_diffexDT,
        bioplanetPathwaysDT ,
        pValAttr = "siTAZ1_pValue",
        geneSetNameAttr = "BioPlanetName",
        geneSetMembersAttr = "geneID") )


ness <- bioplanetPathwaysDT[,.(list(geneID)), by = BioPlanetName]$V1
names(ness) <- bioplanetPathwaysDT[,.(list(geneID)), by = BioPlanetName][, BioPlanetName]

metaDEG(siTAZdiffex_HFL1_diffexDT,
        head(ness),
        pValAttr = "siTAZ1_pValue",
        geneSetNameAttr = "BioPlanetName",
        geneSetMembersAttr = "geneID")



metaDEG(pValueSet = siTAZdiffex_HFL1_diffexDT[!duplicated(geneID), structure(siTAZ1_pValue, names = geneID)],
        head(ness),
        geneSetNameAttr = "BioPlanetName",
        geneSetMembersAttr = "geneID")

Let's try it on erlotinib

erlotinib_geneIDexpress <- erlotinib_adrHCC827_diffexDT[!is.na(geneID), .SD[ER3_pValue == min(ER3_pValue, na.rm = T)][1] , by = geneID]



susceptibleCellLine_BetaUniformModel <- fitBetaUniformMixtureDistribution(erlotinib_geneIDexpress$ER3_pValue)

noiseFractionUpperBound(susceptibleCellLine_BetaUniformModel)

plot(susceptibleCellLine_BetaUniformModel)

erlotinib_geneIDexpress[, betaUnifScore_FDR0.05 := betaUniformScore(ER3_pValue, susceptibleCellLine_BetaUniformModel, FDR = 0.05)]



#Group by GeneIDs
pathwayPvals <- erlotinib_geneIDexpress %>%
  .[bioplanetPathwaysDT,,on = 'geneID', allow.cartesian=TRUE] %>%
  .[, .SD[!duplicated(geneID)], by = BioPlanetName] %>%
  .[!is.na(ER3_pValue),
    .(pValueSet = list(ER3_pValue), members = list(geneSymbol), scoreSum = sum(betaUnifScore_FDR0.05, na.rm = T)), by = .(BioPlanetName,BioPlanetID)]


pathwayPvals[, geneSet := str_c(sort(members[[1]]), collapse = ","), by = BioPlanetName] #Ensure that we don't test a set of genes more than once and inflate the number of tests conducted
pathwayPvals[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], susceptibleCellLine_BetaUniformModel),  by = geneSet]




pathwayPvals[, fishersP := fishersPvalueSumTest(pValueSet[[1]]),  by = geneSet]

pathwayPvals[!duplicated(geneSet), betaUniformMixtureQ := p.adjust(betaUniformMixtureP, "fdr")]
pathwayPvals[,betaUniformMixtureQ := na.omit(betaUniformMixtureQ) , by = geneSet]

pathwayPvals[!duplicated(geneSet), fishersQ := p.adjust(fishersP, "fdr")]
pathwayPvals[,fishersQ := na.omit(fishersQ) , by = geneSet]


pathwayPvals[, nValues := length(pValueSet[[1]]), by = BioPlanetID]

pathwayPvals[betaUniformMixtureQ < 1E-3 & nValues <= 20][order(-scoreSum)]

pathwayPvals[fishersQ < 0.01][order(-scoreSum)]


erlotinibSignif <- pathwayPvals[betaUniformMixtureQ < 1E-2 & nValues <= 30][order(-scoreSum)]

I don't think that this is too bad!

siTAZ

The siTAZ (WWTR!) TF knockdown experiment is a clean pertubation of a cell line expression. If we can't see anything in the gene expression signature, we're sunk.

fbTAZ1 <- fitBetaUniformMixtureDistribution(siTAZdiffex_HFL1_diffexDT$siTAZ1_pValue)
fbTAZ2 <- fitBetaUniformMixtureDistribution(siTAZdiffex_HFL1_diffexDT$siTAZ2_pValue)

plot(fbTAZ1)
plot(fbTAZ2)


siTAZdiffex_HFL1_diffexDT[, betaUnifScore1_FDR0.05 := betaUniformScore(siTAZ1_pValue, fbTAZ1, FDR = 0.05)]
siTAZdiffex_HFL1_diffexDT[, betaUnifScore2_FDR0.05 := betaUniformScore(siTAZ2_pValue, fbTAZ2, FDR = 0.05)]


siTAZdiffex_HFL1_diffexDT[, geneID := as.integer(geneID)]


#Group by GeneIDs
pathwayPvals <- siTAZdiffex_HFL1_diffexDT %>%
    .[bioplanetPathwaysDT,,on = "geneID", allow.cartesian=TRUE] %>%
    .[, .SD[!duplicated(geneID)], by = BioPlanetName] %>%
    .[!is.na(siTAZ1_pValue) & !is.na(siTAZ2_pValue),
    .(pValueSet1 = list(siTAZ1_pValue), pValueSet2 = list(siTAZ2_pValue),
      members = list(geneSymbol),
      scoreSum1 = sum(betaUnifScore1_FDR0.05, na.rm = T),
      scoreSum2 = sum(betaUnifScore2_FDR0.05, na.rm = T)),
    by = .(BioPlanetName,BioPlanetID)]

pathwayPvals[,size := sapply(members, length)]

pathwayPvals[, geneSet := str_c(sort(members[[1]]), collapse = ","), by = BioPlanetName] #Ensure that we don't test a set of genes more than once and inflate the number of tests conducted


library(profvis)

profvis({
  pathwayPvals[, betaUniformMixtureP1 := betaUniformPvalueSumTest(pValueSet1[[1]], fbTAZ1),
               by = geneSet] })

pathwayPvals[, betaUniformMixtureP2 := betaUniformPvalueSumTest(pValueSet2[[1]], fbTAZ1),
             by = geneSet]

pathwayPvals[, betaUniformMixtureQ1 := p.adjust(betaUniformMixtureP1, method = "fdr")]
pathwayPvals[, betaUniformMixtureQ2 := p.adjust(betaUniformMixtureP2, method = "fdr")]

pathwayPvals[betaUniformMixtureQ1 < 0.01 & betaUniformMixtureQ2 <= 0.01 & size < 20]

bioplanetPathwaysDT[, geneID := as.character(geneID)]




metaDEG(siTAZdiffex_HFL1_diffexDT,
        bioplanetPathwaysDT,
        betaUniformFit = fbTAZ1,
        pValAttr = "siTAZ1_pValue",
        geneSetNameAttr = "BioPlanetName",
        geneSetMemberAttr = "geneID")



siTAZsignif <- pathwayPvals[betaUniformMixtureQ1 < 0.01 & betaUniformMixtureQ2 <= 0.01 & size < 30][order(-scoreSum1)]

merge(siTAZsignif, erlotinibSignif, by = "BioPlanetName")


adamsardar/metaDEGth documentation built on Sept. 28, 2022, 10:12 p.m.