devtools::load_all()
library(ggplot2)
library(data.table)
koOCT4_BetaUniformModel <- fitBetaUniformMixtureDistribution(cas9OCT4_hsZygote_diffexDT$koOCT4_pValue,
                                                             nStarts = 20)

# ~40% noise!!
noiseFractionUpperBound(koOCT4_BetaUniformModel)

print.BetaUniformPlots(plot(koOCT4_BetaUniformModel))

Transcription factors

OCT4_TFenrich <- metaDEG(cas9OCT4_hsZygote_diffexDT,
        TTRUST_TF2targets_DT,
        koOCT4_BetaUniformModel,
          pValAttr = "koOCT4_pValue",
          geneSetNameAttr = "TF",
          geneSetMembersAttr = "ENSG")


OCT4_TFenrich[,qplot(betaUniformMixtureP)]

merge(
  TTRUST_TF2targets_DT[TF == "POU5F1"],
  OCT4_TFenrich,
  by.x = "target",
  by.y = "TF")

Pathway

koOCT4_BioPlanet <-  metaDEG( cas9OCT4_hsZygote_diffexDT,
                              bioplanetPathwaysDT,
                              betaUniformFit = koOCT4_BetaUniformModel,
                              pValAttr = "koOCT4_pValue",
                              geneSetNameAttr = "BioPlanetName",
                              geneSetMembersAttr =  "geneID" )


koOCT4_BioPlanet[betaUniformMixtureQ <= 0.1]

koOCT4_BioPlanet[size < 100,qplot(betaUniformMixtureP)]
OCTcrisprByGeneID <- cas9OCT4_hsZygote_diffexDT[!is.na(geneID), .(geneID = as.integer(unlist(strsplit(geneID, "; ")))), by = .(ENSG, accession, koOCT4_pValue)]

cas9OCT4_BUMod <- fitBetaUniformMixtureDistribution(OCTcrisprByGeneID$koOCT4_pValue)

plot.BetaUniformModel(cas9OCT4_BUMod, outputFormula = FALSE, outputParameters=FALSE)

OCTcrisprByGeneID[, betaUnifScore_FDR0.05 := betaUniformScore(koOCT4_pValue, cas9OCT4_BUMod)]


OCTcrisprByGeneID[TTRUST_TF2targets_DT[TF == "POU5F1"], , on = "geneID"]

falsePositiveFraction(cas9OCT4_BUMod, pValueThreshold = 0.05)/(falsePositiveFraction( cas9OCT4_BUMod, pValueThreshold = 0.05) + truePositiveFraction(cas9OCT4_BUMod, pValueThreshold = 0.05))

noiseFractionUpperBound(cas9OCT4_BUMod)

TF_pvals <- OCTcrisprByGeneID[  unique(TTRUST_TF2targets_DT[!is.na(geneID), .(TF, geneID)]), , on = "geneID"] %>% 
                    .[!is.na(koOCT4_pValue),.(pValueSet = list(koOCT4_pValue), members = list(geneID), sumScore = sum(betaUnifScore_FDR0.05, na.rm=T)), by = TF]

for(i in 1:nrow(TF_pvals)){

 betaUniformPvalueSumTest( TF_pvals[1,  pValueSet[[1]]], cas9OCT4_BUMod)
}

TF_pvals[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], cas9OCT4_BUMod), by = TF]
TF_pvals[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = TF]

TF_pvals %>%
  ggplot(aes(x = -log10( as.numeric(fishersP)), y = -log10(as.numeric(betaUniformMixtureP)))) +
  geom_point()


TF_pvals[, betaUniformMixtureQ := p.adjust(betaUniformMixtureP, "fdr")]
TF_pvals[, fishersQ := p.adjust(fishersP, "fdr")]

TF_pvals[order(betaUniformMixtureQ)]

TF_pvals[TF == "POU5F1"]

TF_pvals[betaUniformMixtureQ < 0.01]

TF_pvals[order(betaUniformMixtureQ)]

TF_pvals[fishersQ < 0.01]

This is another focused dataset where TTRUST fails to pick out signal (or it's metaDEGth - also possible).

FANTOM5_hs_regcirc <- fread("zcat /mnt/c/regulatory_circutis/Network_compendium/Tissue-specific_regulatory_networks_FANTOM5-v1/32_high-level_networks/20_gastrointestinal_system.txt.gz") # Arbitrary cut off

FANTOM5_hs_regcirc[V3 > 0.2, qplot(V3)] + scale_x_continuous(breaks = seq(0,1,0.1))
cas9OCT4_geneSymExpress <- cas9OCT4_hsZygote_diffexDT[!is.na(geneSymbol), .SD[koOCT4_pValue == min(koOCT4_pValue, na.rm = T)][1] , by = geneSymbol]

cas9OCT4_geneSymExpress[,qplot(koOCT4_pValue, bins = 120)]

OCT4knockout_betaUniformModel <- fitBetaUniformMixtureDistribution(cas9OCT4_geneSymExpress$koOCT4_pValue, nStarts = 20)
# LLH fluctates a bit as we are on the bounds of parameter space!

OCT4knockout_betaUniformModel

noiseFractionUpperBound(OCT4knockout_betaUniformModel)

# If we were to use a P-value cutoff of 0.01, what would the FP rate be?
TP <- truePositiveFraction(OCT4knockout_betaUniformModel, pValueThreshold = 0.01)
FP <- falsePositiveFraction(OCT4knockout_betaUniformModel, pValueThreshold = 0.01)
round(100*FP/(TP+FP), digits = 1) # Over 30% would be false positives!
# Perform meta-analysis using the TTRUST TF->target sets

cas9OCT4_geneSymExpress[, betaUnifScore_FDR0.05 := betaUniformScore(koOCT4_pValue, OCT4knockout_betaUniformModel, FDR = 0.05)]



regCirc_pvalues <- cas9OCT4_geneSymExpress[ unique(FANTOM5_hs_regcirc[V2 > 0.3, .(TF = V1, geneSymbol = V2)]), , on = "geneSymbol"][!is.na(koOCT4_pValue),.(pValueSet = list(koOCT4_pValue), geneSet = list(geneSymbol), scoreSum = sum(betaUnifScore_FDR0.05)), by = TF]


regCirc_pvalues[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], OCT4knockout_betaUniformModel), by = TF]
regCirc_pvalues[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = TF]

regCirc_pvalues[p.adjust(betaUniformMixtureP) < 0.01] # No significant gene sets
regCirc_pvalues[p.adjust(fishersP) < 0.01] # Quite a few ... too many for so little signal?

TTRUST_TF_pvalues[scoreSum > 0]
library(fgsea)

hallmark_pathways <- gmtPathways("/mnt/c/broad_genesets/h.all.v7.0.entrez.gmt")
hallmark_pathways_DT <- map2_dfr(hallmark_pathways, names(hallmark_pathways), ~ data.table(name = .y, geneID = .x))


cannonical_pathways <- gmtPathways("/mnt/c/broad_genesets/c2.cp.v7.0.entrez.gmt")
cannonical_pathways_DT <- map2_dfr(cannonical_pathways, names(cannonical_pathways), ~ data.table(name = .y, geneID = .x))



chemical_genetic_peturbations <- gmtPathways("/mnt/c/broad_genesets/c2.cgp.v7.0.entrez.gmt")
CGP_geneSets_DT <- map2_dfr(chemical_genetic_peturbations, names(chemical_genetic_peturbations), ~ data.table(name = .y, geneID = .x))


cas9OCT4_geneIDexpress <- cas9OCT4_hsZygote_diffex_DT[!is.na(geneID), .SD[koOCT4_pValue == min(koOCT4_pValue, na.rm = T)][1] , by = geneID]

cas9OCT4_geneIDexpress[,qplot(koOCT4_pValue, bins = 120)]

OCT4knockout_betaUniformModel <- fitBetaUniformMixtureDistribution(cas9OCT4_geneIDexpress$koOCT4_pValue, nStarts = 20)

cas9OCT4_geneIDexpress[, betaUnifScore_FDR0.05 := betaUniformScore(koOCT4_pValue, OCT4knockout_betaUniformModel, FDR = 0.05)]



####


braodHallmarkSetsDT <- cas9OCT4_geneSymExpress[ unique(hallmark_pathways_DT[, .(pathway = name, geneID)]), , on = "geneID"][!is.na(koOCT4_pValue),.(pValueSet = list(koOCT4_pValue), geneSet = list(geneSymbol), scoreSum = sum(betaUnifScore_FDR0.05)), by = pathway]


braodHallmarkSetsDT[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], OCT4knockout_betaUniformModel), by = pathway]
braodHallmarkSetsDT[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = pathway]

braodHallmarkSetsDT[p.adjust(betaUniformMixtureP, "fdr") < 0.01] 
braodHallmarkSetsDT[p.adjust(fishersP, "fdr") < 0.05]

braodHallmarkSetsDT[scoreSum > 0]


####

braodGCPsetsDT <- cas9OCT4_geneSymExpress[ unique(CGP_geneSets_DT[, .(pathway = name, geneID)]), , on = "geneID"][!is.na(koOCT4_pValue),.(pValueSet = list(koOCT4_pValue), geneSet = list(geneSymbol), scoreSum = sum(betaUnifScore_FDR0.05)), by = pathway]

braodGCPsetsDT[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], OCT4knockout_betaUniformModel), by = pathway]
braodGCPsetsDT[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = pathway]

braodGCPsetsDT[p.adjust(betaUniformMixtureP) < 0.01] 
braodGCPsetsDT[p.adjust(fishersP) < 0.01]

braodGCPsetsDT[scoreSum > 0]


####

braodCanonocalSetsDT <- cas9OCT4_geneSymExpress[ unique(cannonical_pathways_DT[, .(pathway = name, geneID)]), , on = "geneID"][!is.na(koOCT4_pValue),.(pValueSet = list(koOCT4_pValue), geneSet = list(geneSymbol), scoreSum = sum(betaUnifScore_FDR0.05)), by = pathway]

braodCanonocalSetsDT[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], ATF3knockout_betaUniformModel), by = pathway]
braodCanonocalSetsDT[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = pathway]

braodCanonocalSetsDT[p.adjust(betaUniformMixtureP) < 0.01] 
braodCanonocalSetsDT[p.adjust(fishersP) < 0.01]

braodGCPsetsDT[scoreSum > 0]


adamsardar/metaDEGth documentation built on Sept. 28, 2022, 10:12 p.m.