GSE37699

A pretty straighforward experiment; the authors prepare cell lines that are resistent to WZ4002 (a third generation TKI) and then compare the expression of the two cell lines. Prepared in triplicate and measured on Affymetrix Human Genome U133A 2.0 Arrays.

From the study blurb:

"Our study identifies ERK signaling as a mediator of resistance to irreversible pyrimidine EGFR inhibitors in EGFR T790M-bearing cancers. We further provide a therapeutic strategy to both treat and prevent the emergence of this resistance mechanism."

Should be a good reporter assay for testing different datasets.

library(data.table)
library(GEOquery)
library(purrr)
library(stringr)
library(magrittr)

parsedGSE37699SOFT <- getGEO(GEO = "GSE37699", GSEMatrix = FALSE, AnnotGPL = FALSE, getGPL = TRUE)

GPL571_table <- data.table(parsedGSE37699SOFT@gpls$GPL571@dataTable@table)

GSE37699_metadata <- map_dfr(parsedGSE37699SOFT@gsms,
    ~ as.data.table(.x@header) )

GSE37699_metadata[, treatment := str_remove(title, "_rep\\d")]
GSE37699_metadata[, filename := str_remove(supplementary_file, "ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM925nnn/GSM\\d+/suppl/")]

GSE37699_metadata[, study_name := str_remove(title, "NCI-")]
library(affy)

GSE37699_CELfiles <- list.files("~/GSE37699/CELfiles/", pattern = "CEL", full.names = TRUE)

setwd("~/GSE37699/CELfiles/")
GSE37699_affy <- ReadAffy()

GSE37699_normalised <- rma(GSE37699_affy)

GSE37699_exprs <- exprs(GSE37699_normalised) # I prefer to just work with matricies. It's really explicit and simple

setkey(GSE37699_metadata, filename)

colnames(GSE37699_exprs) <- GSE37699_metadata[colnames(GSE37699_exprs), study_name]

Build a design and contrast matrix

GSE37699_designMat <- model.matrix(study_name ~ 0 + treatment,
                                   GSE37699_metadata[filename %like% "CEL"])

colnames(GSE37699_designMat) %<>% str_remove("treatment") %>% str_remove("NCI-")
row.names(GSE37699_designMat) <- GSE37699_metadata[filename %like% "CEL", study_name]

GSE37699_contrastMat <- makeContrasts(
              contrasts = c("H1975_WZR6-H1975_parental"), #challenge - control
              levels = GSE37699_designMat)
library(limma)


GSE37699_lmFit <- lmFit(GSE37699_exprs,
                        design = GSE37699_designMat) %>%
                  contrasts.fit(contrasts = GSE37699_contrastMat) %>%
                  eBayes


WZ4002_adrH1975_diffexDT <- data.table(topTable(GSE37699_lmFit, number = Inf, adjust.method = "fdr",
                                                coef = "H1975_WZR6-H1975_parental"), keep.rownames = TRUE )

WZ4002_adrH1975_diffexDT %<>% 
  merge(GPL571_table[,.(ID, geneSymbol = `Gene Symbol`, geneID = ENTREZ_GENE_ID)], by.x = "rn", by.y = "ID")


setnames(WZ4002_adrH1975_diffexDT,
         c("rn","logFC","P.Value"),
         c("HG_U133A_2_ID", "H1975_logFC", "H1975_pValue"))


WZ4002_adrH1975_diffexDT <- WZ4002_adrH1975_diffexDT[,.SD, .SDcols = !c("AveExpr","t","B","adj.P.Val")]

setcolorder(WZ4002_adrH1975_diffexDT, c("HG_U133A_2_ID","geneSymbol","geneID"))
WZ4002_adrH1975_diffexDT %>%
 ggplot(aes(x = H1975_logFC, y = -log10(H1975_pValue))) +
     geom_point(aes(colour = p.adjust(H1975_pValue) < 0.01 )) +
     scale_colour_manual(values = c("darkgrey","dodgerblue")) +
     theme_bw() 

setorder(WZ4002_adrH1975_diffexDT, H1975_pValue)

save(WZ4002_adrH1975_diffexDT, file = "./data/WZ4002_adrH1975_diffexDT.RData", compress = "xz")
library(fgsea)

hallmark_pathways <- gmtPathways("/mnt/c/broad_genesets/h.all.v7.0.entrez.gmt")
hallmark_pathways_DT <- map2_dfr(hallmark_pathways, names(hallmark_pathways), ~ data.table(name = .y, geneID = .x))


cannonical_pathways <- gmtPathways("/mnt/c/broad_genesets/c2.cp.v7.0.entrez.gmt")
cannonical_pathways_DT <- map2_dfr(cannonical_pathways, names(cannonical_pathways), ~ data.table(name = .y, geneID = .x))

chemical_genetic_peturbations <- gmtPathways("/mnt/c/broad_genesets/c2.cgp.v7.0.entrez.gmt")
CGP_geneSets_DT <- map2_dfr(chemical_genetic_peturbations, names(chemical_genetic_peturbations), ~ data.table(name = .y, geneID = .x))



WZ4002_geneIDexpress <- WZ4002_adrH1975_diffexDT[!is.na(geneID), .SD[H1975_pValue == min(H1975_pValue, na.rm = T)][1] , by = geneID]


WZ4002_betaUniformModel <- fitBetaUniformMixtureDistribution(WZ4002_geneIDexpress$H1975_pValue, nStarts = 20)

WZ4002_geneIDexpress[, betaUnifScore_FDR0.05 := betaUniformScore(H1975_pValue, WZ4002_betaUniformModel, FDR = 0.05)]

noiseFractionUpperBound(WZ4002_betaUniformModel)

####


cgpSetsDT <- WZ4002_geneIDexpress[ unique(cannonical_pathways_DT[, .(pathway = name, geneID)]), , on = "geneID"][!is.na(H1975_pValue),.(pValueSet = list(H1975_pValue), geneSet = list(geneSymbol), scoreSum = sum(betaUnifScore_FDR0.05)), by = pathway]

cgpSetsDT[, betaUniformMixtureP := betaUniformPvalueSumTest(pValueSet[[1]], WZ4002_betaUniformModel), by = pathway]
cgpSetsDT[, fishersP := fishersPvalueSumTest(pValueSet[[1]]), by = pathway]


cgpSetsDT[pathway %like% "BIOCARTA"][p.adjust(betaUniformMixtureP, "fdr") < 0.01] 
cgpSetsDT[p.adjust(fishersP, "fdr") < 0.01]

cgpSetsDT[scoreSum > 0]


adamsardar/metaDEGth documentation built on Jan. 28, 2020, 12:17 a.m.