library("BloodCancerMultiOmics2017")
library("DESeq2")
library("reshape2")
library("dplyr")
library("tibble")
library("Biobase")
library("SummarizedExperiment")
library("genefilter")
library("piano") # loadGSC
library("ggplot2")
library("gtable")
library("grid")
plotDir = ifelse(exists(".standalone"), "", "part07/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
options(stringsAsFactors=FALSE)

Gene set enrichment analysis on BTK, mTOR, MEK groups

Based on the classification of drug response phenotypes we divided CLL samples into distinct groups driven by the increased sensitivity towards BTK, mTOR and MEK inhibition. Here we perform gene set enrichment analysis to find the causes of distinctive drug response phenotypes in the gene expression data.

Load objects.

data(list=c("dds", "lpdAll"))

gmts = list(H=system.file("extdata","h.all.v5.1.symbols.gmt",
                          package="BloodCancerMultiOmics2017"),
            C6=system.file("extdata","c6.all.v5.1.symbols.gmt",
                           package="BloodCancerMultiOmics2017"))

Divide patients into response groups.

patGroup = defineResponseGroups(lpd=lpdAll)

Preprocessing RNAseq data

Subsetting RNAseq data to include the CLL patients for which the drug screen was performed.

lpdCLL <- lpdAll[fData(lpdAll)$type=="viab",
                 pData(lpdAll)$Diagnosis %in% c("CLL")]

ddsCLL <- dds[,colData(dds)$PatID %in% colnames(lpdCLL)]

Read in group and add annotations to the RNAseq data set.

ddsCLL <- ddsCLL[,colData(ddsCLL)$PatID %in% rownames(patGroup)]

#remove genes without gene symbol annotations
ddsCLL <- ddsCLL[!is.na(rowData(ddsCLL)$symbol),]
ddsCLL <- ddsCLL[rowData(ddsCLL)$symbol != "",]

#add drug sensitivity annotations to coldata
colData(ddsCLL)$group <- factor(patGroup[colData(ddsCLL)$PatID, "group"])

Remove rows that contain too few counts.

#only keep genes that have counts higher than 10 in any sample
keep <- apply(counts(ddsCLL), 1, function(x) any(x >= 10)) 
ddsCLL <- ddsCLL[keep,]
dim(ddsCLL)

Remove transcripts which do not show variance across samples.

ddsCLL <- estimateSizeFactors(ddsCLL)
sds <- rowSds(counts(ddsCLL, normalized = TRUE))
sh <- shorth(sds)
ddsCLL <- ddsCLL[sds >= sh,]

Variance stabilizing transformation

ddsCLL.norm <- varianceStabilizingTransformation(ddsCLL)

Differential gene expression

Perform differential gene expression using DESeq2.

DEres <- list()
design(ddsCLL) <- ~ group

rnaRaw <- DESeq(ddsCLL, betaPrior = FALSE)

#extract results for different comparisons
# responders versus weak-responders
DEres[["BTKnone"]] <- results(rnaRaw, contrast = c("group","BTK","none"))
DEres[["MEKnone"]] <- results(rnaRaw, contrast = c("group","MEK","none"))
DEres[["mTORnone"]] <- results(rnaRaw, contrast = c("group","mTOR","none"))

Gene set enrichment analysis

The gene set enrichment analysis will be performed by using the MSigDB gene set collections C6 and Hallmark (http://software.broadinstitute.org/gsea/msigdb/ ). For each collection we will show the top five enriched gene sets and respective differentially expressed genes. Gene set enrichment analysis will be performed on the ranked gene lists using the Parametric Analysis of Gene Set Enrichment (PAGE).

Functions for enrichment analysis and plots

Define cut-off.

pCut = 0.05

Function to run GSEA or PAGE in R.

runGSEA <- function(inputTab, gmtFile, GSAmethod="gsea", nPerm=1000){
    inGMT <- loadGSC(gmtFile,type="gmt")
     #re-rank by score
    rankTab <- inputTab[order(inputTab[,1],decreasing = TRUE),,drop=FALSE] 
    if (GSAmethod == "gsea"){
        #readin geneset database
        #GSEA analysis
        res <- runGSA(geneLevelStats = rankTab,
                      geneSetStat = GSAmethod,
                      adjMethod = "fdr", gsc=inGMT,
                      signifMethod = 'geneSampling', nPerm = nPerm)
        GSAsummaryTable(res)
    } else if (GSAmethod == "page"){
        res <- runGSA(geneLevelStats = rankTab,
                      geneSetStat = GSAmethod,
                      adjMethod = "fdr", gsc=inGMT,
                      signifMethod = 'nullDist')
        GSAsummaryTable(res)
    }
}

Function which run the GSE for each response group.

runGSE = function(gmt) {

  Res <- list()
  for (i in names(DEres)) {
    dataTab <- data.frame(DEres[[i]])
    dataTab$ID <- rownames(dataTab)
    #filter using pvalues
    dataTab <- filter(dataTab, pvalue <= pCut) %>%
                  arrange(pvalue) %>% 
                  mutate(Symbol = rowData(ddsCLL[ID,])$symbol)
    dataTab <- dataTab[!duplicated(dataTab$Symbol),]
    statTab <- data.frame(row.names = dataTab$Symbol, stat = dataTab$stat)
    resTab <- runGSEA(inputTab=statTab, gmtFile=gmt, GSAmethod="page")
    Res[[i]] <- arrange(resTab,desc(`Stat (dist.dir)`))
  }
  Res
}

Function to get the list of genes enriched in a set.

getGenes <- function(inputTab, gmtFile){
  geneList <- loadGSC(gmtFile,type="gmt")$gsc
  enrichedUp <- lapply(geneList, function(x) 
    intersect(rownames(inputTab[inputTab[,1] >0,,drop=FALSE]),x))
  enrichedDown <- lapply(geneList, function(x)
    intersect(rownames(inputTab[inputTab[,1] <0,,drop=FALSE]),x))
  return(list(up=enrichedUp, down=enrichedDown))
}

A function to plot the heat map of intersection of genes in different gene sets.

plotSetHeatmap <- 
  function(geneTab, enrichTab, topN, gmtFile, tittle="",
           asterixList = NULL, anno=FALSE) {

    if (nrow(enrichTab) < topN) topN <- nrow(enrichTab)
    enrichTab <- enrichTab[seq(1,topN),]

    geneList <- getGenes(geneTab,gmtFile)

    geneList$up <- geneList$up[enrichTab[,1]]
    geneList$down <- geneList$down[enrichTab[,1]]

    #form a table 
    allGenes <- unique(c(unlist(geneList$up),unlist(geneList$down)))
    allSets <- unique(c(names(geneList$up),names(geneList$down)))
    plotTable <- matrix(data=NA,ncol = length(allSets),
                        nrow = length(allGenes),
                        dimnames = list(allGenes,allSets))
    for (setName in names(geneList$up)) {
      plotTable[geneList$up[[setName]],setName] <- 1
    }
    for (setName in names(geneList$down)) {
      plotTable[geneList$down[[setName]],setName] <- -1
    }

    if(is.null(asterixList)) {
      #if no correlation table specified, order by the number of
      # significant gene
      geneOrder <- rev(
        rownames(plotTable[order(rowSums(plotTable, na.rm = TRUE),
                                 decreasing =FALSE),]))
    } else {
      #otherwise, order by the p value of correlation
      asterixList <- arrange(asterixList, p)
      geneOrder <- filter(
        asterixList, symbol %in% rownames(plotTable))$symbol
      geneOrder <- c(
        geneOrder, rownames(plotTable)[! rownames(plotTable) %in% geneOrder])
    }

    plotTable <- melt(plotTable)
    colnames(plotTable) <- c("gene","set","value")
    plotTable$gene <- as.character(plotTable$gene)

    if(!is.null(asterixList)) {
      #add + if gene is positivily correlated with sensitivity, else add "-"
      plotTable$ifSig <- asterixList[
        match(plotTable$gene, asterixList$symbol),]$coef
      plotTable <- mutate(plotTable, ifSig =
                            ifelse(is.na(ifSig) | is.na(value), "",
                                   ifelse(ifSig > 0, "-", "+")))
    }
    plotTable$value <- replace(plotTable$value,
                               plotTable$value %in% c(1), "Up")
    plotTable$value <- replace(plotTable$value,
                               plotTable$value %in%  c(-1), "Down")

    allSymbols <- plotTable$gene

    geneSymbol <- geneOrder

    if (anno) { #if add functional annotations in addition to gene names
      annoTab <- tibble(symbol = rowData(ddsCLL)$symbol, 
                        anno = sapply(rowData(ddsCLL)$description,
                                      function(x) unlist(strsplit(x,"[[]"))[1]))
      annoTab <- annoTab[!duplicated(annoTab$symbol),]
      annoTab$combine <- sprintf("%s (%s)",annoTab$symbol, annoTab$anno)
      plotTable$gene <- annoTab[match(plotTable$gene,annoTab$symbol),]$combine
      geneOrder <- annoTab[match(geneOrder,annoTab$symbol),]$combine
      geneOrder <- rev(geneOrder)
    }

    plotTable$gene <- factor(plotTable$gene, levels =geneOrder)
    plotTable$set <- factor(plotTable$set, levels = enrichTab[,1])


    g <- ggplot(plotTable, aes(x=set, y = gene)) +
      geom_tile(aes(fill=value), color = "black") +
      scale_fill_manual(values = c("Up"="red","Down"="blue")) +
      xlab("") + ylab("") + theme_classic() +
      theme(axis.text.x=element_text(size=7, angle = 60, hjust = 0),
            axis.text.y=element_text(size=7),
            axis.ticks = element_line(color="white"),
            axis.line = element_line(color="white"),
            legend.position = "none") +
      scale_x_discrete(position = "top") +
      scale_y_discrete(position = "right")

    if(!is.null(asterixList)) {
      g <- g + geom_text(aes(label = ifSig), vjust =0.40)
    }

    # construct the gtable
    wdths = c(0.05, 0.25*length(levels(plotTable$set)), 5)
    hghts = c(2.8, 0.1*length(levels(plotTable$gene)), 0.05)
    gt = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
    ## make grobs
    ggr = ggplotGrob(g)
    ## fill in the gtable
    gt = gtable_add_grob(gt, gtable_filter(ggr, "panel"), 2, 2)
    gt = gtable_add_grob(gt, ggr$grobs[[5]], 1, 2) # top axis
    gt = gtable_add_grob(gt, ggr$grobs[[9]], 2, 3) # right axis

    return(list(list(plot=gt,
                     width=sum(wdths),
                     height=sum(hghts),
                     genes=geneSymbol)))
}    

Prepare stats per gene for plotting.

statTab = setNames(lapply(c("mTORnone","BTKnone","MEKnone"), function(gr) {
  dataTab <- data.frame(DEres[[gr]])
  dataTab$ID <- rownames(dataTab)
  #filter using pvalues
  dataTab <- filter(dataTab, pvalue <= pCut) %>%
    arrange(pvalue) %>%
    mutate(Symbol = rowData(ddsCLL[ID,])$symbol) %>%
    filter(log2FoldChange > 0)
  dataTab <- dataTab[!duplicated(dataTab$Symbol),]
  data.frame(row.names = dataTab$Symbol, stat = dataTab$stat)
}), nm=c("mTORnone","BTKnone","MEKnone"))

Geneset enrichment based on Hallmark set (H)

Perform enrichment analysis using PAGE method.

hallmarkRes = runGSE(gmt=gmts[["H"]])

Geneset enrichment based on oncogenic signature set (C6)

Perform enrichment analysis using PAGE method.

c6Res = runGSE(gmt=gmts[["C6"]])

Everolimus response VS gene expression (within mTOR group)

To further investiage the association between expression and drug sensitivity group at gene level, correlation test was performed to identify genes whose expressions are correlated with the sensitivity to the mTOR inhibitor (everolimus) sensitivity within the mTOR group.

Correlation test

Prepare drug sensitivity vector and gene expression matrix

ddsCLL.mTOR <- ddsCLL.norm[,ddsCLL.norm$group %in% "mTOR"]
viabMTOR <- Biobase::exprs(lpdCLL["D_063_4:5", ddsCLL.mTOR$PatID])[1,]
stopifnot(all(ddsCLL.mTOR$PatID == colnames(viabMTOR)))  

Filtering and applying variance stabilizing transformation on RNAseq data

#only keep genes that have counts higher than 10 in any sample
keep <- apply(assay(ddsCLL.mTOR), 1, function(x) any(x >= 10)) 
ddsCLL.mTOR <- ddsCLL.mTOR[keep,]
dim(ddsCLL.mTOR)

Association test using Pearson correlation

tmp = do.call(rbind, lapply(1:nrow(ddsCLL.mTOR), function(i) {
  res = cor.test(viabMTOR, assay(ddsCLL.mTOR[i,])[1,], method = "pearson")
  data.frame(coef=unname(res$estimate), p=res$p.value)
}))

corResult <- tibble(ID = rownames(ddsCLL.mTOR), 
                    symbol = rowData(ddsCLL.mTOR)$symbol,
                    coef = tmp$coef,
                    p = tmp$p)

corResult <- arrange(corResult, p) %>% mutate(p.adj = p.adjust(p, method="BH"))

Enrichment heatmaps for mTOR group with overlapped genes indicated

The genes that are positively correlated with everolimus sensitivity are labeled as "+" in the heatmap and the negatively correlated genes are labeled as "-".

Plot for C6 gene sets

pCut = 0.05
corResult.sig <- filter(corResult, p <= pCut)
c6Plot <- plotSetHeatmap(geneTab=statTab[["mTORnone"]],
                enrichTab=c6Res[["mTORnone"]],
                topN=5, gmtFile=gmts[["C6"]],
                #add asterix in front of the overlapped genes
                asterixList = corResult.sig, 
                anno=TRUE, i)
#FIG# S13 B left
grid.draw(c6Plot[[1]]$plot)

Plot for Hallmark gene sets

hallmarkPlot <- plotSetHeatmap(geneTab=statTab[["mTORnone"]],
                enrichTab=hallmarkRes[["mTORnone"]],
                topN=5, gmtFile=gmts[["H"]],
                asterixList = corResult.sig,
                anno=TRUE, i)
#FIG# S13 B right
grid.draw(hallmarkPlot[[1]]$plot)

Ibrutinib response VS gene expression (within BTK group)

Correlation test was performed to identify genes whose expressions are correlated with the sensitivity to the BTK inhibitor (ibrutinib) sensitivity within the BTK group.

Correlation test

Prepare drug sensitivity vector and gene expression matrix

ddsCLL.BTK <- ddsCLL.norm[,ddsCLL.norm$group %in% "BTK"]
viabBTK <- Biobase::exprs(lpdCLL["D_002_4:5", ddsCLL.BTK$PatID])[1,]
stopifnot(all(ddsCLL.BTK$PatID == colnames(viabBTK)))  

Filtering and applying variance stabilizing transformation on RNAseq data

#only keep genes that have counts higher than 10 in any sample
keep <- apply(assay(ddsCLL.BTK), 1, function(x) any(x >= 10)) 
ddsCLL.BTK <- ddsCLL.BTK[keep,]
dim(ddsCLL.BTK)

Association test using Pearson correlation

tmp = do.call(rbind, lapply(1:nrow(ddsCLL.BTK), function(i) {
  res = cor.test(viabBTK, assay(ddsCLL.BTK[i,])[1,], method = "pearson")
  data.frame(coef=unname(res$estimate), p=res$p.value)
}))

corResult <- tibble(ID = rownames(ddsCLL.BTK), 
                    symbol = rowData(ddsCLL.BTK)$symbol,
                    coef = tmp$coef,
                    p = tmp$p)

corResult <- arrange(corResult, p) %>% mutate(p.adj = p.adjust(p, method="BH"))

Enrichment heatmaps for BTK group with overlapped genes indicated

Plot for C6 gene sets

pCut = 0.05
corResult.sig <- filter(corResult, p <= pCut)
c6Plot <- plotSetHeatmap(geneTab=statTab[["BTKnone"]],
                enrichTab=c6Res[["BTKnone"]],
                topN=5, gmtFile=gmts[["C6"]],
                #add asterix in front of the overlapped genes
                asterixList = corResult.sig, 
                anno=TRUE, i)
#FIG# S12 left
grid.draw(c6Plot[[1]]$plot)

Plot for Hallmark gene sets

hallmarkPlot <- plotSetHeatmap(geneTab=statTab[["BTKnone"]],
                enrichTab=hallmarkRes[["BTKnone"]],
                topN=5, gmtFile=gmts[["H"]],
                asterixList = corResult.sig,
                anno=TRUE, i)
#FIG# S12 right
grid.draw(hallmarkPlot[[1]]$plot)

Selumetinib response VS gene expression (within MEK group)

Correlation test was performed to identify genes whose expressions are correlated with the sensitivity to the MEK inhibitor (selumetinib) sensitivity within the MEK group.

Correlation test

Prepare drug sensitivity vector and gene expression matrix

ddsCLL.MEK <- ddsCLL.norm[,ddsCLL.norm$group %in% "MEK"]
viabMEK <- Biobase::exprs(lpdCLL["D_012_4:5", ddsCLL.MEK$PatID])[1,]
stopifnot(all(ddsCLL.MEK$PatID == colnames(viabMEK)))  

Filtering and applying variance stabilizing transformation on RNAseq data

#only keep genes that have counts higher than 10 in any sample
keep <- apply(assay(ddsCLL.MEK), 1, function(x) any(x >= 10)) 
ddsCLL.MEK <- ddsCLL.MEK[keep,]
dim(ddsCLL.MEK)

Association test using Pearson correlation

tmp = do.call(rbind, lapply(1:nrow(ddsCLL.MEK), function(i) {
  res = cor.test(viabMEK, assay(ddsCLL.MEK[i,])[1,], method = "pearson")
  data.frame(coef=unname(res$estimate), p=res$p.value)
}))

corResult <- tibble(ID = rownames(ddsCLL.MEK), 
                    symbol = rowData(ddsCLL.MEK)$symbol,
                    coef = tmp$coef,
                    p = tmp$p)

corResult <- arrange(corResult, p) %>% mutate(p.adj = p.adjust(p, method="BH"))

Within MEK group, no gene expression was correlated with Selumetinib response

Enrichment heatmaps for MEK group

Plot for C6 gene sets

pCut = 0.05
corResult.sig <- filter(corResult, p <= pCut)
c6Plot <- plotSetHeatmap(geneTab=statTab[["MEKnone"]],
                enrichTab=c6Res[["MEKnone"]],
                topN=5, gmtFile=gmts[["C6"]],
                anno=TRUE, i)
#FIG# S13 A left
grid.draw(c6Plot[[1]]$plot)

Plot for Hallmark gene sets

hallmarkPlot <- plotSetHeatmap(geneTab=statTab[["MEKnone"]],
                enrichTab=hallmarkRes[["MEKnone"]],
                topN=5, gmtFile=gmts[["H"]],
                asterixList = corResult.sig,
                anno=TRUE, i)
#FIG# S13 A right
grid.draw(hallmarkPlot[[1]]$plot)
sessionInfo()
rm(list=ls())


MalgorzataOles/BloodCancerMultiOmics2017 documentation built on March 29, 2024, 2:29 p.m.