knitr::opts_chunk$set(cache = TRUE)
library("BloodCancerMultiOmics2017") library("DESeq2") library("piano") library("pheatmap") library("genefilter") library("grid") library("gridExtra") library("RColorBrewer") library("cowplot") library("dplyr") library("ggplot2") library("tibble")
plotDir = ifelse(exists(".standalone"), "", "part13/") if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
Load and prepare expression data set.
data(list=c("dds", "patmeta", "mutCOM")) #load genesets gmts = list( H=system.file("extdata","h.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"), C6=system.file("extdata","c6.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"), KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"))
Choose CLL samples with trisomy 12 annotation from the gene expression data set.
#only choose CLL samples colData(dds)$Diagnosis <- patmeta[match(dds$PatID,rownames(patmeta)),]$Diagnosis ddsCLL <- dds[,dds$Diagnosis %in% "CLL"] #add trisomy 12 and IGHV information colData(ddsCLL)$trisomy12 <- factor(assayData(mutCOM[ddsCLL$PatID,])$binary[,"trisomy12"]) colData(ddsCLL)$IGHV <- factor(patmeta[ddsCLL$PatID,]$IGHV) #remove samples that do not have trisomy 12 information ddsCLL <- ddsCLL[,!is.na(ddsCLL$trisomy12)] #how many genes and samples we have? dim(ddsCLL)
Remove transcripts that do not have gene symbol annotations, show low counts or do not show variance across samples.
#remove genes without gene symbol annotations ddsCLL <- ddsCLL[!is.na(rowData(ddsCLL)$symbol),] ddsCLL <- ddsCLL[rowData(ddsCLL)$symbol != "",] #only keep genes that have counts higher than 10 in any sample keep <- apply(counts(ddsCLL), 1, function(x) any(x >= 10)) ddsCLL <- ddsCLL[keep,] #Remove transcripts do not show variance across samples ddsCLL <- estimateSizeFactors(ddsCLL) sds <- rowSds(counts(ddsCLL, normalized = TRUE)) sh <- shorth(sds) ddsCLL <- ddsCLL[sds >= sh,] #variance stabilization ddsCLL.norm <- varianceStabilizingTransformation(ddsCLL, blind=TRUE) #how many genes left dim(ddsCLL)
DESeq2 was used to identify genes that are differentially expressed between wild-type CLL samples and samples with trisomy 12.
Run DESeq2
design(ddsCLL) <- ~ trisomy12 ddsCLL <- DESeq(ddsCLL, betaPrior = FALSE) DEres <- results(ddsCLL) DEres.shr <- lfcShrink(ddsCLL, type="normal", contrast = c("trisomy12","1","0"), res = DEres)
Plot gene dosage effect.
#FIG# S23 A plotTab <- as.data.frame(DEres) plotTab$onChr12 <- rowData(ddsCLL)$chromosome == 12 dosePlot <- ggplot(plotTab) + geom_density(aes(x=log2FoldChange, col=onChr12, fill=onChr12), alpha=0.4) + xlim( -3, 3 ) dosePlot
The distributions of the logarithmic (base 2) fold change between samples with and without trisomy 12 are shown separately for the genes on chromosome 12 (green) and on other chromosomes (red). The two distributions are shifted with respected to each by an amount that is consistent with log2(3/2) ~ 0.58 and thus with gene dosage effects.
A heat map plot was used to show the normalized expression value (Z-score) of the differentially expressed genes in samples with and without trisomy 12.
Prepare matrix for heat map plot.
#filter genes fdrCut <- 0.1 fcCut <- 1.5 allDE <- data.frame(DEres.shr) %>% rownames_to_column(var = "ID") %>% mutate(Symbol = rowData(ddsCLL[ID,])$symbol, Chr = rowData(ddsCLL[ID,])$chromosome) %>% filter(padj <= fdrCut & abs(log2FoldChange) > fcCut) %>% arrange(pvalue) %>% filter(!duplicated(Symbol)) %>% mutate(Chr12 = ifelse(Chr == 12, "yes", "no")) #get the expression matrix plotMat <- assay(ddsCLL.norm[allDE$ID,]) colnames(plotMat) <- ddsCLL.norm$PatID rownames(plotMat) <- allDE$Symbol #sort columns of plot matrix based on trisomy 12 status plotMat <- plotMat[,order(ddsCLL.norm$trisomy12)] #calculate z-score and scale plotMat <- t(scale(t(plotMat))) plotMat[plotMat >= 4] <- 4 plotMat[plotMat <= -4] <- -4
Plot the heat map.
#FIG# S23 B #prepare colums and row annotations annoCol <- data.frame(row.names=ddsCLL.norm$PatID, Tris12=ddsCLL.norm$trisomy12) levels(annoCol$Tris12) <- list(wt = 0, mut =1) annoRow <- data.frame(row.names = allDE$Symbol, Chr12 = allDE$Chr12) annoColor <- list(Tris12 = c(wt = "grey80", mut = "black"), Chr12 = c(yes="red", no = "grey80")) pheatmap(plotMat, color=colorRampPalette(rev(brewer.pal(n=7, name="RdBu")))(100), cluster_cols = FALSE, annotation_row = annoRow, annotation_col = annoCol, show_colnames = FALSE, fontsize_row = 3, breaks = seq(-4,4, length.out = 101), annotation_colors = annoColor, border_color = NA)
According to the gene expression heat map, the samples with trisomy 12 show distinct expression pattern. 84 genes are significantly up-regulated in trisomy 12 samples and 37 genes are down-regulated in trisomy 12 samples (FDR =0.1 and log2FoldChange > 1.5). Among the 84 up-regulated genes, only 12 genes are from chromosome 12, suggested the distinct expression pattern of trisomy 12 samples can not be merely explained by gene dosage effect.
Gene set enrichment analysis using PAGE (Parametric Analysis of Gene Set Enrichment) was used to unravel the pathway activity changes underlying trisomy 12.
Function to run PAGE in R.
runGSEA <- function(inputTab, gmtFile, GSAmethod="gsea", nPerm=1000){ inGMT <- loadGSC(gmtFile,type="gmt") #re-rank by score rankTab <- inputTab[order(inputTab[,1],decreasing = TRUE),,drop=FALSE] if (GSAmethod == "gsea"){ #readin geneset database #GSEA analysis res <- runGSA(geneLevelStats = rankTab, geneSetStat = GSAmethod, adjMethod = "fdr", gsc=inGMT, signifMethod = 'geneSampling', nPerm = nPerm) GSAsummaryTable(res) } else if (GSAmethod == "page"){ res <- runGSA(geneLevelStats = rankTab, geneSetStat = GSAmethod, adjMethod = "fdr", gsc=inGMT, signifMethod = 'nullDist') GSAsummaryTable(res) } }
Function for plotting enrichment bar.
plotEnrichmentBar <- function(resTab, pCut=0.05, ifFDR=FALSE, setName="Signatures") { pList <- list() rowNum <- c() for (i in names(resTab)) { plotTab <- resTab[[i]] if (ifFDR) { plotTab <- dplyr::filter( plotTab, `p adj (dist.dir.up)` <= pCut | `p adj (dist.dir.dn)` <= pCut) } else { plotTab <- dplyr::filter( plotTab, `p (dist.dir.up)` <= pCut | `p (dist.dir.dn)` <= pCut) } if (nrow(plotTab) == 0) { print("No sets passed the criteria") next } else { #firstly, process the result table plotTab <- apply(plotTab, 1, function(x) { statSign <- as.numeric(x[3]) data.frame(Name = x[1], p = as.numeric(ifelse(statSign >= 0, x[4], x[6])), geneNum = ifelse(statSign >= 0, x[8], x[9]), Direction = ifelse(statSign > 0, "Up", "Down"), stringsAsFactors = FALSE) }) %>% do.call(rbind,.) plotTab$Name <- sprintf("%s (%s)",plotTab$Name,plotTab$geneNum) plotTab <- plotTab[with(plotTab,order(Direction, p, decreasing=TRUE)),] plotTab$Direction <- factor(plotTab$Direction, levels = c("Down","Up")) plotTab$Name <- factor(plotTab$Name, levels = plotTab$Name) #plot the barplot pList[[i]] <- ggplot(data=plotTab, aes(x=Name, y= -log10(p), fill=Direction)) + geom_bar(position="dodge",stat="identity", width = 0.5) + scale_fill_manual(values=c(Up = "blue", Down = "red")) + coord_fixed(ratio = 0.5) + coord_flip() + xlab(setName) + ggtitle(i) + theme_bw() + theme( plot.title = element_text(face = "bold", hjust =0.5), axis.title = element_text(size=15)) rowNum <-c(rowNum,nrow(plotTab)) } } if (length(pList) == 0) { print("Nothing to plot") } else { rowNum <- rowNum grobList <- lapply(pList, ggplotGrob) grobList <- do.call(rbind,c(grobList,size="max")) panels <- grobList$layout$t[grep("panel", grobList$layout$name)] grobList$heights[panels] <- unit(rowNum, "null") } return(grobList) }
Prepare input table for gene set enrichment analysis. A cut-off of raw p value < 0.05 was used to select genes for the analysis.
pCut <- 0.05 dataTab <- data.frame(DEres) dataTab$ID <- rownames(dataTab) #filter using raw pvalues dataTab <- filter(dataTab, pvalue <= pCut) %>% arrange(pvalue) %>% mutate(Symbol = rowData(ddsCLL[ID,])$symbol) dataTab <- dataTab[!duplicated(dataTab$Symbol),] statTab <- data.frame(row.names = dataTab$Symbol, stat = dataTab$stat)
Gene set enrichment analysis using Hallmarks gene set from MsigDB.
hallmarkRes <- list() #run PAGE resTab <- runGSEA(statTab, gmts$H ,GSAmethod = "page") #remove the HALLMARK_ resTab$Name <- gsub("HALLMARK_","",resTab$Name) hallmarkRes[["Gene set enrichment analysis"]] <- arrange(resTab,desc(`Stat (dist.dir)`)) hallBar <- plotEnrichmentBar(hallmarkRes, pCut = 0.01, ifFDR = TRUE, setName = "Hallmark gene sets")
Gene set enrichment analysis using kegg gene set from MsigDB.
keggRes <- list() resTab <- runGSEA(statTab,gmts$KEGG,GSAmethod = "page") #remove the KEGG_ resTab$Name <- gsub("KEGG_","",resTab$Name) keggRes[["Gene set enrichment analysis"]] <- resTab keggBar <- plotEnrichmentBar(keggRes, pCut = 0.01, ifFDR = TRUE, setName = "KEGG gene sets")
Heatmap plots were used to show the expression values of differentially expressed genes from KEGG_CHEMOKINE_SIGNALING_PATHWAY gene set
Prepare the matrix for heatmap plot.
#select differentially expressed genes fdrCut <- 0.05 cytoDE <- data.frame(DEres) %>% rownames_to_column(var = "ID") %>% mutate(Symbol = rowData(ddsCLL[ID,])$symbol, Chr=rowData(ddsCLL[ID,])$chromosome) %>% filter(padj <= fdrCut, log2FoldChange > 0) %>% arrange(pvalue) %>% filter(!duplicated(Symbol)) %>% mutate(Chr12 = ifelse(Chr == 12, "yes", "no")) #get the expression matrix plotMat <- assay(ddsCLL.norm[cytoDE$ID,]) colnames(plotMat) <- ddsCLL.norm$PatID rownames(plotMat) <- cytoDE$Symbol #sort columns of plot matrix based on trisomy 12 status plotMat <- plotMat[,order(ddsCLL.norm$trisomy12)] #calculate z-score and sensor plotMat <- t(scale(t(plotMat))) plotMat[plotMat >= 4] <- 4 plotMat[plotMat <= -4] <- -4 annoCol <- data.frame(row.names = ddsCLL.norm$PatID, Tris12 = ddsCLL.norm$trisomy12) levels(annoCol$Tris12) <- list(wt = 0, mut =1) annoRow <- data.frame(row.names = cytoDE$Symbol, Chr12 = cytoDE$Chr12)
Heatmap for genes from KEGG_CHEMOKINE_SIGNALING_PATHWAY geneset.
gsc <- loadGSC(gmts$KEGG) geneList <- gsc$gsc$KEGG_CHEMOKINE_SIGNALING_PATHWAY plotMat.chemo <- plotMat[rownames(plotMat) %in% geneList,] keggHeatmap <- pheatmap(plotMat.chemo, color = colorRampPalette( rev(brewer.pal(n=7, name="RdBu")))(100), cluster_cols = FALSE, clustering_method = "ward.D2", annotation_row = annoRow, annotation_col = annoCol, show_colnames = FALSE, fontsize_row = 8, breaks = seq(-4,4, length.out = 101), treeheight_row = 0, annotation_colors = annoColor, border_color = NA, main = "CHEMOKINE_SIGNALING_PATHWAY",silent = TRUE)$gtable
Combine enrichment plot and heatmap plot.
#FIG# S24 ABC ggdraw() + draw_plot(hallBar, 0, 0.7, 0.5, 0.3) + draw_plot(keggBar, 0.5, 0.7, 0.5, 0.3) + draw_plot(keggHeatmap, 0.1, 0, 0.8, 0.65) + draw_plot_label(c("A","B","C"), c(0, 0.5, 0.1), c(1, 1, 0.7), fontface = "plain", size=20)
Based on the gene set enrichment analysis results, genes from PI3K_ATK_MTOR pathway are significantly up-regulated in the samples with trisomy 12, which partially explained the increased sensitivity of trisomy 12 samples to PI3K and MTOR inhibitors. In addition, genes that are up-regulated in trisomy 12 are enrichment in chemokine signaling pathway.
sessionInfo()
rm(list=ls())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.