In lujunyan1118/mofaCLL: Multi-omics reveals clinically relevant proliferative drive associated with mTOR-MYC-OXPHOS activity in chronic lymphocytic leukemia

plotDir = ifelse(exists(".standalone"), "", "part5/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
knitr::opts_chunk$set(fig.path=plotDir, dev=c("png", "pdf"))
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

#Libraries
library(mofaCLL)
library(gridExtra)
library(ggrepel)
library(genefilter)
library(limma)
library(ggbeeswarm)
library(GEOquery)
library(fgsea)
library(pheatmap)
library(tidygraph)
library(igraph)
library(ggraph)
library(ComplexHeatmap)
library(grid)
library(cowplot)
library(DESeq2)
library(tidyverse)

Gene expression signatures for CLL-PD (F4)

Load datasets

data("mofaOut","gene","drug","validateScreen","rna","seahorse","protein","mito")

gmts = list(H = system.file("externalData/h.all.v6.2.symbols.gmt", package = "mofaCLL"))

setMap <- read_tsv(system.file("externalData/setToPathway.txt", package = "mofaCLL"), col_types = "cc")

mitoProtList <- readxl::read_xls(system.file("externalData/Human.MitoCarta3.0.xls", package = "mofaCLL"), 
                                 sheet = 2)$Symbol

Process MOFA model

library(MOFA)
#get factor values
facTab <- getFactors(
  MOFAobject, 
  factors = "LF4",
  as.data.frame = TRUE
) %>% as_tibble()

#get all factors
facTab.all <- getFactors(
  MOFAobject,
  factors = "all",
  as.data.frame = TRUE
) %>% as_tibble()

#get weights
weightTab <- getWeights(
  MOFAobject,
  factors = "all",
  as.data.frame = TRUE
) %>% as_tibble() %>%
  mutate(feature = ifelse(feature == "IGHV.status", "IGHV", feature))
#get training data
trainData <- getTrainData(MOFAobject)

detach("package:MOFA", unload = TRUE) #detach MOFA because it's "predict" function marks the "predict" function in glmnet

Pre-processing data

Processing RNAseq data

dds<-estimateSizeFactors(rna)
dds$factor <- facTab[match(dds$PatID, facTab$sample),]$value
dds$IGHV <- gene[match(dds$PatID, rownames(gene)),]$IGHV
ddsSub <- dds[,!is.na(dds$factor)]

#vst
ddsSub.vst <- varianceStabilizingTransformation(ddsSub)

#seperate M-CLL and U-CLL
ddsM.vst <- ddsSub.vst[,ddsSub.vst$IGHV %in% 1]
ddsU.vst <- ddsSub.vst[,ddsSub.vst$IGHV %in% 0]

Identify genes associated with CLL-PD

Correlation test using DESeq2

factorMatrix <- facTab.all %>% spread(key = factor, value = value) %>%
  data.frame() %>% column_to_rownames("sample") 
factorMatrix <- factorMatrix[colnames(ddsSub),]

designMat <- model.matrix(~1 +.,factorMatrix)
deRes <- DESeq(ddsSub, full = designMat, betaPrior = FALSE)


corRes.rna <- results(deRes, name = "LF4", tidy = TRUE) %>%
  mutate(symbol = rowData(ddsSub)[row,]$symbol) %>%
  dplyr::rename(logFC = log2FoldChange, t = stat, P.Value = pvalue, adj.P.Val = padj,id=row)

P value histogram

pHis <- ggplot(corRes.rna, aes(x=P.Value)) + geom_histogram(col="black",fill = colList[2],alpha=0.5) +
  xlab("P value") + ylab("Count") + ggtitle("P value histogram") +
  theme_full
pHis

Heatmap of significantly correlated genes (1% FDR)

corRes.sig <- filter(corRes.rna, adj.P.Val < 0.01)
exprMat <- assay(ddsSub.vst)
plotMat <- exprMat[corRes.sig$id,]
plotMat <- plotMat[order(corRes.sig$logFC, decreasing = TRUE),order(designMat[,"LF4"])]

colAnno <- data.frame(row.names = colnames(exprMat),
                      LF4 = designMat[,"LF4"])
colnames(colAnno) <- c("CLL-PD")
plotMat <- mscale(plotMat, censor = 4)

breaks <- seq(-4,4,length.out = 100)

pheatmap(plotMat, scale = "none", cluster_cols = FALSE, cluster_rows = FALSE,
         annotation_col = colAnno,
         color = colorRampPalette(c(colList[2],"white",colList[1]))(length(breaks)),
         breaks = breaks,
         show_rownames = FALSE, show_colnames = FALSE)

How many genes show significant correlation?

nrow(corRes.sig)

#percentage
nrow(corRes.sig)/nrow(ddsSub)

How many genes show up-regulation?

nrow(filter(corRes.sig, t>0))

Gene enrichment analysis

Enrichment using Hallmark genesets for all CLLs

enrichRes <- runCamera(exprMat, designMat, gmts$H, 
                       id = rowData(ddsSub.vst[rownames(exprMat),])$symbol, contrast = 5, #LF4
                       method = "camera", pCut = 0.05, ifFDR = TRUE, removePrefix = "HALLMARK_",
                       plotTitle = "Pathway enrichment on RNA level (5% FDR)", insideLegend = TRUE, setMap = setMap)
rnaEnrichHallmark <- enrichRes$enrichPlot + theme(legend.position = c(0.8,0.2))
rnaEnrichHallmark

Enrichment using Hallmark genesets for U-CLLs

exprMat.U <- assay(ddsU.vst)
designMat.U <- designMat[colnames(exprMat.U),]
enrichRes.U <- runCamera(exprMat.U, designMat.U, gmts$H, 
                       id = rowData(ddsSub.vst[rownames(exprMat.U),])$symbol, contrast = 5, #LF4
                       method = "camera", pCut = 0.05, ifFDR = TRUE, removePrefix = "HALLMARK_",
                       plotTitle = "Hallmarks (U-CLL, 5% FDR)", setMap = setMap)
plotEnrichHallmark.U <- enrichRes.U$enrichPlot
plotEnrichHallmark.U

Enrichment using Hallmark genesets for M-CLLs

exprMat.M <- assay(ddsM.vst)
designMat.M <- designMat[colnames(exprMat.M),]
enrichRes.M <- runCamera(exprMat.M, designMat.M, gmts$H, 
                       id = rowData(ddsSub.vst[rownames(exprMat.M),])$symbol, contrast = 5, #LF4
                       method = "camera", pCut = 0.05, ifFDR = TRUE, removePrefix = "HALLMARK_",
                       plotTitle = "Hallmarks (M-CLL, 5% FDR)", setMap = setMap)
plotEnrichHallmark.M <- enrichRes.M$enrichPlot
plotEnrichHallmark.M

Heatmap for enriched genesets

Get mitochondrial genes from mito carta

highLightList <- filter(corRes.sig, symbol %in% mitoProtList)$id

MTORC1_SIGNALING

corRes.sig <- dplyr::rename(corRes.rna, coef = "logFC") %>% dplyr::filter(adj.P.Val < 0.01, coef > 0)

colAnno <- data.frame(row.names = colnames(exprMat),
                      LF4 = designMat[,"LF4"])
col_fun = circlize::colorRamp2(c(min(colAnno$LF4),max(colAnno$LF4)), c("white",colList[4]))
annoColor <- list(`CLL-PD` = col_fun)
colnames(colAnno) <- c("CLL-PD")

plotSetHeatmapComplex(geneSigTab = corRes.sig, geneSet =gmts$H, setName = "HALLMARK_MTORC1_SIGNALING", exprMat, colAnno, plotTitle = "Hallmark mTORC1 signaling",scale = TRUE,annoCol = annoColor, highLight =  highLightList)

OXIDATIVE_PHOSPHORYLATION

plotSetHeatmapComplex(geneSigTab = corRes.sig, geneSet =gmts$H, setName = "HALLMARK_OXIDATIVE_PHOSPHORYLATION", exprMat, colAnno, plotTitle = "Hallmark Oxidative phosphorylation", scale = TRUE, annoCol = annoColor, highLight =  highLightList)

#get a percentage of mitochondiral proteins in this set
allGeneInList <- intersect(corRes.sig$symbol, loadGSC(gmts$H)$gsc$HALLMARK_OXIDATIVE_PHOSPHORYLATION)

How many genes are in the geneset of OXIDATIVE PHOSPHORYLAITON?

length(allGeneInList)

How many genes are also on mitochondria?

sum(allGeneInList %in% mitoProtList)

MYC_TARGETS_V1

#pvalue for MYC
filter(corRes.sig, symbol == "MYC")


plotSetHeatmapComplex(geneSigTab = corRes.sig, geneSet =gmts$H, setName = "HALLMARK_MYC_TARGETS_V1", exprMat, colAnno, scale = TRUE, plotTitle = "Hallmark MYC targets v1", annoCol = annoColor, highLight =  highLightList)

A network plot showing the overlap of gene enriched in those three pathways and annotate mitochondrial genes

corRes.sigUp <- filter(corRes.rna, adj.P.Val < 0.01, logFC >0)

setList <- c("HALLMARK_MYC_TARGETS_V1", "HALLMARK_OXIDATIVE_PHOSPHORYLATION", "HALLMARK_MTORC1_SIGNALING")
nodeTab <- lapply(setList, function(eachSet) {
  geneList <- intersect(corRes.sig$symbol, loadGSC(gmts$H)$gsc[[eachSet]])
  tibble(symbol = geneList, setName = eachSet)
}) %>% bind_rows() %>%
  mutate(setName = str_remove(setName,"HALLMARK_")) %>%
  mutate(setName = setMap[match(setName, setMap$setName),]$pathwayName) %>%
  mutate(mitoGene = ifelse(symbol %in% mitoProtList, "yes", "no"))

#get node list
allNodes <- union(nodeTab$symbol, nodeTab$setName) 

nodeList <- data.frame(id = seq(length(allNodes))-1, name = allNodes, stringsAsFactors = FALSE) %>%
  mutate(type = ifelse(name %in% mitoProtList, 
                          "yes","no")) %>%
  mutate(type = ifelse(name %in% nodeTab$setName,"set", type)) %>%
  mutate(nodeText = ifelse(type == "no","",name)) %>%
  mutate(pathText = ifelse(type == "set", name, "")) %>%
  mutate(textSize = ifelse(type == "set",3,0)) %>%
  mutate(textPosAdjust = ifelse(name %in%"Oxidative phosphorylation",0.2,0.5))


#get edge list
edgeList <- nodeTab %>%
  dplyr::rename(Source = setName, Target = symbol) %>%
  mutate(Source = nodeList[match(Source,nodeList$name),]$id,
         Target = nodeList[match(Target, nodeList$name),]$id) %>%
  data.frame(stringsAsFactors = FALSE)

net <- graph_from_data_frame(vertices = nodeList, d=edgeList, directed = FALSE)

set.seed(2)
tidyNet <- as_tbl_graph(net)
enrichNet <- ggraph(tidyNet, layout = "igraph", algorithm = "nicely") + 
  geom_edge_link(color = "grey90", width=1) + 
  geom_node_point(aes(color =type), size=3, alpha=0.7) + 
  geom_node_text(aes(label = pathText, col = type, hjust = textPosAdjust), repel = FALSE, fontface= "bold", size=5, nudge_y = -0.5) +
  #scale_size_manual(values =c(set = 4, yes = 5, no =4))+
  scale_color_manual(values = c(yes = colList[5],no = "grey60", set = colList[1])) + 
  #scale_edge_color_brewer(palette = "Set2") +
  theme_graph(base_family = "sans") + theme(legend.position = "none", plot.margin =margin(-5,-5,-5,-5))

prepare legend

set.seed(2)

legTab <- tibble(x=c(1,1), y=c(1,2), group = c("mitochondrial protein coding genes",
                                               "other genes"))

legP <- ggplot(legTab, aes(x=x, y=y, col = group)) +
  geom_point(size=4) + theme_bw() +
  scale_color_manual(values = c("mitochondrial protein coding genes" = colList[5], "other genes" = "grey60"), name = "") +
  theme(legend.text = element_text(size=12),
        legend.background = element_blank())
legP <- get_legend(legP)

comNet <- ggdraw(enrichNet) + draw_plot(legP, x = 0.22, y=-0.35)
comNet

Compare CLL-PD expression signature with other stimulation signatures

Expression profile of U-CLL cells treated with CPG (GSE30105)

Query and pre-processing microarray data

CLL cells were purified by negative selection using anti-CD3, anti-CD14 and anti-CD16 mouse monoclonal antibodies and Dynabeads coated with a pan anti-mouse IgG antibody. The purity of the CLL cells after negative selection was monitored by flow-cytometry and the percentage of CD5+/CD19+ cells exceeded 98% for each sample. CLL cells were resuspended in RPMI complete medium at a density of 2 × 105 and stimulated with 7.5 μg/ml complete phosphorothioate CpG ODN oligonucleotide 2006 (5′-TCGTCGTTTTGTCGTTTTGTCGTT-3′) or left unstimulated for 18 h.

Sys.setenv("VROOM_CONNECTION_SIZE" = 131072*1000)
gse30105 <- getGEO("GSE30105", GSEMatrix = TRUE)

##GSE30105
gse <- gse30105[[1]]
gse$treatment <- factor(ifelse(grepl("Unstimulated",gse$title), "untreated","CPG"),
                        levels = c("untreated","CPG"))
gse.vst <- gse
exprs(gse.vst) <- normalizeVSN(exprs(gse))
gse.vst <- gse.vst[!fData(gse.vst)$`GENE_SYMBOL` %in% c(NA, ""),]
#invariant filtering
sds <- genefilter::rowSds(exprs(gse.vst))
gse.vst <- gse.vst[sds > genefilter::shorth(sds)]

Differential expression and enrichment analysis

Differential expression using Limma

mm <- model.matrix( ~ 1 + treatment, pData(gse.vst))
fit <- lmFit(gse.vst, mm)
fit <- eBayes(fit)
resTab <- limma::topTable(fit, coef= "treatmentCPG", number = "all")
hist(resTab$P.Value)

sigList <- list()
sigListP <- list() #for upset plot, only cut-off is adj.P.Val < 0.01
sigList[["Up-regulated by CpG ODN"]] <- unique(filter(resTab, adj.P.Val < 0.01, logFC > 1.5)$GENE_SYMBOL)
sigListP[["CpG ODN"]] <- unique(filter(resTab, adj.P.Val < 0.01, logFC > 0)$GENE_SYMBOL)

Enrichment

highSet <- c("MYC targets v1", "MYC targets v2", "mTORC1 signaling","Oxidative phosphorylation")
exprMat <- exprs(gse.vst)
designMat <- model.matrix( ~ 1 + treatment, pData(gse.vst))

enrichRes <- runCamera(exprMat, designMat, gmts$H, id = fData(gse.vst)$GENE_SYMBOL,
                       method = "camera", pCut = 0.01, ifFDR = TRUE, removePrefix = "HALLMARK_",
                       plotTitle = "CpG ODN treatment", insideLegend = TRUE, setToHighlight = highSet, setMap = setMap)
plotEnrich.CPG <- enrichRes$enrichPlot + theme(plot.title = element_text(size=18, face = "bold"))
plotEnrich.CPG

Signature of IL21 + CD40L

Pre-processing microarray data

GSE50572 <- getGEO("GSE50572", GSEMatrix = TRUE)

gse <- GSE50572[[1]]

#table(gse$`treatment:ch1`)
condi <- structure(c("none","Tcell","IL21_CD40L"),
                   names = c("none","cocultured with activated T cells",
                             "IL-21 + cocultured with CD40L- cells"))
gse <- gse[,gse$`treatment:ch1` %in% names(condi)]
gse$treatment <- condi[gse$`treatment:ch1`]
gse$treatment <- factor(gse$treatment, levels =c("none","IL21_CD40L","Tcell"))

gse.all <- gse
exprs(gse.all) <- normalizeVSN(exprs(gse))
gse.all <- gse.all[!fData(gse.all)$`Gene Symbol` %in% c(NA, ""),]

#invariant filtering
sds <- genefilter::rowSds(exprs(gse.all))
gse.all <- gse.all[sds > genefilter::shorth(sds)]

Differential expression using Limma

mm <- model.matrix( ~ 1 + treatment, pData(gse.all))
fit <- lmFit(gse.all, mm)
fit <- eBayes(fit)

Differential expression and enrichment analysis

Differential expression using Limma

resTab <- limma::topTable(fit, coef= "treatmentIL21_CD40L", number = "all")
hist(resTab$P.Value)

Creat gene sets

sigList[["Up-regulated by IL21+CD40L"]] <- unique(filter(resTab, adj.P.Val < 0.01, logFC > 1.5)$Gene.Symbol)
sigListP[["IL21+CD40L"]] <- unique(filter(resTab, adj.P.Val < 0.01,logFC>0)$Gene.Symbol)

Enrichment analysis

exprMat <- exprs(gse.all)
designMat <- mm

enrichRes <- runCamera(exprMat, designMat, gmts$H, id = fData(gse.all)$`Gene Symbol`,
                       contrast = "treatmentIL21_CD40L",
                       method = "camera", pCut = 0.01, ifFDR = TRUE, removePrefix = "HALLMARK_",
                       plotTitle = "IL21 + CD40L treatment", insideLegend = TRUE, setToHighlight = highSet, setMap = setMap)

plotEnrich.IL21CD40L <- enrichRes$enrichPlot + theme(plot.title = element_text(size=18, face = "bold"))
plotEnrich.IL21CD40L

T cell co-cultrue signatures

Differential expression and enrichment analysis

Differential expression using Limma

resTab <- limma::topTable(fit, coef= "treatmentTcell", number = "all")
hist(resTab$P.Value)

sigList[["Up-regulated by activated T cells"]] <- unique(filter(resTab, adj.P.Val < 0.01, logFC > 1.5)$Gene.Symbol)
sigListP[["activated T cells"]] <- unique(filter(resTab, adj.P.Val < 0.01, logFC >0)$Gene.Symbol)

Enrichment

enrichRes <- runCamera(exprMat, mm, gmts$H, id = fData(gse.all)$`Gene Symbol`, contrast = "treatmentTcell",
                       method = "camera", pCut = 0.01, ifFDR = TRUE, removePrefix = "HALLMARK_",
                       plotTitle = "Co-culture with activated T cells", insideLegend = TRUE, setToHighlight = highSet, setMap=setMap)
plotEnrich.Tcell <- enrichRes$enrichPlot+ theme(plot.title = element_text(size=18, face = "bold"))
plotEnrich.Tcell

BCR-triggering signature by anti-IgM stimulation for 390 min (GSE3941)

gse39411 <- getGEO("GSE39411", GSEMatrix = TRUE)

gse <- gse39411[[1]]

#subset for only B-cell and CLL without transfection
gse <- gse[,gse$`transfected with:ch1` == "none" &
             gse$`time point (min):ch1` == "T390" &
             gse$`cell type:ch1` == "chronic lymphocytic leukemia B-cell"]

#vst
gse.vst <- gse
exprs(gse.vst) <- limma::normalizeVSN(gse.vst)

patAnno <- pData(gse.vst) %>% rownames_to_column("sampleID") %>%
  select(sampleID, description) %>%
  separate(description, into = c("patID","stimulation","timePoint"),sep = "_") %>%
  mutate(cellType = substr(patID,1,nchar(patID)-1)) %>%
  mutate(cellType = ifelse(cellType == "N", "B-cell",cellType)) %>%
  mutate(timePoint = abs(as.integer(gsub("T","",timePoint)))) %>%
  mutate(stimulation = factor(stimulation, levels = c("US","S")))

pData(gse.vst) <- patAnno[match(colnames(gse.vst), patAnno$sampleID),]

Differential expression (CLL cells for 390 min)

gse.test <- gse.vst

Differential expression using Limma

mm <- model.matrix( ~ 1  + patID + stimulation, pData(gse.test))
fit <- lmFit(gse.test, mm)
fit <- eBayes(fit)
resTab <- limma::topTable(fit, coef= "stimulationS", number = "all")
hist(resTab$P.Value)

sigList[["Up-regulated by anti-IgM"]] <- unique(filter(resTab, adj.P.Val < 0.01, logFC > 1.5)$Gene.Symbol)
sigListP[["anti-IgM"]] <- unique(filter(resTab, adj.P.Val < 0.01, logFC > 0)$Gene.Symbol)

Enrichment

exprMat <- exprs(gse.test)
designMat <- model.matrix( ~ 1  + patID  + stimulation, pData(gse.test))

enrichRes <- runCamera(exprMat, designMat, gmts$H, id = fData(gse.test)$`Gene Symbol`,
                       method = "camera", pCut = 0.01, ifFDR = TRUE, removePrefix = "HALLMARK_",
                       plotTitle = "anti-IgM treatment",insideLegend = TRUE, setToHighlight = highSet, setMap = setMap)
plotEnrich.IgM <- enrichRes$enrichPlot+ theme(plot.title = element_text(size=18, face = "bold"))
plotEnrich.IgM

Enrichment analysis using all above stimulation signatures

fgseaOut <- runFGSEA(corRes.rna, sigList, ifFDR = FALSE)
pList <- fgseaOut$plots

stiEnrich <- pList[[1]]
stiEnrich

pList[[2]] <- pList[[2]] + ylab("")
pList[[4]] <- pList[[4]] + ylab("")
stiEnrichOther <- plot_grid(plotlist = pList, ncol=2, align = "hv", axis = "tblr")
fgseaOut$table
stiEnrichOther

Upset plot to show gene overlaps

geneOverList <- c(list(`CLL-PD` = filter(corRes.rna, logFC>0, adj.P.Val <0.01)$symbol), sigListP)
UpSetR::upset(UpSetR::fromList(geneOverList))

Proteomic signatures for CLL-PD

Detect proteins correlated with CLL-PD (LF4)

Process datasets

Process proteomics data

protCLL <- protein
protCLL$CLLPD <- facTab[match(colnames(protCLL), facTab$sample),]$value
protCLL <- protCLL[,!is.na(protCLL$CLLPD)]
protMat <- assays(protCLL)[["QRILC"]]

**How many samples have CLL-PD value?

ncol(protCLL)

Correlate protein expression with CLL-PD using proDA

Fit the probailistic dropout model

CLLPD <- protCLL$CLLPD
fit <- proDA::proDA(protMat, design = ~ CLLPD)

Test for differentially expressed proteins

corRes.prot <- proDA::test_diff(fit, "CLLPD") %>%
  dplyr::rename(id = name, logFC = diff, t=t_statistic,
                P.Value = pval, adj.P.Val = adj_pval) %>% 
  mutate(symbol = rowData(protCLL[id,])$hgnc_symbol,
         coef = logFC) %>%
  select(symbol, id, coef, t, P.Value, adj.P.Val, n_obs) %>%  
  arrange(P.Value) %>% 
  as_tibble()

P-value histogram

ggplot(corRes.prot, aes(x=P.Value)) + geom_histogram(fill = "lightblue",col="grey50") + xlim(0,1) +
  theme_full

List of significantly correlated proteins (10% FDR)

corRes.sig.prot <- filter(corRes.prot, adj.P.Val <= 0.1) %>% arrange(desc(t))
corRes.sig.prot %>% mutate_if(is.numeric, formatC, digits =2, format="e") %>% 
  mutate(n_obs = as.integer(n_obs)) %>%
  DT::datatable()

How many proteins in total?

nrow(corRes.sig.prot)

How many up-regulated proteins?

nrow(filter(corRes.sig.prot, t>0))

Enrichment using Hallmark genesets for all CLLs

designMat <- model.matrix(~CLLPD)
enrichRes <- runCamera(protMat, designMat, gmts$H, 
                       id = rowData(protCLL[rownames(protMat),])$hgnc_symbol, #LF4
                       method = "camera", pCut = 0.05, ifFDR = TRUE, removePrefix = "HALLMARK_",
                       plotTitle = "Pathway enrichment on protein level (5% FDR)", insideLegend = TRUE, direction = "Up", setMap = setMap)
proteinEnrichHallmark <- enrichRes$enrichPlot + theme(legend.position = c(0.8,0.2))
proteinEnrichHallmark

Heatmap of enrichmed MYC targets

mycTar <- unique(unlist(loadGSC(gmts$H)$gsc[c("HALLMARK_MYC_TARGETS_V2","HALLMARK_MYC_TARGETS_V1")]))

corRes.sigUp <- corRes.prot %>% dplyr::filter(P.Value < 0.05, t > 0)

colAnno <- tibble(patID = colnames(protMat), 
                 F4 = protCLL$CLLPD) %>%
  arrange(F4) %>% data.frame() %>% column_to_rownames("patID")

col_fun = circlize::colorRamp2(c(min(colAnno$F4),max(colAnno$F4)), c("white",colList[4]))
annoColor <- list(`CLL-PD` = col_fun)
colnames(colAnno) <- c("CLL-PD")

plotProtMYC <- plotSetHeatmapComplex(geneSigTab = corRes.sigUp, geneSet =mycTar, setName = "HALLMARK_MYC_TARGETS", plotTitle = "Hallmark MYC targets",
                                     exprMat = protMat,colAnno =colAnno, scale = TRUE, annoCol = annoColor)

plotProtMYC

Scatter plot of selected proteins

MYC tagets

seleProt <- c("MCM4","NME1","PAICS")
tTab <- filter(corRes.prot, symbol %in% seleProt)
scatterTab <- protMat[tTab$id,] %>% data.frame() %>%
  rownames_to_column("id") %>% gather(key="patID", value = "value",-id) %>%
  filter(!is.na(value)) %>%
  mutate(LF4 = facTab[match(patID, facTab$sample),]$value,
         symbol = rowData(protCLL)[id,]$hgnc_symbol)


plotProtScatter <- lapply(seleProt, function(n) {
  eachTab <- filter(scatterTab, symbol == n) 
  res <- cor.test(eachTab$value, eachTab$LF4)
  annoCoef <- paste("coefficient =",format(res$estimate,digits = 2))
  annoP <- paste("italic(P)~'=",formatNum(res$p.value, digits = 1, format = "e"),"'")
  ggplot(eachTab, aes(x=LF4, y=value)) +
    geom_point(fill = colList[6],size=2,shape=21) + geom_smooth(method="lm",se=FALSE, linetype = "dashed", color = "grey50") +
    annotate("text", x = -1.5, y = Inf, label = annoCoef,
           hjust=0, vjust =1, size = 5, col= colList[1]) +
    annotate("text", x = -1.5, y = Inf, label = annoP,
           hjust=0, vjust =3, size = 5, col= colList[1], parse= TRUE) +
    ggtitle(unique(eachTab$symbol)) + ylab("Protein expression") + xlab("CLL-PD") +
    #ylim(c(0.4,1.2)) +
    theme_half 
})
grid.arrange(grobs= plotProtScatter, ncol=3)

Differentially expressed proteins on mitochondria

Mitochondrial proteins

Read a table of proteins on mitochondria from mito

corRes.mito <- corRes.sig.prot %>% filter(symbol %in% mitoProtList, t>0)

How many proteins are on mitochodnria?

nrow(corRes.mito)

Heatmap of significantly associated proteins (10% FDR), with mitochondrial proteins highlighted

colAnno <- tibble(patID = colnames(protMat), 
                  CLL_PD = protCLL$CLLPD) %>%
  arrange(CLL_PD) %>% data.frame() %>% column_to_rownames("patID")

col_fun = circlize::colorRamp2(c(min(colAnno$CLL_PD),max(colAnno$CLL_PD)), c("white",colList[4]))
annoColor <- list(CLL_PD = col_fun)
plotMat <- assays(protCLL[corRes.sig.prot$id,rownames(colAnno)])[["QRILC"]]
plotMat <- mscale(plotMat, censor = 6)

protHeatmap <- plotSetHeatmapComplex(geneSigTab = corRes.sig.prot, geneSet = corRes.sig.prot$symbol,
               exprMat = protMat, colAnno = colAnno, 
               setName = "Proteins associated with CLL-PD (10%FDR)", annoCol = annoColor,
               highLight = corRes.mito$id)
grobHeatmap = grid.grabExpr(draw(protHeatmap))
plot_grid(grobHeatmap)

How many up-regulated RNAs are mitochodnrial protiens?

corRes.rna.mito <- filter(corRes.rna, t>0, adj.P.Val < 0.01, symbol %in% mitoProtList) 
nrow(corRes.rna.mito)
head(arrange(corRes.rna.mito, desc(t)))

seleProt <- c("VDAC1","HSPD1")
tTab <- filter(corRes.prot, symbol %in% seleProt)
scatterTab <- protMat[tTab$id,] %>% data.frame() %>%
  rownames_to_column("id") %>% gather(key="patID", value = "value",-id) %>%
  filter(!is.na(value)) %>%
  mutate(LF4 = facTab[match(patID, facTab$sample),]$value,
         symbol = rowData(protCLL)[id,]$hgnc_symbol)


plotMitoProScatter <- lapply(seleProt, function(n) {
  eachTab <- filter(scatterTab, symbol == n) 
  res <- cor.test(eachTab$value, eachTab$LF4)
  annoCoef <- paste("coefficient =",format(res$estimate,digits = 2))
  annoP <- paste("italic(P)~'=",formatNum(res$p.value, digits = 1, format = "e"),"'")
  ggplot(eachTab, aes(x=LF4, y=value)) +
    geom_point(fill = colList[6],size=3,shape=21) + geom_smooth(method="lm",se=FALSE, linetype = "dashed", color = "grey50") +
    annotate("text", x = -1.5, y = Inf, label = annoCoef,
           hjust=0, vjust =1, size = 5, col= colList[1]) +
    annotate("text", x = -1.5, y = Inf, label = annoP,
           hjust=0, vjust =3, size = 5, col= colList[1], parse= TRUE) +
    ggtitle(unique(eachTab$symbol)) + ylab("Protein expression") + xlab("CLL-PD") +
    #ylim(c(0.4,1.2)) +
    theme_half 
})
corMitoMarer <- grid.arrange(grobs= plotMitoProScatter, ncol=2)
corMitoMarer

FACS validation using mitoTracker

mitoTab <- mito %>% mutate(CLLPD = facTab[match(patientID, facTab$sample),]$value) %>%
  pivot_longer(-c(patientID, CLLPD), names_to = "feature", values_to = "MFI") %>%
  mutate(logMFI = log10(MFI)) %>%
  filter(feature == "MitoTracker")


eachTab <- mitoTab
res <- cor.test(eachTab$logMFI, eachTab$CLLPD)
annoCoef <- paste("coefficient =",format(res$estimate,digits = 2))
annoP <- paste("italic(P)~'=",formatNum(res$p.value, digits = 2, format = "e"),"'")
plotMitoScatter <- ggplot(eachTab, aes(x=CLLPD, y=logMFI)) +
  geom_point(fill = colList[3],size=5,shape=21) + geom_smooth(method="lm",se=FALSE, linetype = "dashed", color = "grey50") +
  annotate("text", x = -1.5, y = Inf, label = annoCoef,
         hjust=0, vjust =1, size = 5, col= colList[1]) +
  annotate("text", x = -1.5, y = Inf, label = annoP,
         hjust=0, vjust =3, size = 5, col= colList[1], parse= TRUE) +
  ggtitle(unique(eachTab$feature)) + ylab(expression(log[10]~(MFI))) + xlab("CLL-PD") +
  theme_half 


plotMitoScatter

Association between CLL-PD and energy metabolism

Correlation test using linear model

seaMat <- SummarizedExperiment::assay(seahorse)
factorMatrix <- facTab.all %>% spread(key = factor, value = value) %>%
  data.frame() %>% column_to_rownames("sample") 
overSample <- intersect(colnames(seaMat), rownames(factorMatrix))
factorMatrix <- factorMatrix[overSample,]
factorMatrix <- factorMatrix[,complete.cases(t(factorMatrix))]
designMat <- model.matrix(~1 +.,factorMatrix)
seaMat <- seaMat[,rownames(designMat)]

fit <- lmFit(seaMat, designMat)
fit2 <- eBayes(fit)

corRes.LF1 <- limma::topTable(fit2, number ="all", coef = "LF1", adjust.method = "BH") %>%
  rownames_to_column("id") %>% as_tibble() %>% mutate(factor = "LF1")
corRes.LF4 <- limma::topTable(fit2, number ="all", coef = "LF4", adjust.method = "BH") %>%
  rownames_to_column("id") %>% as_tibble() %>% mutate(factor = "LF4")


dim(seaMat)

Plot P values for associations, straitified by F1 and F4

plotTab <- bind_rows(corRes.LF1[,c("id","P.Value","factor")], corRes.LF4[,c("id","P.Value","factor")]) %>%
  mutate(P.adj = p.adjust(P.Value, method = "BH")) %>%
  mutate(id = str_replace_all(id, "[.]", " "))

orderTab <- plotTab %>% select(id, P.Value, factor) %>% spread(key = factor, value = P.Value) %>%
  arrange(LF1/LF4)

plotTab <- plotTab %>% mutate(id = factor(id, levels = orderTab$id))

fdrCut <- -log10((filter(plotTab,P.adj <= 0.05) %>% arrange(desc(P.Value)))$P.Value[1])

plotFeature <- unique(filter(plotTab, P.adj <=0.05)$id)
plotTab <- filter(plotTab, id %in% plotFeature)# %>%
 # mutate(factor = ifelse(factor == "LF1", "F1", "F4 (CLL-PD)"))
plotSeahorse <- ggplot(plotTab, aes(x=id, y=-log10(P.Value), col = factor, dodge = factor)) + 
  geom_point(position = position_dodge(width=0.5),size=3) + 
  geom_linerange(aes(ymin = 0, ymax=-log10(P.Value)),position = position_dodge2(width=0.5), size=1) + 
  scale_color_manual(values = c(LF1=colList[3],LF4=colList[4]),labels = 
                      c("F1","F4 (CLL-PD)"), name = "") +
  geom_hline(yintercept = fdrCut, linetype = "dashed", color = "grey50") +
  coord_flip() + xlab("") + ylab(bquote("-log"[10]*"("*italic("P")~"value)")) + 
  annotate("text", x = Inf, y = fdrCut, label = paste0("5% FDR"),
             size = 3, vjust =1.5, hjust=-0.1) +
  theme_full + theme(legend.position = "bottom", 
                     legend.text = element_text(size=13)) 
plotSeahorse

Scatter plot for showing assocations

plotFeature <- c("maximal.respiration", "spare.respiratory.capacity", "OCR")
scatterTab <- seaMat[plotFeature,] %>% data.frame() %>%
  rownames_to_column("feature") %>% gather(key="patID", value = "value",-feature) %>%
  filter(!is.na(value)) %>%
  mutate(LF4 = facTab[match(patID, facTab$sample),]$value)

pList <- lapply(plotFeature, function(n) {
  eachTab <- filter(scatterTab, feature == n) %>%
   mutate(feature = str_replace_all(feature, "[.]", " "))
  corRes = cor.test(eachTab$value, eachTab$LF4)
  annoCoef <- paste("'coefficient ='~",format(corRes$estimate,digits = 2))
  annoP <- paste("italic(P)~'=",formatNum(corRes$p.value, digits = 1, format = "e"),"'")
  ggplot(eachTab, aes(x=LF4, y=value)) +
    geom_point(fill = colList[6],size=2, shape =21) + geom_smooth(method="lm",se=FALSE, linetype = "dashed", color = "grey50") +
    annotate("text", x = Inf, y = Inf, label = annoCoef,
           hjust=1, vjust =1, size = 5, parse = TRUE, col= colList[1]) +
    annotate("text", x = Inf, y = Inf, label = annoP,
           hjust=1, vjust =3, size = 5, parse = TRUE, col= colList[1]) +
    ggtitle(unique(eachTab$feature)) + ylab("OCR (pMol/min)") + xlab("CLL-PD") +
    theme_half +
    theme(plot.margin = margin(5,20,5,20),
          plot.title = element_text(size = 14, hjust =0.5, face="bold"))
})
grid.arrange(grobs= pList, ncol=3)
plotSeahorseScatter <- pList

Associations between CLL-PD(F4) and ex-vivo drug response phenotyps

Correlation test using limma

Association test

viabSD <- group_by(drug, Drug, Concentration) %>%
  summarise(sdViab = sd(normVal), meanViab = mean(normVal)) %>%
  filter(sdViab > genefilter::shorth(sdViab), meanViab < 0.9)

viabMat <- filter(drug, paste0(Drug, Concentration) %in% paste0(viabSD$Drug, viabSD$Concentration)) %>%
  distinct(patientID, Drug, auc) %>%
  filter(patientID %in% facTab$sample) %>%
  spread(key = patientID, value = auc) %>%
  data.frame() %>% column_to_rownames("Drug")

factorMatrix <- facTab.all %>% spread(key = factor, value = value) %>%
  data.frame() %>% column_to_rownames("sample") 
factorMatrix <- factorMatrix[colnames(viabMat),complete.cases(t(factorMatrix))]

designMat <- model.matrix(~1 +.,factorMatrix)
fit <- lmFit(viabMat, designMat)
fit2 <- eBayes(fit)

corRes <- limma::topTable(fit2, number ="all", coef = "LF4", adjust.method = "BH") %>%
  rownames_to_column("name") %>% as_tibble() %>% mutate(factor = "LF4")
corRes.LF1 <- limma::topTable(fit2, number ="all", coef = "LF1", adjust.method = "BH") %>%
  rownames_to_column("name") %>% as_tibble() %>% mutate(factor = "LF1")
corRes.LF2 <- limma::topTable(fit2, number ="all", coef = "LF2", adjust.method = "BH") %>%
  rownames_to_column("name") %>% as_tibble() %>% mutate(factor = "LF2")

Compare the strength of associations to F1 (IGHV), F2 (trisomy12) and F4 (CLL-PD)

plotTab <- bind_rows(corRes, corRes.LF1, corRes.LF2) %>% 
  mutate(P.adj = p.adjust(P.Value, method = "BH")) %>% arrange(P.Value)

drugShow <- filter(plotTab, P.adj <= 0.05) %>%
  arrange(P.Value) %>% distinct(name)

plotTab <- filter(plotTab, name %in% drugShow$name) %>%
  mutate(name = factor(name, levels = drugShow$name))

fdrCut <- -log10((filter(plotTab,P.adj <= 0.05) %>% arrange(desc(P.Value)))$P.Value[1])

plotDrugPval <- ggplot(plotTab, aes(x=name, y=-log10(P.Value), col = factor)) + 
  geom_point(size=3) + 
  scale_color_manual(values = c(LF1=colList[3],LF2 = colList[5], LF4=colList[4]), labels = c("F1","F2","F4 (CLL-PD)")) +
  geom_hline(yintercept = fdrCut, linetype = "dashed", color = "grey50") +
  xlab("") + ylab(bquote("-log"[10]*"("*italic("P")~"value)")) + 
  annotate("text", x = Inf, y = fdrCut, label = paste0("5% FDR"),
             size = 3, vjust =2, hjust=-0.1) +
  theme_full +
  theme(panel.grid.major = element_line(),
        panel.grid.minor = element_line(),
        axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5))
plotDrugPval

Scatter plots significant assocations (5% FDR)

plotFeature <- as.character(filter(plotTab, factor == "LF4", P.adj < 0.05)$name)

scatterTab <- viabMat[plotFeature,] %>% data.frame() %>%
  rownames_to_column("feature") %>% gather(key="patID", value = "value",-feature) %>%
  filter(!is.na(value)) %>%
  mutate(LF4 = facTab[match(patID, facTab$sample),]$value)


plotDrugScatter <- lapply(plotFeature, function(n) {
  eachTab <- filter(scatterTab, feature == n) 
  res <- cor.test(eachTab$value, eachTab$LF4)
  annoCoef <- paste("coefficient =",format(res$estimate,digits = 2))
  annoP <- paste("italic(P)~'=",formatNum(res$p.value, digits = 1, format = "e"),"'")
  ggplot(eachTab, aes(x=LF4, y=value)) +
    geom_point(fill = colList[6],size=2,shape=21) + geom_smooth(method="lm",se=FALSE, linetype = "dashed", color = "grey50") +
    annotate("text", x = Inf, y = Inf, label = annoCoef,
           hjust=1, vjust =1, size = 5, col= colList[1]) +
    annotate("text", x = Inf, y = Inf, label = annoP,
           hjust=1, vjust =3, size = 5, col= colList[1], parse= TRUE) +
    ggtitle(unique(eachTab$feature)) + ylab("Viability") + xlab("CLL-PD") +
    #ylim(c(0.4,1.2)) +
    theme_half 
})
grid.arrange(grobs= plotDrugScatter, ncol=3)

A different arrangement

grid.arrange(grobs= plotDrugScatter, ncol=2)

Organizing figures for Section 4 in the manuscript

Main Figure 4

set.seed(2)
title = ggdraw() + draw_figure_label("Figure 4", fontface = "bold", position = "top.left",size=22)

leftGrid <- plot_grid(rnaEnrichHallmark + theme(plot.margin = margin(0,1,1,0,"cm")),
                      proteinEnrichHallmark + theme(plot.margin = margin(1,1,0,0,"cm")), 
                      ncol=1, 
                      rel_heights = c(1.2,1),labels = c("a","b"), label_size = 22,
                      vjust = c(1.5,1.5))

rightGrid <- plot_grid(plotSeahorse, comNet,
                       ncol=1,rel_heights = c(0.58, 0.6),
                       labels = c("c","d"), label_size = 22)

pout <- plot_grid(leftGrid, rightGrid,rel_widths = c(1,1), ncol =2)
plot_grid(title, pout, rel_heights = c(0.05,0.95), ncol = 1)

Supplementary figures

Enrichment for M-CLL and U-CLL separately

plot_grid(plotEnrichHallmark.U + theme(legend.position = "none"), NULL, plotEnrichHallmark.M, rel_widths = c(0.45,0.08, 0.5), ncol = 3, align = "v")

Asseble supplementary figures to show enrichment barplot

plot_grid(plotEnrich.CPG, plotEnrich.IL21CD40L, plotEnrich.Tcell, plotEnrich.IgM,
          ncol=2, align = "hv",axis = "tblr")

Supplementary figure of Pvalue and variance explained plot

plot_grid(plotDrugPval,
          plot_grid(plotlist = plotDrugScatter, ncol = 3),
          align = "v", nrow =2, labels = c(" "," "), 
          rel_heights = c(0.4,0.6),label_size = 22,ncol=1)

Supplementary Figure for mitochondrial biogenesis

plot_grid(grobHeatmap,
          plot_grid(plotMitoScatter, corMitoMarer, nrow=1, rel_widths = c(0.5,1), labels = c(" "," "), label_size = 22),
          ncol=1, rel_heights = c(3,1), labels = c(" ",""), label_size = 22)

lujunyan1118/mofaCLL documentation built on Dec. 21, 2021, 12:42 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

lujunyan1118/mofaCLL Multi-omics reveals clinically relevant proliferative drive associated with mTOR-MYC-OXPHOS activity in chronic lymphocytic leukemia

In lujunyan1118/mofaCLL: Multi-omics reveals clinically relevant proliferative drive associated with mTOR-MYC-OXPHOS activity in chronic lymphocytic leukemia

Gene expression signatures for CLL-PD (F4)

Pre-processing data

Identify genes associated with CLL-PD

Gene enrichment analysis

Enrichment using Hallmark genesets for all CLLs

Enrichment using Hallmark genesets for U-CLLs

Enrichment using Hallmark genesets for M-CLLs

Heatmap for enriched genesets

A network plot showing the overlap of gene enriched in those three pathways and annotate mitochondrial genes

Compare CLL-PD expression signature with other stimulation signatures

Expression profile of U-CLL cells treated with CPG (GSE30105)

Query and pre-processing microarray data

Differential expression and enrichment analysis

Signature of IL21 + CD40L

Pre-processing microarray data

Differential expression and enrichment analysis

T cell co-cultrue signatures

Differential expression and enrichment analysis

BCR-triggering signature by anti-IgM stimulation for 390 min (GSE3941)

Enrichment analysis using all above stimulation signatures

Proteomic signatures for CLL-PD

Detect proteins correlated with CLL-PD (LF4)

Process datasets

Correlate protein expression with CLL-PD using proDA

P-value histogram

List of significantly correlated proteins (10% FDR)

Enrichment using Hallmark genesets for all CLLs

Heatmap of enrichmed MYC targets

Scatter plot of selected proteins

MYC tagets

Differentially expressed proteins on mitochondria

Mitochondrial proteins

FACS validation using mitoTracker

Association between CLL-PD and energy metabolism

Correlation test using linear model

Plot P values for associations, straitified by F1 and F4

Scatter plot for showing assocations

Associations between CLL-PD(F4) and ex-vivo drug response phenotyps

Correlation test using limma

Compare the strength of associations to F1 (IGHV), F2 (trisomy12) and F4 (CLL-PD)

Scatter plots significant assocations (5% FDR)

Organizing figures for Section 4 in the manuscript

Main Figure 4

Supplementary figures

Enrichment for M-CLL and U-CLL separately

Asseble supplementary figures to show enrichment barplot

Supplementary figure of Pvalue and variance explained plot

Supplementary Figure for mitochondrial biogenesis

R Package Documentation

Browse R Packages

We want your feedback!

lujunyan1118/mofaCLL
Multi-omics reveals clinically relevant proliferative drive associated with mTOR-MYC-OXPHOS activity in chronic lymphocytic leukemia