In lujunyan1118/mofaCLL: Multi-omics reveals clinically relevant proliferative drive associated with mTOR-MYC-OXPHOS activity in chronic lymphocytic leukemia

Overview of MOFA output

plotDir = ifelse(exists(".standalone"), "", "part2/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
knitr::opts_chunk$set(fig.path=plotDir, dev=c("png", "pdf"),message = FALSE, warning = FALSE)
library(mofaCLL)
library(survival)
library(survminer)
library(DESeq2)
library(maxstat)
library(gridExtra)
library(car)
library(MOFA)
library(cowplot)
library(egg)
library(tidyverse)

Load datasets

data("mofaOut", "survival", "gene", "demographic", "doublingTime", "reactomeGS", "rna")

Variance explained by MOFA for each omic data

Plot variance explained

# Calculate the variance explained (R2) per factor in each view 
r2 <- calculateVarianceExplained(MOFAobject)
r2$R2Total


# Plot it
varExpPlot <- plotVarianceExplained.m(MOFAobject, censor = 0.25) 
varExpPlot

The first two latent factors represents IGHV and trisomy12

Get weights and factors

allWeights <- getWeights(MOFAobject,
                         views = "all",
                         factors = "all",
                         as.data.frame = TRUE) %>% as_tibble() %>%
  mutate(feature = ifelse(feature == "IGHV.status","IGHV",feature),
         factor = gsub("LF","F",factor))

allFactors <- getFactors(
  MOFAobject, 
  factors = "all",
  as.data.frame = TRUE
) %>% as.tibble() %>%
  mutate(factor = gsub("LF","F", factor))

patAnno <- gene[,c("IGHV", "trisomy12")] %>% data.frame() %>%
  rownames_to_column("sample") %>%
  filter(!is.na(IGHV),!is.na(trisomy12)) %>%
  mutate(IGHV = ifelse(IGHV==1, "M","U"),
         trisomy12 = ifelse(trisomy12 ==1, "yes","no"))

allFactors <- left_join(allFactors, 
                        patAnno,
                        by = "sample")

Plot the separation of the samples by the first two factors

plotTab <- filter(allFactors, factor %in% c("F1","F2")) %>%
  spread(key =factor, value = value) %>% mutate(trisomy12 = factor(trisomy12)) %>%
  filter(!is.na(IGHV), !is.na(trisomy12))

pcaLF1 <- ggplot(plotTab, aes(x=F1, y=F2, color = trisomy12, 
                         shape = IGHV, label = sample)) + 
  geom_point(size=3) +
  scale_shape_manual(values = c(M = 16, U =1)) +
  scale_color_manual(values = c(no = colList[1], yes = colList[2])) +
  theme_full
pcaLF1

Loadings of genomic variations on the first two factors

loadLF1 <- plotTopWeights.m(allWeights, "Mutations", "F1", nfeatures = 5) + 
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=15))
loadLF2 <- plotTopWeights.m(allWeights, "Mutations", "F2", nfeatures = 5) +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=15))

Factor 1

loadLF1

Factor 2

loadLF2

Associations between Factor 1 and epigenetic subtypes

plotTab <- filter(allFactors, factor %in% c("F1","F2")) %>%
  mutate(Epitype = methCluster[sample]) %>%
  spread(key = factor ,value = value)

violinLF1_Meth <- ggplot(filter(plotTab, !is.na(Epitype)), aes(x=Epitype, y=F1, fill = Epitype)) +
  geom_violin() + geom_point() + scale_fill_manual(values = colList[4:length(colList)]) +
  theme_full + theme(legend.position = "none", axis.title.y = element_text(vjust=-2)) +
  xlab("") + ggtitle("Epigenetic subtypes")
violinLF1_Meth

Associations of latent factors to clinical behaviors

Univariate Cox regression

testTab <- left_join(allFactors, survival, by = c(sample = "patientID"))

#for OS
resOS <- filter(testTab, !is.na(OS)) %>%
  group_by(factor) %>%
  do(comSurv(.$value, .$OS, .$died, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "bonferroni")) %>%
  mutate(Endpoint = "OS")


#for TTT
resTTT <- filter(testTab, !is.na(TTT)) %>%
  group_by(factor) %>%
  do(comSurv(.$value, .$TTT, .$treatedAfter, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "bonferroni")) %>%
  mutate(Endpoint = "TTT")

Overall survival (OS)

resOS

Time to treatment (TTT)

resTTT

Plot p values and hazard ratios

plotTab <- bind_rows(resOS, resTTT) %>%
 filter(factor %in% c("F1","F2","F4"))

haPlot <- ggplot(plotTab, aes(x=factor, y = HR, col = Endpoint, dodge = Endpoint)) + 
  geom_hline(yintercept = 1, linetype = "dotted") +
  geom_point(position = position_dodge(width=0.8)) +
  geom_errorbar(position = position_dodge(width =0.8), 
                aes(ymin = lower, ymax = higher), width = 0.3, size=1) + 
  geom_text(position = position_dodge2(width = 0.8),
            aes(x=as.numeric(as.factor(factor))+0.15,
                label = sprintf("italic(P)~'='~'%s'",
                                formatNum(p))),
            color = "black",size =5, parse = TRUE) +
  xlab("Factor") + ylab("Hazard ratio") +
  scale_y_log10(limits = c(0.5,4)) +
  coord_flip() + theme_full + theme(legend.title = element_blank(),
                                    legend.position = c(0.2,0.1),
                                    legend.background = element_blank(),
                                    legend.key.size = unit(0.5,"cm"),
                                    legend.key.width = unit(0.6,"cm"),
                                    legend.text = element_text(size=rel(1.2))) +
  scale_color_manual(values = c(OS = colList[3], TTT = colList[5])) 

haPlot

Kaplan-Meiler plots

KM plot for overall survival (OS)

facList <- sort(filter(resOS, p.adj <=0.05)$factor)
osList <- lapply(facList, function(x) {
  eachTab <- filter(testTab, factor == x) %>%
    select(value, OS, died) %>% filter(!is.na(OS))
  pval <- filter(resOS, factor == x)$p
  km(eachTab$value, eachTab$OS, eachTab$died, sprintf("%s VS Overall survival time", x),
     stat = "maxstat", pval = pval, showTable = TRUE)
})

grid.arrange(grobs = osList, ncol = 2)

KM plot for time to treatment (TTT)

facList <- sort(filter(resTTT, p.adj <=0.01)$factor)
tttList <- lapply(facList, function(x) {
  eachTab <- filter(testTab, factor == x) %>%
    select(value, TTT, treatedAfter) %>% filter(!is.na(TTT))
    pval <- filter(resTTT, factor == x)$p
  km(eachTab$value, eachTab$TTT, eachTab$treatedAfter, sprintf("%s VS Time to treatment", x), stat = "maxstat",
     maxTime = 7, pval = pval, showTable = TRUE)
})

grid.arrange(grobs = tttList, ncol = 2)

KM plot for subgroup defined by IGHV status and median latent factor values

groupTab <- filter(testTab, factor == "F4", !is.na(value), !is.na(IGHV)) %>%
  mutate(subgroup = ifelse(value > median(value), paste0(IGHV,"_highF4"), paste0(IGHV,"_lowF4"))) 

plotList <- list()
# TTT
plotList[["TTT"]] <- km(groupTab$subgroup, groupTab$TTT, groupTab$treatedAfter, "Time to treatment", stat = "binary", maxTime = 7, showP = TRUE, showTable = TRUE, yLabelAdjust = -10)

# OS
plotList[["OS"]] <- km(groupTab$subgroup, groupTab$OS, groupTab$died, "Overall survival", stat = "binary", maxTime = 7, showP = TRUE, showTable = TRUE, yLabelAdjust = -10)

grid.arrange(grobs = plotList, ncol = 2)

KM plot for subgroup defined by median latent factor values of F1 and F4 (Supplementary Figure)

groupTab <- filter(testTab, factor %in% c("F1","F4"), !is.na(value)) %>%
  spread(key = factor, value = value) %>%
  mutate(LF1group = ifelse(F1 > median(F1), "highF1", "lowF1"),
         LF4group = ifelse(F4 > median(F4), "highF4","lowF4")) %>%
  mutate(subgroup = paste0(LF1group, "_",LF4group)) 

plotList1 <- list()
# TTT
plotList1[["TTT"]] <- km(groupTab$subgroup, groupTab$TTT, groupTab$treatedAfter, "Time to treatment", stat = "binary", maxTime = 7, showP = TRUE, showTable = TRUE, yLabelAdjust = -15)

# OS
plotList1[["OS"]] <- km(groupTab$subgroup, groupTab$OS, groupTab$died, "Overall survival", stat = "binary", maxTime = 7, showP = TRUE, showTable = TRUE, yLabelAdjust = -15)

grid.arrange(grobs = plotList1, ncol = 2)

Multi-variate Cox regression for Factor 4 (F4)

Prepare data for multi-variate Cox regression

survTab <- survival
facTab <- filter(allFactors, factor == "F4")
riskTab <- gene[,c("IGHV","TP53","NOTCH1","del17p","SF3B1")] %>% 
  data.frame() %>% rownames_to_column("patientID") %>%
  mutate(`TP53.del17p` = as.numeric(TP53 | del17p)) %>%
  select(-TP53, -del17p) %>%
  mutate_if(is.numeric, as.factor) %>%
  left_join(select(demographic, patientID, age, sex), by = "patientID") %>%
  mutate(age = age/10) %>%
  mutate(F4 = facTab[match(patientID, facTab$sample),]$value,
         IGHV = factor(IGHV, levels = c(0,1))) %>%
  dplyr::rename(IGHV_mutated = IGHV, Sex_male = sex, Age = age) %>%
  filter(!is.na(F4))

Time to treatment

resTTT <- runCox(survTab, riskTab, "TTT", "treatedAfter")

#summary(surv1)
haTTT <- plotHazard(resTTT, title = "Time to treatment") +
   scale_y_log10(limits=c(0.2,5))
haTTT

Overall survival

resOS <- runCox(survTab, riskTab, "OS", "died")

#summary(surv1)
haOS <- plotHazard(resOS,"Overall survival") + scale_y_log10(limits=c(0.05,5))
haOS

Correlation between F4 and demographics

Age

plotTab <- left_join(demographic, facTab, by = c(patientID = "sample")) %>%
  filter(!is.na(value)) %>%
  mutate(pretreat = ifelse(is.na(pretreat),NA,
                           ifelse(pretreat ==1 , "yes","no")),
         sex = ifelse(is.na(sex),NA,
                           ifelse(sex =="f" , "Female","Male")))

corRes <- cor.test(plotTab$age, plotTab$value)
pval <- formatNum(corRes$p.value, digits = 1)
annoN <- sprintf("n = %s", nrow(filter(plotTab,!is.na(age))))
annoP <- bquote(italic("P")~"="~.(pval))
annoCoef <- sprintf("coefficient = %1.2f",corRes$estimate)

plotAge <- ggplot(plotTab, aes(x = age, y = value)) + 
  geom_point(fill =colList[3], shape =21, size=3) + 
  geom_smooth(method = "lm", se=FALSE, color = "grey50", linetype ="dashed" ) + 
  annotate("text", x = max(plotTab$age), y = Inf, label = annoN,
           hjust=1, vjust =1.5, size = 5, parse = FALSE, col= colList[1]) +
  annotate("text", x = max(plotTab$age), y = Inf, label = annoP,
           hjust=1, vjust =3.5, size = 5, parse = FALSE, col= colList[1]) +
  annotate("text", x = max(plotTab$age), y = Inf, label = annoCoef,
           hjust=1, vjust =5.5, size = 5, parse = FALSE, col= colList[1]) +
  ylab("F4") + xlab("Age (years)") +
  theme_full
plotAge

Sex

corRes <- t.test(value ~ sex, plotTab)
pval <- formatNum(corRes$p.value, digits = 1)
annoP <- bquote(italic("P")~"="~.(pval))

plotTab <- group_by(plotTab, sex) %>% mutate(n=n()) %>% ungroup() %>%
  mutate(sex = sprintf("%s\n(n=%s)",sex,n))

plotSex <- ggplot(plotTab, aes(x = sex, y = value)) + 
  geom_violin(aes(fill = sex)) +
  geom_point() + 
  annotate("text", x = Inf, y = Inf, label = annoP,
           hjust=1.2, vjust =2, size = 5, parse = FALSE, col= colList[1]) +
  scale_fill_manual(values = colList) +
  ylab("F4") + xlab("Sex") +
  theme_full + theme(legend.position = "none")
plotSex

Pretreatment

corRes <- t.test(value ~ pretreat, plotTab)
annoP <- paste("italic(P)~'='~",formatNum(corRes$p.value, digits = 1, format = "e"))

plotTab <- filter(plotTab, !is.na(pretreat)) %>%
  group_by(pretreat) %>% mutate(n=n()) %>% ungroup() %>%
  mutate(pretreat = sprintf("%s\n(n=%s)",pretreat,n))

plotTreat <- ggplot(filter(plotTab,!is.na(pretreat)), aes(x = pretreat, y = value)) + 
  geom_violin(aes(fill = pretreat)) +
  geom_point() + 
  annotate("text", x = -Inf, y = Inf, label = annoP,
           hjust=-0.2, vjust =2, size = 5, parse = TRUE, col= colList[1]) +
  scale_fill_manual(values = colList[3:length(colList)]) +
  ylab("F4") + xlab("Pretreatment") +
  theme_full + theme(legend.position = "none")
plotTreat

Association between F4 and outcomes in treatment-naive patients

Univariate Cox regression

survival.untreated <- survival %>% mutate(pretreat = demographic[match(patientID, demographic$patientID),]$pretreat) %>%
  filter(pretreat ==0)
testTab <- left_join(allFactors, survival.untreated, by = c(sample = "patientID"))

#for OS
resOS <- filter(testTab, !is.na(OS), factor == "F4") %>%
  group_by(factor) %>%
  do(comSurv(.$value, .$OS, .$died, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
  mutate(Endpoint = "OS")

resOS

#for TTT
resTTT <- filter(testTab, !is.na(TTT), factor == "F4") %>%
  group_by(factor) %>%
  do(comSurv(.$value, .$TTT, .$treatedAfter, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
  mutate(Endpoint = "TTT")

resTTT

Kaplan-Meiler plots

KM plot for overall survival (OS)

eachTab <- filter(testTab, factor == "F4") %>%
  select(value, OS, died) %>% filter(!is.na(OS))
pval <- filter(resOS, factor == "F4")$p
kmOS.untreat <- km(eachTab$value, eachTab$OS, eachTab$died, "F4 VS Overall survival time",
     stat = "maxstat", pval = pval, showTable = TRUE)

kmOS.untreat

KM plot for time to treatment (TTT)

eachTab <- filter(testTab, factor == "F4") %>%
  select(value, TTT, treatedAfter) %>% filter(!is.na(TTT))
pval <- filter(resTTT, factor == "F4")$p
kmTTT.untreat <- km(eachTab$value, eachTab$TTT, eachTab$treatedAfter, "F4 VS Time to treatment",
     stat = "maxstat", pval = pval, showTable = TRUE)

kmTTT.untreat

Multi-variate Cox regression

Prepare data for multi-variate Cox regression

survTab <- survival.untreated
facTab <- filter(allFactors, factor == "F4")
riskTab <- gene[,c("IGHV","TP53","del17p","SF3B1")] %>% 
  data.frame() %>% rownames_to_column("patientID") %>%
  mutate(`TP53.del17p` = as.numeric(TP53 | del17p)) %>%
  select(-TP53, -del17p) %>%
  mutate_if(is.numeric, as.factor) %>%
  left_join(select(demographic, patientID, age, sex), by = "patientID") %>%
  mutate(age = age/10) %>%
  mutate(F4 = facTab[match(patientID, facTab$sample),]$value,
         IGHV = factor(IGHV, levels = c(0,1))) %>%
  dplyr::rename(IGHV_mutated = IGHV, Sex_male = sex, Age = age) %>%
  filter(!is.na(F4))

Time to treatment

resTTT <- runCox(survTab, riskTab, "TTT", "treatedAfter")

#summary(surv1)
haTTT.untreat <- plotHazard(resTTT, title = "Time to treatment")
haTTT.untreat

Overall survival

resOS <- runCox(survTab, riskTab, "OS", "died")

#summary(surv1)
haOS.untreat <- plotHazard(resOS,"Overall survival")
haOS.untreat

Correlation between F4 and Lymphocyte doubling time

Univariate test

Pearson's correlation test

LDT <- doublingTime %>% mutate(F4 = facTab[match(patID, facTab$sample),]$value) %>%
  filter(!is.na(F4)) %>%
  mutate(IGHV = as.factor(facTab[match(patID, facTab$sample),]$IGHV))
corRes <- cor.test(log10(LDT$doubling.time), LDT$F4)

Scatter plot of correlations

pval <- formatNum(corRes$p.value, digits = 1, format = "e")
annoN <- sprintf("n = %s", nrow(LDT))
annoP <- bquote(italic("P")~"="~.(pval))
annoCoef <- sprintf("coefficient = %1.2f",corRes$estimate)

corPlot <- ggplot(LDT, aes(x = F4, y = doubling.time/30)) + 
  geom_point(fill =colList[5], shape =21, size=3) + 
  geom_smooth(method = "lm", se=FALSE, color = "grey50", linetype ="dashed" ) +
  annotate("text", x = max(LDT$F4), y = Inf, label = annoN,
           hjust=1, vjust =1.5, size = 5, parse = FALSE, col= colList[1]) +
  annotate("text", x = max(LDT$F4), y = Inf, label = annoP,
           hjust=1, vjust =3.5, size = 5, parse = FALSE, col= colList[1]) +
  annotate("text", x = max(LDT$F4), y = Inf, label = annoCoef,
           hjust=1, vjust =5.5, size = 5, parse = FALSE, col= colList[1]) +
  ylab(bquote("doubling time (months)")) + ggtitle("Lymphocyte doubling time") +
  scale_y_log10() +
  theme_full


corPlot

Consider IGHV status

IGHV as a covariate

ANOVA test

LDT <- doublingTime %>% mutate(F4 = facTab[match(patID, facTab$sample),]$value,
                               IGHV = as.factor(facTab[match(patID, facTab$sample),]$IGHV)) %>%
  filter(!is.na(F4),!is.na(IGHV))

corRes <- car::Anova(lm(log10(doubling.time) ~ F4 + IGHV, data = LDT))
corRes

Only M-CLL

LDT.M <- filter(LDT, IGHV == "M") 
corRes.M <- cor.test(log2(LDT.M$doubling.time), LDT.M$F4)
corRes.M

Only U-CLL

LDT.U <- filter(LDT, IGHV == "U") 
corRes.U <- cor.test(log2(LDT.U$doubling.time), LDT.U$F4)
corRes.U

Scatter plot of correlations, stratified by IGHV

annoM <- sprintf("'M-CLL: n = %s, coefficient = %1.2f,'~italic(P)~'= %s'",nrow(LDT.M), corRes.M$estimate, formatNum(corRes.M$p.value, digits = 1, format = "e"))
annoU <- sprintf("'U-CLL: n = %s, coefficient = %1.2f,'~italic(P)~'= %s'", nrow(LDT.U), corRes.U$estimate,formatNum(corRes.U$p.value, digits = 1, format = "e"))

corPlot.IGHV <- ggplot(LDT, aes(x = F4, y = doubling.time/30, fill = IGHV, col = IGHV)) + 
  geom_point(shape=21, size=3, col = "black") + 
  geom_smooth(method = "lm", se=FALSE, linetype ="dashed" ) + 
  annotate("text", x = Inf, y = Inf, label = annoM,
           hjust=1.05, vjust =1.2, size = 4, parse = TRUE, color = colList[1]) +
  annotate("text", x = Inf, y = Inf, label = annoU,
           hjust=1.05, vjust =2.5, size = 4, parse = TRUE, color = colList[2]) +
  ylab("doubling time (months)") + ggtitle("Lymphocyte doubling time") +
  scale_fill_manual(values = c(M = colList[1],U=colList[2])) +
  scale_color_manual(values = c(M = colList[1],U=colList[2])) +
  scale_y_log10() +
  theme_full + theme(legend.position = "none")

corPlot.IGHV

Correlations in untreated patients only

LDT.untreat <- doublingTime %>% mutate(F4 = facTab[match(patID, facTab$sample),]$value, 
                               pretreat = demographic[match(patID, demographic$patientID),]$pretreat) %>%
  filter(!is.na(F4),!is.na(pretreat)) %>% filter(pretreat == 0)

corRes <- cor.test(LDT.untreat$doubling.time, LDT.untreat$F4)

annoText <- sprintf("'coefficient = %1.2f, '~italic(P)~'= %s'",corRes$estimate,formatNum(corRes$p.value, digits = 1, format = "e"))
corPlot.untreat <- ggplot(LDT.untreat, aes(x = F4, y = doubling.time/30)) + 
  geom_point(fill =colList[5], shape=21, size=3) + 
  geom_smooth(method = "lm", se=FALSE, color = "grey50", linetype ="dashed" ) + 
  annotate("text", x = 2.2, y = Inf, label = annoText,
           hjust=1, vjust =2, size = 4, parse = TRUE, col= colList[1]) +
  ylab("doubling time (months)") + ggtitle(sprintf("Lymphocyte doubling time\n(untreated patients only, n=%s)",nrow(LDT.untreat))) +
  scale_y_log10() +
  theme_full

corPlot.untreat

Variance expalined for lymphocyte doubling time

onlyIGHV <- summary(lm(log2(doubling.time) ~ IGHV, data = LDT))
onlyF4 <- summary(lm(log2(doubling.time) ~ F4, data = LDT))
combined <- summary(lm(log2(doubling.time) ~ F4 + IGHV, data = LDT))
plotTab <- tibble(model = c("IGHV only","F4 only", "IGHV + F4"),
                  R2 = c(onlyIGHV$adj.r.squared, onlyF4$r.squared, combined$adj.r.squared)) %>%
  mutate(model = factor(model, levels = model))
explainedDT <- ggplot(plotTab, aes(x=model, y = R2)) + geom_bar(stat = "identity", aes(fill = model), width = 0.8) +
  coord_flip(expand = FALSE, xlim = c(0.5,3.5)) + theme_half + scale_fill_manual(values = colList) +
  geom_text(aes(x = model, y =0.01, label = model), hjust =0, fontface = "bold", size =5) +
  theme(axis.ticks.y = element_blank(), axis.text.y = element_blank(), legend.position = "none",
        axis.title.y = element_text( size =13)) +
  xlab("Predictors") + ylab("Variance explained") 
explainedDT

Annotation for other four factor that only explain RNA expression variations

Factor 3

Assocations with RNAseq batch

batchTab <- filter(allFactors , factor == "F3") %>%
  mutate(batch = rna[,match(sample,colnames(rna))]$batch) %>% 
  filter(!is.na(batch)) %>%
  mutate(batch = paste0("batch", batch+1))

pval <- car::Anova(lm(value ~ factor(batch), batchTab))$`Pr(>F)`[1]
pval <- formatNum(pval, digits = 2)
pAnno <- bquote(italic("P")~"="~.(pval))
colListNew <- colList[-4]
pL3 <- ggplot(batchTab, aes(x=batch, y = value, col = batch)) +
  geom_boxplot() + 
  ggbeeswarm::geom_beeswarm() + 
  scale_color_manual(values = colListNew) +
  annotate("text", x=Inf, y=Inf, label=pAnno, hjust=1.5, vjust=1.5)+
  theme_full +
  theme(legend.position = "none") +
  ylab("F3 value") + xlab("") + ggtitle("F3 ~ RNAseq batch")
pL3

Factor 5

Assocations with CD4, CD8 expression

rna <- estimateSizeFactors(rna)
exprTab <- counts(rna[rowData(rna)$symbol %in% c("CD4","CD8A"),],normalized = TRUE) %>%
  t() %>% as_tibble(rownames = "sample") %>% 
  pivot_longer(-sample, names_to="id", values_to = "count") %>%
  mutate(symbol = rowData(rna)[id,]$symbol)


facTab <- filter(allFactors , factor == "F5")
plotTab <- left_join(exprTab, facTab, by = "sample")


pAnno <- bquote(italic("P")~"<"~.(10e-13))
pL5 <- ggplot(plotTab, aes(y=log2(count), x= value, col = symbol)) +
  geom_point() + geom_smooth(method = "lm", se = FALSE) +
  theme_half +
  scale_color_manual(values = colList) +
  annotate("text", x=-Inf, y=Inf, label=pAnno, hjust=-0.5, vjust=5, size=5)+
  xlab("F5 value") + ylab(bquote("log"[2]*"(RNAseq count)")) +
  ggtitle("T cell marker expressions ~ F5") +
  theme(legend.position = c(0.8,0.15),
        legend.text = element_text(size=15),
        legend.title = element_text(size=15)) +
  ylim(0,13)

Factor 6

fsea.results <- MOFA::runEnrichmentAnalysis(MOFAobject,view = "mRNA", factor =6 , feature.sets = reactomeGS)
enL6 <- MOFA::plotEnrichment(MOFAobject, fsea.results, factor = 6, max.pathways = 5) +
  ylab(bquote("-log"[10]*"(adjusted "*italic("P")~"value)")) +
  ggtitle("Pathways enriched for F6") + theme_half

#source table
outTab <- tibble(pathway = rownames(fsea.results$pval),
                 PValue = fsea.results$pval[,1],
                 adj.PValue = fsea.results$pval.adj[,1]) %>%
  arrange(PValue)

exprTab <- counts(rna[rowData(rna)$symbol %in% c("SOD1","GPX4"),],normalized = TRUE) %>%
  t() %>% as_tibble(rownames = "sample") %>% 
  pivot_longer(-sample, names_to="id", values_to = "count") %>%
  mutate(symbol = rowData(rna)[id,]$symbol)


facTab <- filter(allFactors , factor == "F6")
plotTab <- left_join(exprTab, facTab, by = "sample")

p1 <- cor.test(~ value + log2(count), filter(plotTab, symbol == "SOD1"))
p2 <- cor.test(~ value + log2(count), filter(plotTab, symbol == "GPX4"))

pAnno <- bquote(italic("P")~"<"~.(10e-13))
corL6 <- ggplot(plotTab, aes(y=log2(count), x= value, col = symbol)) +
  geom_point() + geom_smooth(method = "lm", se = FALSE) +
  theme_half +
  scale_color_manual(values = colList) +
  annotate("text", x=-Inf, y=Inf, label=pAnno, hjust=-0.5, vjust=5, size=5)+
  xlab("F6 value") + ylab(bquote("log"[2]*"(RNAseq count)")) +
  ggtitle("SOD1 and GPX4 ~ F6") +
  theme(legend.position = c(0.8,0.15),
        legend.text = element_text(size=15),
        legend.title  = element_text(size=15))




#source data

Factor 7

fsea.results <- MOFA::runEnrichmentAnalysis(MOFAobject,view = "mRNA", factor =7 , feature.sets = reactomeGS)
enL7 <- MOFA::plotEnrichment(MOFAobject, fsea.results, factor = 7, max.pathways = 5) +
  ylab(bquote("-log"[10]*"(adjusted "*italic("P")~"value)")) +
  ggtitle("Pathways enriched for F7") + theme_half

Arrange plots for Section 1 in the manuscript

Figure 1 in main text

plotTitle <- ggdraw() +draw_figure_label("Figure 1", fontface = "bold", position = "top.left",size=20)

topGrid <- plot_grid(varExpPlot, haPlot, haTTT, haOS, nrow =1, 
                     rel_widths = c(1,0.9,1,1), labels = c("a","b","e","f"), label_size = 20,
                     align = "h", axis = "l")
bottomGrid <- plot_grid(plotList[["TTT"]],plotList[["OS"]],NULL,
                        plot_grid(corPlot,explainedDT, rel_heights = c(0.70,0.30), ncol =1, labels = c("g","h"),label_size = 20, align = "v", axis = "l"),
                        nrow =1, rel_widths = c(1,1,0.02,0.85), labels = c("c","d"), label_size = 20)

plot_grid(plotTitle, topGrid, NULL, bottomGrid, rel_heights = c(0.02,0.45,0.02,0.5), ncol = 1)

Forest plot showing values of variance explained for each view

R2list <- calculateVarianceExplained(MOFAobject)
plotTab <- R2list$R2PerFactor %>%
  as_tibble(rownames = "factor") %>%
  pivot_longer(-factor, names_to = "view", values_to = "R2") %>%
  mutate(factor = str_replace(factor,"LF","F"))


p <- ggplot(plotTab, aes_string(x = "view", y = "R2")) +
  geom_point(size = 2) + 
  geom_segment(aes_string(xend = "view"), size = 0.75,yend = 0) + 
  expand_limits(y=0) +
  coord_flip() +
  facet_wrap(~factor, scale = "free_x") +
  xlab("") + ylab(bquote('Variance explained ('~R^2~')')) + 
  theme_full +
  theme(strip.text = element_text(size =15))
p

Annotation of LF1 for Supplementary figure

plot_grid(plot_grid(loadLF1, loadLF2, violinLF1_Meth, 
                    nrow =3, align = "hv", axis = "l", labels = c(" "," "," "), label_size = 20),
          NULL,
          plot_grid(pcaLF1,NULL, nrow=2, rel_heights  = c(0.8,0.2)),
          nrow = 1, rel_widths = c(0.4,.02, 0.6), labels = c("",""," "), label_size = 20)

KM plots for Supplementary Figure

grid.arrange(grobs = c(tttList, osList), ncol =2)

Lymphocyte doubling time for supplementary figure

plot_grid(corPlot.untreat, NULL,corPlot.IGHV, nrow =1, rel_widths = c(1,0.1,1))

Associations with demographics

plot_grid(plotAge, NULL,plotSex, NULL, plotTreat,ncol=1, rel_heights  = c(1,0.1,1,0.1,1))

Associations with outcomes in treatment-naive patients

plot_grid(plot_grid(kmTTT.untreat, haTTT.untreat, nrow =2, align = "h", axis = "l", rel_heights = c(0.55,0.45)), NULL,
          plot_grid(kmOS.untreat, haOS.untreat, nrow =2, align = "h", axis = "l", rel_heights = c(0.55,0.45)), nrow=1, rel_widths = c(1,0.1,1) )

Annotatons of other factors

p <- plot_grid(plot_grid(pL3, pL5, corL6, nrow=1, rel_widths = c(1,0.8,0.8), labels = c(" "," "," "), label_size = 20),
          plot_grid(enL6, enL7, nrow=1, labels = c(" "," "), label_size = 20, rel_widths = c(0.5,0.6)),nrow=2)
p

lujunyan1118/mofaCLL documentation built on Dec. 21, 2021, 12:42 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

lujunyan1118/mofaCLL Multi-omics reveals clinically relevant proliferative drive associated with mTOR-MYC-OXPHOS activity in chronic lymphocytic leukemia

In lujunyan1118/mofaCLL: Multi-omics reveals clinically relevant proliferative drive associated with mTOR-MYC-OXPHOS activity in chronic lymphocytic leukemia

Overview of MOFA output

Variance explained by MOFA for each omic data

The first two latent factors represents IGHV and trisomy12

Plot the separation of the samples by the first two factors

Loadings of genomic variations on the first two factors

Associations between Factor 1 and epigenetic subtypes

Associations of latent factors to clinical behaviors

Univariate Cox regression

Kaplan-Meiler plots

KM plot for overall survival (OS)

KM plot for time to treatment (TTT)

KM plot for subgroup defined by IGHV status and median latent factor values

KM plot for subgroup defined by median latent factor values of F1 and F4 (Supplementary Figure)

Multi-variate Cox regression for Factor 4 (F4)

Correlation between F4 and demographics

Association between F4 and outcomes in treatment-naive patients

Univariate Cox regression

Kaplan-Meiler plots

Multi-variate Cox regression

Correlation between F4 and Lymphocyte doubling time

Univariate test

Consider IGHV status

IGHV as a covariate

Only M-CLL

Only U-CLL

Variance expalined for lymphocyte doubling time

Annotation for other four factor that only explain RNA expression variations

Factor 3

Factor 5

Factor 6

Factor 7

Arrange plots for Section 1 in the manuscript

Figure 1 in main text

Forest plot showing values of variance explained for each view

Annotation of LF1 for Supplementary figure

KM plots for Supplementary Figure

Lymphocyte doubling time for supplementary figure

Associations with demographics

Associations with outcomes in treatment-naive patients

Annotatons of other factors

R Package Documentation

Browse R Packages

We want your feedback!

lujunyan1118/mofaCLL
Multi-omics reveals clinically relevant proliferative drive associated with mTOR-MYC-OXPHOS activity in chronic lymphocytic leukemia