Impact of genetic heterogeneity on energy metabolism of CLL

library(seahorseCLL)
library(BloodCancerMultiOmics2017)
library(SummarizedExperiment)
library(ggbeeswarm)
library(cowplot)
library(ggrepel)
library(reshape2)
library(RColorBrewer)
library(gridExtra)
library(xtable)
library(tidyverse)
plotDir = ifelse(exists(".standalone"), "", "section02/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
options(stringsAsFactors=FALSE)
#Global aesthetic options for ggplots
myTheme <- theme_bw() + theme(axis.title = element_text(size = 18), 
                              axis.text = element_text(size = 18),
                              legend.text = element_text(size =15), 
                              legend.title = element_text(size = 15),
                              panel.grid =  element_blank())

Data pre-processing

Load data

data("lpdAll", "seaOri","seaCombat", "patmeta", "mutCOM")

Subsetting

#overlap between the patient samples in seahorse dataset and main screen dataset
lpdCLL <- lpdAll[,pData(lpdAll)$Diagnosis %in% "CLL"]
seaMain <- intersect(colnames(seaOri),colnames(lpdCLL))
length(seaMain)

#CLL seahorse data
seaSub <- t(assays(seaOri[,seaMain])$seaMedian)

Get patient genetic background

#extract genetic background information
genBack <- Biobase::exprs(lpdCLL)[fData(lpdCLL)$type %in%
                          c("gen","Methylation_Cluster","IGHV"), seaMain]  

genBack <- genBack[! rownames(genBack) %in% 
                     c("del13q14_bi","del13q14_mono"),]

genBack <- data.frame(t(genBack))
colnames(genBack) <- replace(colnames(genBack), colnames(genBack) == "del13q14_any", "del13q14")

Hypothesis testing: genetic variants vs bioenergetic features

Pre-processing data for testing

#get genetic background information
geneSub <- t(genBack[seaMain,])

seaTest <- t(seaSub)

geneTab <- data.frame(geneSub) %>% rownames_to_column("Variant") %>%
  gather(key = "patID",value = "status", -Variant)
seaTab <- data.frame(seaSub) %>% rownames_to_column("patID") %>%
  gather(key = "Measurement", value = "value", -patID)

testTab <- left_join(seaTab, geneTab, by = "patID")

Perform ANOVA-test, including batch as a co-variate.

aovTest <- function(value, status, batch) {
  eachTab <- tibble(value, status, batch) %>%
    filter(!is.na(value), !is.na(status))
  if (nrow(eachTab) >=10 & sum(eachTab$status != 0) >= 5) {
     res <- summary(aov(value ~ factor(status) + factor(batch), data=eachTab))
     mdTab <- group_by(eachTab, status) %>%
       summarise(mm = mean(value)) %>% arrange(status)
     data.frame(P.raw = res[[1]][5][1,], dm = mdTab$mm[nrow(mdTab)] - mdTab$mm[1])
  } else {
     data.frame(P.raw = NA, dm = NA)
  }
}

testTab <- mutate(testTab, batch = factor(colData(seaOri)[patID,]$dateMST))

p.tab <- group_by(testTab, Measurement, Variant) %>%
  do(aovTest(.$value, .$status, .$batch)) %>%
  ungroup() %>% filter(!is.na(P.raw)) %>%
  mutate(P.adj = p.adjust(P.raw, method = "BH"))

hist(p.tab$P.raw, breaks=20, col= "lightgreen", 
     main = "Seahorse VS genetics", xlab = "raw P values")

Export a table of all tested associations

#pCut <- 0.01

tabOut <- filter(p.tab, P.adj <= 1) %>% mutate(Measurement = formatSea(Measurement)) %>%
  mutate(Variant = ifelse(Variant == "IGHV.Uppsala.U.M", "IGHV status", Variant)) %>%
  select(Measurement, Variant, P.raw, dm, P.adj)

colnames(tabOut) <- c("Seahorse measurment", "Genetic variant", "p value", "Difference of mean", "adjusted p value")

write(print(xtable(tabOut, digits = 3, 
             caption = "ANOVA test results (adjusted for batch effect) of energy metabolic measurements related to different genetic variants"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = paste0(plotDir,"tTest_SeahorseVSgene.tex"))

Scatter plot of p values

#prepare table for plot
plotTab <- p.tab %>% mutate(type = ifelse(P.adj > 0.05, "not significant", 
                                          ifelse(dm >0, "higher","lower"))) %>%
  mutate(Variant = ifelse(Variant == "IGHV.Uppsala.U.M", "IGHV status", Variant),
         Measurement = formatSea(Measurement)) %>%
  mutate(varName = ifelse(type == "not significant","",Variant))

#define color
#colList <- c("#c0508a", "#79c858", "#7e45b9", "#c0ad52",
#             "#8c8bbd", "#c35c41", "#86bca8", "#4b3e3a","grey80")
colList <- c(`not significant` = "grey80", higher = "firebrick", lower = "darkblue")
#fdr cut-off
fdrCut <- max(filter(plotTab, type != "not significant")$P.raw)
pos = position_jitter(width = 0.15, seed = 10)
p <- ggplot(data=plotTab, aes(x= Measurement, y=-log10(P.raw),
                              col=type, label = varName))+ 
  geom_text_repel(position = pos, color = "black", size= 4, force = 3) +
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted", color = "grey20") + 
  geom_point(size=3, position = pos) + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("Seahore measurements") +
  theme_bw() + scale_color_manual(values = colList) +
  theme(axis.text.x = element_text(vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =18),
        legend.position = "none") +
  annotate(geom = "text", x = 0.6, y = -log10(fdrCut) + 0.5, label = "5% FDR") +
  coord_flip()
plot(p)

Plot all significant associations as beeswarm plot

p.tab.sig <- p.tab[p.tab$P.adj < 0.05,]

#batch effect corrected values should be used for plot
seaPlot <- assays(seaCombat)$seaMedian
seaPlot <- seaPlot[rownames(seaTest), colnames(seaTest)]

gList <- lapply(seq(1,nrow(p.tab.sig)), function(i) {
  seaName <- p.tab.sig[i,]$Measurement
  geneName <- p.tab.sig[i,]$Variant
  pval <- p.tab.sig[i,]$P.raw

  plotTab <- tibble(Measurement = seaPlot[seaName,], Mutation = geneSub[geneName,]) %>% 
    filter(!is.na(Measurement), !is.na(Mutation))
  #for IGHV
  if (geneName == "IGHV.Uppsala.U.M") {
    geneName <- "IGHV status"
    plotTab <- mutate(plotTab, 
                      Status = ifelse(Mutation == 0, 
                                      sprintf("unmutated\n(n=%s)", sum(Mutation == 0)), 
                                      sprintf("mutated\n(n=%s)", sum(Mutation == 1))))
  } else if (geneName == "Methylation_Cluster") {
    plotTab <- mutate(plotTab,
                      Status = ifelse(Mutation == 0,sprintf("LP\n(n=%s)", sum(Mutation == 0)),
                                      ifelse(Mutation == 1, sprintf("IP\n(n=%s)", sum(Mutation == 1)),
                                             sprintf("HP\n(n=%s)", sum(Mutation == 2)))))
  } else plotTab <- mutate(plotTab, 
                           Status = ifelse(Mutation == 0,
                                          sprintf("wild type\n(n=%s)", sum(Mutation == 0)), 
                                          sprintf("mutated\n(n=%s)", sum(Mutation == 1))))

  #reverse label factor (wildtype always on the rigt)
  plotTab <- mutate(plotTab, Status = factor(Status)) %>% 
    mutate(Status = factor(Status, levels = rev(levels(Status))))

  # color scheme, black for wildtype, red for mutated
  if (length(levels(plotTab$Status)) == 2) {
    colorList <- c("black","red")
    names(colorList) <- levels(plotTab$Status)
  } else {
    colorList <- c("lightblue","blue", "darkblue")
    names(colorList) <- levels(plotTab$Status)
  }

  #set y label
  if (seaName %in% c("ECAR.OCR.ration")) {
    yLab = "ECAR/OCR"
  } else if (seaName %in% c("maximal.respiration","spare.respiratory.capacity","basal.respiration",
                            "ATP.production","OCR","proton.leak")) {
    yLab = "OCR (pMol/min)" } else yLab = "ECAR (pMol/min)"

  #replace the "." in the mearement name with space
  seaName <- formatSea(seaName)

  #plot title
  plotTitle <- paste(sprintf("'%s ~ %s (p = '~",seaName,geneName),
                     sciPretty(pval, digits = 2),"*')'")

  ggplot(plotTab, aes(x=Status, y = Measurement)) + 
      stat_boxplot(geom = "errorbar", width = 0.3) +
      geom_boxplot(outlier.shape = NA, col="black", width=0.4) + 
      geom_beeswarm(cex=2, size =1, aes(col = Status)) + theme_classic() +
      xlab("") + ylab(yLab) + ggtitle(parse(text=plotTitle)) + 
      scale_color_manual(values = colorList) +
      theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(),
             axis.title = element_text(size=18, face="bold"),
             axis.text = element_text(size=18),
             plot.title = element_text(hjust=0.5,size=13),
             legend.position = "none",
             axis.title.x = element_text(face="bold"))

})
names(gList) <- paste0(p.tab.sig$Measurement, "_",p.tab.sig$Variant)

Combine p value scatter plot and beeswarm plots (Figure 2)

title = ggdraw() + draw_figure_label("Figure 2", fontface = "bold", position = "top.left",size=22)
pout <- ggdraw() +
  draw_plot(p, 0, 0, 0.58, 1) +
  draw_plot(gList$glycolysis_IGHV.Uppsala.U.M, 0.6, 0.5 , .4, .48) +
  draw_plot(gList$glycolysis_Methylation_Cluster, 0.6, 0 , .4, .48) +
  draw_plot_label(c("A", "B", "C"), 
                  c(0, 0.6, 0.6), c(1, 1, 0.5), size = 20)
plot_grid(title, pout, rel_heights = c(0.05,0.95), ncol = 1)

Plot the rest in a separate pdf file (For supplementary figure)

gList$glycolysis_IGHV.Uppsala.U.M <- NULL
gList$glycolysis_Methylation_Cluster <- NULL
grid.arrange(grobs = gList, ncol=2)

Considering pretreatment status in a multi-variate model

data("pretreat")
testTab.pretreat <- testTab %>% mutate(treat = pretreat[patID,]) %>%
  filter(paste0(Measurement,Variant) %in% paste0(p.tab$Measurement, p.tab$Variant))

Perform ANOVA-test, including both batch and pretreatment status as a co-variate.

aovTest.pretreat <- function(value, status, batch, treat) {
  eachTab <- tibble(value, status, batch,treat) %>%
    filter(!is.na(value), !is.na(status),!is.na(treat))
  if (nrow(eachTab) >=10 & sum(eachTab$status != 0) >= 5) {
     res <- summary(aov(value ~ factor(status) * factor(treat) + factor(batch)  , data=eachTab))
     mdTab <- group_by(eachTab, status) %>%
       summarise(mm = mean(value)) %>% arrange(status)
     data.frame(P.raw = res[[1]][5][1,], dm = mdTab$mm[nrow(mdTab)] - mdTab$mm[1], 
                P.inter = res[[1]][5][4,])
  } else {
     data.frame(P.raw = NA, dm = NA)
  }
}

testTab <- mutate(testTab, batch = factor(colData(seaOri)[patID,]$dateMST))

p.tab.pretreat <- group_by(testTab.pretreat, Measurement, Variant) %>%
  do(aovTest.pretreat(.$value, .$status, .$batch,.$treat)) %>%
  ungroup() %>% filter(!is.na(P.raw)) %>%
  mutate(P.adj = p.adjust(P.raw, method = "BH"),
         P.inter.adj = p.adjust(P.inter, method = "BH"))

hist(p.tab.pretreat$P.raw, breaks=20, col= "lightgreen", 
     main = "Seahorse VS genetics", xlab = "raw P values")

Plot of p value comparison

pTab.compare <- left_join(select(p.tab, Measurement, Variant, P.raw, P.adj) %>% dplyr::rename(p.all = P.raw, p.adj.all = P.adj),
                          select(p.tab.pretreat, Measurement, Variant, P.raw, P.adj) %>% dplyr::rename(p.naive = P.raw,
                                                                                                p.adj.naive = P.adj),
                          by = c("Measurement","Variant")) %>%
  mutate(sigGroup = ifelse( p.adj.all > 0.05 & p.adj.naive > 0.05, "Below 5% FDR in both models",
                            ifelse(p.adj.all <= 0.05 & p.adj.naive > 0.05, "Significant without pretreatment in the model", 
                            ifelse(p.adj.all > 0.05 & p.adj.naive <= 0.05, "Significant with pretreatment accounted",
                                                                                                                                   "Significant in both models"))))

colorList <- c(`Below 5% FDR in both models` = "grey70",
               `Significant in both models` = "#E41A1C",
               `Significant without pretreatment in the model` = "#377EB8",
               `Significant with pretreatment accounted` = "#984EA3")
ggplot(pTab.compare, aes(x=-log10(p.all), y = -log10(p.naive), color = sigGroup)) + 
  geom_point() +
  geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dotted") +
  theme_bw() + ylab(expression('-log'[10]*'P, accounting for pretreatment')) +
  xlab(expression('-log'[10]*'P, pretreatment not considered')) + 
  ggtitle("Bioenergetic features ~ genetic variants") +
  scale_color_manual(values = colorList, name = "Statistical significance") +
  theme(plot.title = element_text(face = "bold", hjust =0.5,size=15), 
        legend.position = c(0.65,0.15),
        legend.background = element_rect(colour = "black"),
        legend.text = element_text(size =10),
        legend.title = element_text(size = 12),
        axis.text = element_text(size =13),
        axis.title = element_text(size =14)) 


lujunyan1118/seahorseCLL documentation built on May 12, 2019, 6:16 p.m.