library(seahorseCLL) library(BloodCancerMultiOmics2017) library(SummarizedExperiment) library(ggbeeswarm) library(cowplot) library(ggrepel) library(reshape2) library(RColorBrewer) library(gridExtra) library(xtable) library(tidyverse)
plotDir = ifelse(exists(".standalone"), "", "section02/") if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir) options(stringsAsFactors=FALSE)
#Global aesthetic options for ggplots myTheme <- theme_bw() + theme(axis.title = element_text(size = 18), axis.text = element_text(size = 18), legend.text = element_text(size =15), legend.title = element_text(size = 15), panel.grid = element_blank())
Load data
data("lpdAll", "seaOri","seaCombat", "patmeta", "mutCOM")
Subsetting
#overlap between the patient samples in seahorse dataset and main screen dataset lpdCLL <- lpdAll[,pData(lpdAll)$Diagnosis %in% "CLL"] seaMain <- intersect(colnames(seaOri),colnames(lpdCLL)) length(seaMain) #CLL seahorse data seaSub <- t(assays(seaOri[,seaMain])$seaMedian)
Get patient genetic background
#extract genetic background information genBack <- Biobase::exprs(lpdCLL)[fData(lpdCLL)$type %in% c("gen","Methylation_Cluster","IGHV"), seaMain] genBack <- genBack[! rownames(genBack) %in% c("del13q14_bi","del13q14_mono"),] genBack <- data.frame(t(genBack)) colnames(genBack) <- replace(colnames(genBack), colnames(genBack) == "del13q14_any", "del13q14")
Pre-processing data for testing
#get genetic background information geneSub <- t(genBack[seaMain,]) seaTest <- t(seaSub) geneTab <- data.frame(geneSub) %>% rownames_to_column("Variant") %>% gather(key = "patID",value = "status", -Variant) seaTab <- data.frame(seaSub) %>% rownames_to_column("patID") %>% gather(key = "Measurement", value = "value", -patID) testTab <- left_join(seaTab, geneTab, by = "patID")
Perform ANOVA-test, including batch as a co-variate.
aovTest <- function(value, status, batch) { eachTab <- tibble(value, status, batch) %>% filter(!is.na(value), !is.na(status)) if (nrow(eachTab) >=10 & sum(eachTab$status != 0) >= 5) { res <- summary(aov(value ~ factor(status) + factor(batch), data=eachTab)) mdTab <- group_by(eachTab, status) %>% summarise(mm = mean(value)) %>% arrange(status) data.frame(P.raw = res[[1]][5][1,], dm = mdTab$mm[nrow(mdTab)] - mdTab$mm[1]) } else { data.frame(P.raw = NA, dm = NA) } } testTab <- mutate(testTab, batch = factor(colData(seaOri)[patID,]$dateMST)) p.tab <- group_by(testTab, Measurement, Variant) %>% do(aovTest(.$value, .$status, .$batch)) %>% ungroup() %>% filter(!is.na(P.raw)) %>% mutate(P.adj = p.adjust(P.raw, method = "BH")) hist(p.tab$P.raw, breaks=20, col= "lightgreen", main = "Seahorse VS genetics", xlab = "raw P values")
Export a table of all tested associations
#pCut <- 0.01 tabOut <- filter(p.tab, P.adj <= 1) %>% mutate(Measurement = formatSea(Measurement)) %>% mutate(Variant = ifelse(Variant == "IGHV.Uppsala.U.M", "IGHV status", Variant)) %>% select(Measurement, Variant, P.raw, dm, P.adj) colnames(tabOut) <- c("Seahorse measurment", "Genetic variant", "p value", "Difference of mean", "adjusted p value") write(print(xtable(tabOut, digits = 3, caption = "ANOVA test results (adjusted for batch effect) of energy metabolic measurements related to different genetic variants"), include.rownames=FALSE, caption.placement = "top"), file = paste0(plotDir,"tTest_SeahorseVSgene.tex"))
Scatter plot of p values
#prepare table for plot plotTab <- p.tab %>% mutate(type = ifelse(P.adj > 0.05, "not significant", ifelse(dm >0, "higher","lower"))) %>% mutate(Variant = ifelse(Variant == "IGHV.Uppsala.U.M", "IGHV status", Variant), Measurement = formatSea(Measurement)) %>% mutate(varName = ifelse(type == "not significant","",Variant)) #define color #colList <- c("#c0508a", "#79c858", "#7e45b9", "#c0ad52", # "#8c8bbd", "#c35c41", "#86bca8", "#4b3e3a","grey80") colList <- c(`not significant` = "grey80", higher = "firebrick", lower = "darkblue") #fdr cut-off fdrCut <- max(filter(plotTab, type != "not significant")$P.raw) pos = position_jitter(width = 0.15, seed = 10) p <- ggplot(data=plotTab, aes(x= Measurement, y=-log10(P.raw), col=type, label = varName))+ geom_text_repel(position = pos, color = "black", size= 4, force = 3) + geom_hline(yintercept = -log10(fdrCut), linetype="dotted", color = "grey20") + geom_point(size=3, position = pos) + ylab(expression(-log[10]*'('*p~value*')')) + xlab("Seahore measurements") + theme_bw() + scale_color_manual(values = colList) + theme(axis.text.x = element_text(vjust=0.5, hjust = 1,size=15), axis.text.y = element_text(size=15), axis.title = element_text(size =18), legend.position = "none") + annotate(geom = "text", x = 0.6, y = -log10(fdrCut) + 0.5, label = "5% FDR") + coord_flip() plot(p)
Plot all significant associations as beeswarm plot
p.tab.sig <- p.tab[p.tab$P.adj < 0.05,] #batch effect corrected values should be used for plot seaPlot <- assays(seaCombat)$seaMedian seaPlot <- seaPlot[rownames(seaTest), colnames(seaTest)] gList <- lapply(seq(1,nrow(p.tab.sig)), function(i) { seaName <- p.tab.sig[i,]$Measurement geneName <- p.tab.sig[i,]$Variant pval <- p.tab.sig[i,]$P.raw plotTab <- tibble(Measurement = seaPlot[seaName,], Mutation = geneSub[geneName,]) %>% filter(!is.na(Measurement), !is.na(Mutation)) #for IGHV if (geneName == "IGHV.Uppsala.U.M") { geneName <- "IGHV status" plotTab <- mutate(plotTab, Status = ifelse(Mutation == 0, sprintf("unmutated\n(n=%s)", sum(Mutation == 0)), sprintf("mutated\n(n=%s)", sum(Mutation == 1)))) } else if (geneName == "Methylation_Cluster") { plotTab <- mutate(plotTab, Status = ifelse(Mutation == 0,sprintf("LP\n(n=%s)", sum(Mutation == 0)), ifelse(Mutation == 1, sprintf("IP\n(n=%s)", sum(Mutation == 1)), sprintf("HP\n(n=%s)", sum(Mutation == 2))))) } else plotTab <- mutate(plotTab, Status = ifelse(Mutation == 0, sprintf("wild type\n(n=%s)", sum(Mutation == 0)), sprintf("mutated\n(n=%s)", sum(Mutation == 1)))) #reverse label factor (wildtype always on the rigt) plotTab <- mutate(plotTab, Status = factor(Status)) %>% mutate(Status = factor(Status, levels = rev(levels(Status)))) # color scheme, black for wildtype, red for mutated if (length(levels(plotTab$Status)) == 2) { colorList <- c("black","red") names(colorList) <- levels(plotTab$Status) } else { colorList <- c("lightblue","blue", "darkblue") names(colorList) <- levels(plotTab$Status) } #set y label if (seaName %in% c("ECAR.OCR.ration")) { yLab = "ECAR/OCR" } else if (seaName %in% c("maximal.respiration","spare.respiratory.capacity","basal.respiration", "ATP.production","OCR","proton.leak")) { yLab = "OCR (pMol/min)" } else yLab = "ECAR (pMol/min)" #replace the "." in the mearement name with space seaName <- formatSea(seaName) #plot title plotTitle <- paste(sprintf("'%s ~ %s (p = '~",seaName,geneName), sciPretty(pval, digits = 2),"*')'") ggplot(plotTab, aes(x=Status, y = Measurement)) + stat_boxplot(geom = "errorbar", width = 0.3) + geom_boxplot(outlier.shape = NA, col="black", width=0.4) + geom_beeswarm(cex=2, size =1, aes(col = Status)) + theme_classic() + xlab("") + ylab(yLab) + ggtitle(parse(text=plotTitle)) + scale_color_manual(values = colorList) + theme(axis.line.x = element_blank(), axis.ticks.x = element_blank(), axis.title = element_text(size=18, face="bold"), axis.text = element_text(size=18), plot.title = element_text(hjust=0.5,size=13), legend.position = "none", axis.title.x = element_text(face="bold")) }) names(gList) <- paste0(p.tab.sig$Measurement, "_",p.tab.sig$Variant)
Combine p value scatter plot and beeswarm plots (Figure 2)
title = ggdraw() + draw_figure_label("Figure 2", fontface = "bold", position = "top.left",size=22) pout <- ggdraw() + draw_plot(p, 0, 0, 0.58, 1) + draw_plot(gList$glycolysis_IGHV.Uppsala.U.M, 0.6, 0.5 , .4, .48) + draw_plot(gList$glycolysis_Methylation_Cluster, 0.6, 0 , .4, .48) + draw_plot_label(c("A", "B", "C"), c(0, 0.6, 0.6), c(1, 1, 0.5), size = 20) plot_grid(title, pout, rel_heights = c(0.05,0.95), ncol = 1)
Plot the rest in a separate pdf file (For supplementary figure)
gList$glycolysis_IGHV.Uppsala.U.M <- NULL gList$glycolysis_Methylation_Cluster <- NULL grid.arrange(grobs = gList, ncol=2)
data("pretreat") testTab.pretreat <- testTab %>% mutate(treat = pretreat[patID,]) %>% filter(paste0(Measurement,Variant) %in% paste0(p.tab$Measurement, p.tab$Variant))
Perform ANOVA-test, including both batch and pretreatment status as a co-variate.
aovTest.pretreat <- function(value, status, batch, treat) { eachTab <- tibble(value, status, batch,treat) %>% filter(!is.na(value), !is.na(status),!is.na(treat)) if (nrow(eachTab) >=10 & sum(eachTab$status != 0) >= 5) { res <- summary(aov(value ~ factor(status) * factor(treat) + factor(batch) , data=eachTab)) mdTab <- group_by(eachTab, status) %>% summarise(mm = mean(value)) %>% arrange(status) data.frame(P.raw = res[[1]][5][1,], dm = mdTab$mm[nrow(mdTab)] - mdTab$mm[1], P.inter = res[[1]][5][4,]) } else { data.frame(P.raw = NA, dm = NA) } } testTab <- mutate(testTab, batch = factor(colData(seaOri)[patID,]$dateMST)) p.tab.pretreat <- group_by(testTab.pretreat, Measurement, Variant) %>% do(aovTest.pretreat(.$value, .$status, .$batch,.$treat)) %>% ungroup() %>% filter(!is.na(P.raw)) %>% mutate(P.adj = p.adjust(P.raw, method = "BH"), P.inter.adj = p.adjust(P.inter, method = "BH")) hist(p.tab.pretreat$P.raw, breaks=20, col= "lightgreen", main = "Seahorse VS genetics", xlab = "raw P values")
pTab.compare <- left_join(select(p.tab, Measurement, Variant, P.raw, P.adj) %>% dplyr::rename(p.all = P.raw, p.adj.all = P.adj), select(p.tab.pretreat, Measurement, Variant, P.raw, P.adj) %>% dplyr::rename(p.naive = P.raw, p.adj.naive = P.adj), by = c("Measurement","Variant")) %>% mutate(sigGroup = ifelse( p.adj.all > 0.05 & p.adj.naive > 0.05, "Below 5% FDR in both models", ifelse(p.adj.all <= 0.05 & p.adj.naive > 0.05, "Significant without pretreatment in the model", ifelse(p.adj.all > 0.05 & p.adj.naive <= 0.05, "Significant with pretreatment accounted", "Significant in both models")))) colorList <- c(`Below 5% FDR in both models` = "grey70", `Significant in both models` = "#E41A1C", `Significant without pretreatment in the model` = "#377EB8", `Significant with pretreatment accounted` = "#984EA3") ggplot(pTab.compare, aes(x=-log10(p.all), y = -log10(p.naive), color = sigGroup)) + geom_point() + geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dotted") + theme_bw() + ylab(expression('-log'[10]*'P, accounting for pretreatment')) + xlab(expression('-log'[10]*'P, pretreatment not considered')) + ggtitle("Bioenergetic features ~ genetic variants") + scale_color_manual(values = colorList, name = "Statistical significance") + theme(plot.title = element_text(face = "bold", hjust =0.5,size=15), legend.position = c(0.65,0.15), legend.background = element_rect(colour = "black"), legend.text = element_text(size =10), legend.title = element_text(size = 12), axis.text = element_text(size =13), axis.title = element_text(size =14))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.