Associations between drug response phenotype and energy metabolism of CLL

library(seahorseCLL)
library(BloodCancerMultiOmics2017)
library(SummarizedExperiment)
library(RColorBrewer)
library(grid)
library(gridExtra)
library(ggrepel)
library(cowplot)
library(xtable)
library(robustbase)
library(tidyverse)
library(Biobase)
plotDir = ifelse(exists(".standalone"), "", "section03/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
options(stringsAsFactors=FALSE)

Data pre-processing

Load data set

data("lpdAll","drugs", "patmeta", "seaCombat", "conctab")

Sample subsetting

#get drug response data for CLL samples only
lpdCLL <- lpdAll[fData(lpdAll)$type == "viab",pData(lpdAll)$Diagnosis == "CLL"]

#get overlapped samples
sampleOverlap <- intersect(colnames(lpdCLL), colnames(seaCombat))
seaSub <- seaCombat[,sampleOverlap]
lpdCLL <- lpdCLL[,sampleOverlap]

How many overlapped samples?

length(sampleOverlap)

Remove bad drugs. Bortezomib lost its activity during storage. The data for this drug and NSC 74859 were discarded from further analysis.

badrugs = c("D_008", "D_025") 
lpdCLL <- lpdCLL[!fData(lpdCLL)$id %in% badrugs,]

Get drug response data

# get drug responsee data
get.drugresp <- function(lpd) {
  drugresp = t(Biobase::exprs(lpd[fData(lpd)$type == 'viab'])) %>%
    tbl_df %>% dplyr::select(-ends_with(":5")) %>%
    dplyr::mutate(ID = colnames(lpd)) %>%
    tidyr::gather(drugconc, viab, -ID) %>%
    dplyr::mutate(drug = drugs[substring(drugconc, 1, 5), "name"],
           conc = sub("^D_([0-9]+_)", "", drugconc)) %>%
    dplyr::mutate(conc = as.integer(gsub("D_CHK_", "", conc)))

  drugresp
}
drugresp <- get.drugresp(lpdCLL)

Use median polish to summarise drug response of the five concentrations

get.medp <- function(drugresp) {
  tab = drugresp %>% group_by(drug, conc) %>% 
    do(data.frame(v = .$viab, ID = .$ID)) %>% spread(ID, v)

  med.p = lapply(unique(tab$drug), function(n) {
    tb = filter(tab, drug == n) %>% ungroup() %>% select(-(drug:conc)) %>% 
      as.matrix %>% `rownames<-`(1:5)
    mdp = stats::medpolish(tb, trace.iter = FALSE)
    df = as.data.frame(mdp$col) + mdp$overall
    colnames(df) <- n
    df
  }) %>% do.call(cbind,.)

  medp.viab = tbl_df(med.p) %>% mutate(ID = rownames(med.p)) %>%
    gather(drug, viab, -ID) 
  medp.viab
}
drugresp.mp <- get.medp(drugresp)

Association test between drug response and seahorse measurements

Function to calculate correlation given a drug table and seahorse table

corTest <- function(patID, viab, seaTable, ighv = NULL, pretreat = NULL) {
  viab <- setNames(viab, patID)

  corTab <- lapply(seq(1,nrow(seaTable)), function(i) {
    seaName <- rownames(seaTable)[i]

    #remove NA samples in Seahorse entry
    seaVal <- seaTable[seaName,]
    seaVal <- seaVal[!is.na(seaVal)]


    #get useable sample list
    if (!is.null(ighv)) {
      patList <- intersect(names(ighv), intersect(patID, names(seaVal)))
      ighvVal <- ighv[patList]
    } else {
      patList <- intersect(patID, names(seaVal))
    }

    if (!is.null(pretreat)) {
      patList <- intersect(names(pretreat), patList)
      pretreat <- pretreat[patList]
    }

    #subset drug value
    drugVal <- viab[patList]
    seaVal <- seaVal[patList]

    #correlation test, block for IGHV
    if (!is.null(ighv)) {
      res <- summary(lm(seaVal ~ drugVal + ighvVal))
      data.frame(seahorse = seaName, 
               p = res$coefficients[2,4], 
               coef =  sqrt(res$r.squared) * sign(res$coefficients[2,3]),
               stringsAsFactors = FALSE)
    } else if (!is.null(pretreat)) {
      res <- summary(lm(seaVal ~ drugVal + pretreat))
      data.frame(seahorse = seaName, 
               p = res$coefficients[2,4], 
               coef =  sqrt(res$r.squared) * sign(res$coefficients[2,3]),
               stringsAsFactors = FALSE)
    } else {
      res <- cor.test(seaVal, drugVal, method = "pearson")
      data.frame(seahorse = seaName, 
         p = res$p.value, 
         coef =  res$estimate[[1]],
         stringsAsFactors = FALSE)

    }
  }) %>% do.call(rbind,.)
}

Correlation test without blocking for IGHV

Calculate correlation coefficient and p values

seaTest <- assays(seaSub)$seaMedian
resTab.noBlock <- group_by(drugresp.mp, drug) %>% do(corTest(.$ID, .$viab, seaTest, ighv = NULL)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

How many significant associations at 10% FDR?

resTab.noBlock %>% filter(p.adj <= 0.1) %>% nrow()

How many drugs show at least one significant assocations?

resTab.noBlock %>% filter(p.adj <= 0.1) %>% filter(!duplicated(drug)) %>% nrow()

Correlation test with blocking for IGHV

Calculate correlation coefficient and p values

#get IGHV stauts
ighv <- Biobase::exprs(lpdAll)["IGHV Uppsala U/M",]
ighv <-ighv[!is.na(ighv)]


resTab <- group_by(drugresp.mp, drug) %>% do(corTest(.$ID, .$viab, seaTest, ighv = ighv)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

How many significant associations at 10% FDR?

resTab %>% filter(p.adj <= 0.1) %>% nrow()

How many drugs show at least one significant assocations?

resTab %>% filter(p.adj <= 0.1) %>% filter(!duplicated(drug)) %>% nrow()

Correlation test with blocking for pretreatment status

Calculate correlation coefficient and p values

#get IGHV stauts
ighv <- Biobase::exprs(lpdAll)["IGHV Uppsala U/M",]
ighv <-ighv[!is.na(ighv)]

#get pretreatment status
data("pretreat")
pretreat <- structure(pretreat$pretreat, names = rownames(pretreat))

resTab.pretreat <- group_by(drugresp.mp, drug) %>% do(corTest(.$ID, .$viab, seaTest, ighv = NULL, pretreat = pretreat)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

How many significant associations at 10% FDR?

resTab.pretreat %>% filter(p.adj <= 0.1) %>% nrow()

How many drugs show at least one significant assocations?

resTab.pretreat %>% filter(p.adj <= 0.1) %>% filter(!duplicated(drug)) %>% nrow()

Compare p values

compareTab <- left_join(resTab.noBlock, resTab.pretreat, by = c("drug","seahorse")) %>%
    mutate(sigGroup = ifelse( p.adj.x > 0.05 & p.adj.y > 0.05, "Below 5% FDR in both models",
                            ifelse(p.adj.x <= 0.05 & p.adj.y > 0.05, "Significant without pretreatment in the model", 
                            ifelse(p.adj.x > 0.05 & p.adj.y <= 0.05, "Significant with pretreatment accounted",
                                                                                                                                   "Significant in both models"))))

colorList <- c(`Below 5% FDR in both models` = "grey70",
               `Significant in both models` = "#E41A1C",
               `Significant without pretreatment in the model` = "#377EB8",
               `Significant with pretreatment accounted` = "#984EA3")

ggplot(compareTab, aes(x=-log10(p.x), y = -log10(p.y), color = sigGroup)) + 
  geom_point() +
  geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dotted") +
  theme_bw() + ylab(expression('-log'[10]*'P, accounting for pretreatment')) +
  xlab(expression('-log'[10]*'P, pretreatment not considered')) + 
  ggtitle("Bioenergetic features ~ drug responses") +
  scale_color_manual(values = colorList, name = "Statistical significance") +
  theme(plot.title = element_text(face = "bold", hjust =0.5,size=15), 
        legend.position = c(0.65,0.15),
        legend.background = element_rect(colour = "black"),
        legend.text = element_text(size =8),
        legend.title = element_text(size = 10),
        axis.text = element_text(size =13),
        axis.title = element_text(size =14)) 

Create a table showing the associations that are significant only without pretreatment in the model

tabOut <- filter(compareTab, sigGroup == "Significant without pretreatment in the model") %>% 
  arrange(drug) %>% mutate(seahorse = formatSea(seahorse)) %>% select(-sigGroup,-coef.x, -coef.y) %>%
  rename(`Drug name` = drug, `Seahorse measurement` = seahorse,
         `p value (pretreatment not considered)` = p.x,
         `adjusted p value (pretreatment not considered)` = p.adj.x,
         `p value (accounted for pretreatment)` = p.y,
         `adjusted p value (accounted for pretreatment)` = p.adj.y)

write(print(xtable(tabOut, digits = 3, 
             caption = "Associations between bioenergetic features and drug responses that are only significant with pretreatment in the model"), 
      include.rownames=FALSE,
      caption.placement = "top"), file = paste0(plotDir,"seahorseVSdrug_onlyWithoutPretreat.tex"))

P-value scatter plot

Correlation test with blocking for IGHV status (Supplementary Figure)

Preocess table for plotting

atLeastOne <- group_by(resTab, drug) %>% summarise(sigNum = sum(p.adj <= 0.1)) %>% filter(sigNum > 0)
plotTab <- filter(resTab, drug %in% atLeastOne$drug) %>% 
  mutate(seahorse = ifelse(p.adj > 0.1, "not significant", seahorse))

#change mearement name 
plotTab$seahorse <- sapply(plotTab$seahorse, function(x) {gsub("\\."," ",x)})

#define the group of seahorse measurement
measureType <- tibble(measure = rownames(seaCombat), type = rowData(seaCombat)$type) %>% 
  mutate(type = ifelse(type %in% c("ECAR.OCR","GST","ECAR"), "glycolysis", "respiration"))

#generate color list separately for each group
glyList <- setNames(tail(brewer.pal(9,"OrRd"),nrow(filter(measureType, type =="glycolysis"))),
                    filter(measureType, type =="glycolysis")$measure)
resList <- setNames(tail(brewer.pal(9,"GnBu"),nrow(filter(measureType, type =="respiration"))),
                    filter(measureType, type =="respiration")$measure)
nosig <- c("not significant" = "grey80")
colList <- c(glyList, resList, nosig)
names(colList) <- sapply(names(colList), function(x) {gsub("\\."," ",x)})

#order the factor for seahorse measurment
plotTab$seahorse <- factor(plotTab$seahorse, levels = names(colList))

#add the direction of correlation
plotTab <- mutate(plotTab, Direction = ifelse(coef > 0, "positive", "negative"))

#get the cutoff value
fdrCut <- max(filter(plotTab, seahorse != "not significant")$p)

Plot

p <- ggplot(data = plotTab, aes(x = drug,y=-log10(p), fill = seahorse, color = seahorse, shape = Direction)) + 
  geom_point(size=4) + scale_fill_manual(values = colList) + scale_color_manual(values = colList) +
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted") + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("") + theme_bw() + 
  scale_shape_manual(values = c(positive = 24, negative = 25)) +
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =15, face = "bold")) +
  guides(fill="none", color = guide_legend(title = "Measurement"))

plot(p)

Correlation test without blocking for IGHV status ( Figure 4)

Preocess table for plotting

atLeastOne <- group_by(resTab.noBlock, drug) %>% summarise(sigNum = sum(p.adj <= 0.1)) %>% filter(sigNum > 0)
plotTab <- filter(resTab.noBlock, drug %in% atLeastOne$drug) %>% 
  mutate(seahorse = ifelse(p.adj > 0.1, "not significant", seahorse))

#change mearement name 
plotTab$seahorse <- sapply(plotTab$seahorse, function(x) {gsub("\\."," ",x)})

#define the group of seahorse measurement
measureType <- tibble(measure = rownames(seaCombat), type = rowData(seaCombat)$type) %>% 
  mutate(type = ifelse(type %in% c("ECAR.OCR","GST","ECAR"), "glycolysis", "respiration"))

#generate color list separately for each group
glyList <- setNames(tail(brewer.pal(9,"OrRd"),nrow(filter(measureType, type =="glycolysis"))),
                    filter(measureType, type =="glycolysis")$measure)
resList <- setNames(tail(brewer.pal(9,"GnBu"),nrow(filter(measureType, type =="respiration"))),
                    filter(measureType, type =="respiration")$measure)

nosig <- c("not significant" = "grey80")
colList <- c(glyList, resList, nosig)
names(colList) <- sapply(names(colList), function(x) {gsub("\\."," ",x)})

#order the factor for seahorse measurment
plotTab$seahorse <- factor(plotTab$seahorse, levels = names(colList))

#add direction iformation
plotTab <- mutate(plotTab, Direction = ifelse(coef > 0, "positive", "negative"))

#get the cutoff value
fdrCut <- max(filter(plotTab, seahorse != "not significant")$p)
drugManhattan <- ggplot(data = plotTab, aes(x = drug,y=-log10(p), fill = seahorse, color = seahorse, shape = Direction)) + 
  geom_point(size=4) + scale_fill_manual(values = colList) + scale_color_manual(values = colList) +
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted") + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("") + theme_bw() + 
  scale_shape_manual(values = c(positive = 24, negative = 25)) +
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =15, face = "bold"),
        plot.title = element_text(size=20, face = "bold")) +
  guides(fill="none", color = guide_legend(title = "Measurement")) 


plot(drugManhattan)

Correlation test with blocking for pretreatment status (Supplementary Figure)

Preocess table for plotting

atLeastOne <- group_by(resTab.pretreat, drug) %>% summarise(sigNum = sum(p.adj <= 0.1)) %>% filter(sigNum > 0)
plotTab <- filter(resTab.pretreat, drug %in% atLeastOne$drug) %>% 
  mutate(seahorse = ifelse(p.adj > 0.1, "not significant", seahorse))

#change mearement name 
plotTab$seahorse <- sapply(plotTab$seahorse, function(x) {gsub("\\."," ",x)})

#define the group of seahorse measurement
measureType <- tibble(measure = rownames(seaCombat), type = rowData(seaCombat)$type) %>% 
  mutate(type = ifelse(type %in% c("ECAR.OCR","GST","ECAR"), "glycolysis", "respiration"))

#generate color list separately for each group
glyList <- setNames(tail(brewer.pal(9,"OrRd"),nrow(filter(measureType, type =="glycolysis"))),
                    filter(measureType, type =="glycolysis")$measure)
resList <- setNames(tail(brewer.pal(9,"GnBu"),nrow(filter(measureType, type =="respiration"))),
                    filter(measureType, type =="respiration")$measure)
nosig <- c("not significant" = "grey80")
colList <- c(glyList, resList, nosig)
names(colList) <- sapply(names(colList), function(x) {gsub("\\."," ",x)})

#order the factor for seahorse measurment
plotTab$seahorse <- factor(plotTab$seahorse, levels = names(colList))

#add the direction of correlation
plotTab <- mutate(plotTab, Direction = ifelse(coef > 0, "positive", "negative"))

#get the cutoff value
fdrCut <- max(filter(plotTab, seahorse != "not significant")$p)

Plot

p <- ggplot(data = plotTab, aes(x = drug,y=-log10(p), fill = seahorse, color = seahorse, shape = Direction)) + 
  geom_point(size=4) + scale_fill_manual(values = colList) + scale_color_manual(values = colList) +
  geom_hline(yintercept = -log10(fdrCut), linetype="dotted") + 
  ylab(expression(-log[10]*'('*p~value*')')) + xlab("") + theme_bw() + 
  scale_shape_manual(values = c(positive = 24, negative = 25)) +
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust = 1,size=15),
        axis.text.y = element_text(size=15),
        axis.title = element_text(size =15, face = "bold")) +
  guides(fill="none", color = guide_legend(title = "Measurement"))

plot(p)

Scatter plot of associations

Scatter plot for all significant pairs

resTab.sig <- filter(resTab, p.adj <= 0.1)

scatterList <- lapply(seq(1,nrow(resTab.sig)), function(i){
  seaName <- resTab.sig[i,]$seahorse
  p <- resTab.sig[i,]$p
  coef <- format(resTab.sig[i,]$coef,digits = 2)
  drugName <- resTab.sig[i,]$drug

  #remove NA samples in Seahorse entry
  seaVal <- seaTest[seaName,]
  seaVal <- seaVal[!is.na(seaVal)]

  #get useable sample list
  patList <- intersect(filter(drugresp.mp, drug == drugName)$ID, names(seaVal))

  #set y label
  if (seaName %in% c("ECAR.OCR.ration")) {
    xLab = "ECAR/OCR"
  } else if (seaName %in% c("maximal.respiration","spare.respiratory.capacity","basal.respiration",
                            "ATP.production")) {
    xLab = "OCR (pMol/min)" } else xLab = "ECAR (pMol/min)"

  #format seahorse measurement name
  seaName.new <- ifelse(seaName == "ECAR.OCR.ratio", "ECAR/OCR", gsub("\\."," ",seaName))

  #prepare title
  plotTitle <- sprintf("%s ~ %s", drugName, seaName.new)

  #prepare plot table
  plotTab <- filter(drugresp.mp, drug == drugName, ID %in% patList) %>%
    mutate(sea=seaVal[ID])

  #prepare correlation test annotations
  annoText <- paste("'coefficient ='~",coef,"*","', p ='~",sciPretty(p,digits=2))
  limX <- max(plotTab$sea) + 2
  midX <- max(plotTab$sea)/2
  ggplot(plotTab, aes(x=sea,y=100*viab)) + geom_point(size=1) + 
    geom_smooth(method = "lmrob", se= FALSE) +
    xlab(xLab) + ylab("% viability after drug treatment") +
    theme_bw() + ggtitle(plotTitle) + coord_cartesian(xlim = c(-2,limX)) +
    annotate("text", x = midX, y = Inf, label = annoText, 
             vjust=1, hjust=0.5, size = 5, parse = TRUE, col= "darkred") +
    theme(panel.grid = element_blank(),
          axis.title = element_text(size=13, face="bold"),
          axis.text = element_text(size=12),
          plot.title = element_text(face="bold", hjust=0.5, size = 15 ),
          legend.position = "none",
          axis.title.x = element_text(face="bold"),
          plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
})

names(scatterList) <- paste0(resTab.sig$seahorse, "_", resTab.sig$drug)

A figure of selected drugs (Supplementary Figure)

plot_grid(scatterList$glycolysis_rotenone, 
          scatterList$glycolysis_venetoclax,
          scatterList$glycolytic.capacity_orlistat,
          scatterList$`ECAR_KX2-391`,ncol=2)

Association test for individual concentrations

Association tests for each concentration

corRes_conc <- group_by(drugresp, drug, conc) %>% do(corTest(.$ID, .$viab, seaTest, ighv = NULL)) %>% ungroup() %>%
  mutate(p.adj = p.adjust(p, method = "BH"))

Prepare plot tab

drugList <- unique(filter(corRes_conc, p.adj < 0.1)$drug)
seaList <- unique(filter(corRes_conc, p.adj < 0.1)$seahorse)

plotTab <- filter(corRes_conc, drug %in% drugList,
                  seahorse %in% seaList) %>% mutate(concIndex = paste0("c",conc)) %>%
  mutate(coef = ifelse(p.adj < 0.1, coef, 0),
         seahorse = ifelse(seahorse != "ECAR.OCR.ratio", seahorse,
                              "ECAR/OCR")) %>%
  mutate(seahorse = gsub("[.]","\n",seahorse)) %>%
  mutate(seahorse = factor(seahorse))

#plot similar seahorse measurment together
plotTab$seahorse <- factor(plotTab$seahorse, 
                              levels = levels(plotTab$seahorse)[c(3,4,5,6,7,
                                                                     9,1,2,8,11,10)])

Heatmap plot for p values (Supplementary Figure)

ggplot(plotTab, aes(x=concIndex, y = drug, fill = coef)) + geom_tile(size = 0.3, color = "white") + facet_wrap(~ seahorse, nrow = 1) + 
  scale_fill_gradient2(low = "red", mid = "white", high = "blue", midpoint = 0,
                       limits =c(-0.6,0.6), labels = seq(-0.8,0.8, by = 0.2),
                       breaks = seq(-0.8,0.8, by = 0.2),
                       name = "Coefficient") +
  theme_bw() + theme(strip.text = element_text(face = "bold"),
                     axis.text.y = element_text(size =12)) +
  ylab("Drug name") + xlab("Concentration Index")

Multi-variate models for predicting drug responses

Data pre-processing

For genetic data

#mutations and copy number variations
mutCOMbinary<-channel(mutCOM, "binary")
mutCOMbinary<-mutCOMbinary[featureNames(mutCOMbinary) %in% colnames(seaCombat),]
genData<-Biobase::exprs(mutCOMbinary)
idx <- which(colnames(genData) %in% c("del13q14_bi", "del13q14_mono"))
genData <- genData[,-idx]
colnames(genData)[which(colnames(genData)=="del13q14_any")] = "del13q14"
genData <- data.frame(genData)

#add IGHV
translation <- c(`U` = 0, `M` = 1)
stopifnot(all(patmeta$IGHV %in% c("U","M", NA)))
IGHVData <- data.frame(row.names = rownames(patmeta), translation[patmeta$IGHV])
genData$IGHV <- IGHVData[rownames(genData),]

#add methylation
# Methylation cluster
translation <- c(`HP` = 2, `IP` = 1, `LP` = 0)
Mcluster <- data.frame(row.names =rownames(patmeta), translation[patmeta$ConsClust])
genData$Methylation_Cluster <- Mcluster[rownames(genData),]           

genData <- as.matrix(genData)

genData <- genData[,colSums(!is.na(genData)) >= 10 & colSums(genData,na.rm = TRUE) >= 5]

#fill the missing value with majority
genData <- apply(genData, 2, function(x) {
  xVec <- x
  popVal <- names(which.max(table(x)))
  xVec[is.na(xVec)] <- as.integer(popVal)
  xVec
})

For drug response data

viabData <- spread(drugresp.mp, key = "ID", value = "viab") %>%
  data.frame() %>% column_to_rownames("drug") %>%
  as.matrix() %>% t()

Prepare seahorse meaurement

sea <- t(assays(seaCombat)$seaMedian)

#use complete cases and subset
sea <- sea[complete.cases(sea),]

Function to Generate the explanatory dataset for each drug responses

#function to generate response vector and explainatory variable for each seahorse measurement

generateData.drug <- function(inclSet, onlyCombine = FALSE, censor = NULL, robust = FALSE) {

    dataScale <- function(x, censor = NULL, robust = FALSE) {
        #function to scale different variables
        if (length(unique(na.omit(x))) == 2){
          #a binary variable, change to -0.5 and 0.5 for 1 and 2
          x - 0.5
        } else if (length(unique(na.omit(x))) == 3) {
          #catagorical varialbe with 3 levels, methylation_cluster, change to -0.5,0,0.5
          (x - 1)/2
        } else {
          if (robust) {
          #continuous variable, centered by median and divied by 2*mad
          mScore <- (x-median(x,na.rm=TRUE))/(1.4826*mad(x,na.rm=TRUE))
            if (!is.null(censor)) {
              mScore[mScore > censor] <- censor
              mScore[mScore < -censor] <- -censor
            }
          mScore/2
          } else {
            mScore <- (x-mean(x,na.rm=TRUE))/(sd(x,na.rm=TRUE))
              if (!is.null(censor)) {
                mScore[mScore > censor] <- censor
                mScore[mScore < -censor] <- -censor
              }
          mScore/2
          }
        }
      }



    allResponse <- list()
    allExplain <- list()

    for (name in colnames(inclSet$drugs)) {
      y <- inclSet$drugs[,name]
      y <- y[!is.na(y)]

      #get overlapped samples for each dataset 
      overSample <- names(y)

      for (eachSet in inclSet) {
        overSample <- intersect(overSample,rownames(eachSet))
      }

      y <- dataScale(y[overSample], censor = censor, robust = robust)
      allResponse[[name]] <- y
    }

  #generate explainatory variable
     if ("seahorse" %in% names(inclSet)) {
      seaTab <- inclSet$seahorse[overSample,]
      vecName <- sprintf("metabolism(%s)",ncol(seaTab))
      allExplain[[vecName]] <- apply(seaTab,2,dataScale,censor = censor, robust = robust)
    }


     if ("gen" %in% names(inclSet)) {
      geneTab <- inclSet$gen[overSample,]
      vecName <- sprintf("genetic(%s)", ncol(geneTab))
      allExplain[[vecName]] <- apply(geneTab,2,dataScale)
    }


    comboTab <- c()
    for (eachSet in names(allExplain)){
      comboTab <- cbind(comboTab, allExplain[[eachSet]])
    }

    vecName <- sprintf("all(%s)", ncol(comboTab))
    allExplain[[vecName]] <- comboTab

  return(list(allResponse=allResponse, allExplain=allExplain))

}

Calulate drug responses explained by multi-omics data set

Training models

Clean and integrate multi-omics data

inclSet<-list(gen=genData, drugs=viabData, seahorse = sea)
cleanData <- generateData.drug(inclSet, censor = 4)

Function for multi-variate regression without penalization (lm version)

runLM <- function(X, y) {
  res <- summary(lm(y~ 0 + X))

  coefTab <- res$coefficients[,c(1,4)]
  rownames(coefTab) <- gsub("X","", rownames(coefTab))
  colnames(coefTab) <- c("coef","p")
  R2 <- res$adj.r.squared



  list(model = res,  varExplain = R2, coef = coefTab)
}

Perform regression

lmResults <- list()
for (eachMeasure in names(cleanData$allResponse)) {
  dataResult <- list()
  for (eachDataset in names(cleanData$allExplain)) {
    y <- cleanData$allResponse[[eachMeasure]]
    X <- cleanData$allExplain[[eachDataset]]

    glmRes <- runLM(X, y)
    dataResult[[eachDataset]] <- glmRes 
  }
  lmResults[[eachMeasure]] <- dataResult

}

Plot the comparison of R2

compareR2 <- lapply(names(lmResults), function(name) {
  tibble(drug = name, 
         genetics = lmResults[[name]]$`genetic(20)`$varExplain,
         metabolism = lmResults[[name]]$`metabolism(11)`$varExplain,
         both = lmResults[[name]]$`all(31)`$varExplain)
}) %>% bind_rows() %>% mutate(diff = both - genetics) %>% arrange(desc(diff))
plotTab <- compareR2 %>% 
  mutate(colLab = ifelse(diff > 0.1, "darkred",
                         ifelse(genetics > 0.4,"darkblue","black"))) %>%
  mutate(labText = ifelse(colLab != "black", drug,""))
plotR2 <- ggplot(plotTab, aes(x=genetics, y = both, label = labText)) + 
  geom_point(aes(col = colLab),size=2) + 
  scale_color_manual(values = c(darkred = "firebrick",darkblue="darkblue",black= "black"))+
  ggrepel::geom_text_repel(size=4) + geom_abline(intercept = 0, slope = 1, color = "red", linetype ="dotted") +
  coord_cartesian(xlim=c(-0.1,0.8),ylim = c(-0.1,0.8)) + theme_bw() +
  theme(axis.text.x = element_text(size=12),
        axis.text.y = element_text(size=12),
        axis.title = element_text(size =12, face = "bold"),
        plot.title = element_text(size=16, face = "bold"),
        legend.position = "none") +
  ylab("Variance explained by bioenergetic and genetic features") +
  xlab("Variance explained by genetic features alone")

Plot selected features for each drug

plotDrugs <- filter(compareR2, diff > 0.1) %>% pull(drug)

#function to plot coefficient for selected drugs
plotCoef <- function(drugList, lmResults, maxy = 3.5) {
  rowNum <- c()
  pList <- list()
  for (name in drugList) {
    eachTab <- lmResults[[name]]$`all(31)`$coef %>% data.frame() %>%
      rownames_to_column("feature") %>% filter(p <= 0.05) %>%
      mutate(feature = formatSea(feature), logP = -log10(p)) %>%
      mutate(direction = ifelse(coef >0, "pos","neg")) %>%
      arrange(logP) %>% mutate(feature = factor(feature, levels=feature))


    pList[[name]] <- ggplot(eachTab, aes(x=feature, y = logP, fill = direction)) + 
      geom_bar(stat = "identity", width = 0.7) +
      coord_flip(ylim =c(0,maxy)) + 
      xlab(name) + ylab(expression(-log[10]*'('*p*')')) +
      scale_fill_manual(values = c(pos = "blue",neg = "red"), guide = FALSE) + 
      theme(axis.line.y = element_blank(),
            axis.text.x = element_text(size=10),
            axis.text.y = element_text(size=10),
            axis.title.y = element_text(size=10, face= "bold"),
            axis.title.x = element_text(size =12, face = "bold"),
            plot.title = element_text(size=20, face = "bold"),
            plot.margin = margin(0.1,0,0.1,0, unit= "cm"))
    rowNum <- c(rowNum, nrow(eachTab))
  }

  grobList <- lapply(pList, ggplotGrob)
  grobList <- do.call(gridExtra::gtable_rbind, 
                      c(grobList, size = "max"))
  panels <- grobList$layout$t[grep("panel", grobList$layout$name)]
  grobList$heights[panels] <- unit(rowNum, "null")
  return(grobList)       
}

seaCoefs <- plotCoef(plotDrugs, lmResults)

Plot selected features for drug with high vairance explained values

plotDrugs <- arrange(compareR2, desc(genetics)) %>% 
  filter(genetics >0.4) %>% pull(drug)

highCoefs <- plotCoef(plotDrugs, lmResults, maxy = 5)
grid.draw(highCoefs)

Generate a combined plot for figure 4

title = ggdraw() + draw_figure_label("Figure 4", fontface = "bold", position = "top.left",size=20)
pout <- ggdraw() +
  draw_plot(drugManhattan, 0, 0.56, 1, 0.42) +
  draw_plot(plotR2, 0, 0.05 , .5, .4) +
  draw_plot(seaCoefs, 0.55, 0 , .38, 0.53) +
  draw_plot_label(c("A", "B", "C"), 
                  c(0, 0, 0.50), c(1, 0.55, 0.55), size = 20)
plot_grid(title, pout, rel_heights = c(0.05,0.95), ncol = 1)


lujunyan1118/seahorseCLL documentation built on May 12, 2019, 6:16 p.m.