Global overview of the drug response landscape

options(error = recover)
knitr::opts_chunk$set(tidy = FALSE, # cache = TRUE, autodep = TRUE,
                      message = FALSE, error = FALSE, warning = TRUE)

library("BloodCancerMultiOmics2017")
library("Biobase")
library("dplyr")
library("RColorBrewer")
library("dendsort")
library("nat")  # provides nlapply
library("grid")
library("magrittr")
plotDir = ifelse(exists(".standalone"), "", "part03/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)

Load data

data("conctab", "lpdAll", "drugs", "patmeta")
lpdCLL <- lpdAll[, lpdAll$Diagnosis=="CLL"]
lpdAll = lpdAll[, lpdAll$Diagnosis!="hMNC"]

Setup

Additional functions

someMatch <- function(...) {
  rv <- match(...)
  if (all(is.na(rv)))
    stop(sprintf("`match` failed to match any of the following: %s",
                 paste(x[is.na(rv)], collapse=", ")))
  rv  
}

Preprocess IGHV gene usage

Find the largest categories, which we will represent by colours, and merge the others in a group other.

colnames(pData(lpdAll))
gu    <- pData(lpdAll)$`IGHV Uppsala gene usage`
tabgu <- sort(table(gu), decreasing = TRUE)
biggestGroups <- names(tabgu)[1:5]
gu[ is.na(match(gu, biggestGroups)) & !is.na(gu) ] <- "other"
pData(lpdAll)$`IGHV gene usage` <- factor(gu, levels = c(biggestGroups, "other"))

Some drugs that we are particularly interested in

stopifnot(is.null(drugs$id))
drugs$id <- rownames(drugs)

targetedDrugNames <- c("ibrutinib", "idelalisib",  "PRT062607 HCl", 
   "duvelisib", "spebrutinib", "selumetinib", "MK-2206",  
   "everolimus", "encorafenib")
id1 <- safeMatch(targetedDrugNames, drugs$name)
targetedDrugs <- paste( rep(drugs[id1, "id"], each = 2), 4:5, sep="_" )

chemoDrugNames <- c("fludarabine", "doxorubicine",  "nutlin-3")
id2 <- safeMatch(chemoDrugNames, drugs$name)
chemoDrugs <- paste( rep(drugs[id2, "id"], each = 5), 3:5, sep="_" )

tzselDrugNames <- c("ibrutinib", "idelalisib", "duvelisib", "selumetinib", 
    "AZD7762", "MK-2206", "everolimus", "venetoclax", "thapsigargin", 
    "AT13387", "YM155", "encorafenib", "tamatinib", "ruxolitinib", 
    "PF 477736", "fludarabine", "nutlin-3")
id3 <- safeMatch(tzselDrugNames, drugs$name)
tzselDrugs <- unlist(lapply(seq(along = tzselDrugNames), function(i)
  paste(drugs[id3[i], "id"], 
      if (tzselDrugNames[i] %in% c("fludarabine", "nutlin-3")) 2:3 else 4:5, 
    sep = "_" )))

Feature weighting and score for dendrogram reordering

The weights are used for weighting the similarity metric used in heatmap clustering. weights$hclust affects the clustering of the patients. weights$score affects the dendrogram reordering of the drugs.

bcrDrugs <- c("ibrutinib", "idelalisib", "PRT062607 HCl", "spebrutinib")
everolID <- drugs$id[ safeMatch("everolimus",  drugs$name)]
bcrID    <- drugs$id[ safeMatch(bcrDrugs, drugs$name)]

is_BCR  <- 
  (fData(lpdAll)$id %in% bcrID)    & (fData(lpdAll)$subtype %in% paste(4:5))
is_mTOR <- 
  (fData(lpdAll)$id %in% everolID) & (fData(lpdAll)$subtype %in% paste(4:5))

myin <- function(x, y) as.numeric( (x %in% y) & !is.na(x) )

weights1 <- data.frame(
  hclust = rep(1, nrow(lpdAll)) + 1.75 * is_mTOR, 
  score  = as.numeric( is_BCR ), 
  row.names = rownames(lpdAll))

weights2 <- data.frame(
  row.names = tzselDrugs,
  hclust = myin(drugs$target_category[id3], "B-cell receptor") * 0.3 + 0.7,
  score  = rep(1, length(tzselDrugs))) 

Remove drugs that failed quality control: NSC 74859, bortezomib.

badDrugs <- c(bortezomib = "D_008", `NSC 74859` = "D_025")
stopifnot(identical(drugs[ badDrugs, "name"], names(badDrugs)))

candDrugs <- rownames(lpdAll)[  
  fData(lpdAll)$type=="viab" & !(fData(lpdAll)$id %in% badDrugs) &
  fData(lpdAll)$subtype %in% paste(2:5)
]

Threshold parameters: drugs are accepted that for at least effectNum samples have a viability effect less than or equal to effectVal. On the other hand, the mean viability effect should not be below viab.

thresh <- list(effectVal = 0.7, effectNum = 4, viab = 0.6, maxval = 1.1)
overallMean  <- rowMeans(Biobase::exprs(lpdAll)[candDrugs, ])
nthStrongest <- apply(Biobase::exprs(lpdAll)[candDrugs, ], 1,
                      function(x) sort(x)[thresh$effectNum])
par(mfrow = c(1, 3))
hist(overallMean,  breaks = 30, col = "pink")
abline(v = thresh$viab,      col="blue")
hist(nthStrongest, breaks = 30, col = "pink")
abline(v = thresh$effectVal, col="blue")
plot(overallMean, nthStrongest)
abline(h = thresh$effectVal, v = thresh$viab, col = "blue")

seldrugs1 and d1: as in the version of Figure 3A we had in the first submission to JCI. d2: two concentrations for each drug in tzselDrugNames.

seldrugs1 <- candDrugs[ overallMean >= thresh$viab &
                        nthStrongest <= thresh$effectVal ] %>%
            union(targetedDrugs) %>% 
            union(chemoDrugs)

d1 <- Biobase::exprs(lpdAll[seldrugs1,, drop = FALSE ]) %>%
  deckel(lower = 0, upper = thresh$maxval)

d2 <- Biobase::exprs(lpdAll[tzselDrugs,, drop = FALSE ]) %>%
  deckel(lower = 0, upper = thresh$maxval)

We are going to scale the data. But was is the best measure of scale (or spread)? Let's explore different measures of spread. We'll see that it does not seem to matter too much which one we use. We'll use median centering and scaling by mad.

spreads <- sapply(list(mad = mad, `Q95-Q05` = function(x)
  diff(quantile(x, probs = c(0.05, 0.95)))), function(s) apply(d1, 1, s))
plot( spreads )

jj <- names(which( spreads[, "mad"] < 0.15 & spreads[, "Q95-Q05"] > 0.7))
jj
drugs[ stripConc(jj), "name" ]
medianCenter_MadScale <- function(x) {
  s <- median(x)
  (x - s) / deckel(mad(x, center = s), lower = 0.05, upper = 0.2)
}

scaleDrugResp  <- function(x) t(apply(x, 1, medianCenter_MadScale)) 

scd1 <- scaleDrugResp(d1)
scd2 <- scaleDrugResp(d2)

Define disease groups

sort(table(lpdAll$Diagnosis), decreasing = TRUE)

diseaseGroups <- list(
  `CLL` = c("CLL"), 
  `MCL` = c("MCL"),
  `HCL` = c("HCL", "HCL-V"),
  `other B-cell` = c("B-PLL", "MZL", "LPL", "FL"), 
  `T-cell` = c("T-PLL", "Sezary", "PTCL-NOS"),
  `myeloid` = c("AML"))
stopifnot(setequal(unlist(diseaseGroups), unique(lpdAll$Diagnosis)))

fdg <- factor(rep(NA, ncol(lpdAll)), levels = names(diseaseGroups))
for (i in names(diseaseGroups))
  fdg[ lpdAll$Diagnosis %in% diseaseGroups[[i]] ] <- i
lpdAll$`Disease Group` <- fdg

Code for heatmap

Matrix row / column clustering

The helper function matClust clusters a matrix x, whose columns represent samples and whose rows represent drugs. Its arguments control how the columns are clustered:

matClust <- function(x, 
                     rowweights,
                     colgroups   = factor(rep("all", ncol(x))),
                     reorderrows = FALSE) {

  stopifnot(is.data.frame(rowweights),
            c("hclust", "score") %in% colnames(rowweights),
            !is.null(rownames(rowweights)),
            !is.null(rownames(x)), 
            all(rownames(x) %in% rownames(rowweights)),
            is.factor(colgroups), 
            !any(is.na(colgroups)), 
            length(colgroups) == ncol(x))

  wgt <- rowweights[ rownames(x), ]

  columnsClust <- function(xk) {
    score   <- -svd(xk * wgt[, "score"])$v[, 1]
    cmns    <- colSums(xk * wgt[, "score"])
    ## make sure that high score = high sensitivity  
    if (cor(score, cmns) > 0) score <- (-score)

    ddraw  <- as.dendrogram(hclust(dist(t(xk * wgt[, "hclust"]), 
                                        method = "euclidean"), 
                                   method = "complete"))
    dd  <- reorder(ddraw, wts = -score, agglo.FUN = mean)
    ord <- order.dendrogram(dd)
    list(dd = dd, ord = ord, score = score)
  }

  sp <- split(seq(along = colgroups), colgroups)
  cc <- lapply(sp, function(k) columnsClust(x[, k, drop=FALSE]))
  cidx <- unlist(lapply(seq(along = cc), function (i)
    sp[[i]][ cc[[i]]$ord ]))
  csc  <- unlist(lapply(seq(along = cc), function (i)
    cc[[i]]$score[ cc[[i]]$ord ]))

  rddraw <- as.dendrogram(hclust(dist(x, method = "euclidean"),
                                   method = "complete"))
  ridx  <- if (reorderrows) {
    ww <- (colgroups == "CLL")
    stopifnot(!any(is.na(ww)), any(ww))
    rowscore <- svd(t(x) * ww)$v[, 1]
    dd <- reorder(rddraw, wts = rowscore, agglo.FUN = mean)
    order.dendrogram(dd)
  } else {
    rev(order.dendrogram(dendsort(rddraw)))
  }

  res <- x[ridx, cidx]

  stopifnot(identical(dim(res), dim(x)))
  attr(res, "colgap") <- cumsum(sapply(cc, function(x) length(x$score)))
  res
}

Prepare sample annotations

I.e. the right hand side color bar. IGHV Uppsala U/M is implied by IGHV Uppsala % SHM (see sensi2.Rmd). cut(..., right=FALSE) will use intervals that are closed on the left and open on the right.

translation = list(IGHV=c(U=0, M=1),
                   Methylation_Cluster=c(`LP-CLL`=0, `IP-CLL`=1, `HP-CLL`=2))
make_pd <- function(cn, ...) {
  df <- function(...) data.frame(..., check.names = FALSE)

  x <- lpdAll[, cn]
  pd <- df(
    t(Biobase::exprs(x)[    c("del17p13", "TP53", "trisomy12"), , drop = FALSE]) %>% 
      `colnames<-`(c("del 17p13", "TP53", "trisomy 12")))

  # pd <- df(pd,
  #    t(Biobase::exprs(x)[  c("SF3B1", "del11q22.3", "del13q14_any"),, drop = FALSE]) %>% 
  #    `colnames<-`(c("SF3B1", "del11q22.3", "del13q14"))) 

  pd <- df(pd,
      cbind(as.integer(Biobase::exprs(x)["KRAS",] | Biobase::exprs(x)["NRAS",])) %>% 
      `colnames<-`("KRAS | NRAS")) 

  pd <- df(pd,
    # IGHV = Biobase::exprs(x)["IGHV Uppsala U/M", ],
    `IGHV (%)` = cut(x[["IGHV Uppsala % SHM"]],
                     breaks = c(0, seq(92, 100, by = 2), Inf), right = FALSE),
    `Meth. Cluster` = names(translation$Methylation_Cluster)[
          someMatch(paste(Biobase::exprs(x)["Methylation_Cluster", ]),
                    translation$Methylation_Cluster)],
    `Gene usage` = x$`IGHV gene usage`)

  if(length(unique(x$Diagnosis)) > 1)  
    pd <- df(pd, Diagnosis = x$Diagnosis)

  pd <- df(pd, 
    pretreated  = ifelse(patmeta[colnames(x),"IC50beforeTreatment"],"no","yes"), 
    alive       = ifelse(patmeta[colnames(x),"died"]>0, "no", "yes"),
    sex         = factor(x$Gender))

  rownames(pd) <- colnames(Biobase::exprs(x))

  for (i in setdiff(colnames(pd), "BCR score")) {
    if (!is.factor(pd[[i]]))
      pd[[i]] <- factor(pd[[i]])
    if (any(is.na(pd[[i]]))) {
      levels(pd[[i]]) <- c(levels(pd[[i]]), "n.d.")
      pd[[i]][ is.na(pd[[i]]) ] <- "n.d."   
    }
  }

  pd
}

Define the annotation colors

gucol <- rev(brewer.pal(nlevels(lpdAll$`IGHV gene usage`), "Set3")) %>% 
             `names<-`(sort(levels(lpdAll$`IGHV gene usage`)))
gucol["IGHV3-21"] <- "#E41A1C"

make_ann_colors <- function(pd) {
  bw <- c(`TRUE` = "darkblue", `FALSE` = "#ffffff")
  res <- list(
  Btk = bw, Syk = bw, PI3K = bw, MEK = bw)

  if ("exptbatch" %in% colnames(pd))
    res$exptbatch <- brewer.pal(nlevels(pd$exptbatch), "Set2") %>%
    `names<-`(levels(pd$exptbatch))

  if ("IGHV (%)" %in% colnames(pd))
    res$`IGHV (%)` <- 
       c(rev(colorRampPalette(
         brewer.pal(9, "Blues"))(nlevels(pd$`IGHV (%)`)-1)), "white") %>% 
                        `names<-`(levels(pd$`IGHV (%)`))

  if ("CD38" %in% colnames(pd))
    res$CD38 <- colorRampPalette(
      c("blue", "yellow"))(nlevels(pd$CD38)) %>% `names<-`(levels(pd$CD38))

  if("Gene usage" %in% colnames(pd)) 
    res$`Gene usage` <- gucol 

  if("Meth. Cluster" %in% colnames(pd)) 
    res$`Meth. Cluster` <- brewer.pal(9, "Blues")[c(1, 5, 9)] %>% 
        `names<-`(names(translation$Methylation_Cluster))

  res <- c(res, BloodCancerMultiOmics2017:::sampleColors)   # from addons.R  

  if("Diagnosis" %in% colnames(pd)) 
    res$Diagnosis <- BloodCancerMultiOmics2017:::colDiagS[
      names(BloodCancerMultiOmics2017:::colDiagS) %in% levels(pd$Diagnosis) ] %>% 
         (function(x) x[order(names(x))])

  for(i in names(res)) {
    whnd <- which(names(res[[i]]) == "n.d.")
    if(length(whnd)==1)
      res[[i]][whnd] <- "#e0e0e0" else 
        res[[i]] <- c(res[[i]], `n.d.` = "#e0e0e0")
    stopifnot(all(pd[[i]] %in% names(res[[i]])))
  }

  res
}

Heatmap drawing function

theatmap <- function(x, cellwidth = 7, cellheight = 11) {

  stopifnot(is.matrix(x))
  patDat <- make_pd(colnames(x))

  bpp <- brewer.pal(9, "Set1")
  breaks <- 2.3 * c(seq(-1, 1, length.out = 101)) %>% `names<-`(
      colorRampPalette(c(rev(brewer.pal(7, "YlOrRd")), 
                         "white", "white", "white", 
                         brewer.pal(7, "Blues")))(101))

  if (!is.null(attr(x, "colgap")))
    stopifnot(last(attr(x, "colgap")) == ncol(x))

  pheatmapwh(deckel(x, lower = first(breaks), upper = last(breaks)), 
         cluster_rows = FALSE,  
         cluster_cols = FALSE,
         gaps_col = attr(x, "colgap"),
         gaps_row = attr(x, "rowgap"),
         scale = "none",
         annotation_col = patDat,
         annotation_colors = make_ann_colors(patDat),
         color = names(breaks),
         breaks = breaks, 
         show_rownames = TRUE, 
         show_colnames = !TRUE, 
         cellwidth = cellwidth, cellheight = cellheight, 
         fontsize = 10, fontsize_row = 11, fontsize_col = 8,
         annotation_legend = TRUE, drop_levels = TRUE)
  }

Draw the heatmaps

Things we see in the plot:

clscd1/2: clustered and scaled drug matrix

clscd1 <- matClust(scd1, rowweights = weights1, 
                  colgroups = lpdAll$`Disease Group`)
clscd2 <- matClust(scd2, rowweights = weights2, 
                  colgroups = lpdAll$`Disease Group`, reorderrows = TRUE)

Identify places where gaps between the rows should be

setGapPositions <- function(x, gapAt) {
  rg <- if (missing(gapAt)) c(0) else {
    s <- strsplit(gapAt, split = "--")
    stopifnot(all(listLen(s) == 2L))
    s <- strsplit(unlist(s), split = ":")
    spname <- drugs[safeMatch(sapply(s, `[`, 1), drugs$name), "id"]
    spconc <- as.numeric(sapply(s, `[`, 2))
    spi <- mapply(function(d, cc) {
      i <- which(cc == conctab[d, ])
      stopifnot(length(i) == 1)
      i
     }, spname, spconc)
    spdrug <- paste(spname, spi, sep = "_")
    mt <- safeMatch(spdrug, rownames(x))
    igp <- seq(1, length(mt), by = 2L)
    stopifnot(all( mt[igp] - mt[igp + 1] == 1))
    #stopifnot(all( mt[igp] - mt[igp + 1] == 1))
    c(mt[igp + 1], 0)
  }
  attr(x, "rowgap") <- rg
  x
}

clscd1 %<>% setGapPositions(gapAt = c(
  "PF 477736:10--idelalisib:10",
  "spebrutinib:2.5--PF 477736:2.5",
  "PRT062607 HCl:10--selumetinib:2.5",
  "selumetinib:10--MK-2206:2.5",
  "MK-2206:0.156--tipifarnib:10",
  "AT13387:0.039--encorafenib:10",
  "encorafenib:2.5--SD07:1.111",
  "doxorubicine:0.016--encorafenib:0.625",
  "encorafenib:0.156--rotenone:2.5",
  "SCH 900776:0.625--everolimus:0.625",
  "everolimus:10--afatinib:1.667",
  "arsenic trioxide:1--thapsigargin:5",
  "thapsigargin:0.313--fludarabine:0.156"
))

clscd2 %<>% setGapPositions(gapAt = c(
  "AT13387:0.039--everolimus:0.156",
  "everolimus:0.625--nutlin-3:10", 
  "fludarabine:10--thapsigargin:0.078",
  "thapsigargin:0.313--encorafenib:0.625", 
  "encorafenib:0.156--ruxolitinib:0.156"
))
#FIG# S8
rownames(clscd1) <- with(fData(lpdAll)[ rownames(clscd1),, drop = FALSE], 
  paste0(drugs[id, "name"], " ", conctab[cbind(id, paste0("c", subtype))], "uM")) 
rownames(clscd1)
theatmap(clscd1)
#FIG# 3A
rownames(clscd2) <- with(fData(lpdAll)[ rownames(clscd2),, drop = FALSE], 
  paste0(drugs[id, "name"], " ", conctab[cbind(id, paste0("c", subtype))], "uM")) 

rownames(clscd2)
theatmap(clscd2)
devtools::session_info()
rm(list=ls())


MalgorzataOles/BloodCancerMultiOmics2017 documentation built on March 29, 2024, 2:29 p.m.