options(error = recover) knitr::opts_chunk$set(tidy = FALSE, # cache = TRUE, autodep = TRUE, message = FALSE, error = FALSE, warning = TRUE) library("BloodCancerMultiOmics2017") library("Biobase") library("dplyr") library("RColorBrewer") library("dendsort") library("nat") # provides nlapply library("grid") library("magrittr")
plotDir = ifelse(exists(".standalone"), "", "part03/") if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
Load data
data("conctab", "lpdAll", "drugs", "patmeta") lpdCLL <- lpdAll[, lpdAll$Diagnosis=="CLL"] lpdAll = lpdAll[, lpdAll$Diagnosis!="hMNC"]
someMatch <- function(...) { rv <- match(...) if (all(is.na(rv))) stop(sprintf("`match` failed to match any of the following: %s", paste(x[is.na(rv)], collapse=", "))) rv }
IGHV gene usage
Find the largest categories, which we will represent by colours,
and merge the others in a group other
.
colnames(pData(lpdAll)) gu <- pData(lpdAll)$`IGHV Uppsala gene usage` tabgu <- sort(table(gu), decreasing = TRUE) biggestGroups <- names(tabgu)[1:5] gu[ is.na(match(gu, biggestGroups)) & !is.na(gu) ] <- "other" pData(lpdAll)$`IGHV gene usage` <- factor(gu, levels = c(biggestGroups, "other"))
stopifnot(is.null(drugs$id)) drugs$id <- rownames(drugs) targetedDrugNames <- c("ibrutinib", "idelalisib", "PRT062607 HCl", "duvelisib", "spebrutinib", "selumetinib", "MK-2206", "everolimus", "encorafenib") id1 <- safeMatch(targetedDrugNames, drugs$name) targetedDrugs <- paste( rep(drugs[id1, "id"], each = 2), 4:5, sep="_" ) chemoDrugNames <- c("fludarabine", "doxorubicine", "nutlin-3") id2 <- safeMatch(chemoDrugNames, drugs$name) chemoDrugs <- paste( rep(drugs[id2, "id"], each = 5), 3:5, sep="_" ) tzselDrugNames <- c("ibrutinib", "idelalisib", "duvelisib", "selumetinib", "AZD7762", "MK-2206", "everolimus", "venetoclax", "thapsigargin", "AT13387", "YM155", "encorafenib", "tamatinib", "ruxolitinib", "PF 477736", "fludarabine", "nutlin-3") id3 <- safeMatch(tzselDrugNames, drugs$name) tzselDrugs <- unlist(lapply(seq(along = tzselDrugNames), function(i) paste(drugs[id3[i], "id"], if (tzselDrugNames[i] %in% c("fludarabine", "nutlin-3")) 2:3 else 4:5, sep = "_" )))
The weights
are used for weighting the similarity metric used in heatmap clustering.
weights$hclust
affects the clustering of the patients.
weights$score
affects the dendrogram reordering of the drugs.
bcrDrugs <- c("ibrutinib", "idelalisib", "PRT062607 HCl", "spebrutinib") everolID <- drugs$id[ safeMatch("everolimus", drugs$name)] bcrID <- drugs$id[ safeMatch(bcrDrugs, drugs$name)] is_BCR <- (fData(lpdAll)$id %in% bcrID) & (fData(lpdAll)$subtype %in% paste(4:5)) is_mTOR <- (fData(lpdAll)$id %in% everolID) & (fData(lpdAll)$subtype %in% paste(4:5)) myin <- function(x, y) as.numeric( (x %in% y) & !is.na(x) ) weights1 <- data.frame( hclust = rep(1, nrow(lpdAll)) + 1.75 * is_mTOR, score = as.numeric( is_BCR ), row.names = rownames(lpdAll)) weights2 <- data.frame( row.names = tzselDrugs, hclust = myin(drugs$target_category[id3], "B-cell receptor") * 0.3 + 0.7, score = rep(1, length(tzselDrugs)))
Remove drugs that failed quality control: NSC 74859, bortezomib.
badDrugs <- c(bortezomib = "D_008", `NSC 74859` = "D_025") stopifnot(identical(drugs[ badDrugs, "name"], names(badDrugs))) candDrugs <- rownames(lpdAll)[ fData(lpdAll)$type=="viab" & !(fData(lpdAll)$id %in% badDrugs) & fData(lpdAll)$subtype %in% paste(2:5) ]
Threshold parameters: drugs are accepted that for at least effectNum
samples
have a viability effect less than or equal to effectVal
. On the other hand, the
mean viability effect should not be below viab
.
thresh <- list(effectVal = 0.7, effectNum = 4, viab = 0.6, maxval = 1.1) overallMean <- rowMeans(Biobase::exprs(lpdAll)[candDrugs, ]) nthStrongest <- apply(Biobase::exprs(lpdAll)[candDrugs, ], 1, function(x) sort(x)[thresh$effectNum])
par(mfrow = c(1, 3)) hist(overallMean, breaks = 30, col = "pink") abline(v = thresh$viab, col="blue") hist(nthStrongest, breaks = 30, col = "pink") abline(v = thresh$effectVal, col="blue") plot(overallMean, nthStrongest) abline(h = thresh$effectVal, v = thresh$viab, col = "blue")
seldrugs1
and d1
: as in the version of
Figure 3A we had in the first submission to JCI. d2
: two concentrations for each drug in tzselDrugNames
.
seldrugs1 <- candDrugs[ overallMean >= thresh$viab & nthStrongest <= thresh$effectVal ] %>% union(targetedDrugs) %>% union(chemoDrugs) d1 <- Biobase::exprs(lpdAll[seldrugs1,, drop = FALSE ]) %>% deckel(lower = 0, upper = thresh$maxval) d2 <- Biobase::exprs(lpdAll[tzselDrugs,, drop = FALSE ]) %>% deckel(lower = 0, upper = thresh$maxval)
We are going to scale the data. But was is the best measure of scale (or spread)? Let's explore different measures of spread. We'll see that it does not seem to matter too much which one we use. We'll use median centering and scaling by mad.
spreads <- sapply(list(mad = mad, `Q95-Q05` = function(x) diff(quantile(x, probs = c(0.05, 0.95)))), function(s) apply(d1, 1, s)) plot( spreads ) jj <- names(which( spreads[, "mad"] < 0.15 & spreads[, "Q95-Q05"] > 0.7)) jj drugs[ stripConc(jj), "name" ]
medianCenter_MadScale <- function(x) { s <- median(x) (x - s) / deckel(mad(x, center = s), lower = 0.05, upper = 0.2) } scaleDrugResp <- function(x) t(apply(x, 1, medianCenter_MadScale)) scd1 <- scaleDrugResp(d1) scd2 <- scaleDrugResp(d2)
sort(table(lpdAll$Diagnosis), decreasing = TRUE) diseaseGroups <- list( `CLL` = c("CLL"), `MCL` = c("MCL"), `HCL` = c("HCL", "HCL-V"), `other B-cell` = c("B-PLL", "MZL", "LPL", "FL"), `T-cell` = c("T-PLL", "Sezary", "PTCL-NOS"), `myeloid` = c("AML")) stopifnot(setequal(unlist(diseaseGroups), unique(lpdAll$Diagnosis))) fdg <- factor(rep(NA, ncol(lpdAll)), levels = names(diseaseGroups)) for (i in names(diseaseGroups)) fdg[ lpdAll$Diagnosis %in% diseaseGroups[[i]] ] <- i lpdAll$`Disease Group` <- fdg
The helper function matClust
clusters a matrix x
,
whose columns represent samples and whose rows represent drugs.
Its arguments control how the columns are clustered:
weights
: a data.frame
with a weight for each row of x
. The weights are used in the computation of distances
between columns and thus for column sorting. The data.frame
's column hclust
contains the weights
for hclust(dist())
. The column score
contains the weights for computing the score used for dendrogram reordering.
See weights1
and weights2
defined above.colgroups
: a factor
by which to first split the columns before clusteringreorderrows
: logical. FALSE
for previous behaviour (old Fig. 3A), TRUE
for reordering the row dendrogram, too.matClust <- function(x, rowweights, colgroups = factor(rep("all", ncol(x))), reorderrows = FALSE) { stopifnot(is.data.frame(rowweights), c("hclust", "score") %in% colnames(rowweights), !is.null(rownames(rowweights)), !is.null(rownames(x)), all(rownames(x) %in% rownames(rowweights)), is.factor(colgroups), !any(is.na(colgroups)), length(colgroups) == ncol(x)) wgt <- rowweights[ rownames(x), ] columnsClust <- function(xk) { score <- -svd(xk * wgt[, "score"])$v[, 1] cmns <- colSums(xk * wgt[, "score"]) ## make sure that high score = high sensitivity if (cor(score, cmns) > 0) score <- (-score) ddraw <- as.dendrogram(hclust(dist(t(xk * wgt[, "hclust"]), method = "euclidean"), method = "complete")) dd <- reorder(ddraw, wts = -score, agglo.FUN = mean) ord <- order.dendrogram(dd) list(dd = dd, ord = ord, score = score) } sp <- split(seq(along = colgroups), colgroups) cc <- lapply(sp, function(k) columnsClust(x[, k, drop=FALSE])) cidx <- unlist(lapply(seq(along = cc), function (i) sp[[i]][ cc[[i]]$ord ])) csc <- unlist(lapply(seq(along = cc), function (i) cc[[i]]$score[ cc[[i]]$ord ])) rddraw <- as.dendrogram(hclust(dist(x, method = "euclidean"), method = "complete")) ridx <- if (reorderrows) { ww <- (colgroups == "CLL") stopifnot(!any(is.na(ww)), any(ww)) rowscore <- svd(t(x) * ww)$v[, 1] dd <- reorder(rddraw, wts = rowscore, agglo.FUN = mean) order.dendrogram(dd) } else { rev(order.dendrogram(dendsort(rddraw))) } res <- x[ridx, cidx] stopifnot(identical(dim(res), dim(x))) attr(res, "colgap") <- cumsum(sapply(cc, function(x) length(x$score))) res }
I.e. the right hand side color bar. IGHV Uppsala U/M
is implied by
IGHV Uppsala % SHM
(see sensi2.Rmd).
cut(..., right=FALSE)
will use intervals that are closed on the left
and open on the right.
translation = list(IGHV=c(U=0, M=1), Methylation_Cluster=c(`LP-CLL`=0, `IP-CLL`=1, `HP-CLL`=2))
make_pd <- function(cn, ...) { df <- function(...) data.frame(..., check.names = FALSE) x <- lpdAll[, cn] pd <- df( t(Biobase::exprs(x)[ c("del17p13", "TP53", "trisomy12"), , drop = FALSE]) %>% `colnames<-`(c("del 17p13", "TP53", "trisomy 12"))) # pd <- df(pd, # t(Biobase::exprs(x)[ c("SF3B1", "del11q22.3", "del13q14_any"),, drop = FALSE]) %>% # `colnames<-`(c("SF3B1", "del11q22.3", "del13q14"))) pd <- df(pd, cbind(as.integer(Biobase::exprs(x)["KRAS",] | Biobase::exprs(x)["NRAS",])) %>% `colnames<-`("KRAS | NRAS")) pd <- df(pd, # IGHV = Biobase::exprs(x)["IGHV Uppsala U/M", ], `IGHV (%)` = cut(x[["IGHV Uppsala % SHM"]], breaks = c(0, seq(92, 100, by = 2), Inf), right = FALSE), `Meth. Cluster` = names(translation$Methylation_Cluster)[ someMatch(paste(Biobase::exprs(x)["Methylation_Cluster", ]), translation$Methylation_Cluster)], `Gene usage` = x$`IGHV gene usage`) if(length(unique(x$Diagnosis)) > 1) pd <- df(pd, Diagnosis = x$Diagnosis) pd <- df(pd, pretreated = ifelse(patmeta[colnames(x),"IC50beforeTreatment"],"no","yes"), alive = ifelse(patmeta[colnames(x),"died"]>0, "no", "yes"), sex = factor(x$Gender)) rownames(pd) <- colnames(Biobase::exprs(x)) for (i in setdiff(colnames(pd), "BCR score")) { if (!is.factor(pd[[i]])) pd[[i]] <- factor(pd[[i]]) if (any(is.na(pd[[i]]))) { levels(pd[[i]]) <- c(levels(pd[[i]]), "n.d.") pd[[i]][ is.na(pd[[i]]) ] <- "n.d." } } pd }
gucol <- rev(brewer.pal(nlevels(lpdAll$`IGHV gene usage`), "Set3")) %>% `names<-`(sort(levels(lpdAll$`IGHV gene usage`))) gucol["IGHV3-21"] <- "#E41A1C" make_ann_colors <- function(pd) { bw <- c(`TRUE` = "darkblue", `FALSE` = "#ffffff") res <- list( Btk = bw, Syk = bw, PI3K = bw, MEK = bw) if ("exptbatch" %in% colnames(pd)) res$exptbatch <- brewer.pal(nlevels(pd$exptbatch), "Set2") %>% `names<-`(levels(pd$exptbatch)) if ("IGHV (%)" %in% colnames(pd)) res$`IGHV (%)` <- c(rev(colorRampPalette( brewer.pal(9, "Blues"))(nlevels(pd$`IGHV (%)`)-1)), "white") %>% `names<-`(levels(pd$`IGHV (%)`)) if ("CD38" %in% colnames(pd)) res$CD38 <- colorRampPalette( c("blue", "yellow"))(nlevels(pd$CD38)) %>% `names<-`(levels(pd$CD38)) if("Gene usage" %in% colnames(pd)) res$`Gene usage` <- gucol if("Meth. Cluster" %in% colnames(pd)) res$`Meth. Cluster` <- brewer.pal(9, "Blues")[c(1, 5, 9)] %>% `names<-`(names(translation$Methylation_Cluster)) res <- c(res, BloodCancerMultiOmics2017:::sampleColors) # from addons.R if("Diagnosis" %in% colnames(pd)) res$Diagnosis <- BloodCancerMultiOmics2017:::colDiagS[ names(BloodCancerMultiOmics2017:::colDiagS) %in% levels(pd$Diagnosis) ] %>% (function(x) x[order(names(x))]) for(i in names(res)) { whnd <- which(names(res[[i]]) == "n.d.") if(length(whnd)==1) res[[i]][whnd] <- "#e0e0e0" else res[[i]] <- c(res[[i]], `n.d.` = "#e0e0e0") stopifnot(all(pd[[i]] %in% names(res[[i]]))) } res }
theatmap <- function(x, cellwidth = 7, cellheight = 11) { stopifnot(is.matrix(x)) patDat <- make_pd(colnames(x)) bpp <- brewer.pal(9, "Set1") breaks <- 2.3 * c(seq(-1, 1, length.out = 101)) %>% `names<-`( colorRampPalette(c(rev(brewer.pal(7, "YlOrRd")), "white", "white", "white", brewer.pal(7, "Blues")))(101)) if (!is.null(attr(x, "colgap"))) stopifnot(last(attr(x, "colgap")) == ncol(x)) pheatmapwh(deckel(x, lower = first(breaks), upper = last(breaks)), cluster_rows = FALSE, cluster_cols = FALSE, gaps_col = attr(x, "colgap"), gaps_row = attr(x, "rowgap"), scale = "none", annotation_col = patDat, annotation_colors = make_ann_colors(patDat), color = names(breaks), breaks = breaks, show_rownames = TRUE, show_colnames = !TRUE, cellwidth = cellwidth, cellheight = cellheight, fontsize = 10, fontsize_row = 11, fontsize_col = 8, annotation_legend = TRUE, drop_levels = TRUE) }
Things we see in the plot:
clscd1/2: clustered and scaled drug matrix
clscd1 <- matClust(scd1, rowweights = weights1, colgroups = lpdAll$`Disease Group`) clscd2 <- matClust(scd2, rowweights = weights2, colgroups = lpdAll$`Disease Group`, reorderrows = TRUE)
setGapPositions <- function(x, gapAt) { rg <- if (missing(gapAt)) c(0) else { s <- strsplit(gapAt, split = "--") stopifnot(all(listLen(s) == 2L)) s <- strsplit(unlist(s), split = ":") spname <- drugs[safeMatch(sapply(s, `[`, 1), drugs$name), "id"] spconc <- as.numeric(sapply(s, `[`, 2)) spi <- mapply(function(d, cc) { i <- which(cc == conctab[d, ]) stopifnot(length(i) == 1) i }, spname, spconc) spdrug <- paste(spname, spi, sep = "_") mt <- safeMatch(spdrug, rownames(x)) igp <- seq(1, length(mt), by = 2L) stopifnot(all( mt[igp] - mt[igp + 1] == 1)) #stopifnot(all( mt[igp] - mt[igp + 1] == 1)) c(mt[igp + 1], 0) } attr(x, "rowgap") <- rg x } clscd1 %<>% setGapPositions(gapAt = c( "PF 477736:10--idelalisib:10", "spebrutinib:2.5--PF 477736:2.5", "PRT062607 HCl:10--selumetinib:2.5", "selumetinib:10--MK-2206:2.5", "MK-2206:0.156--tipifarnib:10", "AT13387:0.039--encorafenib:10", "encorafenib:2.5--SD07:1.111", "doxorubicine:0.016--encorafenib:0.625", "encorafenib:0.156--rotenone:2.5", "SCH 900776:0.625--everolimus:0.625", "everolimus:10--afatinib:1.667", "arsenic trioxide:1--thapsigargin:5", "thapsigargin:0.313--fludarabine:0.156" )) clscd2 %<>% setGapPositions(gapAt = c( "AT13387:0.039--everolimus:0.156", "everolimus:0.625--nutlin-3:10", "fludarabine:10--thapsigargin:0.078", "thapsigargin:0.313--encorafenib:0.625", "encorafenib:0.156--ruxolitinib:0.156" ))
#FIG# S8 rownames(clscd1) <- with(fData(lpdAll)[ rownames(clscd1),, drop = FALSE], paste0(drugs[id, "name"], " ", conctab[cbind(id, paste0("c", subtype))], "uM")) rownames(clscd1) theatmap(clscd1)
#FIG# 3A rownames(clscd2) <- with(fData(lpdAll)[ rownames(clscd2),, drop = FALSE], paste0(drugs[id, "name"], " ", conctab[cbind(id, paste0("c", subtype))], "uM")) rownames(clscd2) theatmap(clscd2)
devtools::session_info()
rm(list=ls())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.