require("knitr")
opts_knit$set(root.dir="..")
opts_chunk$set(fig.align="center", fig.width=6, fig.height=6, dpi=300)

Purpose

Set Up

library(rcellminer)
library(rcellminerElasticNet)
library(stringr)
library(RColorBrewer)
library(pheatmap)
library(vioplot)
library(cluster)

# Parameters are set at top in YAML section
commonFlag <- params$commonFlag
partialFlag <- params$partialFlag
dataDir <- params$dataDir
outputDir <- params$outputDir

commonFlag
partialFlag
dataDir
outputDir

#DEBUG 
#commonFlag <- TRUE
#partialFlag <- TRUE

enDataDir <- file.path(dataDir, allDataDir)
outputDir <- outputDir
pdfFile <- "en_cvresults_heatmap.pdf"
vioplotFile <- "en_cvresults_vioplot.pdf"

rdsFile <- "en_cvresults_cvCorMat.rds"
namedTmpCvCorMatFile <- "en_cvresults_heatmap.txt"

if(commonFlag) {
  enDataDir <- file.path(dataDir, commonDataDir)
  pdfFile <- paste0("common_", pdfFile)
  vioplotFile <- paste0("common_", vioplotFile)

  rdsFile <- paste0("common_", rdsFile)
  namedTmpCvCorMatFile <- paste0("common_", namedTmpCvCorMatFile)
}

if(partialFlag) {
  pdfFile <- paste0("partial_", pdfFile)
  namedTmpCvCorMatFile <- paste0("partial_", namedTmpCvCorMatFile)
}

dataFiles <- list.files(path=enDataDir, pattern = "*.Rdata")

colors <- brewer.pal(12, "Set3")

nscSet <- NULL
for (fName in dataFiles){
  tmp <- str_split(string = fName, pattern = "NSC_")[[1]][2]
  nsc <- str_split(string = tmp, pattern = "\\.")[[1]][1]
  if (!(nsc %in% nscSet)){
    nscSet <- c(nscSet, nsc)
  }
}

dataTypes <- c("exp", "mut", "swa", "exp_mut", "exp_swa", "mut_swa", "exp_mut_swa")

# Initialize cvCorMat
cvCorMat <- matrix(NA, nrow=length(nscSet), ncol=length(dataTypes))
rownames(cvCorMat) <- nscSet
colnames(cvCorMat) <- dataTypes

Fill Correlation Matrix (Full Elastic CV-Predicted vs Actual Correlations)

# FILL CORRELATION MATRIX (FULL ELASTIC CV-PREDICTED VS. ACTUAL CORRELATIONS).
badFiles <- NULL

if(!file.exists(file.path(outputDir, rdsFile))) {
  for (i in 1:length(nscSet)) {
    nsc <- nscSet[i]
    for (dataType in dataTypes){
      rdataFile <- paste0("DATA_", dataType, "_NSC_", nsc, ".Rdata")
      if (rdataFile %in% dataFiles) {
        #cat("RDATA: ", rdataFile, "\n")
        load(file.path(enDataDir, rdataFile)) # object name: elNetResults.
        if (!is.null(elNetResults$fullEnCvResults$cvPredRsqared)){
          cvCorMat[i, dataType] <- elNetResults$fullEnCvResults$cvPredR
        } else {
          badFiles <- c(badFiles, rdataFile)
        }
        rm(elNetResults)
      }
    }
  }  
  rds <- list(cvCorMat=cvCorMat, badFiles=badFiles)
  saveRDS(rds, file=file.path(outputDir, rdsFile))
} else {
  tmpFile <- file.path(outputDir, rdsFile)
  cat("RDS: ", tmpFile, "\n")

  rds <- readRDS(tmpFile)
  cvCorMat <- rds$cvCorMat
}

Exclude entries with missing values

#noNaCvCorMat <- na.exclude(cvCorMat)
noNaCvCorMat <- cvCorMat

tmpCvCorMat <- cvCorMat

NSC Counts

# All NSCs
nrow(cvCorMat)

# No NA NSCs
nrow(na.exclude(cvCorMat))

NSC and MOA Information

drugAnnot <- getFeatureAnnot(rcellminerData::drugData)[["drug"]]
fdaDrugAnnot <- drugAnnot[which(drugAnnot$FDA_STATUS == "FDA approved"), "NSC"]
ctDrugAnnot  <- drugAnnot[which(drugAnnot$FDA_STATUS == "Clinical trial"), "NSC"]

isFdaApproved <- rownames(cvCorMat) %in% fdaDrugAnnot
drugNames <- getDrugName(rownames(cvCorMat))

enDrugInfo <- data.frame(nsc=rownames(cvCorMat), isFdaApproved=isFdaApproved, drugNames=drugNames, moaStr=getMoaStr(rownames(cvCorMat)))

write.table(enDrugInfo, file=file.path(outputDir, "enDrugInfo.txt"), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
write.table(Drug_MOA_Key, file=file.path(outputDir, "Drug_MOA_Key.txt"), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)

Overall Dataset Comparison

#boxplot(na.exclude(cvCorMat), las=2, cex.axis=0.7)
x1 <- cvCorMat[,1]
x2 <- cvCorMat[,2]
x3 <- cvCorMat[,3]
x4 <- cvCorMat[,4]
x5 <- cvCorMat[,5]
x6 <- cvCorMat[,6]
x7 <- cvCorMat[,7]

x1 <- x1[!is.na(x1)]
x2 <- x2[!is.na(x2)]
x3 <- x3[!is.na(x3)]
x4 <- x4[!is.na(x4)]
x5 <- x5[!is.na(x5)]
x6 <- x6[!is.na(x6)]
x7 <- x7[!is.na(x7)]

outputFilePath <- file.path(outputDir, vioplotFile)

pdf(file=outputFilePath, width=9, height=5)
vioplot(x1, x2, x3, x4, x5, x6, x7, names=colnames(tmpCvCorMat), col="lightblue")
title("Distribution of Correlation Values\n(Elastic Net CV-Predicted vs Actual)")
dev.off() 

Re-order Drugs

cvCorMatList <- list(all=tmpCvCorMat)

# swaSamples <- grepl("swa", colnames(tmpCvCorMat))
# expSamples <- grepl("exp", colnames(tmpCvCorMat))
# 
# diff <- rowMeans(tmpCvCorMat[, swaSamples]) - rowMeans(tmpCvCorMat[, !swaSamples])
# diff2 <- tmpCvCorMat[, "swa"] - tmpCvCorMat[, "exp"]
# 
# a <- diff < quantile(diff, 0.10, na.rm=TRUE)
# b <- diff > quantile(diff, 0.90, na.rm=TRUE)
# d <- c(which(a), which(b))
# 
# # Can comment out line below (and one further below) to obtain plot for all drugs. (HERE)
# tmpCvCorMat <- tmpCvCorMat[d,]
# 
# sorted <- sort.int(diff[d], decreasing=TRUE, index.return=TRUE)
# sorted2 <- sort.int(diff2[d], decreasing=TRUE, index.return=TRUE)
# 
# #e <- names(sorted2$x[which(names(sorted$x) %in% names(d))])
# 
# # Can comment out line below to obtain plot for all drugs. (HERE)
# tmpCvCorMat <- tmpCvCorMat[sorted$ix, ]

# cvCorMatList[["partial"]] <- tmpCvCorMat[sorted$ix, ]

cvCorVals <- na.exclude(as.numeric(tmpCvCorMat))
cvCorThreshold <- unname(quantile(cvCorVals, probs = 0.60))

cvCorMaxVals <- apply(tmpCvCorMat, MARGIN = 1, FUN = function(x){
  if (!all(is.na(x))){
    return(max(x, na.rm = TRUE))
  } else{
    return(NA)
  }
})
cvCorMatList[["partial"]] <- tmpCvCorMat[(which(cvCorMaxVals > cvCorThreshold)), ]

Select Data for Plots

tmpCvCorMat <- cvCorMatList[["all"]]

if(partialFlag) {
  tmpCvCorMat <- cvCorMatList[["partial"]]
}

Generate Colors

Mechanism of Action

tmp <- getMoaToCompounds()
tmpNscSet <- rownames(tmpCvCorMat)
moas <- searchListOfVectors(tmpNscSet, tmp)

kinaseInhibitor <- c("STK", "PK:PRKCA", "PK:PIK3", "PK:MTOR", "YK", 
                     "PK:SYK", "PK:FLT,NTRK", "PK:AKT", "PK:CDK", "PK:MAP2K",
                     "PK:EGFR,ERBB2", "PK:EGFR", "PK:EGFR,ERBB2", "PDGFR", "PK:KIT",
                     "PK:VEGFR", "PK:CHEK", "PK:KIT,VEGFR", "PK:MET", "PK:EGFR,VEGFR",
                     "PK:MET,VEGFR", "PK:BRAF", "STAT", "PK:SRC")
dnaDamaging <- c("A2", "A6", "A7", "T1", "T2", "PARP", "Ds", "Rs", "Df", "Db", "ROS1")

Set Class Colors

moaClasses <- NULL 
statusClasses <- NULL 

for(nsc in names(moas)) {
  idx <- moas[[nsc]]

  if(any(names(tmp[idx]) %in% kinaseInhibitor)) {
    moaClasses <- c(moaClasses, "Kinase")
  } else if(any(names(tmp[idx]) %in% dnaDamaging)) {
    moaClasses <- c(moaClasses, "DNA")
  } else if(any(names(tmp[idx]) %in% "Ho")) {
    moaClasses <- c(moaClasses, "Ho")
  } else if(any(names(tmp[idx]) %in% "AM")) {
    moaClasses <- c(moaClasses, "AM")
  } else if(any(names(tmp[idx]) %in% "Tu")) {
    moaClasses <- c(moaClasses, "Tu")
  } else if(any(names(tmp[idx]) %in% "HDAC")) {
    moaClasses <- c(moaClasses, "HDAC")
  } else {
    moaClasses <- c(moaClasses, "Other")
  }

  if(nsc %in% fdaDrugAnnot) {
    statusClasses <- c(statusClasses, "FDA")
  } else if(nsc %in% ctDrugAnnot) {
    statusClasses <- c(statusClasses, "CT")
  } 
}

annotation_row <- data.frame(
    MOA=moaClasses,
    Status=statusClasses
  )
rownames(annotation_row) = names(moas)

Sample Type

initNames <- rep("FALSE", ncol(tmpCvCorMat))

swaColors <- initNames
swaColors[grepl("swa", colnames(tmpCvCorMat))] <- "TRUE"

expColors <- initNames
expColors[grepl("exp", colnames(tmpCvCorMat))] <- "TRUE"

mutColors <- initNames
mutColors[grepl("mut", colnames(tmpCvCorMat))] <- "TRUE"

annotation_col <- data.frame(
    MUT=mutColors,
    SWA=swaColors,
    EXP=expColors
  )
rownames(annotation_col) = colnames(tmpCvCorMat)

# Unused in pheatmap
#colColors <- cbind(swaColors, expColors, mutColors)
#colnames(colColors) <- c("SWATH", "Expression", "Mutations")

Make Heat Map

par(cex.main=0.7)

Status <- c("black", "white")
names(Status) <- c("CT", "FDA")

EXP <- c("black", "white")
names(EXP) <- c("TRUE", "FALSE")

SWA <- c("black", "white")
names(SWA) <- c("TRUE", "FALSE")

MUT <- c("black", "white")
names(MUT) <- c("TRUE", "FALSE")

MOA <-brewer.pal(7, "Set1")
names(MOA) <-   c("Kinase", "DNA", "Ho", "AM", "Tu", "HDAC", "Other")

anno_colors <- list(EXP=EXP, SWA=SWA, MUT=MUT, Status=Status, MOA=MOA)

rowLabels <- paste0(getDrugName(rownames(tmpCvCorMat)), " (NSC", rownames(tmpCvCorMat), ")")

plotTitle <- "Correlation Matrix\n(Elastic Net CV-Predicted vs Actual)"

outputFilePath <- file.path(outputDir, pdfFile)

# Cluster using daisy GOWER function that works with NA values
# NOTE: This still will not work if all entries are NA
# From: http://stackoverflow.com/questions/20438019/how-to-perform-clustering-without-removing-rows-where-na-is-present-in-r
distfunc <- function(x) { daisy(x, metric="gower") }
hclustfunc <- function(x) hclust(x, method="complete")

idx <- apply(tmpCvCorMat, 1, function(x) all(is.na(x)))
heatmapMat <- tmpCvCorMat[!idx, ]

d <- distfunc(heatmapMat)
hclustObj <- hclustfunc(d)

#drows = dist(tmpCvCorMat, method = "minkwski")
#pheatmap(test, clustering_distance_rows = drows, clustering_distance_cols = dcols)
#callback <- function(hc, ...){ dendsort(hc) }

# pheatmap(tmpCvCorMat,
#        main=plotTitle,
#        cluster_rows=hclustObj,
#        annotation_row=annotation_row,
#        annotation_col=annotation_col,
#        labels_row=rowLabels,
#        labels_col=rep("", ncol(tmpCvCorMat)),
#        annotation_colors=anno_colors,
#        legend = TRUE,
#        filename=outputFilePath,
#        width=7.75,
#        height=12,
#        fontsize=10,
#        fontsize_row=4
#        )

pheatmap(tmpCvCorMat[, c("exp", "swa", "exp_swa", "exp_mut_swa", "mut_swa", "exp_mut", "mut")],
       main=plotTitle,
       cluster_rows=hclustObj,
       cluster_cols=FALSE,
       annotation_row=annotation_row,
       annotation_col=annotation_col,
       labels_row=rowLabels,
       labels_col=rep("", ncol(tmpCvCorMat)),
       annotation_colors=anno_colors,
       legend = TRUE,
       filename=outputFilePath,
       width=7.75,
       height=12,
       fontsize=10,
       fontsize_row=4
       )
namedTmpCvCorMat <- data.frame(name=rowLabels, moaStr=getMoaStr(rownames(tmpCvCorMat)), tmpCvCorMat)

write.table(namedTmpCvCorMat, file=file.path(outputDir, namedTmpCvCorMatFile), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)

Top Drugs SWATH-Only

tmp <- namedTmpCvCorMat[with(namedTmpCvCorMat, order(-swa)), ]

kable(head(tmp, 20), row.names=FALSE)

Top Drugs Expression-SWATH

tmp <- namedTmpCvCorMat[with(namedTmpCvCorMat, order(-exp_swa)), ]

kable(head(tmp, 10), row.names=FALSE)

Top Drugs Mutation-SWATH

tmp <- namedTmpCvCorMat[with(namedTmpCvCorMat, order(-mut_swa)), ]

kable(head(tmp, 10), row.names=FALSE)

Top Drugs Expression-Mutation-SWATH

tmp <- namedTmpCvCorMat[with(namedTmpCvCorMat, order(-exp_mut_swa)), ]

kable(head(tmp, 10), row.names=FALSE)

Session Info

sessionInfo()


CBIIT/rcellminerElasticNet documentation built on Sept. 8, 2020, 6:21 p.m.