require("knitr") opts_knit$set(root.dir="..") opts_chunk$set(fig.align="center", fig.width=6, fig.height=6, dpi=300)
library(rcellminer) library(rcellminerElasticNet) library(stringr) library(RColorBrewer) library(pheatmap) library(vioplot) library(cluster) # Parameters are set at top in YAML section commonFlag <- params$commonFlag partialFlag <- params$partialFlag dataDir <- params$dataDir outputDir <- params$outputDir commonFlag partialFlag dataDir outputDir #DEBUG #commonFlag <- TRUE #partialFlag <- TRUE enDataDir <- file.path(dataDir, allDataDir) outputDir <- outputDir pdfFile <- "en_cvresults_heatmap.pdf" vioplotFile <- "en_cvresults_vioplot.pdf" rdsFile <- "en_cvresults_cvCorMat.rds" namedTmpCvCorMatFile <- "en_cvresults_heatmap.txt" if(commonFlag) { enDataDir <- file.path(dataDir, commonDataDir) pdfFile <- paste0("common_", pdfFile) vioplotFile <- paste0("common_", vioplotFile) rdsFile <- paste0("common_", rdsFile) namedTmpCvCorMatFile <- paste0("common_", namedTmpCvCorMatFile) } if(partialFlag) { pdfFile <- paste0("partial_", pdfFile) namedTmpCvCorMatFile <- paste0("partial_", namedTmpCvCorMatFile) } dataFiles <- list.files(path=enDataDir, pattern = "*.Rdata") colors <- brewer.pal(12, "Set3") nscSet <- NULL for (fName in dataFiles){ tmp <- str_split(string = fName, pattern = "NSC_")[[1]][2] nsc <- str_split(string = tmp, pattern = "\\.")[[1]][1] if (!(nsc %in% nscSet)){ nscSet <- c(nscSet, nsc) } } dataTypes <- c("exp", "mut", "swa", "exp_mut", "exp_swa", "mut_swa", "exp_mut_swa") # Initialize cvCorMat cvCorMat <- matrix(NA, nrow=length(nscSet), ncol=length(dataTypes)) rownames(cvCorMat) <- nscSet colnames(cvCorMat) <- dataTypes
# FILL CORRELATION MATRIX (FULL ELASTIC CV-PREDICTED VS. ACTUAL CORRELATIONS). badFiles <- NULL if(!file.exists(file.path(outputDir, rdsFile))) { for (i in 1:length(nscSet)) { nsc <- nscSet[i] for (dataType in dataTypes){ rdataFile <- paste0("DATA_", dataType, "_NSC_", nsc, ".Rdata") if (rdataFile %in% dataFiles) { #cat("RDATA: ", rdataFile, "\n") load(file.path(enDataDir, rdataFile)) # object name: elNetResults. if (!is.null(elNetResults$fullEnCvResults$cvPredRsqared)){ cvCorMat[i, dataType] <- elNetResults$fullEnCvResults$cvPredR } else { badFiles <- c(badFiles, rdataFile) } rm(elNetResults) } } } rds <- list(cvCorMat=cvCorMat, badFiles=badFiles) saveRDS(rds, file=file.path(outputDir, rdsFile)) } else { tmpFile <- file.path(outputDir, rdsFile) cat("RDS: ", tmpFile, "\n") rds <- readRDS(tmpFile) cvCorMat <- rds$cvCorMat }
#noNaCvCorMat <- na.exclude(cvCorMat) noNaCvCorMat <- cvCorMat tmpCvCorMat <- cvCorMat
# All NSCs nrow(cvCorMat) # No NA NSCs nrow(na.exclude(cvCorMat))
drugAnnot <- getFeatureAnnot(rcellminerData::drugData)[["drug"]] fdaDrugAnnot <- drugAnnot[which(drugAnnot$FDA_STATUS == "FDA approved"), "NSC"] ctDrugAnnot <- drugAnnot[which(drugAnnot$FDA_STATUS == "Clinical trial"), "NSC"] isFdaApproved <- rownames(cvCorMat) %in% fdaDrugAnnot drugNames <- getDrugName(rownames(cvCorMat)) enDrugInfo <- data.frame(nsc=rownames(cvCorMat), isFdaApproved=isFdaApproved, drugNames=drugNames, moaStr=getMoaStr(rownames(cvCorMat))) write.table(enDrugInfo, file=file.path(outputDir, "enDrugInfo.txt"), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE) write.table(Drug_MOA_Key, file=file.path(outputDir, "Drug_MOA_Key.txt"), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
#boxplot(na.exclude(cvCorMat), las=2, cex.axis=0.7) x1 <- cvCorMat[,1] x2 <- cvCorMat[,2] x3 <- cvCorMat[,3] x4 <- cvCorMat[,4] x5 <- cvCorMat[,5] x6 <- cvCorMat[,6] x7 <- cvCorMat[,7] x1 <- x1[!is.na(x1)] x2 <- x2[!is.na(x2)] x3 <- x3[!is.na(x3)] x4 <- x4[!is.na(x4)] x5 <- x5[!is.na(x5)] x6 <- x6[!is.na(x6)] x7 <- x7[!is.na(x7)] outputFilePath <- file.path(outputDir, vioplotFile) pdf(file=outputFilePath, width=9, height=5) vioplot(x1, x2, x3, x4, x5, x6, x7, names=colnames(tmpCvCorMat), col="lightblue") title("Distribution of Correlation Values\n(Elastic Net CV-Predicted vs Actual)") dev.off()
cvCorMatList <- list(all=tmpCvCorMat) # swaSamples <- grepl("swa", colnames(tmpCvCorMat)) # expSamples <- grepl("exp", colnames(tmpCvCorMat)) # # diff <- rowMeans(tmpCvCorMat[, swaSamples]) - rowMeans(tmpCvCorMat[, !swaSamples]) # diff2 <- tmpCvCorMat[, "swa"] - tmpCvCorMat[, "exp"] # # a <- diff < quantile(diff, 0.10, na.rm=TRUE) # b <- diff > quantile(diff, 0.90, na.rm=TRUE) # d <- c(which(a), which(b)) # # # Can comment out line below (and one further below) to obtain plot for all drugs. (HERE) # tmpCvCorMat <- tmpCvCorMat[d,] # # sorted <- sort.int(diff[d], decreasing=TRUE, index.return=TRUE) # sorted2 <- sort.int(diff2[d], decreasing=TRUE, index.return=TRUE) # # #e <- names(sorted2$x[which(names(sorted$x) %in% names(d))]) # # # Can comment out line below to obtain plot for all drugs. (HERE) # tmpCvCorMat <- tmpCvCorMat[sorted$ix, ] # cvCorMatList[["partial"]] <- tmpCvCorMat[sorted$ix, ] cvCorVals <- na.exclude(as.numeric(tmpCvCorMat)) cvCorThreshold <- unname(quantile(cvCorVals, probs = 0.60)) cvCorMaxVals <- apply(tmpCvCorMat, MARGIN = 1, FUN = function(x){ if (!all(is.na(x))){ return(max(x, na.rm = TRUE)) } else{ return(NA) } }) cvCorMatList[["partial"]] <- tmpCvCorMat[(which(cvCorMaxVals > cvCorThreshold)), ]
tmpCvCorMat <- cvCorMatList[["all"]] if(partialFlag) { tmpCvCorMat <- cvCorMatList[["partial"]] }
tmp <- getMoaToCompounds() tmpNscSet <- rownames(tmpCvCorMat) moas <- searchListOfVectors(tmpNscSet, tmp) kinaseInhibitor <- c("STK", "PK:PRKCA", "PK:PIK3", "PK:MTOR", "YK", "PK:SYK", "PK:FLT,NTRK", "PK:AKT", "PK:CDK", "PK:MAP2K", "PK:EGFR,ERBB2", "PK:EGFR", "PK:EGFR,ERBB2", "PDGFR", "PK:KIT", "PK:VEGFR", "PK:CHEK", "PK:KIT,VEGFR", "PK:MET", "PK:EGFR,VEGFR", "PK:MET,VEGFR", "PK:BRAF", "STAT", "PK:SRC") dnaDamaging <- c("A2", "A6", "A7", "T1", "T2", "PARP", "Ds", "Rs", "Df", "Db", "ROS1")
moaClasses <- NULL statusClasses <- NULL for(nsc in names(moas)) { idx <- moas[[nsc]] if(any(names(tmp[idx]) %in% kinaseInhibitor)) { moaClasses <- c(moaClasses, "Kinase") } else if(any(names(tmp[idx]) %in% dnaDamaging)) { moaClasses <- c(moaClasses, "DNA") } else if(any(names(tmp[idx]) %in% "Ho")) { moaClasses <- c(moaClasses, "Ho") } else if(any(names(tmp[idx]) %in% "AM")) { moaClasses <- c(moaClasses, "AM") } else if(any(names(tmp[idx]) %in% "Tu")) { moaClasses <- c(moaClasses, "Tu") } else if(any(names(tmp[idx]) %in% "HDAC")) { moaClasses <- c(moaClasses, "HDAC") } else { moaClasses <- c(moaClasses, "Other") } if(nsc %in% fdaDrugAnnot) { statusClasses <- c(statusClasses, "FDA") } else if(nsc %in% ctDrugAnnot) { statusClasses <- c(statusClasses, "CT") } } annotation_row <- data.frame( MOA=moaClasses, Status=statusClasses ) rownames(annotation_row) = names(moas)
initNames <- rep("FALSE", ncol(tmpCvCorMat)) swaColors <- initNames swaColors[grepl("swa", colnames(tmpCvCorMat))] <- "TRUE" expColors <- initNames expColors[grepl("exp", colnames(tmpCvCorMat))] <- "TRUE" mutColors <- initNames mutColors[grepl("mut", colnames(tmpCvCorMat))] <- "TRUE" annotation_col <- data.frame( MUT=mutColors, SWA=swaColors, EXP=expColors ) rownames(annotation_col) = colnames(tmpCvCorMat) # Unused in pheatmap #colColors <- cbind(swaColors, expColors, mutColors) #colnames(colColors) <- c("SWATH", "Expression", "Mutations")
par(cex.main=0.7) Status <- c("black", "white") names(Status) <- c("CT", "FDA") EXP <- c("black", "white") names(EXP) <- c("TRUE", "FALSE") SWA <- c("black", "white") names(SWA) <- c("TRUE", "FALSE") MUT <- c("black", "white") names(MUT) <- c("TRUE", "FALSE") MOA <-brewer.pal(7, "Set1") names(MOA) <- c("Kinase", "DNA", "Ho", "AM", "Tu", "HDAC", "Other") anno_colors <- list(EXP=EXP, SWA=SWA, MUT=MUT, Status=Status, MOA=MOA) rowLabels <- paste0(getDrugName(rownames(tmpCvCorMat)), " (NSC", rownames(tmpCvCorMat), ")") plotTitle <- "Correlation Matrix\n(Elastic Net CV-Predicted vs Actual)" outputFilePath <- file.path(outputDir, pdfFile) # Cluster using daisy GOWER function that works with NA values # NOTE: This still will not work if all entries are NA # From: http://stackoverflow.com/questions/20438019/how-to-perform-clustering-without-removing-rows-where-na-is-present-in-r distfunc <- function(x) { daisy(x, metric="gower") } hclustfunc <- function(x) hclust(x, method="complete") idx <- apply(tmpCvCorMat, 1, function(x) all(is.na(x))) heatmapMat <- tmpCvCorMat[!idx, ] d <- distfunc(heatmapMat) hclustObj <- hclustfunc(d) #drows = dist(tmpCvCorMat, method = "minkwski") #pheatmap(test, clustering_distance_rows = drows, clustering_distance_cols = dcols) #callback <- function(hc, ...){ dendsort(hc) } # pheatmap(tmpCvCorMat, # main=plotTitle, # cluster_rows=hclustObj, # annotation_row=annotation_row, # annotation_col=annotation_col, # labels_row=rowLabels, # labels_col=rep("", ncol(tmpCvCorMat)), # annotation_colors=anno_colors, # legend = TRUE, # filename=outputFilePath, # width=7.75, # height=12, # fontsize=10, # fontsize_row=4 # ) pheatmap(tmpCvCorMat[, c("exp", "swa", "exp_swa", "exp_mut_swa", "mut_swa", "exp_mut", "mut")], main=plotTitle, cluster_rows=hclustObj, cluster_cols=FALSE, annotation_row=annotation_row, annotation_col=annotation_col, labels_row=rowLabels, labels_col=rep("", ncol(tmpCvCorMat)), annotation_colors=anno_colors, legend = TRUE, filename=outputFilePath, width=7.75, height=12, fontsize=10, fontsize_row=4 )
namedTmpCvCorMat <- data.frame(name=rowLabels, moaStr=getMoaStr(rownames(tmpCvCorMat)), tmpCvCorMat) write.table(namedTmpCvCorMat, file=file.path(outputDir, namedTmpCvCorMatFile), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
tmp <- namedTmpCvCorMat[with(namedTmpCvCorMat, order(-swa)), ] kable(head(tmp, 20), row.names=FALSE)
tmp <- namedTmpCvCorMat[with(namedTmpCvCorMat, order(-exp_swa)), ] kable(head(tmp, 10), row.names=FALSE)
tmp <- namedTmpCvCorMat[with(namedTmpCvCorMat, order(-mut_swa)), ] kable(head(tmp, 10), row.names=FALSE)
tmp <- namedTmpCvCorMat[with(namedTmpCvCorMat, order(-exp_mut_swa)), ] kable(head(tmp, 10), row.names=FALSE)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.