## ----knitrSetup, include=FALSE-------------------------------------------
library(knitr)
opts_chunk$set(out.extra='style="display:block; margin: auto"',
fig.align="center", fig.width=8, fig.height=8, tidy=FALSE)
## ----install, eval=FALSE-------------------------------------------------
## if (!requireNamespace("BiocManager", quietly=TRUE))
## install.packages("BiocManager")
## BiocManager::install("rcellminer")
## BiocManager::install("rcellminerData")
## ----loadLibrary, message=FALSE, warning=FALSE---------------------------
library(rcellminer)
library(rcellminerData)
## ----searchHelp, eval=FALSE, tidy=FALSE----------------------------------
## help.search("rcellminer")
## ----searchForNscs-------------------------------------------------------
searchForNscs("nib$")
## ----plotCellminer-------------------------------------------------------
# Get Cellminer data
drugAct <- exprs(getAct(rcellminerData::drugData))
molData <- getMolDataMatrices()
# One drug
nsc <- "94600"
plots <- c("drug")
plotCellMiner(drugAct, molData, plots, nsc, NULL)
# One expression
gene <- "TP53"
plots <- c("exp")
plotCellMiner(drugAct, molData, plots, NULL, gene)
# Two drugs: Irinotecan and topotecan
nsc <- c("616348", "609699")
plots <- c("drug", "drug")
plotCellMiner(drugAct, molData, plots, nsc, NULL)
# Two genes
# NOTE: subscript out of bounds Errors likely mean the gene is not present for that data type
gene <- c("TP53", "MDM2")
plots <- c("exp", "mut", "exp")
plotCellMiner(drugAct, molData, plots, NULL, gene)
# Gene and drug to plot
nsc <- "609699"
gene <- "SLFN11"
plots <- c("exp", "cop", "drug")
plotCellMiner(drugAct, molData, plots, nsc, gene)
## ----plotDrugSets--------------------------------------------------------
# Get CellMiner data
drugAct <- exprs(getAct(rcellminerData::drugData))
drugs <- searchForNscs("camptothecin")
drugAct <- drugAct[drugs,]
mainLabel <- paste("Drug Set: Camptothecin Derivatives, Drugs:", length(drugs), sep=" ")
plotDrugSets(drugAct, drugs, mainLabel)
## ----plotStructures, fig.width=3, fig.height=3---------------------------
plotStructuresFromNscs("609699")
## ----makeDrugInfoTable, results='hide', message=FALSE--------------------
# getFeatureAnnot() returns a named list of data frames with annotation data
# for drugs ("drug") and drug activity repeat experiments ("drugRepeat").
drugAnnot <- getFeatureAnnot(rcellminerData::drugData)[["drug"]]
# Get the names and MOA for compounds with MOA entries
knownMoaDrugs <- unique(c(getMoaToCompounds(), recursive = TRUE))
knownMoaDrugInfo <- data.frame(NSC=knownMoaDrugs, stringsAsFactors = FALSE)
knownMoaDrugInfo$Name <- drugAnnot[knownMoaDrugInfo$NSC, "NAME"]
# Process all NSCs
knownMoaDrugInfo$MOA <- sapply(knownMoaDrugInfo$NSC, getMoaStr)
# Order drugs by mechanism of action.
knownMoaDrugInfo <- knownMoaDrugInfo[order(knownMoaDrugInfo$MOA), ]
# Drug_MOA_Key data frame provides further details on MOA abbrevations.
Drug_MOA_Key[c("A2", "A7"), ]
## ----computeGI50Data, results='hide', message=FALSE----------------------
negLogGI50Data <- getDrugActivityData(nscSet = knownMoaDrugInfo$NSC)
gi50Data <- 10^(-negLogGI50Data)
## ----makeIntegratedTable, results='hide', message=FALSE------------------
knownMoaDrugAct <- as.data.frame(cbind(knownMoaDrugInfo, gi50Data), stringsAsFactors = FALSE)
# This table can be written out to a file
#write.table(knownMoaDrugAct, file="knownMoaDrugAct.txt", quote=FALSE, sep="\t", row.names=FALSE, col.names=TRUE, na="NA")
## ----compareFingerPrints, results='hide', message=FALSE, warning=FALSE----
# Load sqldf
library(sqldf)
# Set up necessary data
# getFeatureAnnot() returns a named list of data frames with annotation data
# for drugs ("drug") and drug activity repeat experiments ("drugRepeat").
df <- getFeatureAnnot(rcellminerData::drugData)[["drug"]]
## Drug activities
drugAct <- exprs(getAct(rcellminerData::drugData))
## Molecular profiling data
molData <- getMolDataMatrices()
# Example filter on particular properties of the compounds
tmpDf <- sqldf("SELECT NSC, SMILES FROM df WHERE SMILES != ''")
# Compare against known MOA compounds demonstration
knownMoaDrugs <- unique(c(getMoaToCompounds(), recursive=TRUE))
ids <- tmpDf[tmpDf$NSC %in% knownMoaDrugs, "NSC"]
smiles <- tmpDf[tmpDf$NSC %in% knownMoaDrugs, "SMILES"]
# All public
#ids <- tmpDf$nsc
#smiles <- tmpDf$smiles
drugOfInterest <- "Camptothecin"
smilesOfInterest <- "CCC1(C2=C(COC1=O)C(=O)N3CC4=CC5=CC=CC=C5N=C4C3=C2)O"
# Make a vector of all the compounds to be pairwise compared
ids <- c(drugOfInterest, ids)
smiles <- c(smilesOfInterest, smiles)
## ----runComparison, message=FALSE----------------------------------------
# Run fingerprint comparison
results <- compareFingerprints(ids, smiles, verbose=FALSE)
# Display the top 5 tanimoto similarity coefficient results. The first compound
# is a self comparison.
results[1:5]
## ----plotSimilarStructures, fig.height=2, fig.width=8--------------------
# Plot top 4 results
resultsIdx <- sapply(names(results)[2:5], function(x) { which(tmpDf$NSC == x) })
resultsIds <- names(results)[2:5]
resultsSmiles <- tmpDf$SMILES[resultsIdx]
resultsIds <- c(drugOfInterest, resultsIds)
resultsSmiles <- c(smilesOfInterest, resultsSmiles)
# plotStructures() allows plotting user-included SMILES-based structures
plotStructures(resultsIds, resultsSmiles, titleCex=0.5, mainLabel="Fingerprint Results")
## ----plotCellMiner-------------------------------------------------------
nscs <- names(results)[2:5]
plotCellMiner(drugAct=drugAct, molData=molData, plots=rep("drug", length(nscs)), nscs, NULL)
## ----reinhold2015--------------------------------------------------------
# Gene of interest
gene <- "PTEN"
# Get Data
drugAct <- exprs(getAct(rcellminerData::drugData))
molData <- getMolDataMatrices()
# Get the cell lines names for cell lines meeting particular thresholds
copKnockdown <- names(which(molData[["cop"]][paste0("cop", gene), ] < -1))
expKnockdown <- names(which(molData[["exp"]][paste0("exp", gene), ] < -1.5))
mutKnockdown <- names(which(molData[["mut"]][paste0("mut", gene), ] == 1))
# Make composite pattern
pattern <- rep(0, length(molData[["cop"]][paste0("cop", gene), ]))
names(pattern) <- names(molData[["cop"]][paste0("cop", gene), ])
tmp <- Reduce(union, list(copKnockdown, expKnockdown, mutKnockdown))
pattern[tmp] <- 1
# Composite plot data
extraPlot <- list()
extraPlot[["title"]] <- "Composite Pattern"
extraPlot[["label"]] <- "Knockdown Composite (Binary)"
extraPlot[["values"]] <- pattern
# Plot data
plotCellMiner(molData=molData, plots=c("cop", "exp", "mut"), gene=gene, extraPlot=extraPlot)
# Significant drug correlations to FDA-approved/Clinical trial drugs
# getFeatureAnnot() returns a named list of data frames with annotation data
# for drugs ("drug") and drug activity repeat experiments ("drugRepeat").
drugAnnot <- getFeatureAnnot(rcellminerData::drugData)[["drug"]]
tmpDA <- drugAnnot[drugAnnot$FDA_STATUS != "-", c("NSC", "FDA_STATUS")]
tmpDrugAct <- drugAct[rownames(drugAct) %in% tmpDA$NSC,]
results <- patternComparison(pattern, tmpDrugAct)
sigResults <- results[results$PVAL < 0.01, ]
# Get names of drugs that are FDA approved or in clinical trials
getDrugName(rownames(sigResults))
## ----varma2014, fig.height=7.5, fig.width=6------------------------------
molData <- getMolDataMatrices()
# Get Data
copGenes <- removeMolDataType(rownames(molData[["cop"]]))
expGenes <- removeMolDataType(rownames(molData[["exp"]]))
genes <- intersect(copGenes, expGenes)
# Generate the appropriate rownames
expGeneLabels <- paste0("exp", genes)
copGeneLabels <- paste0("cop", genes)
a <- molData[["exp"]][expGeneLabels,]
b <- molData[["cop"]][copGeneLabels,]
allGenes <- rowCors(a, b)
#selectedOncogenes <- c("FZD1", "JAK2", "ALK", "PIK3CG", "RET", "CDC25A", "PDGFB", "PIK3CB", "WNT3")
selectedOncogenes <- c("ABL1", "ALK", "BRAF", "CCND1", "CCND3", "CCNE1", "CCNE2",
"CDC25A", "EGFR", "ERBB2", "EZH2", "FOS", "FOXL2", "HRAS",
"IDH1", "IDH2", "JAK2", "KIT", "KRAS", "MET", "MOS", "MYC",
"NRAS", "PDGFB", "PDGFRA", "PIK3CA", "PIK3CB", "PIK3CD",
"PIK3CG", "PIM1", "PML", "RAF1", "REL", "RET", "SRC", "STK11",
"TP63", "WNT10B", "WNT4", "WNT2B", "WNT9A", "WNT3", "WNT5A",
"WNT5B", "WNT10A", "WNT11", "WNT2", "WNT1", "WNT7B", "WISP1",
"WNT8B", "WNT7A", "WNT16", "WISP2", "WISP3", "FZD5", "FZD1")
# Generate the appropriate rownames
expGeneLabels <- paste0("exp", selectedOncogenes)
copGeneLabels <- paste0("cop", selectedOncogenes)
a <- molData[["exp"]][expGeneLabels,]
b <- molData[["cop"]][copGeneLabels,]
selectedOncogenesCor <- rowCors(a, b)
hist(allGenes$cor, main="", xlab="Pearson correlation between expression and copy number", breaks=200, col="lightgray", border="lightgray")
segments(x0=median(allGenes$cor), y0=0, x1=median(allGenes$cor), y1=175, lty=2, lwd=2)
text(median(allGenes$cor)+0.02, y=175, adj=0, labels="Median Correlation\nAll Genes", cex=0.75)
segments(x0=median(selectedOncogenesCor$cor), y0=0, x1=median(selectedOncogenesCor$cor), y1=140, lty=2, lwd=2, col="red")
text(median(selectedOncogenesCor$cor)+0.02, y=140, adj=0, labels="Median Correlation\nOncogenes", cex=0.75)
rug(selectedOncogenesCor$cor, col="red")
## ----idTopo1InhCors------------------------------------------------------
# Get normalized (Z-score) NCI-60 gene expression and drug activity data.
nci60DrugActZ <- exprs(getAct(rcellminerData::drugData))
nci60GeneExpZ <- getAllFeatureData(rcellminerData::molData)[["exp"]]
antiApoptosisGenes <- c("BAG1", "BAG3", "BAG4", "BCL10", "BCL2",
"BCL2A1", "BCL2L1", "BCL2L10", "BCL2L2",
"BFAR", "BIRC3", "BIRC6", "BNIP1", "BNIP2",
"BNIP3", "BNIP3L", "BRAF", "CASP3", "CD27",
"CD40LG", "CFLAR", "CIDEA", "DAPK1", "DFFA",
"FAS", "IGF1R", "MCL1", "NOL3", "TMBIM1",
"TP53", "TP73", "XIAP")
camptothecinNsc <- "94600"
# Compute table of correlations between camptothecin activity and anti-apoptosis gene expression.
pcResults <- patternComparison(nci60DrugActZ[camptothecinNsc, ], nci60GeneExpZ[antiApoptosisGenes, ])
# Adjust p-values for multiple comparisons, sort with respect to q-values.
pcResults$QVAL <- p.adjust(pcResults$PVAL, method = "fdr")
pcResults <- pcResults[order(pcResults$QVAL), ]
# Identify genes with significant negative correlations (FDR < 0.1)
pcResults[((pcResults$COR < 0) & (pcResults$QVAL < 0.10)), ]
## ----plotCptActivityVsGeneExp--------------------------------------------
colorTab <- loadNciColorSet(returnDf = TRUE)
tissueColorTab <- unique(colorTab[, c("tissues", "colors")])
plotData <- as.data.frame(t(rbind(nci60DrugActZ[camptothecinNsc, , drop=FALSE],
nci60GeneExpZ[c("TMBIM1", "BCL2L2"), ])))
colnames(plotData) <- c("Camptothecin", "TMBIM1", "BCL2L2")
plot(x=plotData$TMBIM1, y=plotData$Camptothecin, col=colorTab$colors, pch=16,
xlab="TMBIM1 mRNA exp (Z-score)", ylab="Camptothecin Activity (Z-score)",
main=paste0("Camptothecin Activity vs. TMBIM Expression (r = ",
round(pcResults["TMBIM1", "COR"], 2), ")"))
abline(lm(formula("Camptothecin ~ TMBIM1"), plotData), col="red")
legend("bottomleft", legend=tissueColorTab$tissues, col=tissueColorTab$colors,
cex=0.7, pch = 16)
plot(x=plotData$BCL2L2, y=plotData$Camptothecin, col=colorTab$colors, pch=16,
xlab="BCL2L2 mRNA exp (Z-score)", ylab="Camptothecin Activity (Z-score)",
main=paste0("Camptothecin Activity vs. BCL2L2 Expression (r = ",
round(pcResults["BCL2L2", "COR"], 2), ")"))
abline(lm(formula("Camptothecin ~ BCL2L2"), plotData), col="red")
legend("bottomleft", legend=tissueColorTab$tissues, col=tissueColorTab$colors,
cex=0.7, pch = 16)
## ----getCellMinerData----------------------------------------------------
# Get Cellminer data
# getFeatureAnnot() returns a named list of data frames with annotation data
# for drugs ("drug") and drug activity repeat experiments ("drugRepeat").
drugAnnot <- getFeatureAnnot(rcellminerData::drugData)[["drug"]]
fdaDrugAnnot <- drugAnnot[which(drugAnnot$FDA_STATUS == "FDA approved"), ]
nci60FdaDrugAct <- getDrugActivityData(nscSet = fdaDrugAnnot$NSC)
nci60GeneExp <- getAllFeatureData(rcellminerData::molData)[["xai"]]
nci60GeneMut <- getAllFeatureData(rcellminerData::molData)[["mut"]]
## ----vizDnarGeneMutations, fig.width=10, fig.height=7--------------------
dnarGeneSet <- c("APC", "APLF", "ATAD5", "ATM", "CLSPN", "ERCC6", "FANCI",
"FANCM", "GEN1", "HLTF", "MLH1", "POLD1", "POLE", "POLG",
"POLQ", "RAD54L", "REV3L", "RMI1", "SLX4", "SMARCA4", "SMC2",
"TP53", "WRN")
dnarGeneMut <- nci60GeneMut[dnarGeneSet, ]
# Identify most frequently mutated genes.
numMutLines <- apply(dnarGeneMut, MARGIN = 1, FUN = sum)
sort(numMutLines, decreasing = TRUE)
dnarGeneMutPlotData <- dnarGeneMut[order(numMutLines), ]
heatmap(dnarGeneMutPlotData, scale="none", Rowv = NA, Colv = NA, col = c("grey", "red"),
main="Deleterious mutations")
## ----vizDnarGeneExpKnockouts, fig.width=10, fig.height=7-----------------
dnarGeneExp <- nci60GeneExp[dnarGeneSet, ]
hist(dnarGeneExp, xlab="mRNA expression (log2 intensity)",
main="Distribution of DNA repair gene expression values")
# Set low expression threshold to 1st percentile of expression values.
lowExpThreshold <- quantile(dnarGeneExp, na.rm = TRUE, probs = c(0.01))
lowExpThreshold
# Construct binary (potential) expression knockout matrix.
dnarGeneExpKo <- matrix(0, nrow=nrow(dnarGeneExp), ncol=ncol(dnarGeneExp))
rownames(dnarGeneExpKo) <- rownames(dnarGeneExp)
colnames(dnarGeneExpKo) <- colnames(dnarGeneExp)
dnarGeneExpKo[which(dnarGeneExp < lowExpThreshold)] <- 1
# Restrict to wild type expression knockouts.
dnarGeneExpKo[which(dnarGeneMut == 1)] <- 0
# Identify genes with most frequent loss of expression.
numExpKoLines <- apply(dnarGeneExpKo, MARGIN = 1, FUN = sum)
sort(numExpKoLines, decreasing = TRUE)
dnarGeneExpKoPlotData <- dnarGeneExpKo[order(numExpKoLines), ]
heatmap(dnarGeneExpKoPlotData, scale="none", Rowv = NA, Colv = NA,
col = c("grey", "blue"), main="Potential expression knockouts")
## ----vizDnarGeneAlterations, fig.width=10, fig.height=7------------------
dnarGeneAlt <- matrix(0, nrow=nrow(dnarGeneExp), ncol=ncol(dnarGeneExp))
rownames(dnarGeneAlt) <- rownames(dnarGeneExp)
colnames(dnarGeneAlt) <- colnames(dnarGeneExp)
dnarGeneAlt[which(dnarGeneMut == 1)] <- 1
dnarGeneAlt[which(dnarGeneExpKo == 1)] <- 2
# Identify genes with most frequent alterations (by mutation or lack of expression).
numAltLines <- apply(dnarGeneAlt, MARGIN = 1, FUN = sum)
sort(numAltLines, decreasing = TRUE)
dnarGeneAltPlotData <- dnarGeneAlt[order(numAltLines), ]
heatmap(dnarGeneAltPlotData, scale="none", Rowv = NA, Colv = NA,
col = c("grey", "red", "blue"),
main="Altered genes by mutation (red) or low expression (blue)")
## ----checkAplfDrugAssocs, fig.width=10, fig.height=7---------------------
geneName <- "SLX4"
altLineIndices <- which(dnarGeneAlt[geneName, ] != 0)
nonAltLineIndices <- which(dnarGeneAlt[geneName, ] == 0)
drugAssocTab <- data.frame(GENE=geneName, DRUG_NAME=fdaDrugAnnot$NAME,
DRUG_NSC=fdaDrugAnnot$NSC, TTEST_PVAL=NA,
ADJ_PVAL=NA, MEAN_ACT_DIFF=NA,
stringsAsFactors = FALSE)
rownames(drugAssocTab) <- drugAssocTab$DRUG_NSC
for (drugNsc in drugAssocTab$DRUG_NSC){
ttestResult <- t.test(x=nci60FdaDrugAct[drugNsc, altLineIndices],
y=nci60FdaDrugAct[drugNsc, nonAltLineIndices])
drugAssocTab[drugNsc, "TTEST_PVAL"] <- ttestResult$p.value
drugAssocTab[drugNsc, "MEAN_ACT_DIFF"] <-
ttestResult$estimate[1] - ttestResult$estimate[2]
}
drugAssocTab$ADJ_PVAL <- p.adjust(drugAssocTab$TTEST_PVAL, method = "bonferroni")
drugAssocTab <- drugAssocTab[order(drugAssocTab$ADJ_PVAL), ]
drugAssocTab[drugAssocTab$ADJ_PVAL < 0.05,
c("GENE", "DRUG_NAME", "DRUG_NSC", "ADJ_PVAL", "MEAN_ACT_DIFF")]
meanActDiff <- drugAssocTab$MEAN_ACT_DIFF
negLog10Pval <- -log10(drugAssocTab$TTEST_PVAL)
plot(x=meanActDiff, y=negLog10Pval, xlim = c((min(meanActDiff)-1), (max(meanActDiff)+1)),
xlab="Difference between -logGI50 means (altered vs. non-altered lines)",
ylab="-log10(p-value)", main=(paste0(geneName, " Drug Response Associations")))
ptLabels <- character(length(drugAssocTab$DRUG_NAME))
numLabeledPts <- 7 # label the k most significant drug associations
ptLabels[1:numLabeledPts] <- drugAssocTab$DRUG_NAME[1:numLabeledPts]
text(x=meanActDiff, negLog10Pval, ptLabels, cex=0.9, pos=4, col="red")
## ----comparePlots, eval=FALSE--------------------------------------------
## runShinyApp("shinyComparePlots")
## ----compoundBrowser, eval=FALSE-----------------------------------------
## runShinyApp("shinyReprodPlots")
## ----compareStructures, eval=FALSE---------------------------------------
## runShinyApp("shinyCompareStructures")
## ----sessionInfo---------------------------------------------------------
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.