This markdown assesses gene expression changes the correlate with drug sensitivity.
knitr::opts_chunk$set(echo = TRUE) require(remotes) if (!require(mpnstXenoModeling)) { remotes::install_github('sgosline/mpnstXenoModeling') library(mpnstXenoModeling) } mpnstXenoModeling::loadSynapse() if (!require('DT')) { install.packages("DT") library(DT) } library(dplyr) library(ggrepel)
#here we need mT drug sensitivity and gene expression #update to use `RNAseq` column pdx_data <- mpnstXenoModeling::loadPDXData() clin.tab <<- data.tab %>% dplyr::select(Sample, Age, Sex, MicroTissueQuality, MPNST, `PRC2 Status`, `Clinical Status`, Size) %>% distinct() %>% mutate(MicroTissueQuality = unlist(MicroTissueQuality)) %>% mutate(MicroTissueQuality = stringr::str_replace_all(MicroTissueQuality, 'NaN', 'ND')) %>% mutate(MicroTissueQuality = stringr::str_replace_all(MicroTissueQuality, 'Usable', 'Good'))%>% mutate(`Clinical Status` = stringr::str_replace_all(`Clinical Status`,'Alive with metastatic disease','Alive'))%>% mutate(`Clinical Status` = stringr::str_replace_all(`Clinical Status`,'NED','Alive')) rnaSeq <<- mpnstXenoModeling::loadRNASeqData() mtDrugData <- loadMicrotissueDrugData()
We can now evaluate drugs that behave differently across samples. We are primarily focused on four samples and four drugs. However, we anticipate pooling the results for each drug to carry out differential expression.
The first thing we need to do is establish good metrics for evaluating drug sensitivity. We experiment with a few here.
mtStats <- mtDrugData %>% mutate(maxKilling = 1 - (MinViability / MaxViability)) good.mt <- subset(clin.tab, MicroTissueQuality %in% c('Good', 'Robust', 'Usable')) %>% distinct() print(paste('Looking at ', nrow(good.mt), 'micro tisues')) mtStats <- mtStats %>% subset(CellLine %in% good.mt$Sample) sh <- grep("99.00", mtStats$Drug) mtStats$Drug[sh] <- rep("SHP099", length(sh)) minDrugs <- mtStats%>% mutate(hasRNASeq=(CellLine%in%rnaSeq$Sample))%>% group_by(Drug)%>% summarise(samps=n_distinct(CellLine),samplesWithRNA=count(hasRNASeq==TRUE))%>% subset(samps>2) DT::datatable(minDrugs) ##these are the drugs we want to evaluate!!! eval.drugs <- c( 'Mirdametinib', 'Mirdametinib;Trabectedin', 'Trabectedin', 'Olaparib', 'Olaparib;Trabectedin' )#, # 'Ribociclib;SHP099','Ribociclib', # 'SHP099','Trametinib','SHP099;Trametinib') drug.colors <- list( Mirdametinib = '#4e8e00', Olaparib = '#9437ff', Trabectedin = '#0a33ff', `Olaparib;Trabectedin` = '#F94040', `Mirdametinib;Trabectedin` = 'plum3' ) cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F94040", "#0072B2", "#D55E00", "#CC79A7") drug.colors<-cbPalette[c(2,3,4,5,8)] names(drug.colors)<-c('Mirdametinib','Olaparib','Trabectedin','Olaparib;Trabectedin','Mirdametinib;Trabectedin') eval.samps<-c('MN-2','JH-2-079c','JH-2-002','WU-225')
Now we have three metrics: maxKilling, AUC, and IC50. How do these compare across tissues?
library(pheatmap) auc.mat <- mtStats %>% dplyr::select(Drug, CellLine, auc) %>% tidyr::pivot_wider( values_from = auc, names_from = CellLine, values_fn = list(auc = mean), values_fill = 0.0 ) %>% tibble::column_to_rownames('Drug') %>% as.matrix() ##single treatments only combos <- rownames(auc.mat)[grep(';', rownames(auc.mat))] singls <- setdiff(rownames(auc.mat), combos) #pheatmap(auc.mat[singls,],cellwidth = 10,cellheight=10,main='AUC across MT data',filename='AUC_MT_single.png') # pheatmap(auc.mat[combos,],cellwidth = 10,cellheight=10,main='AUC across MT data',filename='AUC_MT_combo.png') #pheatmap(auc.mat,cellwidth = 10,cellheight=10,main='AUC across MT data') ##now let's try barplotss mtStats %>% subset(Drug %in% singls) %>% ggplot(aes( x = Drug, y = auc, ymin = auc_low, ymax = auc_hi, fill = Drug )) + geom_bar(stat = 'identity', position = 'dodge') + geom_errorbar() + facet_grid(CellLine ~ .) + ggtitle("AUC of single agents") + guides(x = guide_axis(angle = 90)) ggsave('mt_single_agent_auc_barplots.pdf') ##combos mtStats %>% subset(Drug %in% combos) %>% ggplot(aes( x = Drug, y = auc, ymin = auc_low, ymax = auc_hi, fill = Drug )) + geom_bar(stat = 'identity', position = 'dodge') + geom_errorbar(width = 0.2) + facet_grid(CellLine ~ .) + ggtitle("AUC of drug combinations") + guides(x = guide_axis(angle = 90)) ggsave('mt_combo_agent_auc_barplots.pdf') mtStats %>% subset(Drug %in% eval.drugs) %>% subset(CellLine %in% eval.samps) %>% ggplot(aes( x = Drug, y = auc, ymin = auc_low, ymax = auc_hi, fill = Drug )) + geom_bar(stat = 'identity', position = 'dodge') + geom_errorbar(width = 0.2) + facet_grid(CellLine ~ .) + ggtitle("AUC of selected drugs") + guides(x = guide_axis(n.dodge = 2)) + scale_fill_manual(values = drug.colors) ggsave('mt_selectedDrugs_auc_barplots.pdf') ##now plot IC50 mtStats$ic50[!is.finite(mtStats$ic50)] <- 0.0 tgi.mat <- mtStats %>% dplyr::select(Drug, CellLine, ic50) %>% tidyr::pivot_wider( values_from = ic50, names_from = CellLine, values_fn = list(ic50 = mean), values_fill = 0.0 ) %>% tibble::column_to_rownames('Drug') %>% as.matrix() skip <- which(rowMeans(tgi.mat) %in% c(1, 0)) if (length(skip) > 0) tgi.mat <- tgi.mat[-skip, ] try(pheatmap( tgi.mat, cellwidth = 10, cellheight = 10, main = 'IC50 across MT data', filename = 'ic50_mt.png' )) try(pheatmap(tgi.mat, cellwidth = 10, cellheight = 10, main = 'IC50 across MT data')) spi.mat <- mtStats %>% dplyr::select(Drug, CellLine, maxKilling) %>% tidyr::pivot_wider( values_from = maxKilling, names_from = CellLine, values_fn = list(maxKilling = mean), values_fill = 0 ) %>% tibble::column_to_rownames('Drug') %>% as.matrix() pheatmap(spi.mat, cellwidth = 10, cellheight = 10, main = 'max killing across mT data') pheatmap( spi.mat, cellwidth = 10, cellheight = 10, main = 'max killing across mT data', filename = 'MK_mt.png' ) ##this wont' work because it's all 1s #pheatmap(spi.mat[eval.drugs, ], # cellwidth = 10, # cellheight = 10, # main = 'max killing across mT data') #pheatmap( # spi.mat[eval.drugs, ], # cellwidth = 10, # cellheight = 10, # main = 'max killing across mT data', # filename = 'MK_mt_selDrugs.png' #)
Which metrics are best for analyzing the data?
We apparently have a lot of variability here, which is a good thing. We can now distinguish cells that are sensitive or resistant by comparing them to other cells in the collection and establishing custom, drug-specific thresholds.
drugMids <- mtStats %>% group_by(Drug) %>% summarize(aucMid = median(auc), mkMid = median(maxKilling)) #DT::datatable(drugMids) auc.thresh = 0.6 library(ggplot2) sens.class <- mtStats %>% left_join(drugMids) %>% rowwise() %>% mutate(sens = (auc <= aucMid), mksens = (maxKilling >= mkMid)) DT::datatable(sens.class)
We can now collect gene expression values by each sample to identify gene signatures in the drugs of interest.
library(ensembldb) library("EnsDb.Hsapiens.v86") library(ggrepel) database <- EnsDb.Hsapiens.v86 pmap <- ensembldb::select( database, keys = list( GeneIdFilter(rnaSeq$GENE), TxBiotypeFilter("protein_coding") ), columns = c("GENENAME") ) %>% dplyr::rename(GENE = 'GENEID') %>% right_join(rnaSeq) %>% subset(TXBIOTYPE == 'protein_coding') %>% dplyr::select(GENE, GENENAME, GENEID) %>% distinct() dds <- rnaSeq %>% subset(GENE %in% pmap$GENE) %>% mpnstXenoModeling::deseq2NormFilter() genemat <- counts(dds, normalize = TRUE) %>% as.data.frame() %>% tibble::rownames_to_column("GENEID") %>% tidyr::pivot_longer(-GENEID, names_to = 'Sample', values_to = 'gcounts') %>% left_join(pmap) %>% group_by(Sample, GENENAME) %>% summarize(gcounts = sum(gcounts)) %>% tidyr::pivot_wider(values_from = gcounts, names_from = 'Sample') %>% tibble::column_to_rownames('GENENAME') %>% as.matrix() pc <- prcomp(t(genemat), scale. = TRUE) ex.df <- as.data.frame(pc$x)%>% tibble::rownames_to_column('Sample')%>% dplyr::select('Sample','PC1','PC2')%>% left_join(clin.tab) p<-ex.df%>% ggplot(aes(x=PC1,y=PC2,col=Age,shape=MicroTissueQuality))+ geom_point()+ geom_text_repel(data = ex.df, aes(label = Sample)) p ggsave("UMAP_mt_qual_GeneExpression.pdf", p) pmap <- pmap %>% tibble::column_to_rownames('GENEID')
There is clear separation between most microtissues that are 'good/usable' (aside from 225) and those that are not. Are there any biological pathways enriched?
goodsamps <- clin.tab %>% subset(MicroTissueQuality %in% c('Good', 'Robust')) badsamps <- clin.tab %>% subset(MicroTissueQuality %in% c('Unusable')) drug.res <- ds2FactorDE( dds, ids1 = goodsamps$Sample, ids2 = badsamps$Sample, name = 'MTqual', doShrinkage = TRUE ) if (nrow(drug.res) > 0) try(mpnstXenoModeling::plotTopGenesHeatmap( drug.res, dds, pmap, paste0('MTqual'), patients = c(goodsamps$Sample, badsamps$Sample), adjpval = 0.01, upload = TRUE, parentID = 'syn25323411' )) gs.res <- drug.res %>% tibble::rownames_to_column('GENEID') %>% left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>% dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>% mpnstXenoModeling::doGSEA(., prefix =paste0('GSEA_MTqual'), compress_output = TRUE) sig.genes <- drug.res %>% tibble::rownames_to_column('GENEID') %>% left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>% subset(padj < 0.01) %>% select(GENENAME) go.res <- mpnstXenoModeling::doRegularGo(sig.genes$GENENAME, prefix = paste0('GO_MTqual'), compress_output = TRUE) annotes <- clin.tab%>% dplyr::select(Sample, Age, Sex, `PRC2 Status`, `Clinical Status`, MicroTissueQuality)%>% tibble::remove_rownames()%>% tibble::column_to_rownames('Sample') pheatmap(log10(0.01+genemat[sig.genes$GENENAME,]), annotation_col = annotes, clustering_distance_cols = 'correlation', clustering_method = 'ward.D2', filename='allMTqual_diffex_heatmap.pdf',show_rownames = FALSE)
The goal for this is to subset the patients by whether or not they are sensitive to different drugs then compare the expression for each. There are $nrow(diff.drugs)
drugs that exhibit variable behavior with an AUC less than $auc.thresh
.
##let's rename the samples a bit? all.res <- lapply(unique(eval.drugs), function(x) { ddf <- sens.class %>% subset(Drug == x) %>% dplyr::select(Sample = 'CellLine', 'auc') %>% right_join(ex.df) %>% tidyr::replace_na(list(auc = 1.0)) %>% mutate(auc = as.numeric(auc)) p <- ddf %>% ggplot(aes( x = PC1, y = PC2, color = auc, shape = MicroTissueQuality )) + geom_point() + geom_text_repel(data = ddf, aes(label = Sample)) + ggtitle(paste0('Expression by ', x, ' AUC')) p #ggsave(paste0('umapBy',x,'MT_AUC.png'),p) sens.pats <- subset(sens.class, Drug == x) %>% subset(sens == TRUE) res.pats <- subset(sens.class, Drug == x) %>% subset(sens == FALSE) ##add col data to dds drug.res <- ds2FactorDE( dds, ids1 = sens.pats$CellLine, ids2 = res.pats$CellLine, name = x, doShrinkage = TRUE ) if (nrow(drug.res) > 0) try(mpnstXenoModeling::plotTopGenesHeatmap( drug.res, dds, pmap, paste0('AUC_MT', x), patients = c(sens.pats$CellLine, res.pats$CellLine), adjpval = 0.01, upload = FALSE, parentID = 'syn25323411' )) gs.res <- drug.res %>% tibble::rownames_to_column('GENEID') %>% left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>% dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>% doGSEA(., prefix = paste0('GSEA_AUC', x), compress_output = TRUE) sig.genes <- drug.res %>% tibble::rownames_to_column('GENEID') %>% left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>% subset(padj < 0.01) %>% select(GENENAME) go.res <- doRegularGo(sig.genes$GENENAME, prefix = paste0('GO_AUC', x), compress_output = TRUE) return(drug.res %>% mutate(Drug = x)) }) full.res <- do.call(rbind, all.res) write.table(full.res, file = 'AUCMTGenesAtMidAUC.csv') full.res %>% group_by(Drug) %>% summarize(sigGenes = count(padj < 0.05, na.rm = T)) %>% DT::datatable()
How do we visualize the overlap between genes? - We can plot the similarity of drugs by how similar their gene expression values are... - We can do massive venn diagrams
##spread LFC values across a matrix, then make PCA of matrix merged <- full.res %>% tibble::rownames_to_column('geneid') %>% tidyr::separate(geneid, into = c('GENE', 'Version'), sep = '\\.') %>% left_join(pmap) %>% dplyr::select(GENENAME, log2FoldChange, Drug) %>% subset(!is.na(GENENAME)) %>% tidyr::pivot_wider( values_from = log2FoldChange, names_from = Drug, values_fill = list(log2FoldChange = 0.0), values_fn = list(log2FoldChange = mean) ) mmat <- merged %>% tibble() %>% column_to_rownames('GENENAME') %>% as.matrix() mmat[which(is.na(mmat), arr.ind = T)] <- 0.0 pc <- prcomp(t(mmat), scale. = FALSE) library(ggrepel) ##which are outliers? df <- pc$x %>% as.data.frame() %>% tibble::rownames_to_column('Drug') p <- df %>% ggplot(aes(x = PC1, y = PC2, col = Drug)) + geom_point() + geom_text_repel(data = subset(df, abs(PC1) > 50 | abs(PC2) > 15), aes(label = Drug)) p ggsave('PCA_MT_byDiffExAUC.png', p)
By running GO enrichment on each set of genes we can determine if there are specific GO terms that are enriched or depleted in each set.
###now let's do GO enrichment go.res <- lapply(all.res, function(drug.res) { x = drug.res$Drug[1] genes = pmap[rownames(subset(drug.res, padj < 0.01)), 'GENENAME'] res2 <- mpnstXenoModeling::doRegularGo(genes, prefix = paste0('MT_AUC', x),compress_output = TRUE) res2 %>% mutate(Drug = x) }) full.go <- do.call(rbind, go.res) full.go %>% group_by(Drug) %>% summarize(sigTerms = count(p.adjust < 0.05, na.rm = T)) %>% DT::datatable()
The goal for this is to subset the patients by whether or not they are sensitive to different drugs then compare the expression for each. There are $nrow(diff.drugs)
drugs that exhibit variable behavior with an AUC less than $auc.thresh
.
##let's rename the samples a bit? mk.all.res <- lapply(eval.drugs, function(x) { print(x) ddf <- sens.class %>% subset(Drug == x) %>% dplyr::select(Sample = 'CellLine', 'maxKilling') %>% right_join(ex.df) %>% tidyr::replace_na(list(maxKilling = 0)) %>% mutate(maxKilling = as.numeric(maxKilling)) p <- ddf %>% ggplot(aes( x = PC1, y = PC2, color = maxKilling, shape = MicroTissueQuality )) + geom_point() + geom_text_repel(data = ddf, aes(label = Sample)) + ggtitle(paste0('Expression by ', x, ' MaxKilling')) p # ggsave(paste0('umapBy', x, 'MT_MK.png'), p) sens.pats <- subset(sens.class, Drug == x) %>% subset(mksens == TRUE) res.pats <- subset(sens.class, Drug == x) %>% subset(mksens == FALSE) ##add col data to dds if (nrow(sens.pats) == 0 || nrow(res.pats) == 0) return(NULL) drug.res <- ds2FactorDE( dds, ids1 = sens.pats$CellLine, ids2 = res.pats$CellLine, name = x, doShrinkage = TRUE ) if (nrow(drug.res) > 0) try(mpnstXenoModeling::plotTopGenesHeatmap( drug.res, dds, pmap, paste0('MT_MK', x), patients = c(sens.pats$CellLine, res.pats$CellLine), adjpval = 0.01, upload = FALSE, parentID = 'syn25323411' )) gs.res <- drug.res %>% tibble::rownames_to_column('GENEID') %>% left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>% dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>% doGSEA(., prefix = paste0('GSEA_MK', x), compress_output = TRUE) sig.genes <- drug.res %>% tibble::rownames_to_column('GENEID') %>% left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>% subset(padj < 0.01) %>% select(GENENAME) go.res <- doRegularGo(sig.genes$GENENAME, prefix = paste0('GO_MK', x), compress_output = TRUE) return(drug.res %>% mutate(Drug = x)) }) full.mk.res <- do.call(rbind, mk.all.res) write.table(full.mk.res, file = 'maxKillMTGenesAtMidMK.csv') full.mk.res %>% group_by(Drug) %>% summarize(sigGenes = count(padj < 0.05, na.rm = T)) %>% DT::datatable()
Now we can plot gene expression similarity by sensitivity via max killing
merged <- full.mk.res %>% tibble::rownames_to_column('geneid') %>% tidyr::separate(geneid, into = c('GENE', 'Version'), sep = '\\.') %>% left_join(pmap) %>% dplyr::select(GENENAME, log2FoldChange, Drug) %>% subset(!is.na(GENENAME)) %>% tidyr::pivot_wider( values_from = log2FoldChange, names_from = Drug, values_fill = list(log2FoldChange = 0.0), values_fn = list(log2FoldChange = mean) ) mmat <- merged %>% tibble::column_to_rownames('GENENAME') %>% as.matrix() mmat[which(is.na(mmat), arr.ind = T)] <- 0.0
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.