This markdown assesses gene expression changes the correlate with drug sensitivity.
knitr::opts_chunk$set(echo = TRUE) if(!require('DT')){ install.packages("DT") library(DT) } library(dplyr)
#mpnstXenoModeling::loadPDXData() mpnstXenoModeling::loadSynapse() rnaSeq <- mpnstXenoModeling::loadRNASeqData() syn = mpnstXenoModeling::synapseLogin() drugStats <- syn$tableQuery("select * from syn25955439")$asDataFrame() minDrugs <- drugStats %>% mutate(hasRNASeq = (individualID %in% rnaSeq$Sample)) %>% group_by(drug) %>% summarise(samples = n_distinct(individualID), samplesWithRNA = count(hasRNASeq == TRUE)) %>% subset(samples > 2) DT::datatable(minDrugs)
We can now evaluate drugs that behave differently across samples. We are primarily focused on four samples and four drugs. However, we anticipate pooling the results for each drug to carry out differential expression.
For the PDXs, have 10 drugs to choose from at this point, 8 of which have at least 4 samples (let's remove olaparib and trabectidin)
##let's plot the stats for each drug library(pheatmap) auc.mat <- drugStats %>% dplyr::select(drug, individualID, AUC) %>% tidyr::pivot_wider( values_from = AUC, names_from = individualID, values_fill = list(AUC = 0) ) %>% tibble::column_to_rownames('drug') %>% as.matrix() tgi.mat <- drugStats %>% dplyr::select(drug, individualID, TGI) %>% tidyr::pivot_wider( values_from = TGI, names_from = individualID, values_fill = list(TGI = 0) ) %>% tibble::column_to_rownames('drug') %>% as.matrix() spi.mat <- drugStats %>% dplyr::select(drug, individualID, SPI) %>% tidyr::pivot_wider( values_from = SPI, names_from = individualID, values_fill = list(SPI = 0) ) %>% tibble::column_to_rownames('drug') %>% as.matrix()
It appears there is an outlier, so we can remove it.
#drugStats<-drugStats%>% # subset(individualID!='WU-545') DT::datatable(drugStats) auc.mat <- drugStats %>% dplyr::select(drug, individualID, AUC) %>% tidyr::pivot_wider( values_from = AUC, names_from = individualID, values_fill = list(AUC = 0) ) %>% tibble::column_to_rownames('drug') %>% as.matrix() pheatmap(auc.mat, cellwidth = 10, cellheight = 10, main = 'AUC across PDX data') pheatmap( auc.mat, cellwidth = 10, cellheight = 10, main = 'AUC across PDX data', filename = 'AUC_pdx.png' ) tgi.mat <- drugStats %>% dplyr::select(drug, individualID, TGI) %>% tidyr::pivot_wider( values_from = TGI, names_from = individualID, values_fill = list(TGI = 0) ) %>% tibble::column_to_rownames('drug') %>% as.matrix() pheatmap(tgi.mat, cellwidth = 10, cellheight = 10, main = 'TGI across PDX data') pheatmap( tgi.mat, cellwidth = 10, cellheight = 10, main = 'TGI across PDX data', filename = 'TGI_pdx.png' ) spi.mat <- drugStats %>% dplyr::select(drug, individualID, SPI) %>% tidyr::pivot_wider( values_from = SPI, names_from = individualID, values_fill = list(SPI = 0) ) %>% tibble::column_to_rownames('drug') %>% as.matrix() pheatmap(spi.mat, cellwidth = 10, cellheight = 10, main = 'SPI across PDX data') pheatmap( spi.mat, cellwidth = 10, cellheight = 10, main = 'SPI across PDX data', filename = 'SPI_pdx.png' )
We are still missing a lot of data here!!!
We apparently have a lot of variability here, which is a good thing. We can now distinguish cells that are sensitive by looking at the median value of each statistic.
drugMids <- drugStats %>% group_by(drug) %>% summarize(aucMid = median(AUC), spiMid = median(SPI), tgiMid = median(TGI)) DT::datatable(drugMids)
Now we can use these customized mid points to assess sensitivty.
library(ggplot2) sampCounts <- drugStats %>% group_by(drug) %>% summarize(n_samps = n_distinct(individualID)) %>% left_join(drugMids) sens.classes <- drugStats %>% left_join(sampCounts) %>% subset(n_samps > 2) %>% mutate( aucSens = (AUC < aucMid), spiSens = (SPI < spiMid), tgiSens = (TGI < tgiMid) ) #drugStats%>%left_join(sampCounts)%>% # mutate(aucSens=(AUC<aucMid),spiSens=(SPI<spiMid),tgiSens=(TGI<tgiMid)) pl <- lapply(c('AUC', 'SPI', 'TGI'), function(met) { p <- drugStats %>% subset(drug %in% sens.classes$drug) %>% dplyr::rename(Metric = met) %>% ggplot(aes(y = Metric, x = individualID, fill = drug)) + geom_bar(stat = 'identity', position = 'dodge') + ggtitle(met) print(p) }) # cowplot::plot_grid(plotlist=pl) # #diff.drugs=sens.classes%>% # # group_by(drug)%>% # # summarize(res=count(!sens),sens=count(sens))%>% # # subset(sens>1)%>% # # subset(res>1) # # library(ggplot2) # # p<-ggplot(subset(drugStats,drug%in%sampCounts$drug),aes(x=drug,y=AUC,fill=individualID))+ # geom_bar(stat='identity',position='dodge')+scale_x_discrete(guide = guide_axis(angle=90)) # #ggsave(paste0('drugsWith2SensAt',auc.thresh,'.png'),p) # p # # p<-ggplot(subset(drugStats,drug%in%sampCounts$drug),aes(x=drug,y=TGI,fill=individualID))+ # geom_bar(stat='identity',position='dodge')+scale_x_discrete(guide = guide_axis(angle=90)) # #ggsave(paste0('drugsWith2SensAt',auc.thresh,'.png'),p) # p
We now grab the gene expression data and determine expression-based signatures of drug response.
library(ensembldb) library(EnsDb.Hsapiens.v86) database <- EnsDb.Hsapiens.v86 pmap <- ensembldb::select( database, keys = list( GeneIdFilter(rnaSeq$GENE), TxBiotypeFilter("protein_coding") ), columns = c("GENENAME") ) %>% dplyr::rename(GENE = 'GENEID') %>% right_join(rnaSeq) %>% subset(TXBIOTYPE == 'protein_coding') %>% dplyr::select(GENE, GENENAME, GENEID) %>% distinct() %>% tibble::column_to_rownames('GENEID') dds <- rnaSeq %>% subset(GENE %in% pmap$GENE) %>% mpnstXenoModeling::deseq2NormFilter()
We take the midpoint of the SPI value for each drug and compare samples that are above vs. below that midpoint
##let's rename the samples a bit? all.res <- lapply(c('trabectedin','olaparib'), function(x) { sens.pats <- subset(sens.classes, drug == x) %>% subset(spiSens == TRUE) res.pats <- subset(sens.classes, drug == x) %>% subset(spiSens == FALSE) ##add col data to dds #print(x) #print(sens.pats) #print(res.pats) drug.res <- mpnstXenoModeling::ds2FactorDE( dds, ids1 = sens.pats$individualID, ids2 = res.pats$individualID, name = x, doShrinkage = TRUE ) sigs <- subset(drug.res, padj < 0.01) goTerms = data.frame() if (nrow(sigs > 2)) { mpnstXenoModeling::plotTopGenesHeatmap( drug.res, dds, pmap, x, patients = c(sens.pats$individualID, res.pats$individualID), adjpval = 0.01, upload = FALSE, parentID = 'syn25323411' ) #goTerms<-pmap[rownames(subset(drug.res,padj<0.01)),'GENENAME']%>%doRegularGo(prefix=paste0('GO_',x)) } return(mutate(drug.res, Drug = x)) }) full.genes <- do.call(rbind, all.res) #full.genes<-do.call(rbind,lapply(all.res,function(x) x$genes)) #full.go<-do.call(rbind,lapply(all.res,function(x) x$goTerms)) #full.gsea<-do.call(rbind,lapply(all.res,function(x) x$gseaTerms)) write.table(full.genes, file = 'SPIPDXGenesAtmid.csv') full.genes %>% group_by(Drug) %>% summarize(sigGenes = count(padj < 0.01, na.rm = T)) %>% DT::datatable()
How do we visualize the overlap between genes? - We can plot the similarity of drugs by how similar their gene expression values are... - We can do massive venn diagrams
##spread LFC values across a matrix, then make PCA of matrix merged <- full.genes %>% tibble::rownames_to_column('geneid') %>% tidyr::separate(geneid, into = c('GENE', 'Version'), sep = '\\.') %>% left_join(pmap) %>% dplyr::select(GENENAME, log2FoldChange, Drug) %>% subset(!is.na(GENENAME)) %>% tidyr::pivot_wider( values_from = log2FoldChange, names_from = Drug, values_fill = list(log2FoldChange = 0.0), values_fn = list(log2FoldChange = mean) ) mmat <- merged %>% column_to_rownames('GENENAME') %>% as.matrix() mmat[which(is.na(mmat), arr.ind = T)] <- 0.0 pc <- prcomp(t(mmat), scale. = FALSE) library(ggrepel) ##which are outliers? df <- pc$x %>% as.data.frame() %>% tibble::rownames_to_column('Drug') p <- df %>% ggplot(aes(x = PC1, y = PC2, col = Drug)) + geom_point() + geom_text_repel(data = df, aes(label = Drug)) p ggsave('PCA_PDX_byDiffExSPI.png', p)
Then we can do GSEA on the genes resulting from this
go.res <- lapply(all.res, function(drug.res) { x = drug.res$Drug[1] genes.with.values <- cbind(drug.res, pmap[rownames(drug.res), ]) %>% dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') res2 = mpnstXenoModeling::doGSEA( genes.with.values, prot.univ = NULL, prefix = paste0('GSEA_PDX_SPI', x), useEns = FALSE, compress_output = TRUE ) sig.genes <- cbind(drug.res, pmap[rownames(drug.res), ])%>% subset(padj < 0.05) res3<-mpnstXenoModeling::doRegularGo(sig.genes$GENENAME,prefix=paste0('GO_PDX_SPI',x),compress_output=FALSE) #drug.res res2 %>% mutate(Drug = x) }) full.go <- do.call(rbind, go.res) full.go %>% group_by(Drug) %>% summarize(sigTerms = count(p.adjust < 0.05, na.rm = T)) %>% DT::datatable()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.