In sgosline/mpnstXenoModeling: MPNST Xenograft Modeling

This markdown evaluates the behavior of pairs of drugs across samples via correlation and then computes the expression changes that occur upon synergistic activity.

knitr::opts_chunk$set(echo = TRUE)

require(remotes)
if(!require(mpnstXenoModeling)){
  remotes::install_github('sgosline/mpnstXenoModeling')
  library(mpnstXenoModeling)
}


if(!require('DT')){
  install.packages("DT",repos = "http://cran.us.r-project.org")
  library(DT)
}

library(dplyr)
library(ggrepel)

Load drug data and expression data

if(!exists("dataLoaded")){
  dataLoaded=TRUE
  mpnstXenoModeling::loadPDXData()
  mtDrugData <- loadMicrotissueDrugData()
  res<-loadPDXDrugData()
  rnaSeq<-loadRNASeqData()
#  pdxDrugStats<-res$summary
 # drugData=res$dose, summary = pdxDrugStats
}

Synergy detection in Microtissues

For each combination and each sample, we want to see if the AUC is lower than what we see in the individual drugs. So we filter out the combinations and label them as synergistic as such.

mtStats <- mtDrugData

good.mt <- subset(clin.tab,MicroTissueQuality%in%c('Good','Robust','Usable'))%>%distinct()

#print(paste('Looking at ',nrow(good.mt),'micro tisues'))

#i think this is done?
sh<-grep("99.00",mtStats$Drug)
if(length(sh)>0)
  mtStats$Drug[sh]<-rep("SHP099",length(sh))


##these are the drugs we want to evaluate!!!
combos=mtStats$Drug[grep(';',mtStats$Drug)]
singles<-setdiff(mtStats$Drug,combos)

cdrugs<-lapply(combos,function(x) unlist(stringr::str_split(x,';')))
names(cdrugs)<-combos

comboStats<-mtStats%>%subset(Drug%in%combos)%>%dplyr::select(Drug,CellLine,auc)%>%
  tidyr::separate(Drug,into=c("Drug1","Drug2"),sep=';')

singleStats<-mtStats%>%subset(Drug%in%unlist(cdrugs))%>%dplyr::select(Drug,CellLine,auc)

combined<-comboStats%>%
  left_join(dplyr::rename(singleStats,Drug1='Drug',auc1='auc'))%>%
  left_join(dplyr::rename(singleStats,Drug2='Drug',auc2='auc'))%>%
  mutate(synergistic=(auc<auc1&auc<auc2))%>%
  arrange(synergistic)%>%
  mutate(synergistic=as.numeric(synergistic))

comb.stats<-combined%>%
  group_by(Drug1,Drug2)%>%
  mutate(nSyn=sum(synergistic))

DT::datatable(comb.stats)

write.csv(comb.stats,file='drugCombinationStats.csv')

Can we do the same for max killing? Or is it pointless?

mtStats<-mtStats%>%
  mutate(maxKilling=(MaxViability-MinViability))

comboStats<-mtStats%>%
  subset(Drug%in%combos)%>%
  dplyr::select(Drug,CellLine,maxKilling)%>%
  tidyr::separate(Drug,into=c("Drug1","Drug2"),sep=';')

singleStats<-mtStats%>%subset(Drug%in%unlist(cdrugs))%>%
  dplyr::select(Drug,CellLine,maxKilling)

combined<-comboStats%>%
  left_join(dplyr::rename(singleStats,Drug1='Drug',mk1='maxKilling'))%>%
  left_join(dplyr::rename(singleStats,Drug2='Drug',mk2='maxKilling'))%>%
  mutate(mksynergistic=(maxKilling>mk1&maxKilling>mk2))%>%
  arrange(mksynergistic)%>%
      mutate(mksynergistic=as.numeric(mksynergistic))


mk.comb.stats<-combined%>%group_by(Drug1,Drug2)%>%
  mutate(nSynMK=sum(mksynergistic))

DT::datatable(mk.comb.stats)

write.csv(mk.comb.stats,file='drugMKCombinationStats.csv')

Max killing doesn't really identify any synergistic compounds

mtStats <- mtDrugData

good.mt <- subset(clin.tab,MicroTissueQuality%in%c('Good','Robust','Usable'))%>%distinct()

#print(paste('Looking at ',nrow(good.mt),'micro tisues'))

#i think this is done?
sh<-grep("99.00",mtStats$Drug)
if(length(sh)>0)
  mtStats$Drug[sh]<-rep("SHP099",length(sh))


##these are the drugs we want to evaluate!!!
combos=mtStats$Drug[grep(';',mtStats$Drug)]
singles<-setdiff(mtStats$Drug,combos)

cdrugs<-lapply(combos,function(x) unlist(stringr::str_split(x,';')))
names(cdrugs)<-combos

comboStats<-mtStats%>%subset(Drug%in%combos)%>%dplyr::select(Drug,CellLine,ic50)%>%
  tidyr::separate(Drug,into=c("Drug1","Drug2"),sep=';')

singleStats<-mtStats%>%subset(Drug%in%unlist(cdrugs))%>%
  dplyr::select(Drug,CellLine,ic50)
combined<-comboStats%>%
  left_join(dplyr::rename(singleStats,Drug1='Drug',ic1='ic50'))%>%
  left_join(dplyr::rename(singleStats,Drug2='Drug',ic2='ic50'))%>%
  mutate(icSyn=(ic50>ic1&ic50>ic2))%>%
  arrange(icSyn)%>%
  mutate(icSyn=as.numeric(icSyn))

comb.stats<-combined%>%
  group_by(Drug1,Drug2)%>%
  mutate(nSyn=sum(icSyn))

DT::datatable(comb.stats)

write.csv(comb.stats,file='drugCombinationIC50Stats.csv')

Drug correlation

We now want to evaluate the behavior of single drugs to determine which ones were anti-correlated. Are drugs that are correlated or anti-correlated more likely to be synergystic? First we check by AUC

library(pheatmap)
auc.mat<-mtStats%>%
  dplyr::select(Drug,CellLine,auc)%>%
  tidyr::pivot_wider(values_from=auc,names_from=CellLine,
                     values_fn=list(auc=mean),values_fill=0.0)%>%
  tibble::column_to_rownames('Drug')%>%
  as.matrix()

  ##single treatments only
  combos<-rownames(auc.mat)[grep(';',rownames(auc.mat))]
  singls<-setdiff(rownames(auc.mat),combos)

  ##can we plot correlation between AUC values? 
  pheatmap(cor(t(auc.mat[singls,]),method='spearman'),cellheight = 10,cellwidth = 10,filename='auc_corHeatmap.pdf')

  cortab<-cor(t(auc.mat[singls,]))%>%as.data.frame()%>%
    tibble::rownames_to_column('firstDrug')%>%
    tidyr::pivot_longer(cols=c(-firstDrug),values_to='corVal',names_to='otherDrug')%>%
    arrange(corVal)%>%
    tidyr::unite(c(firstDrug,otherDrug),col='combo',sep=';')%>%
    rowwise()%>%
    mutate(combo=paste(sort(unlist(strsplit(combo,split=';'))),collapse=';'))%>%
    dplyr::select(combo,corVal)%>%distinct()%>%
    arrange(corVal)%>%
    subset(corVal!=1.0)

  #cortab$combo<-as.factor(cortab$combo)

  mt.combs<-comb.stats%>%
    tidyr::unite(Drug1,Drug2,sep=';',col='combo')%>%
    dplyr::select(combo,nSyn)%>%distinct()%>%
    mutate(label=combo)

  cortab%>%
    inner_join(mt.combs)%>%mutate(combo=as.factor(combo))%>%
    ggplot(aes(x=nSyn,y=corVal,col=nSyn))+
    geom_point()+geom_text(aes(label=label))
  ggsave('aucCorrelationDotPlot.pdf',width=12)
  cortab%>%
    left_join(mt.combs)%>%    
    mutate(combo=as.factor(combo))%>%
    ggplot(aes(x=combo,y=corVal,fill=nSyn))+geom_bar(stat='identity')+
    geom_text(aes(label=label),position='dodge')
  ggsave('aucCorrelationBarplot.pdf',width=12)

  cortab%>%subset(abs(corVal)>0.6)%>%DT::datatable()

Then we check by IC50

auc.mat<-mtStats%>%
  dplyr::select(Drug,CellLine,ic50)%>%
  tidyr::pivot_wider(values_from=ic50,names_from=CellLine,
                     values_fn=list(ic50=mean),values_fill=0.0)%>%
  tibble::column_to_rownames('Drug')%>%
  as.matrix()

  ##single treatments only
  combos<-rownames(auc.mat)[grep(';',rownames(auc.mat))]
  singls<-setdiff(rownames(auc.mat),combos)

  ##can we plot correlation between AUC values? 
  pheatmap(cor(t(auc.mat[singls,]),method='spearman'),cellheight = 10,cellwidth = 10,filename='ic50_corHeatmap.pdf')

  cortab<-cor(t(auc.mat[singls,]))%>%as.data.frame()%>%
    tibble::rownames_to_column('firstDrug')%>%
    tidyr::pivot_longer(cols=c(-firstDrug),values_to='corVal',names_to='otherDrug')%>%
    arrange(corVal)%>%
    tidyr::unite(c(firstDrug,otherDrug),col='combo',sep=';')%>%
    rowwise()%>%
    mutate(combo=paste(sort(unlist(strsplit(combo,split=';'))),collapse=';'))%>%
    dplyr::select(combo,corVal)%>%distinct()%>%
    arrange(corVal)%>%
    subset(corVal!=1.0)

  #cortab$combo<-as.factor(cortab$combo)

  mt.combs<-comb.stats%>%
    tidyr::unite(Drug1,Drug2,sep=';',col='combo')%>%dplyr::select(combo,nSyn)%>%distinct()%>%
    mutate(label=combo)

  cortab%>%
    inner_join(mt.combs)%>%mutate(combo=as.factor(combo))%>%
    ggplot(aes(x=nSyn,y=corVal,col=nSyn))+
    geom_point()+geom_text(aes(label=label))
  ggsave('ic50CorrelationDotPlot.pdf',width=12)
  cortab%>%
    left_join(mt.combs)%>%    
    mutate(combo=as.factor(combo))%>%
    ggplot(aes(x=combo,y=corVal,fill=nSyn))+geom_bar(stat='identity')+
    geom_text(aes(label=label),position='dodge')
  ggsave('ic50CorrelationBarplot.pdf',width=12)

  cortab%>%subset(abs(corVal)>0.6)%>%DT::datatable()

Also, how does this correlation in MTs perform in the PDXs?

Synergy detection in PDXs

For each combination and each sample, we want to see if the AUC is lower than what we see in the individual drugs.

##these are the drugs we want to evaluate!!!
combos=pdxDrugStats$drug[grep('+',pdxDrugStats$drug,fixed=TRUE)]

cdrugs<-lapply(combos,function(x) unlist(stringr::str_split_fixed(x,'\\+',n=2)))
names(cdrugs)<-combos

pcomboStats<-pdxDrugStats%>%subset(drug%in%combos)%>%dplyr::select(drug,individualID,AUC)%>%
  tidyr::separate(drug,into=c("Drug1","Drug2"),sep='\\+')

psingleStats<-pdxDrugStats%>%subset(drug%in%unlist(cdrugs))%>%dplyr::select(drug,individualID,AUC)

combined<-pcomboStats%>%
  left_join(dplyr::rename(psingleStats,Drug1='drug',auc1='AUC'))%>%
  left_join(dplyr::rename(psingleStats,Drug2='drug',auc2='AUC'))%>%
  mutate(synergistic=(AUC<auc1&AUC<auc2))%>%
  mutate(synergistic=as.numeric(synergistic))%>%
  arrange(synergistic)

pcomb.stats<-combined%>%group_by(Drug1,Drug2)%>%
  mutate(nSyn=sum(synergistic))

DT::datatable(pcomb.stats)

write.csv(pcomb.stats,file='drugCombinationStatsInPDX.csv')

RNA Expression

We can now collect gene expression values by each sample to identify gene signatures in the drugs of interest.

 library(ensembldb)
 library("EnsDb.Hsapiens.v86")
  library(ggrepel)
  database <- EnsDb.Hsapiens.v86
  pmap <- ensembldb::select(database, 
                              keys=list(GeneIdFilter(rnaSeq$GENE),
                                        TxBiotypeFilter("protein_coding")),
        columns = c("GENENAME"))%>%
      dplyr::rename(GENE='GENEID')%>%
      right_join(rnaSeq)%>%
      subset(TXBIOTYPE=='protein_coding')%>%
      dplyr::select(GENE,GENENAME,GENEID)%>%distinct()

  dds<-rnaSeq%>%
    subset(GENE%in%pmap$GENE)%>%
    mpnstXenoModeling::deseq2NormFilter()

  genemat<-counts(dds,normalize=TRUE)%>%
    as.data.frame()%>%
    tibble::rownames_to_column("GENEID")%>%
    tidyr::pivot_longer(-GENEID,names_to='Sample',values_to='gcounts')%>%
    left_join(pmap)%>%
    group_by(Sample,GENENAME)%>%
    summarize(gcounts=sum(gcounts))%>%
    tidyr::pivot_wider(values_from=gcounts,names_from='Sample')%>%
    tibble::column_to_rownames('GENENAME')%>%
    as.matrix()


  diffexFromTab<-function(tab,dds,pmap){
      x=unique(paste0(tab$Drug1,tab$Drug2))
      print(x)
      sens=tab$CellLine[tab$synergistic]
      nonsens=tab$CellLine[!tab$synergistic]
  ##add col data to dds

    drug.res <- ds2FactorDE(dds, ids1=sens,
                                    ids2=nonsens,name=x,doShrinkage=TRUE)
    if(nrow(drug.res)>0)
      try(mpnstXenoModeling::plotTopGenesHeatmap(drug.res, dds,
                                             pmap,                                        
                                             paste0('AUC_MT',x),
                                          patients=c(sens,nonsens),
                                           adjpval=0.05, 
                                           upload=FALSE, 
                                           parentID='syn25323411'))

    return(drug.res%>%mutate(Drug=x))  
  }

  combos.to.eval<-subset(comb.stats,nSyn%in%c(2,3))%>%
    dplyr::select(Drug1,Drug2,CellLine,synergistic)%>%
    group_by(Drug1,Drug2)%>%
    group_map( ~ diffexFromTab(.x,dds,pmap),.keep=TRUE)


  full.res<-do.call(rbind,combos.to.eval)

  write.table(full.res,file='AUCMTGenesSynAUC.csv')


full.res%>%
  group_by(Drug)%>%
  summarize(sigGenes=count(padj<0.05,na.rm=T))%>%
  DT::datatable()

GO Enrichment

By running GO enrichment on each set of genes we can determine if there are specific GO terms that are enriched or depleted in each set.

###now let's do GO enrichment
go.res<-lapply(combos.to.eval,function(drug.res){
    x=drug.res$Drug[1]

    genes<-drug.res%>%
      subset(padj<0.01)%>%
      tibble::rownames_to_column('GENEID')%>%
      tidyr::separate(GENEID,into=c("GENE","VERSION"))%>%
      left_join(pmap)%>%
        dplyr::select(GENENAME)%>%
        distinct()
    genes=genes$GENENAME
    #pmap[rownames(subset(drug.res,padj<0.01)),'GENENAME']
    res2<-mpnstXenoModeling::doRegularGo(genes,prefix=paste0('MT_AUC',x)) 
    #genes.with.values<-cbind(drug.res,pmap[rownames(drug.res),])%>%
    #dplyr::select(Gene='GENENAME',value='log2FoldChange')

  #res2=doGSEA(genes.with.values,prot.univ=NULL,prefix=paste0('AUC_MT_',x),useEns=FALSE)
  #drug.res
  res2%>%mutate(Drug=x)
})

full.go<-do.call(rbind,go.res)

full.go%>%
  group_by(Drug)%>%
  summarize(sigTerms=count(p.adjust<0.05,na.rm=T))%>%
  DT::datatable()