In sgosline/mpnstXenoModeling: MPNST Xenograft Modeling

This markdown assesses gene expression changes the correlate with drug sensitivity.

knitr::opts_chunk$set(echo = TRUE)

require(remotes)
if (!require(mpnstXenoModeling)) {
  remotes::install_github('sgosline/mpnstXenoModeling')
  library(mpnstXenoModeling)
}

mpnstXenoModeling::loadSynapse()

if (!require('DT')) {
  install.packages("DT")
  library(DT)
}

library(dplyr)
library(ggrepel)

Load drug data and expression data

#here we need mT drug sensitivity and gene expression
#update to use `RNAseq` column
pdx_data <- mpnstXenoModeling::loadPDXData()


clin.tab <<- data.tab %>%
  dplyr::select(Sample,
                Age,
                Sex,
                MicroTissueQuality,
                MPNST,
                `PRC2 Status`,
                `Clinical Status`,
                Size) %>%
  distinct() %>%
  mutate(MicroTissueQuality = unlist(MicroTissueQuality)) %>%
  mutate(MicroTissueQuality = stringr::str_replace_all(MicroTissueQuality, 'NaN', 'ND')) %>%
  mutate(MicroTissueQuality = stringr::str_replace_all(MicroTissueQuality, 'Usable', 'Good'))%>%
  mutate(`Clinical Status` = stringr::str_replace_all(`Clinical Status`,'Alive with metastatic disease','Alive'))%>%
    mutate(`Clinical Status` = stringr::str_replace_all(`Clinical Status`,'NED','Alive'))


rnaSeq <<- mpnstXenoModeling::loadRNASeqData()
mtDrugData <- loadMicrotissueDrugData()

Drug Plots

We can now evaluate drugs that behave differently across samples. We are primarily focused on four samples and four drugs. However, we anticipate pooling the results for each drug to carry out differential expression.

Microtissue Metrics

The first thing we need to do is establish good metrics for evaluating drug sensitivity. We experiment with a few here.

mtStats <- mtDrugData %>%
  mutate(maxKilling = 1 - (MinViability / MaxViability))


good.mt <-
  subset(clin.tab, MicroTissueQuality %in% c('Good', 'Robust', 'Usable')) %>%
  distinct()

print(paste('Looking at ', nrow(good.mt), 'micro tisues'))

mtStats <- mtStats %>%
  subset(CellLine %in% good.mt$Sample)

sh <- grep("99.00", mtStats$Drug)
mtStats$Drug[sh] <- rep("SHP099", length(sh))

minDrugs <- mtStats%>%
   mutate(hasRNASeq=(CellLine%in%rnaSeq$Sample))%>%
  group_by(Drug)%>%
  summarise(samps=n_distinct(CellLine),samplesWithRNA=count(hasRNASeq==TRUE))%>%
  subset(samps>2)

DT::datatable(minDrugs)

##these are the drugs we want to evaluate!!!

eval.drugs <-
  c(
    'Mirdametinib',
    'Mirdametinib;Trabectedin',
    'Trabectedin',
    'Olaparib',
    'Olaparib;Trabectedin'
  )#,
#                'Ribociclib;SHP099','Ribociclib',
#                'SHP099','Trametinib','SHP099;Trametinib')

drug.colors <- list(
  Mirdametinib = '#4e8e00',
  Olaparib = '#9437ff',
  Trabectedin = '#0a33ff',
  `Olaparib;Trabectedin` = '#F94040',
  `Mirdametinib;Trabectedin` = 'plum3'
)

cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F94040", "#0072B2", "#D55E00", "#CC79A7")
drug.colors<-cbPalette[c(2,3,4,5,8)]
names(drug.colors)<-c('Mirdametinib','Olaparib','Trabectedin','Olaparib;Trabectedin','Mirdametinib;Trabectedin')
eval.samps<-c('MN-2','JH-2-079c','JH-2-002','WU-225')

Now we have three metrics: maxKilling, AUC, and IC50. How do these compare across tissues?

library(pheatmap)

auc.mat <- mtStats %>%
  dplyr::select(Drug, CellLine, auc) %>%
  tidyr::pivot_wider(
    values_from = auc,
    names_from = CellLine,
    values_fn = list(auc = mean),
    values_fill = 0.0
  ) %>%
  tibble::column_to_rownames('Drug') %>%
  as.matrix()

##single treatments only
combos <- rownames(auc.mat)[grep(';', rownames(auc.mat))]
singls <- setdiff(rownames(auc.mat), combos)

#pheatmap(auc.mat[singls,],cellwidth = 10,cellheight=10,main='AUC across MT data',filename='AUC_MT_single.png')
# pheatmap(auc.mat[combos,],cellwidth = 10,cellheight=10,main='AUC across MT data',filename='AUC_MT_combo.png')
#pheatmap(auc.mat,cellwidth = 10,cellheight=10,main='AUC across MT data')

##now let's try barplotss
mtStats %>% subset(Drug %in% singls) %>%
  ggplot(aes(
    x = Drug,
    y = auc,
    ymin = auc_low,
    ymax = auc_hi,
    fill = Drug
  )) +
  geom_bar(stat = 'identity', position = 'dodge') + geom_errorbar() +
  facet_grid(CellLine ~ .) +
  ggtitle("AUC of single agents") + guides(x =  guide_axis(angle = 90))

ggsave('mt_single_agent_auc_barplots.pdf')
##combos
mtStats %>% subset(Drug %in% combos) %>%
  ggplot(aes(
    x = Drug,
    y = auc,
    ymin = auc_low,
    ymax = auc_hi,
    fill = Drug
  )) +
  geom_bar(stat = 'identity', position = 'dodge') + geom_errorbar(width =
                                                                    0.2) + facet_grid(CellLine ~ .) +
  ggtitle("AUC of drug combinations") + guides(x =  guide_axis(angle = 90))
ggsave('mt_combo_agent_auc_barplots.pdf')

mtStats %>% subset(Drug %in% eval.drugs) %>%
  subset(CellLine %in% eval.samps) %>%
  ggplot(aes(
    x = Drug,
    y = auc,
    ymin = auc_low,
    ymax = auc_hi,
    fill = Drug
  )) +
  geom_bar(stat = 'identity', position = 'dodge') + geom_errorbar(width =
                                                                    0.2) + facet_grid(CellLine ~ .) +
  ggtitle("AUC of selected drugs") + guides(x =  guide_axis(n.dodge =
                                                              2)) +
  scale_fill_manual(values = drug.colors)
ggsave('mt_selectedDrugs_auc_barplots.pdf')


##now plot IC50
mtStats$ic50[!is.finite(mtStats$ic50)] <- 0.0

tgi.mat <- mtStats %>%
  dplyr::select(Drug, CellLine, ic50) %>%
  tidyr::pivot_wider(
    values_from = ic50,
    names_from = CellLine,
    values_fn = list(ic50 = mean),
    values_fill = 0.0
  ) %>%
  tibble::column_to_rownames('Drug') %>%
  as.matrix()

skip <- which(rowMeans(tgi.mat) %in% c(1, 0))
if (length(skip) > 0)
  tgi.mat <- tgi.mat[-skip, ]

try(pheatmap(
  tgi.mat,
  cellwidth = 10,
  cellheight = 10,
  main = 'IC50 across MT data',
  filename = 'ic50_mt.png'
))

try(pheatmap(tgi.mat,
             cellwidth = 10,
             cellheight = 10,
             main = 'IC50 across MT data'))

spi.mat <- mtStats %>%
  dplyr::select(Drug, CellLine, maxKilling) %>%
  tidyr::pivot_wider(
    values_from = maxKilling,
    names_from = CellLine,
    values_fn = list(maxKilling = mean),
    values_fill = 0
  ) %>%
  tibble::column_to_rownames('Drug') %>%
  as.matrix()

pheatmap(spi.mat,
         cellwidth = 10,
         cellheight = 10,
         main = 'max killing across mT data')
pheatmap(
  spi.mat,
  cellwidth = 10,
  cellheight = 10,
  main = 'max killing across mT data',
  filename = 'MK_mt.png'
)

##this wont' work because it's all 1s
#pheatmap(spi.mat[eval.drugs, ],
#         cellwidth = 10,

#         cellheight = 10,
#         main = 'max killing across mT data')
#pheatmap(
#  spi.mat[eval.drugs, ],
#  cellwidth = 10,
#  cellheight = 10,
#  main = 'max killing across mT data',
#  filename = 'MK_mt_selDrugs.png'
#)

Which metrics are best for analyzing the data?

Drug stats from AUC threshold vs maxKilling threshold

We apparently have a lot of variability here, which is a good thing. We can now distinguish cells that are sensitive or resistant by comparing them to other cells in the collection and establishing custom, drug-specific thresholds.

drugMids <- mtStats %>%
  group_by(Drug) %>%
  summarize(aucMid = median(auc), mkMid = median(maxKilling))

#DT::datatable(drugMids)


auc.thresh = 0.6

library(ggplot2)
sens.class <- mtStats %>%
  left_join(drugMids) %>%
  rowwise() %>%
  mutate(sens = (auc <= aucMid),
         mksens = (maxKilling >= mkMid))

DT::datatable(sens.class)

RNA Expression

We can now collect gene expression values by each sample to identify gene signatures in the drugs of interest.

library(ensembldb)
library("EnsDb.Hsapiens.v86")
library(ggrepel)
database <- EnsDb.Hsapiens.v86
pmap <- ensembldb::select(
  database,
  keys = list(
    GeneIdFilter(rnaSeq$GENE),
    TxBiotypeFilter("protein_coding")
  ),
  columns = c("GENENAME")
) %>%
  dplyr::rename(GENE = 'GENEID') %>%
  right_join(rnaSeq) %>%
  subset(TXBIOTYPE == 'protein_coding') %>%
  dplyr::select(GENE, GENENAME, GENEID) %>% distinct()

dds <- rnaSeq %>%
  subset(GENE %in% pmap$GENE) %>%
  mpnstXenoModeling::deseq2NormFilter()

genemat <- counts(dds, normalize = TRUE) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("GENEID") %>%
  tidyr::pivot_longer(-GENEID, names_to = 'Sample', values_to = 'gcounts') %>%
  left_join(pmap) %>%
  group_by(Sample, GENENAME) %>%
  summarize(gcounts = sum(gcounts)) %>%
  tidyr::pivot_wider(values_from = gcounts, names_from = 'Sample') %>%
  tibble::column_to_rownames('GENENAME') %>%
  as.matrix()

  pc <- prcomp(t(genemat), scale. = TRUE)


   ex.df <- as.data.frame(pc$x)%>%
    tibble::rownames_to_column('Sample')%>%
    dplyr::select('Sample','PC1','PC2')%>%
    left_join(clin.tab)

  p<-ex.df%>%
    ggplot(aes(x=PC1,y=PC2,col=Age,shape=MicroTissueQuality))+
    geom_point()+
    geom_text_repel(data = ex.df, aes(label = Sample))

p

ggsave("UMAP_mt_qual_GeneExpression.pdf", p)

pmap <- pmap %>%
  tibble::column_to_rownames('GENEID')

There is clear separation between most microtissues that are 'good/usable' (aside from 225) and those that are not. Are there any biological pathways enriched?

goodsamps <-
  clin.tab %>% subset(MicroTissueQuality %in% c('Good', 'Robust'))
badsamps <- clin.tab %>% subset(MicroTissueQuality %in% c('Unusable'))

drug.res <- ds2FactorDE(
  dds,
  ids1 = goodsamps$Sample,
  ids2 = badsamps$Sample,
  name = 'MTqual',
  doShrinkage = TRUE
)

if (nrow(drug.res) > 0)
  try(mpnstXenoModeling::plotTopGenesHeatmap(
    drug.res,
    dds,
    pmap,
    paste0('MTqual'),
    patients = c(goodsamps$Sample, badsamps$Sample),
    adjpval = 0.01,
    upload = TRUE,
    parentID = 'syn25323411'
  ))

gs.res <- drug.res %>%
  tibble::rownames_to_column('GENEID') %>%
  left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>%
  dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>% 
  mpnstXenoModeling::doGSEA(., prefix =paste0('GSEA_MTqual'),
            compress_output = TRUE)

sig.genes <- drug.res %>%
  tibble::rownames_to_column('GENEID') %>%
  left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>%
  subset(padj < 0.01) %>% select(GENENAME)

go.res <- mpnstXenoModeling::doRegularGo(sig.genes$GENENAME, 
                                         prefix = paste0('GO_MTqual'), 
                                         compress_output = TRUE)

annotes <- clin.tab%>%
  dplyr::select(Sample,
                Age,
                Sex,
                `PRC2 Status`,
                `Clinical Status`,
                MicroTissueQuality)%>%
  tibble::remove_rownames()%>%
  tibble::column_to_rownames('Sample')

pheatmap(log10(0.01+genemat[sig.genes$GENENAME,]), 
         annotation_col = annotes, clustering_distance_cols = 'correlation',
         clustering_method = 'ward.D2',
         filename='allMTqual_diffex_heatmap.pdf',show_rownames = FALSE)

Genes correlated with MT hits by AUC

The goal for this is to subset the patients by whether or not they are sensitive to different drugs then compare the expression for each. There are $nrow(diff.drugs)drugs that exhibit variable behavior with an AUC less than $auc.thresh.

##let's rename the samples a bit? 


all.res <- lapply(unique(eval.drugs), function(x) {
  ddf <- sens.class %>%
    subset(Drug == x) %>%
    dplyr::select(Sample = 'CellLine', 'auc') %>%
    right_join(ex.df) %>%
    tidyr::replace_na(list(auc = 1.0)) %>%
    mutate(auc = as.numeric(auc))

  p <- ddf %>%
    ggplot(aes(
      x = PC1,
      y = PC2,
      color = auc,
      shape = MicroTissueQuality
    )) +
    geom_point() +
    geom_text_repel(data = ddf, aes(label = Sample)) +
    ggtitle(paste0('Expression by ', x, ' AUC'))
  p
  #ggsave(paste0('umapBy',x,'MT_AUC.png'),p)


  sens.pats <- subset(sens.class, Drug == x) %>% subset(sens == TRUE)
  res.pats <- subset(sens.class, Drug == x) %>% subset(sens == FALSE)

  ##add col data to dds

  drug.res <- ds2FactorDE(
    dds,
    ids1 = sens.pats$CellLine,
    ids2 = res.pats$CellLine,
    name = x,
    doShrinkage = TRUE
  )

  if (nrow(drug.res) > 0)
    try(mpnstXenoModeling::plotTopGenesHeatmap(
      drug.res,
      dds,
      pmap,
      paste0('AUC_MT', x),
      patients = c(sens.pats$CellLine, res.pats$CellLine),
      adjpval = 0.01,
      upload = FALSE,
      parentID = 'syn25323411'
    ))

  gs.res <- drug.res %>%
    tibble::rownames_to_column('GENEID') %>%
    left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>%
    dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>% 
    doGSEA(., prefix = paste0('GSEA_AUC', x), compress_output = TRUE)

  sig.genes <- drug.res %>%
    tibble::rownames_to_column('GENEID') %>%
    left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>%
    subset(padj < 0.01) %>% select(GENENAME)
  go.res <- doRegularGo(sig.genes$GENENAME, prefix = paste0('GO_AUC', x), 
                        compress_output = TRUE)
  return(drug.res %>% mutate(Drug = x))

})

full.res <- do.call(rbind, all.res)

write.table(full.res, file = 'AUCMTGenesAtMidAUC.csv')

full.res %>%
  group_by(Drug) %>%
  summarize(sigGenes = count(padj < 0.05, na.rm = T)) %>%
  DT::datatable()

Gene overlap

How do we visualize the overlap between genes? - We can plot the similarity of drugs by how similar their gene expression values are... - We can do massive venn diagrams

##spread LFC values across a matrix, then make PCA of matrix

merged <- full.res %>%
  tibble::rownames_to_column('geneid') %>%
  tidyr::separate(geneid, into = c('GENE', 'Version'), sep = '\\.') %>%
  left_join(pmap) %>%
  dplyr::select(GENENAME, log2FoldChange, Drug) %>%
  subset(!is.na(GENENAME)) %>%
  tidyr::pivot_wider(
    values_from = log2FoldChange,
    names_from = Drug,
    values_fill = list(log2FoldChange = 0.0),
    values_fn = list(log2FoldChange = mean)
  )

mmat <-
  merged %>% tibble() %>% column_to_rownames('GENENAME') %>% as.matrix()
mmat[which(is.na(mmat), arr.ind = T)] <- 0.0

pc <- prcomp(t(mmat), scale. = FALSE)

library(ggrepel)

##which are outliers?
df <- pc$x %>%
  as.data.frame() %>%
  tibble::rownames_to_column('Drug')

p <- df %>%
  ggplot(aes(x = PC1, y = PC2, col = Drug)) +
  geom_point() +
  geom_text_repel(data = subset(df, abs(PC1) > 50 |
                                  abs(PC2) > 15), aes(label = Drug))

p

ggsave('PCA_MT_byDiffExAUC.png', p)

GO Enrichment

By running GO enrichment on each set of genes we can determine if there are specific GO terms that are enriched or depleted in each set.

###now let's do GO enrichment
go.res <- lapply(all.res, function(drug.res) {
  x = drug.res$Drug[1]

  genes = pmap[rownames(subset(drug.res, padj < 0.01)), 'GENENAME']
  res2 <-
    mpnstXenoModeling::doRegularGo(genes, prefix = paste0('MT_AUC', x),compress_output = TRUE)

  res2 %>% mutate(Drug = x)
})

full.go <- do.call(rbind, go.res)

full.go %>%
  group_by(Drug) %>%
  summarize(sigTerms = count(p.adjust < 0.05, na.rm = T)) %>%
  DT::datatable()

Genes correlated with MT hits by max killing

##let's rename the samples a bit? 

mk.all.res <- lapply(eval.drugs, function(x) {
  print(x)

  ddf <- sens.class %>%
    subset(Drug == x) %>%
    dplyr::select(Sample = 'CellLine', 'maxKilling') %>%
    right_join(ex.df) %>%
    tidyr::replace_na(list(maxKilling = 0)) %>%
    mutate(maxKilling = as.numeric(maxKilling))

  p <- ddf %>%
    ggplot(aes(
      x = PC1,
      y = PC2,
      color = maxKilling,
      shape = MicroTissueQuality
    )) +
    geom_point() +
    geom_text_repel(data = ddf, aes(label = Sample)) +
    ggtitle(paste0('Expression by ', x, ' MaxKilling'))
  p
 # ggsave(paste0('umapBy', x, 'MT_MK.png'), p)


  sens.pats <-
    subset(sens.class, Drug == x) %>% subset(mksens == TRUE)
  res.pats <-
    subset(sens.class, Drug == x) %>% subset(mksens == FALSE)

  ##add col data to dds
  if (nrow(sens.pats) == 0 || nrow(res.pats) == 0)
    return(NULL)
  drug.res <- ds2FactorDE(
    dds,
    ids1 = sens.pats$CellLine,
    ids2 = res.pats$CellLine,
    name = x,
    doShrinkage = TRUE
  )

  if (nrow(drug.res) > 0)
    try(mpnstXenoModeling::plotTopGenesHeatmap(
      drug.res,
      dds,
      pmap,
      paste0('MT_MK', x),
      patients = c(sens.pats$CellLine, res.pats$CellLine),
      adjpval = 0.01,
      upload = FALSE,
      parentID = 'syn25323411'
    ))

   gs.res <- drug.res %>%
    tibble::rownames_to_column('GENEID') %>%
    left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>%
    dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>% 
    doGSEA(., prefix = paste0('GSEA_MK', x), compress_output = TRUE)

  sig.genes <- drug.res %>%
    tibble::rownames_to_column('GENEID') %>%
    left_join(tibble::rownames_to_column(pmap, 'GENEID')) %>%
    subset(padj < 0.01) %>% select(GENENAME)
  go.res <- doRegularGo(sig.genes$GENENAME, prefix = paste0('GO_MK', x), compress_output = TRUE)

  return(drug.res %>% mutate(Drug = x))

})

full.mk.res <- do.call(rbind, mk.all.res)

write.table(full.mk.res, file = 'maxKillMTGenesAtMidMK.csv')


full.mk.res %>%
  group_by(Drug) %>%
  summarize(sigGenes = count(padj < 0.05, na.rm = T)) %>%
  DT::datatable()

Now we can plot gene expression similarity by sensitivity via max killing

merged <- full.mk.res %>%
  tibble::rownames_to_column('geneid') %>%
  tidyr::separate(geneid, into = c('GENE', 'Version'), sep = '\\.') %>%
  left_join(pmap) %>%
  dplyr::select(GENENAME, log2FoldChange, Drug) %>%
  subset(!is.na(GENENAME)) %>%
  tidyr::pivot_wider(
    values_from = log2FoldChange,
    names_from = Drug,
    values_fill = list(log2FoldChange = 0.0),
    values_fn = list(log2FoldChange = mean)
  )

mmat <-
  merged %>% tibble::column_to_rownames('GENENAME') %>% as.matrix()
mmat[which(is.na(mmat), arr.ind = T)] <- 0.0