In sgosline/mpnstXenoModeling: MPNST Xenograft Modeling

knitr::opts_chunk$set(echo = TRUE)
require(remotes)

if(!require(mpnstXenoModeling)){
    remotes::install_github('sgosline/mpnstXenoModeling')
    library(mpnstXenoModeling)
}

if(!require('dplyr')){
  BiocManager::install('dplyr')
  library(dplyr)
}
if(!require('data.table')){
  BiocManager::install('data.table')
  library(data.table)
}
if(!require('pheatmap')){
  install.packages('pheatmap')
  library(pheatmap)
}
if(!require('ensembldb')){
  BiocManager::install("ensembldb")
  library(ensembldb)
}
if(!require('EnsDb.Hsapiens.v86')){
  BiocManager::install("EnsDb.Hsapiens.v86")
  library(EnsDb.Hsapiens.v86)
}

Mutation data and gene expression data

Load the data from the R package. We want to both evaluate the mutational profile and also the gene expression signatures that fall from this data.

mpnstXenoModeling::loadSynapse()

pdx_data <- mpnstXenoModeling::loadPDXData()

varData <- mpnstXenoModeling::loadVariantData()
rnaSeq <- mpnstXenoModeling::loadRNASeqData()

varData %>%
  subset(AD > 0) %>%
  group_by(specimenID) %>%
  summarize(nMuts = n_distinct(Symbol))

These data comprise tumor, normal, and xenograft so we need to remove and keep only xenograft.

Mutation heatmaps

We now can select specific mutations of interest and put them in a heatmap

##filter for only PDX samples


samps <- unique(varData$specimenID)
tums <- samps[grep('tumor', samps)]
norms <- samps[grep('normal', samps)]

pdx.samps <- setdiff(samps, union(tums, norms))

##spread mutational data to matrix...
mutMat <- varData %>%
  dplyr::filter(specimenID %in% pdx.samps) %>%
  mutate(AD = as.numeric(AD)) %>%
  subset(Symbol != "") %>%
  dplyr::select(
    -c(
      individualID,
      specimenID,
      synid,
      Age,
      Sex,
      MicroTissueQuality,
      Location,
      Size,
      Clinical.Status,
      PRC2.Status,
      AD
    )
  ) %>%
  distinct() %>%
  tidyr::pivot_wider(
    names_from = Sample,
    values_from = Tumor_AF,
    values_fn = list(Tumor_AF = max),
    values_fill = list(Tumor_AF = 0.0)
  ) %>%
  tibble::column_to_rownames('Symbol')

annotes <- varData %>%
  dplyr::filter(specimenID %in% pdx.samps) %>%
  dplyr::select(c(
    Sample,
    Age,
    Sex,
    MicroTissueQuality,
    PRC2.Status,
    Size,
    Clinical.Status
  )) %>%
  mutate(MicroTissueQuality = unlist(MicroTissueQuality)) %>%
  mutate(`Clinical Status` = unlist(Clinical.Status)) %>%
  dplyr::rename(`PRC2 Status`='PRC2.Status')%>%
  dplyr::select(-Clinical.Status) %>%
  #tidyr::separate(specimenID,into=c('patient','sample'),remove=TRUE)%>%
  distinct() %>% tibble::remove_rownames() %>%
  tibble::column_to_rownames('Sample')#%>%
#    dplyr::filter(sample!='normal')

mutMat <- mutMat[, rownames(annotes)]

pheatmap(
  log10(0.01 + mutMat),
  clustering_distance_cols = 'correlation',
  cellwidth = 10,
  annotation_col = annotes,
  labels_row = rep("", nrow(mutMat))
)

pheatmap(
  log10(0.01 + mutMat),
  clustering_distance_cols = 'correlation',
  cellwidth = 10,
  annotation_col = annotes,
  labels_row = rep("", nrow(mutMat)),
  filename = 'allMutations.png'
)

Here we see that some samples are more mutated than others. But really, we are only interested in a handful of mutations.

#Curated, keep this gene list of interest
genelist = c(
  'NF1',
  'CDKN2A',
  'TP53',
  'EED',
  'SUZ12',
  'EGFR',
  'PDGFRA',
  'MET',
  'BRAF',
  'TYK2',
  'PIK3CA',
  'AURKA2',
  'ATRX',
  'TSC1',
  'TSC2',
  'NF2'
)
allgenes = rownames(mutMat)
smutMat <-
  mutMat[intersect(genelist, allgenes), ]#DT_result[result==TRUE,]$allgenes,rownames(annotes)]


pheatmap(
  log10(0.01 + smutMat),
  cellheight = 10,
  cellwidth = 10,
  annotation_col = annotes
)

pheatmap(
  log10(0.01 + smutMat),
  cellheight = 10,
  cellwidth = 10,
  annotation_col = annotes,
  filename = 'selectedMutations.png'
)

Lastly we want to add any recurrent mutations that could be of interest to our analysis.

topMuts = dplyr::filter(varData, AD > 0) %>%
  subset(!is.na(Symbol)) %>%
  subset(Symbol != "") %>%
  group_by(Symbol) %>%
  summarize(nSamps = n_distinct(individualID)) %>%
  dplyr::filter(nSamps > 5) %>%
  dplyr::select(Symbol)

rmutMat <-
  mutMat[intersect(union(topMuts$Symbol, genelist), rownames(mutMat)), rownames(annotes)]

pheatmap(
  log10(0.01 + rmutMat),
  cellheight = 10,
  cellwidth = 10,
  annotation_col = annotes
)
pheatmap(
  log10(0.01 + rmutMat),
  cellheight = 10,
  cellwidth = 10,
  annotation_col = annotes,
  filename = 'selectedAndRecMutations.png'
)

These results show the mutations of interest that we want to compare below.

RNA signatures of mutations

For each mutation in the rmutMat matrix shown above, we can separate out patients that have the mutation and do not. Then we can compare differential expression using the tools described previously (the third markdown):

Group the samples into two groups, then compute the log2fold change and significance via the linear model
Plot the significant gene in a heatmap
Evaluate the functional enrichment of the significant genes only
Evaluate the functional enrichment of the ranking of all genes using GSEA. This is particularly useful when we have no significantly different genes.

Collect RNASeq data

library(ensembldb)
library("EnsDb.Hsapiens.v86")
library(ggrepel)
database <- EnsDb.Hsapiens.v86
pmap <- ensembldb::select(
  database,
  keys = list(
    GeneIdFilter(rnaSeq$GENE),
    TxBiotypeFilter("protein_coding")
  ),
  columns = c("GENENAME")
) %>%
  dplyr::rename(GENE = 'GENEID') %>%
  right_join(rnaSeq) %>%
  subset(TXBIOTYPE == 'protein_coding') %>%
  dplyr::select(GENE, GENENAME, GENEID) %>% distinct()

dds <- rnaSeq %>%
  subset(GENE %in% pmap$GENE) %>%
  mpnstXenoModeling::deseq2NormFilter()

Loop through each gene and identify differential expression, Gene enrichment

Now we go through all the genes of interest and determine if there is a signature of enrichment

#diffex.genes <- lapply(genelist,function(x){
selgenes <- c("NF1", "TSC2", "NF2", "SUZ12", 'TSC1')
all.res <- lapply(intersect(genelist, varData$Symbol), function(x) {
  modSamps = subset(varData, Symbol == x) %>%
    dplyr::select(individualID) %>%
    distinct()
  print(x)
  modSamps <- modSamps$individualID

  wtSamps = setdiff(varData$individualID, modSamps)

  #sens.pats <- subset(sens.class,Drug==x)%>%subset(sens==TRUE)
  #res.pats <- subset(sens.class,Drug==x)%>%subset(sens==FALSE)

  ##add col data to dds
  mut.res <- ds2FactorDE(
    dds,
    ids1 = modSamps,
    ids2 = wtSamps,
    name = x,
    doShrinkage = TRUE
  )

  if (nrow(mut.res) > 0){
    try(mpnstXenoModeling::plotTopGenesHeatmap(
      mut.res,
      dds,
      pmap,
      paste0('mutated_', x),
      patients = c(modSamps, wtSamps),
      adjpval = 0.05,
      upload = FALSE,
      parentID = 'syn25323411'
    ))

  gs.res <- mut.res %>%
    tibble::rownames_to_column('GENEID') %>%
    left_join(pmap) %>%
    #  left_join(tibble::rownames_to_column(pmap,'GENEID'))%>%
    dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>%
    mpnstXenoModeling::doGSEA(., prefix = paste0('GSEA_mutated', x, compress_output=FALSE))

  gsea_compressed <- mut.res %>%
    tibble::rownames_to_column('GENEID') %>%
    left_join(pmap) %>%
    dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>%
    mpnstXenoModeling::doGSEA(., prefix = paste0('GSEA_mutated_compressed', x, compress_output=TRUE))

  sig.genes <- mut.res %>%
    tibble::rownames_to_column('GENEID') %>%
    left_join(pmap) %>%
    subset(padj < 0.05) %>% 
    select(GENENAME)

  if (nrow(sig.genes) > 0)
    go.res <- mpnstXenoModeling::doRegularGo(sig.genes$GENENAME,
                          prefix = paste0('GO_mutated', x))

    compressed_go <- mpnstXenoModeling::doRegularGo(sig.genes$GENENAME,
                          prefix = paste0('GO_mutate_compressed', x),
                          compress_output=TRUE)
  #pmap[rownames(subset(drug.res,padj<0.01)),'GENENAME']%>%doRegularGo(prefix=x)
  }
  return(mut.res %>% mutate(Mutation = x))

})

full.res <- do.call(rbind, all.res)

sumtab <-
  full.res %>% subset(padj < 0.05) %>% group_by(Mutation) %>% summarize(count =n())
DT::datatable(sumtab)

x = c("SUZ12",'EED')
modSamps = subset(varData, Symbol %in% x) %>%
  dplyr::select(individualID) %>%
  distinct()

modSamps <- modSamps$individualID

wtSamps = setdiff(varData$individualID, modSamps)

#sens.pats <- subset(sens.class,Drug==x)%>%subset(sens==TRUE)
#res.pats <- subset(sens.class,Drug==x)%>%subset(sens==FALSE)

##add col data to dds
mut.res <- ds2FactorDE(
  dds,
  ids1 = modSamps,
  ids2 = wtSamps,
  name = x,
  doShrinkage = TRUE
)

if (nrow(mut.res) > 0)
  try(mpnstXenoModeling::plotTopGenesHeatmap(
    mut.res,
    dds,
    pmap,
    paste0('mutated_', paste(x,collapse='_')),
    patients = c(modSamps, wtSamps),
    adjpval = 0.05,
    upload = FALSE,
    parentID = 'syn25323411'
  ))
gs.res <- mut.res %>%
  tibble::rownames_to_column('GENEID') %>%
  left_join(pmap) %>%
  #  left_join(tibble::rownames_to_column(pmap,'GENEID'))%>%
  dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>%
  doGSEA(., prefix = paste0('GSEA_mutated_compressed', paste(x,collapse='_')), compress_output = TRUE)

gs.res <- mut.res %>%
  tibble::rownames_to_column('GENEID') %>%
  left_join(pmap) %>%
  #  left_join(tibble::rownames_to_column(pmap,'GENEID'))%>%
  dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>%
  doGSEA(., prefix = paste0('GSEA_mutated', paste(x,collapse='_')), compress_output = FALSE)

sig.genes <- mut.res %>%
  tibble::rownames_to_column('GENEID') %>%
  left_join(pmap) %>%
  subset(padj < 0.05) %>% select(GENENAME)
if (nrow(sig.genes) > 0)
  go.res <- doRegularGo(
    sig.genes$GENENAME,
    prefix = paste0('GO_mutated_compressed', x),
    compress_output=TRUE
    )

#pmap[rownames(subset(drug.res,padj<0.01)),'GENENAME']%>%doRegularGo(prefix=x)