knitr::opts_chunk$set(echo = TRUE) require(remotes) if(!require(mpnstXenoModeling)){ remotes::install_github('sgosline/mpnstXenoModeling') library(mpnstXenoModeling) } if(!require('dplyr')){ BiocManager::install('dplyr') library(dplyr) } if(!require('data.table')){ BiocManager::install('data.table') library(data.table) } if(!require('pheatmap')){ install.packages('pheatmap') library(pheatmap) } if(!require('ensembldb')){ BiocManager::install("ensembldb") library(ensembldb) } if(!require('EnsDb.Hsapiens.v86')){ BiocManager::install("EnsDb.Hsapiens.v86") library(EnsDb.Hsapiens.v86) }
Load the data from the R package. We want to both evaluate the mutational profile and also the gene expression signatures that fall from this data.
mpnstXenoModeling::loadSynapse() pdx_data <- mpnstXenoModeling::loadPDXData() varData <- mpnstXenoModeling::loadVariantData() rnaSeq <- mpnstXenoModeling::loadRNASeqData() varData %>% subset(AD > 0) %>% group_by(specimenID) %>% summarize(nMuts = n_distinct(Symbol))
These data comprise tumor, normal, and xenograft so we need to remove and keep only xenograft.
We now can select specific mutations of interest and put them in a heatmap
##filter for only PDX samples samps <- unique(varData$specimenID) tums <- samps[grep('tumor', samps)] norms <- samps[grep('normal', samps)] pdx.samps <- setdiff(samps, union(tums, norms)) ##spread mutational data to matrix... mutMat <- varData %>% dplyr::filter(specimenID %in% pdx.samps) %>% mutate(AD = as.numeric(AD)) %>% subset(Symbol != "") %>% dplyr::select( -c( individualID, specimenID, synid, Age, Sex, MicroTissueQuality, Location, Size, Clinical.Status, PRC2.Status, AD ) ) %>% distinct() %>% tidyr::pivot_wider( names_from = Sample, values_from = Tumor_AF, values_fn = list(Tumor_AF = max), values_fill = list(Tumor_AF = 0.0) ) %>% tibble::column_to_rownames('Symbol') annotes <- varData %>% dplyr::filter(specimenID %in% pdx.samps) %>% dplyr::select(c( Sample, Age, Sex, MicroTissueQuality, PRC2.Status, Size, Clinical.Status )) %>% mutate(MicroTissueQuality = unlist(MicroTissueQuality)) %>% mutate(`Clinical Status` = unlist(Clinical.Status)) %>% dplyr::rename(`PRC2 Status`='PRC2.Status')%>% dplyr::select(-Clinical.Status) %>% #tidyr::separate(specimenID,into=c('patient','sample'),remove=TRUE)%>% distinct() %>% tibble::remove_rownames() %>% tibble::column_to_rownames('Sample')#%>% # dplyr::filter(sample!='normal')
mutMat <- mutMat[, rownames(annotes)] pheatmap( log10(0.01 + mutMat), clustering_distance_cols = 'correlation', cellwidth = 10, annotation_col = annotes, labels_row = rep("", nrow(mutMat)) ) pheatmap( log10(0.01 + mutMat), clustering_distance_cols = 'correlation', cellwidth = 10, annotation_col = annotes, labels_row = rep("", nrow(mutMat)), filename = 'allMutations.png' )
Here we see that some samples are more mutated than others. But really, we are only interested in a handful of mutations.
#Curated, keep this gene list of interest genelist = c( 'NF1', 'CDKN2A', 'TP53', 'EED', 'SUZ12', 'EGFR', 'PDGFRA', 'MET', 'BRAF', 'TYK2', 'PIK3CA', 'AURKA2', 'ATRX', 'TSC1', 'TSC2', 'NF2' ) allgenes = rownames(mutMat) smutMat <- mutMat[intersect(genelist, allgenes), ]#DT_result[result==TRUE,]$allgenes,rownames(annotes)] pheatmap( log10(0.01 + smutMat), cellheight = 10, cellwidth = 10, annotation_col = annotes ) pheatmap( log10(0.01 + smutMat), cellheight = 10, cellwidth = 10, annotation_col = annotes, filename = 'selectedMutations.png' )
Lastly we want to add any recurrent mutations that could be of interest to our analysis.
topMuts = dplyr::filter(varData, AD > 0) %>% subset(!is.na(Symbol)) %>% subset(Symbol != "") %>% group_by(Symbol) %>% summarize(nSamps = n_distinct(individualID)) %>% dplyr::filter(nSamps > 5) %>% dplyr::select(Symbol) rmutMat <- mutMat[intersect(union(topMuts$Symbol, genelist), rownames(mutMat)), rownames(annotes)] pheatmap( log10(0.01 + rmutMat), cellheight = 10, cellwidth = 10, annotation_col = annotes ) pheatmap( log10(0.01 + rmutMat), cellheight = 10, cellwidth = 10, annotation_col = annotes, filename = 'selectedAndRecMutations.png' )
These results show the mutations of interest that we want to compare below.
For each mutation in the rmutMat
matrix shown above, we can separate out patients that have the mutation and do not. Then we can compare differential expression using the tools described previously (the third markdown):
library(ensembldb) library("EnsDb.Hsapiens.v86") library(ggrepel) database <- EnsDb.Hsapiens.v86 pmap <- ensembldb::select( database, keys = list( GeneIdFilter(rnaSeq$GENE), TxBiotypeFilter("protein_coding") ), columns = c("GENENAME") ) %>% dplyr::rename(GENE = 'GENEID') %>% right_join(rnaSeq) %>% subset(TXBIOTYPE == 'protein_coding') %>% dplyr::select(GENE, GENENAME, GENEID) %>% distinct() dds <- rnaSeq %>% subset(GENE %in% pmap$GENE) %>% mpnstXenoModeling::deseq2NormFilter()
Now we go through all the genes of interest and determine if there is a signature of enrichment
#diffex.genes <- lapply(genelist,function(x){ selgenes <- c("NF1", "TSC2", "NF2", "SUZ12", 'TSC1') all.res <- lapply(intersect(genelist, varData$Symbol), function(x) { modSamps = subset(varData, Symbol == x) %>% dplyr::select(individualID) %>% distinct() print(x) modSamps <- modSamps$individualID wtSamps = setdiff(varData$individualID, modSamps) #sens.pats <- subset(sens.class,Drug==x)%>%subset(sens==TRUE) #res.pats <- subset(sens.class,Drug==x)%>%subset(sens==FALSE) ##add col data to dds mut.res <- ds2FactorDE( dds, ids1 = modSamps, ids2 = wtSamps, name = x, doShrinkage = TRUE ) if (nrow(mut.res) > 0){ try(mpnstXenoModeling::plotTopGenesHeatmap( mut.res, dds, pmap, paste0('mutated_', x), patients = c(modSamps, wtSamps), adjpval = 0.05, upload = FALSE, parentID = 'syn25323411' )) gs.res <- mut.res %>% tibble::rownames_to_column('GENEID') %>% left_join(pmap) %>% # left_join(tibble::rownames_to_column(pmap,'GENEID'))%>% dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>% mpnstXenoModeling::doGSEA(., prefix = paste0('GSEA_mutated', x, compress_output=FALSE)) gsea_compressed <- mut.res %>% tibble::rownames_to_column('GENEID') %>% left_join(pmap) %>% dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>% mpnstXenoModeling::doGSEA(., prefix = paste0('GSEA_mutated_compressed', x, compress_output=TRUE)) sig.genes <- mut.res %>% tibble::rownames_to_column('GENEID') %>% left_join(pmap) %>% subset(padj < 0.05) %>% select(GENENAME) if (nrow(sig.genes) > 0) go.res <- mpnstXenoModeling::doRegularGo(sig.genes$GENENAME, prefix = paste0('GO_mutated', x)) compressed_go <- mpnstXenoModeling::doRegularGo(sig.genes$GENENAME, prefix = paste0('GO_mutate_compressed', x), compress_output=TRUE) #pmap[rownames(subset(drug.res,padj<0.01)),'GENENAME']%>%doRegularGo(prefix=x) } return(mut.res %>% mutate(Mutation = x)) }) full.res <- do.call(rbind, all.res) sumtab <- full.res %>% subset(padj < 0.05) %>% group_by(Mutation) %>% summarize(count =n()) DT::datatable(sumtab)
x = c("SUZ12",'EED') modSamps = subset(varData, Symbol %in% x) %>% dplyr::select(individualID) %>% distinct() modSamps <- modSamps$individualID wtSamps = setdiff(varData$individualID, modSamps) #sens.pats <- subset(sens.class,Drug==x)%>%subset(sens==TRUE) #res.pats <- subset(sens.class,Drug==x)%>%subset(sens==FALSE) ##add col data to dds mut.res <- ds2FactorDE( dds, ids1 = modSamps, ids2 = wtSamps, name = x, doShrinkage = TRUE ) if (nrow(mut.res) > 0) try(mpnstXenoModeling::plotTopGenesHeatmap( mut.res, dds, pmap, paste0('mutated_', paste(x,collapse='_')), patients = c(modSamps, wtSamps), adjpval = 0.05, upload = FALSE, parentID = 'syn25323411' )) gs.res <- mut.res %>% tibble::rownames_to_column('GENEID') %>% left_join(pmap) %>% # left_join(tibble::rownames_to_column(pmap,'GENEID'))%>% dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>% doGSEA(., prefix = paste0('GSEA_mutated_compressed', paste(x,collapse='_')), compress_output = TRUE) gs.res <- mut.res %>% tibble::rownames_to_column('GENEID') %>% left_join(pmap) %>% # left_join(tibble::rownames_to_column(pmap,'GENEID'))%>% dplyr::select(Gene = 'GENENAME', value = 'log2FoldChange') %>% doGSEA(., prefix = paste0('GSEA_mutated', paste(x,collapse='_')), compress_output = FALSE) sig.genes <- mut.res %>% tibble::rownames_to_column('GENEID') %>% left_join(pmap) %>% subset(padj < 0.05) %>% select(GENENAME) if (nrow(sig.genes) > 0) go.res <- doRegularGo( sig.genes$GENENAME, prefix = paste0('GO_mutated_compressed', x), compress_output=TRUE ) #pmap[rownames(subset(drug.res,padj<0.01)),'GENENAME']%>%doRegularGo(prefix=x)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.