In sgosline/mpnstXenoModeling: MPNST Xenograft Modeling

This document focuses on loading the data and visualizing the PDX model data that we have.

knitr::opts_chunk$set(echo = TRUE, warning = FALSE)
require(remotes)
if(!require('mpnstXenoModeling')){
  remotes::install_github('sgosline/mpnstXenoModeling')
  library(mpnstXenoModeling)
}

library(ggplot2)
library(dplyr)
loadPDXData()

Load and plot PDX Data

PDX data is available on this Synapse project page that collates available PDX and tumor model data across the project.

For this project we create a function that loads and plots the data.

First let's summarize the data that we have currently.

#TODO: add in data summary table

PDX drug experiments

Now we can plot the individual experiments run on the PDXs to summarize.

plots <-drugData%>%
  group_by(Sample,batch,.drop=F)%>%
  group_map(~plotPDXTreatmentBySample(.x),.keep=TRUE)

pdf('allTumorVolume.pdf',height=8,width=12)

res=lapply(plots, print)
dev.off()
res=lapply(plots, print)

Somatic variant data

Now we can look at the mutational data in more detail. What samples do we have somatic variants for?

 mutMat<-varData%>%mutate(AD=as.numeric(AD))%>%
  dplyr::select(specimenID,Symbol,AD)%>%#,tranche))%>%
  subset(Symbol!="")%>%
  tidyr::pivot_wider(names_from=specimenID,values_from=AD,values_fn=list(AD=mean),
                     values_fill=0.0)%>%
    tibble::column_to_rownames('Symbol')

annotes <- varData%>%
  dplyr::select(Sample,specimenID,Age,Sex,MicroTissueQuality,Location,Size)%>%
  distinct()%>%
  tibble::column_to_rownames('specimenID')%>%
  mutate(Age=as.numeric(Age),Size=as.numeric(Size),MicroTissueQuality=unlist(MicroTissueQuality))
if(!require('pheatmap')){
  install.packages('pheatmap')
  library(pheatmap)
}
  mutMat<-mutMat[,intersect(rownames(annotes),colnames(mutMat))]

 pheatmap(log10(0.01+mutMat),clustering_distance_cols = 'correlation',cellwidth = 10,annotation_col = annotes,labels_row = rep("",nrow(mutMat)),labels_col=rep("",ncol(mutMat)))

 pheatmap(log10(0.01+mutMat),clustering_distance_cols = 'correlation',cellwidth = 10,annotation_col = annotes,labels_row = rep("",nrow(mutMat)),labels_col=rep("",ncol(mutMat)),filename='allMutations.pdf')

There are so many one-off somatic variants, let's filter by those that exist in at least 3 samples and then add back TP53.

topMuts=subset(varData,AD>0)%>%
  subset(!is.na(Symbol))%>%
  subset(Symbol!="")%>%
    group_by(Symbol)%>%
    summarize(nSamps=n_distinct(individualID))%>%
    dplyr::filter(nSamps>2)%>%
    dplyr::select(Symbol)

topMuts<-union("TP53",topMuts$Symbol)

 pheatmap(log10(0.01+mutMat[topMuts,rownames(annotes)]),cellwidth = 10,cellheight=10,annotation_col = annotes,clustering_method='ward.D2')

 pheatmap(log10(0.01+mutMat[topMuts,]),cellwidth = 10,cellheight=10,annotation_col = annotes,filename='recMutations.pdf',clustering_method='ward.D2')

Gene expression data

We can also plot the gene expression across highly variable genes.

 rnamat<-rnaSeq%>%
  subset(!is.na(GENEID))%>%
  subset(!is.na(counts))%>%
  dplyr::select(-c(Sample,Sex,Clinical.Status,Size,Location,Age,MicroTissueQuality))%>%
  distinct()%>%#View()
  tidyr::pivot_wider(names_from=synid,values_from=counts,
                     values_fn=list(counts=mean),values_fill=list(counts=0.0))%>%
    tibble::column_to_rownames('GENEID')

#rownames(rnaSeq)<-c()
 annotes<-rnaSeq%>%
      dplyr::select(Sample,synid,Age,Sex,MicroTissueQuality,Location,Size)%>%
  distinct()%>%
  tibble::column_to_rownames('synid')%>%
  mutate(Age=as.numeric(Age),Size=as.numeric(Size),MicroTissueQuality=unlist(MicroTissueQuality))

 #topMuts<-setdiff(topMuts,'TTN')
 mv<-apply(rnamat,1,var)%>%sort(.,decreasing = TRUE)
pheatmap(log10(0.01+rnamat[names(mv)[1:50],]),
          clustering_distance_cols = 'correlation',
          cellwidth = 10,
       #  annotation_col = annotes,
         cellheight=10,
         labels_col=rep("",ncol(mutMat)))

 pheatmap(log10(0.01+rnamat[names(mv)[1:50],]),
          clustering_distance_cols = 'correlation',
          cellwidth = 10,
        # annotation_col = annotes,
         cellheight=10,
         labels_col=rep("",ncol(mutMat)),filename='RAWmostVariableTranscripts.pdf')