This document focuses on loading the data and visualizing the PDX model data that we have.
knitr::opts_chunk$set(echo = TRUE, warning = FALSE) require(remotes) if(!require('mpnstXenoModeling')){ remotes::install_github('sgosline/mpnstXenoModeling') library(mpnstXenoModeling) } library(ggplot2) library(dplyr) loadPDXData()
PDX data is available on this Synapse project page that collates available PDX and tumor model data across the project.
For this project we create a function that loads and plots the data.
First let's summarize the data that we have currently.
#TODO: add in data summary table
Now we can plot the individual experiments run on the PDXs to summarize.
plots <-drugData%>% group_by(Sample,batch,.drop=F)%>% group_map(~plotPDXTreatmentBySample(.x),.keep=TRUE) pdf('allTumorVolume.pdf',height=8,width=12) res=lapply(plots, print) dev.off() res=lapply(plots, print)
Now we can look at the mutational data in more detail. What samples do we have somatic variants for?
mutMat<-varData%>%mutate(AD=as.numeric(AD))%>% dplyr::select(specimenID,Symbol,AD)%>%#,tranche))%>% subset(Symbol!="")%>% tidyr::pivot_wider(names_from=specimenID,values_from=AD,values_fn=list(AD=mean), values_fill=0.0)%>% tibble::column_to_rownames('Symbol') annotes <- varData%>% dplyr::select(Sample,specimenID,Age,Sex,MicroTissueQuality,Location,Size)%>% distinct()%>% tibble::column_to_rownames('specimenID')%>% mutate(Age=as.numeric(Age),Size=as.numeric(Size),MicroTissueQuality=unlist(MicroTissueQuality)) if(!require('pheatmap')){ install.packages('pheatmap') library(pheatmap) } mutMat<-mutMat[,intersect(rownames(annotes),colnames(mutMat))] pheatmap(log10(0.01+mutMat),clustering_distance_cols = 'correlation',cellwidth = 10,annotation_col = annotes,labels_row = rep("",nrow(mutMat)),labels_col=rep("",ncol(mutMat))) pheatmap(log10(0.01+mutMat),clustering_distance_cols = 'correlation',cellwidth = 10,annotation_col = annotes,labels_row = rep("",nrow(mutMat)),labels_col=rep("",ncol(mutMat)),filename='allMutations.pdf')
There are so many one-off somatic variants, let's filter by those that exist in at least 3 samples and then add back TP53.
topMuts=subset(varData,AD>0)%>% subset(!is.na(Symbol))%>% subset(Symbol!="")%>% group_by(Symbol)%>% summarize(nSamps=n_distinct(individualID))%>% dplyr::filter(nSamps>2)%>% dplyr::select(Symbol) topMuts<-union("TP53",topMuts$Symbol) pheatmap(log10(0.01+mutMat[topMuts,rownames(annotes)]),cellwidth = 10,cellheight=10,annotation_col = annotes,clustering_method='ward.D2') pheatmap(log10(0.01+mutMat[topMuts,]),cellwidth = 10,cellheight=10,annotation_col = annotes,filename='recMutations.pdf',clustering_method='ward.D2')
We can also plot the gene expression across highly variable genes.
rnamat<-rnaSeq%>% subset(!is.na(GENEID))%>% subset(!is.na(counts))%>% dplyr::select(-c(Sample,Sex,Clinical.Status,Size,Location,Age,MicroTissueQuality))%>% distinct()%>%#View() tidyr::pivot_wider(names_from=synid,values_from=counts, values_fn=list(counts=mean),values_fill=list(counts=0.0))%>% tibble::column_to_rownames('GENEID') #rownames(rnaSeq)<-c() annotes<-rnaSeq%>% dplyr::select(Sample,synid,Age,Sex,MicroTissueQuality,Location,Size)%>% distinct()%>% tibble::column_to_rownames('synid')%>% mutate(Age=as.numeric(Age),Size=as.numeric(Size),MicroTissueQuality=unlist(MicroTissueQuality)) #topMuts<-setdiff(topMuts,'TTN') mv<-apply(rnamat,1,var)%>%sort(.,decreasing = TRUE) pheatmap(log10(0.01+rnamat[names(mv)[1:50],]), clustering_distance_cols = 'correlation', cellwidth = 10, # annotation_col = annotes, cellheight=10, labels_col=rep("",ncol(mutMat))) pheatmap(log10(0.01+rnamat[names(mv)[1:50],]), clustering_distance_cols = 'correlation', cellwidth = 10, # annotation_col = annotes, cellheight=10, labels_col=rep("",ncol(mutMat)),filename='RAWmostVariableTranscripts.pdf')
Lastly we have the microtissue data that we we also have drug treatment for.
#TODO: summarize MT data as well
This is the data we have so far!
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.