knitr::opts_chunk$set(echo = TRUE) library(mpnstXenoModeling)
Since this is a bit of a side project we will download the data and manually udpate it for now.
syn <- mpnstXenoModeling::synapseLogin() library(dplyr) library(ggplot2) library(ggfortify) meta <- readxl::read_xlsx(syn$get('syn26955140')$path)%>% dplyr::rename(`Sample ID`='Sample ID\r\n(Original)') DT::datatable(meta) prot <- read.table(syn$get('syn26999591')$path,header=T,sep='\t') phos <- read.table(syn$get('syn26999590')$path,header=T,sep='\t') isc <- read.table(syn$get('syn27025914')$path,header=F,sep='\t') nas<- which(apply(prot,1,function(x) any(is.na(x)))) if(length(nas)>0) prot <- prot[-nas,] nas<- which(apply(phos,1,function(x) any(is.na(x)))) if(length(nas)>0) phos <- phos[-nas,] isc_sites<-unlist(sapply(paste0(isc[,2],'-'),function(x) grep(x,rownames(phos)))) pca_prot <- prcomp(t(prot),scale=TRUE) pca_phos <- prcomp(t(phos),scale=TRUE) pca_phos_isc <- prcomp(t(phos[-isc_sites,]),scale=TRUE) vars <- c('Gain of C8','Highest C8') #'CMYC > C8 gain','Highest C8','Cells w. indiv ratio > 2.0') meta<-meta%>% tidyr::separate(`Sample ID`,into=c('PTRC','FISH','Sample'),sep='_',remove=FALSE)%>% tibble::column_to_rownames('Sample ID') autoplot(pca_prot,data=dplyr::rename(meta[1:10,], C8Gain='Gain of C8'),colour="C8Gain",shape='Sample')+ ggtitle('Proteomics PCA') autoplot(pca_phos,data=dplyr::rename(meta[1:10,], C8Gain='Gain of C8'),colour="C8Gain",shape='Sample')+ ggtitle('Filtered phosphoproteomics PCA')
How do we get chr8 genes?
library(ensembldb) library(EnsDb.Hsapiens.v86) # this pkg is about 75 Mb edb <- EnsDb.Hsapiens.v86 g <- genes(edb,filter=AnnotationFilter(~seq_name=='8' & tx_biotype=='protein_coding' & gene_start>45200000)) chr8.genes<-as.data.frame(g)$symbol chr8.prot<-prot[intersect(rownames(prot),chr8.genes),] pca_prot <- prcomp(t(chr8.prot),scale=TRUE) autoplot(pca_prot,data=dplyr::rename(meta[1:10,], C8Gain='Gain of C8'),colour="C8Gain",shape='Sample')+ ggtitle('Chr8 Proteins PCA')
Chr8 proteins, while expressed, are not highly up-regulated at the protein level compared to others.
We will first look at samples that have over 40% chr8 compared to those that have less.
thresh=0.4 chr8_samps<-rownames(subset(meta,`Gain of C8`>thresh)) other_samps<-setdiff(colnames(prot),chr8_samps) prot_res<- limmaTwoFactorDEAnalysis(prot, chr8_samps,other_samps)%>% dplyr::rename(Protein='featureID',`protein fc`='logFC') library(pheatmap) pheatmap(prot[subset(prot_res,adj.P.Val<0.1)$Protein,], annotation_col = dplyr::select(meta,'Gain of C8'), cellwidth = 10,cellheight = 10)
There are indeed a few differentially expressed proteins between sample 2 and others.
phos_res <- limmaTwoFactorDEAnalysis(phos, chr8_samps,other_samps) filtered_phos_res <- limmaTwoFactorDEAnalysis(phos[-isc_sites,], chr8_samps,other_samps)%>% tidyr::separate(featureID,into=c('Protein','site'),sep='-')%>% dplyr::rename(`phospho fc`='logFC') pheatmap(phos[rownames(subset(filtered_phos_res,P.Value<0.005)),], annotation_col = dplyr::select(meta,'Gain of C8'), cellwidth = 10,cellheight = 10)
Not a lot changing here.
Let's try to build a network from the chr8 genes to the diff expressed phosphosites and proteins.
library(amlresistancenetworks) pres <- subset(prot_res,adj.P.Val<0.1) pr_vals <- pres$`protein fc` names(pr_vals) <- pres$Protein phres <-subset(phos_res,adj.P.Val<0.4) ph_vals <-phres$logFC names(ph_vals)<-phres$featureID ##now howw do we add dummies? res1<-computePhosphoNetwork(phos.vals=ph_vals,prot.vals=pr_vals,fname='protPhosChr8DiffExWithChr8',w=8,nrand=1000,dummies=chr8.genes) res2<-computePhosphoNetwork(phos.vals=ph_vals,prot.vals=pr_vals,nrand=1000,fname='protPhosChr8DiffEx') overlap=intersect(names(V(res1$graph)),names(V(res2$graph))) print(paste("Graphs have overlap of",length(overlap))) print(overlap)
Now we can look at correlation of each protein and phosphosite with each of the metadata variables, then calculated the enrichment of proteins and phosphosites.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.