knitr::opts_chunk$set(echo = TRUE)

library(mpnstXenoModeling)

if(!require('EnsDb.Hsapiens.v86')) {
  BiocManager::install("EnsDb.Hsapiens.v86")
  library('EnsDb.Hsapiens.v86')
}
if(!require('ensembldb')){
  BiocManager::install("ensembldb", repos='http://cran.us.r-project.org')
  library(ensembldb)
}

if(!require('apeglm')){
  BiocManager::install("apeglm", repos='http://cran.us.r-project.org')
  library(apeglm)
}
library(dplyr)

Here we bring in the RNASeq data and the leapR package to measure and identify correlated pathways across the samples.

Load data from synapse query

We have all the data in count.sf files so can read them in using the tximport package. Here is the data for which we have gene expression.

#this function simply loads all the data into memory

syn<-mpnstXenoModeling::synapseLogin()

##collecting the metadata
res<-syn$tableQuery('select id,diagnosis,individualID,sex,specimenID,tissue from syn38893832')$asDataFrame()


##reading the counts
full.mat<-do.call(rbind,lapply(res$id,function(x){
  tab<-read.table(syn$get(x)$path,header=T)
  tab$id<-x
  return(tab)
}))%>%left_join(res)%>%
  tidyr::separate(Name,into=c("GENEID","VERSION"),sep='\\.')

database <- EnsDb.Hsapiens.v86
pmap <-
  ensembldb::select(
    database,
    keys = list(
      GeneIdFilter(unique(full.mat$GENEID)),
      TxBiotypeFilter("protein_coding")
    ),
    columns = c("GENENAME")
  )%>% 
#  dplyr::rename(GENE = 'GENEID') %>%
  right_join(full.mat) %>%
  subset(TXBIOTYPE == 'protein_coding') 
#%>%
#  dplyr::select(GENE, GENENAME, GENEID) %>% distinct()#%>%
# tibble::column_to_rownames('GENEID')

counts <- pmap %>%
    dplyr::select(GENEID, NumReads, specimenID) %>%
  distinct()%>%
  as.data.frame()%>%
    tidyr::pivot_wider(values_from = 'NumReads', names_from = 'specimenID',values_fn=list(NumReads=sum)) %>%
    tibble::column_to_rownames('GENEID') %>%
    round()

coldata<-pmap%>%
  dplyr::select(specimenID,diagnosis,tissue,individualID,sex)%>%
  distinct()%>%
  tibble::column_to_rownames('specimenID')

dds <- DESeq2::DESeqDataSetFromMatrix(countData = counts,
                                        colData = coldata,
                                        design = ~ sex)# + Clinical.Status)


library(DESeq2)

vsd <- DESeq2::vst(dds)
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep, ]
dds <- DESeq2::DESeq(dds)

Tumor deconvolution code uses the immunedeconv library.

library(immunedeconv)  
library(pheatmap)
#' runImmuneDeconv
#' @param tab
#' @param method
#' @prefix 
#' @return Tidied data frame
runImmuneDeconv<-function(tab,method,prefix='PDX'){
  #run MCP counter
  mat<-reshape2::acast(tab,Symbol~Sample,value.var='counts',fun.aggregate=mean,na.rm=T)
  nas<-which(apply(mat,1,function(x) any(is.na(x))))
  if(length(nas)>0)
    mat<-mat[-nas,]
  res<-deconvolute(mat,method)

  df<-dplyr::select(tab,c(Sample,experimentalCondition))%>%
      unique()#%>%
  #    rename(study='studyName')
  rownames(df)<-df$specimenID
  #save as heatmap with metadata
  mtab<-res%>%select(-cell_type)%>%as.data.frame()
  rownames(mtab)<-res$cell_type
  library(pheatmap)

  ##now tidy up data to table
  td<-tidyr::gather(res,key="specimenID",value="score",-cell_type )%>%
    left_join(df,by='specimenID')
  td$method=method
  return(td)
}


count.mat <- assay(vsd)#counts(dds,normalized=TRUE)#[symbs,]

#keep <- which(rowMin(counts(dds, normalized = TRUE)) > 9)
rmap<-pmap%>%dplyr::select(GENENAME,GENEID)%>%distinct()
symb.mat <- geneIdToSymbolMatrix(count.mat, rmap)


pat.annote <- pmap %>%
  dplyr::select(specimenID, sex,tissue,diagnosis) %>%
  distinct() %>%
#  mutate(MicroTissueQuality = unlist(MicroTissueQuality)) %>%
  tibble::column_to_rownames('specimenID')

xc<-deconvolute(symb.mat,'xcell')
mc<-deconvolute(symb.mat,'mcp_counter')

pheatmap(tibble::column_to_rownames(mc,'cell_type'),
         annotation_col = pat.annote,
         clustering_method = 'ward.D2', 
         clustering_distance_cols = 'correlation',
         cellheight=10,
         clustering_distance_rows='correlation',
         filename='GEMmcpcounterDeconv.pdf')


xcmat<-tibble::column_to_rownames(xc,'cell_type')
res<-which(rowMeans(xcmat)>.0001)
score_scores<- grep('score',rownames(xcmat))
  pheatmap(xcmat[setdiff(res,score_scores),],
           annotation_col = pat.annote,
           clustering_method = 'ward.D2', 
           clustering_distance_cols = 'correlation',
           cellheight=10,
         #cell_width=10,
         clustering_distance_rows='correlation',
         filename='GEMxcellDeconv.pdf')


sgosline/mpnstXenoModeling documentation built on Dec. 10, 2022, 8:33 a.m.