analyses/ComparisonCellLinesTissues_HER2.R

# Compare certain features of the cell lines and tissues
# such as a receptor expression, transcription factor expression
# also general phosphosites

# read the data files
library(ggplot2)
library(plyr)
setwd("/users/roota/Documents/tasty/expdata")
library(data.table)
ccna <- fread("cellline_CNA.txt")
crna <- fread("cellline_RNA.txt")#mut <- read.table("cellline_MUT.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
cmut <- fread("cellline_MUT.txt")
cpro <- fread("cellline_PRO.txt")
csty <- fread("cellline_STY.txt")

tcna <- fread("Tissues_CNA.txt")
trna <- fread("Tissues_RNA.txt")
tmut <- fread("Tissues_MUT.txt")
tpro <- fread("Tissues_PRO.txt")
tsty <- fread("Tissues_STY.txt")


ccna$dataset <- 'cell_line'
crna$dataset <- 'cell_line'
cmut$dataset <- 'cell_line'
cpro$dataset <- 'cell_line'
csty$dataset <- 'cell_line'


tcna$dataset <- 'tissue'
trna$dataset <- 'tissue'
tmut$dataset <- 'tissue'
tpro$dataset <- 'tissue'
tsty$dataset <- 'tissue'


ccna$alteration <- 'CNA'
crna$alteration <- 'RNA'
cmut$alteration <- 'MUT'
cpro$alteration <- 'PRO'
csty$alteration <- 'STY'


tcna$alteration <- 'CNA'
trna$alteration <- 'RNA'
tmut$alteration <- 'MUT'
tpro$alteration <- 'PRO'
tsty$alteration <- 'STY'


d1 <- rbind(ccna, crna, cmut, cpro, csty,
            tcna, trna, tmut, tpro, tsty)
head(d1)

her2 <- d1[grep("ERBB2", d1$gene), ]
her2 <- her2[!her2$gene %in% 'ERBB2IP', ]
ggplot(her2, aes(x=alteration, y=gene, fill=norm)) + geom_tile() +
  scale_fill_gradient2(low='blue', mid='white', midpoint=0, high='orange') + facet_wrap(~dataset)






pdb <- fread("DB_Paths_111418.txt")
#tdb <- read.table("DB_TFtargets.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
tdb <- fread("DB_TFtargets.txt")


rec <- sort(unique(pdb$Node1))
rec
cpro$dataset <- 'cell_line'
tpro$dataset <- 'tissues'
pro <- rbind(cpro, tpro)
head(pro)
ggplot(pro[pro$gene %in% rec, ],
       aes(x=norm, y=gene, colour=dataset)) + geom_point()
pdf('PlotL2_CelllinesTissues_ReceptorExpression.pdf')
ggplot(pro[pro$gene %in% rec, ],
       aes(x=norm, y=gene, colour=dataset)) + geom_point()
dev.off()

head(tdb)
pdf('PlotL2_CelllinesTissues_TFExpression.pdf', height=68, width=11)
ggplot(pro[pro$gene %in% tdb$TF, ],
       aes(x=norm, y=reorder(gene, norm, sd), colour=dataset)) + geom_point()
dev.off()


### COmpare STY
csty$dataset <- 'cell_line'
tsty$dataset <- 'tissues'
sty <- rbind(csty, tsty)
head(sty)
sty <- sty[!is.na(sty$norm), ]
head(sty)
sty <- ddply(sty, .(gene, dataset), summarize,
             norm = mean(norm, na.rm=TRUE))
library(reshape)
sty <- reshape(sty, direction='wide', timevar='dataset', idvar='gene')
head(sty)

sty2 <- sty[!is.na(sty$norm.tissues) & !is.na(sty$norm.cell_line), ]
head(sty2)
sty2

cor(sty2$norm.tissues, sty2$norm.cell_line, method='spearman')
plot(y=sty2$norm.tissues, x=sty2$norm.cell_line)
cor.test(sty2$norm.tissues, sty2$norm.cell_line)


head(sty2)
library(tidyr)
sty2 <- separate(sty2, col='gene', sep="_", into=c('protein', 'site'))
sty <- separate(sty, col='gene', sep="_", into=c('protein', 'site'))

sty2[sty2$protein %in% 'EGFR', ]
sty2[sty2$protein %in% 'ERBB2', ]
sty[sty$protein %in% 'ERBB2', ]
alexrootgithub/tasty documentation built on May 8, 2019, 7:27 a.m.