analyses/celllines.R

# script to analyze cell lines
# data from various sources
# proteomics from Huang et al 2018

setwd("/users/roota/Documents/tasty/expdata")
library(data.table)
# read the path db and tft db
#pdb <- read.table("DB_Paths_111418.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
pdb1 <- fread("DB_Paths_111418.txt")
#tdb <- read.table("DB_TFtargets.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
tdb1 <- fread("DB_TFtargets.txt")

# read the data files
#cna <- read.table("cellline_CNA.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
cna1 <- fread("cellline_CNA.txt")
#rna <- read.table("cellline_RNA.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
rna1 <- fread("cellline_RNA.txt")
#mut <- read.table("cellline_MUT.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
mut1 <- fread("cellline_MUT.txt")
#pro <- read.table("cellline_PRO.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
pro1 <- fread("cellline_PRO.txt")
#sty <- read.table("cellline_STY.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
sty1 <- fread("cellline_STY.txt")

setkey(cna1, gene)
setkey(rna1, gene)
setkey(mut1, gene)
setkey(pro1, gene)
setkey(sty1, gene)

# reduce the size of PDB
sty2keep <- sort(unique(sty1[!is.na(sty1$norm), ]$gene))
#sty2keep <- c(sty2keep, NA)

g2keep <- sort(unique(c(cna1[abs(cna1$norm) >= 1, ]$gene,
                        rna1[abs(rna1$norm) >= 1, ]$gene,
                        pro1[abs(pro1$norm) >= 1, ]$gene),NA))
p2keep <- sort(unique(pro1[abs(pro1$norm) >= 1, ]$gene))
p2keep
g2keep

pdb1[ ,('TF') := 'NA']
pdb1[pdb1$pathLength == 3, ]$TF <- pdb1[pdb1$pathLength == 3, ]$Gene3
pdb1[pdb1$pathLength == 4, ]$TF <- pdb1[pdb1$pathLength == 4, ]$Gene4
pdb1[pdb1$pathLength == 5, ]$TF <- pdb1[pdb1$pathLength == 5, ]$Gene5
pdb1[pdb1$pathLength == 6, ]$TF <- pdb1[pdb1$pathLength == 6, ]$Gene6


pdb1[ ,('pTF') := 'NA']
pdb1[pdb1$pathLength == 3, ]$pTF <- pdb1[pdb1$pathLength == 3, ]$Node3
pdb1[pdb1$pathLength == 4, ]$pTF <- pdb1[pdb1$pathLength == 4, ]$Node4
pdb1[pdb1$pathLength == 5, ]$pTF <- pdb1[pdb1$pathLength == 5, ]$Node5
pdb1[pdb1$pathLength == 6, ]$pTF <- pdb1[pdb1$pathLength == 6, ]$Node6



# Prepare the TF scores

### Prepare the TF scores
head(tdb1)
head(rna1)

tdb2 <- merge(tdb1, rna1, by.y='gene', by.x = "Target", all.x=TRUE, all.y=FALSE)
head(tdb2)
library(plyr)
tdb2[tdb2$ControlType %in% 'Repression', ]$norm <- -1 * tdb2[tdb2$ControlType %in% 'Repression', ]$norm
t1 <- ddply(tdb2, .(TF), summarize, TFscore = sum(norm, na.rm=TRUE))
head(t1)
class(t1)
summary(t1)
t1[order(abs(t1$TFscore), decreasing=TRUE), ][1:50, ]

t2 <- as.data.table(t1)
setkey(t2, TF)

t2$TFscore <- scale(t2$TFscore, center=TRUE, scale=TRUE)
t2[t2$TFscore > 5, ]$TFscore <- 5
t2[t2$TFscore < -5, ]$TFscore <- -5

summary(t2$TFscore)


sort(unique(pdb1$TF))
sort(unique(pdb1$pTF))

sort(unique(pdb1[pdb1$pTF %in% sty2keep, ]$TF))
sort(unique(pdb1[pdb1$TF %in% p2keep, ]$TF))

tf2keep <- unique(t2[abs(t2$TFscore) >= 5, ]$TF)

pdb2 <- pdb1[#pdb1$Node1 %in% p2keep &
               #pdb1$TF %in% tf2keep &
  (pdb1$Node2 %in% sty2keep |
     pdb1$Node3 %in% sty2keep |
     pdb1$Node4 %in% sty2keep |
     pdb1$Node5 %in% sty2keep |
     pdb1$Node6 %in% sty2keep), ] #&
                #(pdb1$Node1 %in% g2keep |
                 #  pdb1$Gene2 %in% g2keep |
                  # pdb1$Gene3 %in% g2keep |
                   #pdb1$Gene4 %in% g2keep |
                   #pdb1$Gene5 %in% g2keep |
                   #pdb1$Gene6 %in% g2keep), ]
sort(unique(pdb2$Node1))

out <- scorePaths(pdb=pdb2, tdb=t2, # input databases
                       cna=cna1,rna=rna1,mut=mut1,pro=pro1,sty=sty1, #input data
                       c_cna=0.10 , c_rna=0.10, c_mut=0.10, c_pro=0.30, c_sty=0.30, c_tdb=0.10, # input constants
                       nPerms=10)
head(out)
sort(unique(out$Node1))
sort(unique(out$TF))

out2 <- ddply(out, .(Node1, Gene2, Gene3, Gene4, Gene5, Gene6), summarize,
              maxScore = max(sumScore),
              adjpval = min(padj))
out2[order(out2$maxScore, decreasing=TRUE), ][1:50, ]
out2[order(out2$maxScore, decreasing=FALSE), ][1:50, ]
alexrootgithub/tasty documentation built on May 8, 2019, 7:27 a.m.