Network inference and analysis of CLL data

LOCAL <- identical(Sys.getenv("LOCAL"), "TRUE")
#LOCAL=FALSE
knitr::opts_chunk$set(purl = LOCAL)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Data preparation

Retrieve the full CLL dataset.

require(Patterns)
require(Patterns)
CLLfile <- "https://github.com/fbertran/Patterns/raw/master/add_data/CLL.RData"
repmis::source_data(CLLfile)

CLL[1:10,1:5]

Split the CLL dataset into healthy and aggressive stimulated and unstimulated dataset.

hea_US<-CLL[,which((1:48)%%8<5&(1:48)%%8>0)+2]
hea_S<-CLL[,which(!((1:48)%%8<5&(1:48)%%8>0))+2]

agg_US<-CLL[,which((1:40)%%8<5&(1:40)%%8>0)+98]
agg_S<-CLL[,which(!((1:40)%%8<5&(1:40)%%8>0))+98]

m_hea_US<-as.micro_array(hea_US,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2])
m_hea_S<- as.micro_array(hea_S,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2])

m_agg_US<-as.micro_array((agg_US),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2])
m_agg_S<- as.micro_array((agg_S),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2])

Focus on EGR1, run the code to get the graph of the expression values (pasted together for all the subjects) for all the probeset tagged as EGR1.

matplot(t(log(agg_S[which(CLL[,2] %in% "EGR1"),])),type="l",lty=1)

Selection genes according to their profiles.

selection1<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)),-1,alpha=0.1)
selection2<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+1),-1,alpha=0.1)
selection3<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+2),50,alpha=0.005)
selection4<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+3),50,alpha=0.005)

Merge the four selections into a single one.

selection<-Patterns::unionMicro(list(selection1,selection2,selection3,selection4))
summary(selection)

Number of genes in the merged selection.

length(selection@gene_ID)

Translate the probesets' names for the selection.

require(biomaRt)

affyids=c("202763_at","209310_s_at","207500_at")
ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl")
infos<-getBM(attributes=c("affy_hg_u133_plus_2","ensembl_gene_id","hgnc_symbol","chromosome_name","start_position","end_position","band"), filters = "affy_hg_u133_plus_2", values = CLL[CLL[,1] %in% selection@name,1] , mart = ensembl,uniqueRows=TRUE, checkFilters = TRUE)
selection@gene_ID <- lapply(selection@name,function(x) {unique(infos[infos$affy_hg_u133_plus_2==x,"hgnc_symbol"])})

Network inference

Add groupping information according to the pre-merge selection membership to perform network inference.

selection@group <- rep(NA, length(selection@name))
names(selection@group) <- selection@name
selection@group[selection@name %in% selection4@name] <- 4
selection@group[selection@name %in% selection3@name] <- 3
selection@group[selection@name %in% selection2@name] <- 2
selection@group[selection@name %in% selection1@name] <- 1
plot(selection)

Check the length of the group slot of the selection object.

length(selection@group)

Performs a lasso based inference of the network. Then prints the network pbject.

network<-inference(selection,fitfun="LASSO2",Finit=CascadeFinit(4,4),Fshape=CascadeFshape(4,4))
str(network)

Plot the inferred F matrix.

plotF(network@F, choice='F')

Save results.

save(list=c("selection"),file="selection.RData")
save(list=c("infos"),file="infos.RData")

Focus on transcription factors.

Retrieve human transcription factors from HumanTFDB, extracted from AnimalTFDB 3.0: a comprehensive resource for annotation and prediction of animal transcription factors. Hui Hu, Ya-Ru Miao, Long-Hao Jia, Qing-Yang Yu, Qiong Zhang and An-Yuan Guo. Nucl. Acids Res. (2018).

doc <- read.delim("http://bioinfo.life.hust.edu.cn/static/AnimalTFDB3/download/Homo_sapiens_TF",encoding = "UTF-8", header=TRUE)

TF<-as.character(doc[,"Symbol"])
TF<-TF[order(TF)]

The TF object holds the list of human transcription factors geneID. We retrieve those that are in the selection object.

infos_selection <- infos[infos$affy_hg_u133_plus_2 %in% selection@name,]
tfs<-which(infos_selection[,"hgnc_symbol"] %in% TF)

Some plots of the TF found in the selection.

matplot(t(selection@microarray[tfs,]),type="l",lty=1)
kk<-kmeans((selection@microarray[tfs,]),10)
matplot(t(kk$centers),type="l",lty=1)
#TO DO
#Focus on TF that were not selected.

indice<-which(CLL[,2] %in% TF[tfs<-which(! TF %in% selection@gene_ID)])
a<-1:200
matplot(log(t(agg_S[indice[a],]/agg_US[indice[a],])),lty=1,type="l")
kkk<-kmeans(log((agg_S[indice,]/agg_US[indice,])),10)
matplot(t(kkk$centers),type="l",lty=1)

poi<-indice[which(kkk$cluster==2 )]
matmat<-log((agg_S[poi,]/agg_US[poi,]))

addna<-function(mat,t,p){


  mat2<-mat[,1:t]
  for(i in 2:p){
    print(1:t+(i-1)*t)
    mat2<-cbind(mat2,rep(NA,nrow(mat2)),mat[,1:t+(i-1)*t])
  }
  return(mat2)
}

pdf("forgotten_TF.pdf",width=15,height=5)
for(i in 1:15){
poi<-indice[which(kkk$cluster==i )]
if(length(poi)>2){
matmat<-log((agg_S[poi,]/agg_US[poi,]))
#matplot(t(matmat),lty=1,type="l")
matplot(t(addna(matmat,4,5)),lty=1,type="l")}
}
dev.off()
abline(v=c(2,6,10,14,18))


poi<-indice[which(kkk$cluster==1 )]
matplot(log(t(agg_S[poi,]/agg_US[poi,])),lty=1,type="l")
TFi<-function(x) length(which(TF %in% x))



n<-40
kre<-kmeans(selection@microarray,n)
kre
lll<-split(selection@gene_ID,kre$cluster)

require(DCGL)
require("clusterProfiler")
require("AnnotationFuncs")
require(org.Hs.eg.db)

pp<-list()

for(k in 1:2){
  print(k)
pp[[k]]<-translate(lll[[k]],from=org.Hs.egSYMBOL2EG,simplify=TRUE)
# GOs[[k]]<-enrichGO(pp, organism = "human", ont = "MF", pvalueCutoff = 0.05,
#          pAdjustMethod = "BH", qvalueCutoff = 0.2, minGSSize = 5,
#          readable = FALSE)

}

names(pp)<-paste("X",1:2,sep="")
test<-compareCluster(pp,fun="enrichGO", organism="human", pvalueCutoff=0.05)


plot(test)

translate(lll[[k]],from=org.Hs.egSYMBOL2EG,simplify=TRUE)


TFu<-(unlist(lapply(pp,TFi)))
TFy<-unlist(lapply(pp,length))

plot(TFu/TFy)
plot(TFu)
sum(TFu)

entrez<-translate(selection@gene_ID,from=org.Hs.egSYMBOL2EG,simplify=TRUE)

geneName<-translate(entrez[which(TF %in% entrez)],from=org.Hs.egSYMBOL,simplify=TRUE)

which(selection@gene_ID %in% "EGR1")


Try the Patterns package in your browser

Any scripts or data that you put into this service are public.

Patterns documentation built on March 21, 2021, 5:05 p.m.