LOCAL <- identical(Sys.getenv("LOCAL"), "TRUE") #LOCAL=FALSE knitr::opts_chunk$set(purl = LOCAL) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
Retrieve the full CLL dataset.
require(Patterns)
require(Patterns) CLLfile <- "https://github.com/fbertran/Patterns/raw/master/add_data/CLL.RData" repmis::source_data(CLLfile) CLL[1:10,1:5]
Split the CLL
dataset into healthy and aggressive stimulated and unstimulated dataset.
hea_US<-CLL[,which((1:48)%%8<5&(1:48)%%8>0)+2] hea_S<-CLL[,which(!((1:48)%%8<5&(1:48)%%8>0))+2] agg_US<-CLL[,which((1:40)%%8<5&(1:40)%%8>0)+98] agg_S<-CLL[,which(!((1:40)%%8<5&(1:40)%%8>0))+98] m_hea_US<-as.omics_array(hea_US,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2]) m_hea_S<- as.omics_array(hea_S,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2]) m_agg_US<-as.omics_array((agg_US),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2]) m_agg_S<- as.omics_array((agg_S),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2])
Focus on EGR1, run the code to get the graph of the expression values (pasted together for all the subjects) for all the probeset tagged as EGR1.
matplot(t(log(agg_S[which(CLL[,2] %in% "EGR1"),])),type="l",lty=1)
selection1<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)),-1,alpha=0.1)
selection2<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+1),-1,alpha=0.1)
selection3<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+2),50,alpha=0.005)
selection4<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+3),50,alpha=0.005)
Merge the four selections into a single one.
selection<-Patterns::unionOmics(list(selection1,selection2,selection3,selection4)) summary(selection)
Number of genes in the merged selection.
length(selection@gene_ID)
Translate the probesets' names for the selection.
require(biomaRt) affyids=c("202763_at","209310_s_at","207500_at") ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl") infos<-getBM(attributes=c("affy_hg_u133_plus_2","ensembl_gene_id","hgnc_symbol","chromosome_name","start_position","end_position","band"), filters = "affy_hg_u133_plus_2", values = CLL[CLL[,1] %in% selection@name,1] , mart = ensembl,uniqueRows=TRUE, checkFilters = TRUE)
selection@gene_ID <- lapply(selection@name,function(x) {unique(infos[infos$affy_hg_u133_plus_2==x,"hgnc_symbol"])})
Add groupping information according to the pre-merge selection membership to perform network inference.
selection@group <- rep(NA, length(selection@name)) names(selection@group) <- selection@name selection@group[selection@name %in% selection4@name] <- 4 selection@group[selection@name %in% selection3@name] <- 3 selection@group[selection@name %in% selection2@name] <- 2 selection@group[selection@name %in% selection1@name] <- 1 plot(selection)
Check the length of the group
slot of the selection
object.
length(selection@group)
Performs a lasso based inference of the network. Then prints the network
pbject.
network<-inference(selection,fitfun="LASSO2",Finit=CascadeFinit(4,4),Fshape=CascadeFshape(4,4)) str(network)
Plot the inferred F matrix.
plotF(network@F, choice='F')
Save results.
save(list=c("selection"),file="selection.RData") save(list=c("infos"),file="infos.RData")
Retrieve human transcription factors from HumanTFDB, extracted from AnimalTFDB 3.0: a comprehensive resource for annotation and prediction of animal transcription factors. Hui Hu, Ya-Ru Miao, Long-Hao Jia, Qing-Yang Yu, Qiong Zhang and An-Yuan Guo. Nucl. Acids Res. (2018).
getTF <- FALSE try({doc <- read.delim("http://bioinfo.life.hust.edu.cn/static/AnimalTFDB3/download/Homo_sapiens_TF",encoding = "UTF-8", header=TRUE); getTF <- TRUE}, silent = TRUE) if(!getTF){data(doc)} TF<-as.character(doc[,"Symbol"]) TF<-TF[order(TF)]
doc <- read.delim("http://bioinfo.life.hust.edu.cn/static/AnimalTFDB3/download/Homo_sapiens_TF",encoding = "UTF-8", header=TRUE) TF<-as.character(doc[,"Symbol"]) TF<-TF[order(TF)]
The TF
object holds the list of human transcription factors geneID. We retrieve those that are in the selection
object.
infos_selection <- infos[infos$affy_hg_u133_plus_2 %in% selection@name,] tfs<-which(infos_selection[,"hgnc_symbol"] %in% TF)
Some plots of the TF
found in the selection.
matplot(t(selection@omicsarray[tfs,]),type="l",lty=1)
kk<-kmeans((selection@omicsarray[tfs,]),10) matplot(t(kk$centers),type="l",lty=1)
#TO DO #Focus on TF that were not selected. indice<-which(CLL[,2] %in% TF[tfs<-which(! TF %in% selection@gene_ID)]) a<-1:200 matplot(log(t(agg_S[indice[a],]/agg_US[indice[a],])),lty=1,type="l") kkk<-kmeans(log((agg_S[indice,]/agg_US[indice,])),10) matplot(t(kkk$centers),type="l",lty=1) poi<-indice[which(kkk$cluster==2 )] matmat<-log((agg_S[poi,]/agg_US[poi,])) addna<-function(mat,t,p){ mat2<-mat[,1:t] for(i in 2:p){ print(1:t+(i-1)*t) mat2<-cbind(mat2,rep(NA,nrow(mat2)),mat[,1:t+(i-1)*t]) } return(mat2) } pdf("forgotten_TF.pdf",width=15,height=5) for(i in 1:15){ poi<-indice[which(kkk$cluster==i )] if(length(poi)>2){ matmat<-log((agg_S[poi,]/agg_US[poi,])) #matplot(t(matmat),lty=1,type="l") matplot(t(addna(matmat,4,5)),lty=1,type="l")} } dev.off() abline(v=c(2,6,10,14,18)) poi<-indice[which(kkk$cluster==1 )] matplot(log(t(agg_S[poi,]/agg_US[poi,])),lty=1,type="l") TFi<-function(x) length(which(TF %in% x)) n<-40 kre<-kmeans(selection@omicsarray,n) kre lll<-split(selection@gene_ID,kre$cluster) require(DCGL) require("clusterProfiler") require("AnnotationFuncs") require(org.Hs.eg.db) pp<-list() for(k in 1:2){ print(k) pp[[k]]<-translate(lll[[k]],from=org.Hs.egSYMBOL2EG,simplify=TRUE) # GOs[[k]]<-enrichGO(pp, organism = "human", ont = "MF", pvalueCutoff = 0.05, # pAdjustMethod = "BH", qvalueCutoff = 0.2, minGSSize = 5, # readable = FALSE) } names(pp)<-paste("X",1:2,sep="") test<-compareCluster(pp,fun="enrichGO", organism="human", pvalueCutoff=0.05) plot(test) translate(lll[[k]],from=org.Hs.egSYMBOL2EG,simplify=TRUE) TFu<-(unlist(lapply(pp,TFi))) TFy<-unlist(lapply(pp,length)) plot(TFu/TFy) plot(TFu) sum(TFu) entrez<-translate(selection@gene_ID,from=org.Hs.egSYMBOL2EG,simplify=TRUE) geneName<-translate(entrez[which(TF %in% entrez)],from=org.Hs.egSYMBOL,simplify=TRUE) which(selection@gene_ID %in% "EGR1")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.