R/code20150507.r
In DSviaDRM: Exploring Disease Similarity in Terms of Dysfunctional Regulatory Mechanisms

#.packageName<- "DSviaDRM"


  #####################################################################################################
  ## a gene filtering function: Genes which have a Between-Experiment Mean  Expression Signal (BEMES) lower than the median of BEMES's 
  ## of all genes will be filtered out. 
  ## 'exprs' is the expression data for two conditions.
  # output: A data frame or matrix with a reduced number of rows.
  #####################################################################################################
"expressionBasedfilter" <- function(exprs) {
 	all.mean <- apply(exprs,1,mean)
  	median <- median(all.mean)
  	exprs.filter <- exprs[all.mean>median,]
  	return(exprs.filter)
}

  #################################################################################################### 
  ## a gene filtering function: Those genes not significantly more variable than the median gene are filtered out.
  ## 'exprs' is the expression data for two conditions.
  ## 'p' is the probability  cut-off of Chi Squared distribution.
  # output: A data frame or matrix with a reduced number of rows.
  ####################################################################################################
"varianceBasedfilter" <- function(exprs,p) {
 	n <- ncol(exprs)
 	Var.i <- apply(exprs,1,var)
 	Var.median <- median(Var.i)
 	quantity <- (n-1)*Var.i/Var.median
 	degree <- ncol(exprs)-1
 	prob <- pchisq(quantity, degree, ncp=0, lower.tail = F, log.p = FALSE)
 	exprs.filter <- exprs[prob<=p,]
 	return(exprs.filter)
}


###############################################################################
## DCgene&link: for unveil differential coexpression genes and links based on DCp and DCe in DCGL;
## input: disease and control expression profile;
## output: differential coexpression genes and links;
## 
###############################################################################
"DCEA" <- function(exprs.1,exprs.2,r.method=c('pearson','spearman')[1],link.method=c('qth','rth','percent')[1],cutoff=0.25,N=0,N.type=c('pooled','gene_by_gene')[1],q.method=c("BH","holm", "hochberg", "hommel", "bonferroni", "BY","fdr")[1],nbins=20,p=0.1) {
	cor.filtered.1<-cor.filtered.2<-NULL
	if (nrow(exprs.1)!=nrow(exprs.2)) stop('the two expression matrices must have the same number of rows (genes).')
	if (length(rownames(exprs.1))==0 | length(rownames(exprs.2))==0) stop('the expression matrices must have row names specifying the gene names.')
	if ( min(ncol(exprs.1),ncol(exprs.2))<3 ){
		stop('each expression matrix must have at least three or more columns.')
	} else if (min(ncol(exprs.1),ncol(exprs.2))<5 ) {
		warning('the minimum number of columns is less than five and the result may not be reliable.')
	}
	m <- nrow(exprs.1) # exprs.1, exprs.2 is the expression data for different conditions.
  if (m>5000) warning('the number of genes exceeds 5000 and the program may takes long time to run.')
	genes = rownames(exprs.1)
  	
	cor.filtered = switch(link.method,
		rth = rLinkfilter(exprs.1,exprs.2,r.method=r.method,cutoff=cutoff),
	 	qth =  	qLinkfilter(exprs.1,exprs.2,r.method=r.method,cutoff=cutoff),
		percent = percentLinkfilter(exprs.1,exprs.2,r.method=r.method,cutoff=cutoff)
	)
	rth.1 = cor.filtered$rth.1
	rth.2 = cor.filtered$rth.2
	cor.filtered.1 = cor.filtered$cor.filtered.1
	cor.filtered.2 = cor.filtered$cor.filtered.2
 
  #####################################################################################################################################################
  ## For one gene, there are n-1 correlation value pairs also q value pairs(From the two conditions). For a correlation pair for this gene,
  ## if one of the q values is less than the threshold, the pair will be retained. Then there are 'number.i.uniq' unique pairs retained that is there 
  ## are two vectors of correlation values. 
  ## Then a length normalized Euclidean Distance for the two vectors will be calculated (LNED). 
  ##################################################################################################################################################### 

	dC.length = calc.dC(cor.filtered.1,cor.filtered.2,genes)	
	dC = dC.length$dC
	number_uniq = dC.length$length
	
 ########################################################################################################################
 ## Disturb the sample labels for the two conditions and re-assign the samples to two datasets,then calculate the 'dC0' for 
 ## N times and then pool all the dC0 together to construct a 'NULL' distribution.
 #########################################################################################################################
	if(N>0){
		dC0 <- matrix(nrow=length(genes),ncol=N)
		rownames(dC0) <- genes
		exprs <- cbind(exprs.1,exprs.2)
		expSamples <- colnames(exprs)
		n.1 = ncol(exprs.1)
		n.2 = ncol(exprs.2)
		cat.j = 0
		for(j in 1:N) {
			if ( (j*100/N)%/%10>cat.j) {
				cat.j = cat.j+1
				cat(cat.j*10,'%','\n')
			}
			seq <- sample(n.1+n.2)
			exprs.1 <- exprs[,seq[1:n.1]];
			exprs.2 <- exprs[,seq[(n.1+1):(n.1+n.2)]];
			rownames(exprs.1) <- rownames(exprs.2) <- genes

			cor.filtered = switch(link.method,
                		rth = rLinkfilter(exprs.1,exprs.2,r.method=r.method,cutoff=cutoff),
                		qth =   qLinkfilter(exprs.1,exprs.2,r.method=r.method,cutoff=cutoff),
                		percent = percentLinkfilter(exprs.1,exprs.2,r.method=r.method,cutoff=cutoff)
        		)
			dC0.j <- calc.dC(cor.filtered$cor.filtered.1,cor.filtered$cor.filtered.2,cor.filtered$genes)
  		dC0[,j] = dC0.j$dC
		}

		p.value.DCp = switch(N.type,
			gene_by_gene = apply(cbind(dC0,dC),1,function(x) sum(x[1:(length(x)-1)]>x[length(x)],na.rm=T)/length(!is.na(x[1:(length(x)-1)]))),
			pooled = sapply(dC,function(x) sum(as.vector(dC0)>x,na.rm=T)/length(!is.na(as.vector(dC0))))
		)
  		q.value.DCp <- p.adjust(p.value.DCp,method=q.method)
  		Result.DCp <- data.frame(dC=dC,links=number_uniq,p.value=p.value.DCp,q.value=q.value.DCp);
  		row.names(Result.DCp) <- genes;
	} else { 
  		Result.DCp<- data.frame(dC=dC,links=number_uniq,p.value=rep(NA,length(dC)),q.value=rep(NA,length(dC)));
  		row.names(Result.DCp) <- genes;
  	}
  	
#############################################################
## decide three sets of correlation pairs and organize them into two-columned matrices.
#############################################################  	
  	
  	idx.same = (cor.filtered.1*cor.filtered.2)>0; idx.same[is.na(idx.same)] <- TRUE  ##fixing special cases where cor = NA (caused by at least one constant gene expression vector)
  	idx.diff = (cor.filtered.1*cor.filtered.2)<0; idx.diff[is.na(idx.diff)] <- FALSE
  	idx.switched = (cor.filtered.1*cor.filtered.2<0)& ( abs(cor.filtered.1)>=rth.1 & abs(cor.filtered.2)>=rth.2 ); idx.switched[is.na(idx.switched)] <- FALSE
  	
  	cor.same = cbind(cor.filtered.1[idx.same],cor.filtered.2[idx.same])
  	rownames(cor.same) <- names(cor.filtered.1)[idx.same]
  	cor.switched = cbind(cor.filtered.1[idx.switched],cor.filtered.2[idx.switched])
  	rownames(cor.switched) <- names(cor.filtered.1)[idx.switched]  	
  	cor.diff = cbind(cor.filtered.1[idx.diff & (!idx.switched)],cor.filtered.2[idx.diff & (!idx.switched)])
  	rownames(cor.diff) <- names(cor.filtered.1)[idx.diff & (!idx.switched)]

	n.switchedDCL = nrow(cor.switched)
	if ( is.null( rownames(cor.same) ) ) {name.same = NULL}
	if ( !is.null( rownames(cor.same) ) ) {
		name.same = strsplit(rownames(cor.same),',')
		name.same = matrix(unlist(name.same),length(name.same),2,byrow=T)
	}
	if ( is.null( rownames(cor.switched) ) ) {name.switched = NULL}
	if ( !is.null( rownames(cor.switched) ) ) {
		name.switched = strsplit(rownames(cor.switched),',')
		name.switched = matrix(unlist(name.switched),length(name.switched),2,byrow=T)
	}
	if ( is.null( rownames(cor.diff) ) ) {name.diff = NULL}
	if ( !is.null( rownames(cor.diff) ) ) {
		name.diff = strsplit(rownames(cor.diff),',')
		name.diff = matrix(unlist(name.diff),length(name.diff),2,byrow=T)
	}
	name.all = rbind(name.same,name.switched,name.diff)

  #############################################################
  ## Determine DCLs from same sign correlation pairs
  #############################################################
  	if(nrow(cor.same)>1){
		de.s = LFC(cor.same,nbins,p,sign="same")
		DCL.same = cor.same[de.s,]
		name.same = name.same[de.s,]
		n.sameDCL = nrow(DCL.same)
		DCL.same <- data.frame(name.same,DCL.same);
		colnames(DCL.same) <- c("Gene.1","Gene.2","cor.1","cor.2")
 	} else stop("only one or no same-signed pair in all!")

  #############################################################
  ## Determine DCLs from different sign correlation pairs
  #############################################################	
  	if(nrow(cor.diff)>1){
		de.d = LFC(cor.diff,nbins,p,sign="diff")
		DCL.diff = cor.diff[de.d,]
		name.diff = name.diff[de.d,]
  	n.diffDCL = nrow(DCL.diff)
  	DCL.diff <- data.frame(name.diff,DCL.diff);
		colnames(DCL.diff) <- c("Gene.1","Gene.2","cor.1","cor.2")
	} else stop("only one or no differently-signed pair in all!")

	
################################################################################################
## Determine Switched DCLs if they exist 
################################################################################################
	
	pairs=rbind(name.same,name.diff,name.switched);
	if(n.switchedDCL>0) {
		DCL.switched <- data.frame(name.switched,cor.switched);
		colnames(DCL.switched) <- c("Gene.1","Gene.2","cor.1","cor.2")
		cor.max <- apply(abs(cor.switched),1,max)
		middle <-sort(cor.max,method = "quick", index.return=TRUE,decreasing=TRUE)$ix ########
		DCL.switched<- DCL.switched[middle,]
	}

####################################
## All links
####################################
	g.all <- graph.data.frame(name.all);
	gene.all <- as.matrix(V(g.all)$name);
	de.all <- degree(g.all);  
#####################################
## DCLs
#####################################
	g <- graph.data.frame(pairs);
	gene.1 <- as.matrix(V(g)$name);
	de <- degree(g);  
######################################
##DCLs of same sign
######################################
	g.same <- graph.data.frame(name.same);
	g.same.name <- as.matrix(V(g.same)$name);
	degree.same <- as.matrix(degree(g.same));

########################################
## DCLs of different sign
########################################

	g.diff <- graph.data.frame(name.diff);
	g.diff.name <- as.matrix(V(g.diff)$name);
	degree.diff <- as.matrix(degree(g.diff));

#######################################
## DCLs of switched correlation
#######################################
	if(n.switchedDCL>0) {
		g.switch <- graph.data.frame(name.switched);
		g.switch.name <- as.matrix(V(g.switch)$name);
		degree.switch <- as.matrix(degree(g.switch));
	} else { degree.switch = matrix(0,1,1)
	DCL.switched = matrix("NULL",1,1)
	}

#######################################
## Numbers for DCLs of different type. 
#######################################

	degree.bind <- matrix(0,m,5)
	row.names(degree.bind) <- genes
	colnames(degree.bind) <- c("All.links","DC.links","DCL.same","DCL.diff","DCL.switched")

	degree.bind[gene.all,1]=de.all
	degree.bind[gene.1,2]=de
	degree.bind[g.same.name,3]=degree.same
	degree.bind[g.diff.name,4]=degree.diff
	if(n.switchedDCL>0) {
		degree.bind[g.switch.name,5]=degree.switch
	}


########################################################
## DCGs Identification
########################################################
#
# 	prob <- nrow(pairs)/nrow(name.all)
#	p.value.DCe <- pbinom(degree.bind[,'DC.links']-1, degree.bind[,'All.links'], prob, lower.tail = F, log.p = FALSE);
# 	q.value.DCe <- p.adjust(p.value,method=q.method);
# 
# 	degree.bind <- cbind(degree.bind,p.value.DCe,q.value.DCe)
# 	colnames(degree.bind) <- c("All.links","DC.links","DCL_same","DCL_diff","DCL_switch","p","q")
#
# 	middle <-sort(as.numeric(degree.bind[,'q']),method = "quick", decreasing=FALSE,index.return=TRUE)$ix 
# 	DCGs <- degree.bind[middle,]
# 
##########################################################
 
	DCLs <- rbind(data.frame(DCL.same,type='same signed'),data.frame(DCL.diff,type='diff signed'))
	if (n.switchedDCL>0) DCLs <- rbind(DCLs,data.frame(DCL.switched,type='switched opposites'))
	DCLs <- data.frame(DCLs,cor.diff=FALSE)
	DCLs[,'cor.diff'] <- abs(DCLs[,'cor.1']-DCLs[,'cor.2'])
 	Result.DCe <- DCLs

	Result<-list(genes=Result.DCp,links=Result.DCe)
	return(Result)
}

"DCpathway" <- function(DCEA.res=DCEA.res, DisName="COPD",pathways){
	##pathways: pathID geneID
	gene.dC<-merge(pathways, DCEA.res$genes, by.x="geneID", by.y="row.names", all.x=T)
#	gene.dC<-gene.dC[!is.na(gene.dC$dC),]
	pathway.factor<-factor(gene.dC$pathID, levels=unique(gene.dC$pathID),ordered=T)
	pathway.dC<-as.numeric(unlist(by(gene.dC$dC, pathway.factor, mean, na.rm=T)))
#	pathway.dC<-as.numeric(unlist(by(gene.dC$dC, pathway.factor, function(x) mean(x,na.rm=T))))
	Result<-data.frame(pathID=levels(pathway.factor),dC=pathway.dC)
	colnames(Result)[2]<-paste(DisName,"dC",sep=".")
#	rownames(Result)<-Result[,"pathID"]
#	Result
#	Result<-as.matrix(Result)
#	Result
#	Result<-Result[,-1]
#	Result
#	Result<-as.matrix(Result)
#	Result
#	colnames(Result)<-paste(DisName,"dC",sep=".")
#	Result
	return(Result)
}

"DS"<- function(DCpathway.disn,Ndis=3,DCEA.disn,
								DisNames=c("AA","IgA","T2D"), pathways, cutoff=0.05, Nper=0, 
								FigName="DisNetwork.pdf",vsize=5,lcex=0.5,ewidth=1.5 ) {
	DCpathway.disn<-DCpathway.disn[,!duplicated(colnames(DCpathway.disn))]
	colnames(DCpathway.disn)[2:ncol(DCpathway.disn)]<-DisNames
	if (ncol(DCpathway.disn)-1!=Ndis | length(names(DCEA.disn))!=Ndis | length(DisNames)!=Ndis) {
		stop (' "DCpathway.disn", "DCEA.disn", "DisNames" and "Ndis" must have the same number.')
	}
#	DCpathway.disn<-DCpathway.disn[,!duplicated(colnames(DCpathway.disn))]
	if (Nper>0){
		parcor0<-PartialCor(DCpathway.disn=DCpathway.disn)
		parcor.N<-matrix(,nrow(parcor0),0)
		for (i in 1:Nper){
#			pathway.i<-pathway2gene.random(pathways)
			pathway.i<-data.frame(pathID=pathways[,"pathID"],geneID=sample(pathways[,"geneID"]))
			DCpathway.disn.j<-matrix(,nrow(DCpathway.disn),0)
			for (j in 1:Ndis){
				Result.j<-DCpathway(DCEA.res=DCEA.disn[[j]],DisName=DisNames[j],pathway.i)
				DCpathway.disn.j<-cbind(DCpathway.disn.j,Result.j)
			}
			DCpathway.disn.j<-DCpathway.disn.j[,!duplicated(colnames(DCpathway.disn.j))]
			parcor.i<-PartialCor(DCpathway.disn=DCpathway.disn.j)
			parcor.N<-cbind(parcor.N,parcor.i)
		}
		p.value=apply(abs(cbind(parcor0,parcor.N)),1,function(x) sum(x[1:(length(x)-1)]>x[length(x)],na.rm=T)/length( x[1:(length(x)-1)] ))
		q.value=p.adjust(p.value,method="BH")
		Result<-data.frame(parcor0,p.value=p.value,q.value=q.value)
	} else {
		parcor0<-PartialCor(DCpathway.disn=DCpathway.disn)
		Result<-data.frame(parcor0,p.value=rep(NA,length(nrow(parcor0))),q.vlaue=rep(NA,length(nrow(parcor0))))
	}
#	return(Result)
###########################################################
## vis the disease associations.
## red line for positive relationship, green line for negative relationship, gray line for un-significant relationship
###########################################################	
	a<-t(matrix(unlist(strsplit(rownames(Result),",")),2,nrow(Result)))
	relation<-cbind(a,Result)
	colnames(relation)[1:2]<-c("dis1","dis2")
	if (nrow(relation)>1000) {
		warning ("the  number of significant disease pairs(>1000) is too large to display clearly and the program maybe need long time to run.\n")
	}
	node<-unique(c(as.character(relation$dis1),as.character(relation$dis2)))
	f <- graph.data.frame(relation)
#	if (length(V(f)) != nrow (node.classes)) {
#		stop('oops - why rows of node classes not equal to nodes of graph?\n')
#		} else {
#		node.classes = node.classes[V(f)$name,]
		V(f)$shape <- 'circle'
		V(f)$color <- 'skyblue'
		V(f)$label.color <- "black";
		V(f)$label.cex <- lcex;
		V(f)$size <- vsize
		V(f)$label <- V(f)$name
		E(f)$lty <- 1; 
		E(f)$arrow.mode <- '-'
		E(f)$color <- "gray"
		E(f)$color[relation$p.value<cutoff & relation$parcor>0] <- "red"
		E(f)$color[relation$p.value<cutoff & relation$parcor<0] <- "green"
		E(f)$width <- ewidth
		pdf(FigName)
		plot(f,layout=layout.fruchterman.reingold)
		dev.off()
		cat("The graph of", FigName, "has been completed and saved in your working directory.\n")
#	}
#	RIF_reg_rank<-RIF_reg_rank[order(-abs(as.numeric(RIF_reg_rank[,'RIFscore']))),]
	Result<-Result[order(as.numeric(Result[,'p.value'])),]
	return(Result)
}

"comDCGL"<-function(Ndis=3,DCEA.disn,DisNames=c("AA","CKD","T2D"),cutoff=0.05, tf2target ) {
	if ( length(names(DCEA.disn))!=Ndis | length(DisNames)!=Ndis) {
		stop (' "DCEA.disn", "DisNames" and "Ndis" must have the same number.')
	}
	
	for (i in 1:Ndis){
		DCEA.disn[[i]]$genes<-data.frame(DCGs=rownames(DCEA.disn[[i]]$genes),DCEA.disn[[i]]$genes)
		colnames(DCEA.disn[[i]]$genes)[2:ncol(DCEA.disn[[i]]$genes)]<-paste(colnames(DCEA.disn[[i]]$genes)[2:ncol(DCEA.disn[[i]]$genes)],DisNames[i],sep=".")
		colnames(DCEA.disn[[i]]$links)[3:ncol(DCEA.disn[[i]]$links)]<-paste(colnames(DCEA.disn[[i]]$links)[3:ncol(DCEA.disn[[i]]$links)],DisNames[i],sep=".")
	}
	comDCGs<-data.frame(DCGs=DCEA.disn[[1]]$genes[,1])
	comDCLs<-DCEA.disn[[1]]$links[,c("Gene.1","Gene.2")]
	for (i in 1:Ndis){
		genes.i<-DCEA.disn[[i]]$genes
#		colnames(genes.i)<-paste(colnames(genes.i),DisNames[i],sep=".")
		links.i<-DCEA.disn[[i]]$links
#		colnames(genes.i)<-paste(colnames(genes.i),DisNames[i],sep=".")
		if (sum(is.na(genes.i$p.value))==nrow(genes.i)){
			dCname<-paste("dC",DisNames[i],sep=".")
			genes.i <- genes.i[order(-genes.i[,dCname]),]
			DCGs.i <- genes.i[1:ceiling(nrow(genes.i)*cutoff),]
		} else {
			DCGs.i<-genes.i[genes.i$p.value<cutoff,]
		}
		comDCLs<-merge(links.i,comDCLs,by.x=c("Gene.1","Gene.2"),by.y=c("Gene.1","Gene.2"))
		comDCGs<-merge(DCGs.i,comDCGs,by.x="DCGs",by.y="DCGs")
#		colnames(comDCGs)[1]<-"row.names"
	}
	if (nrow(comDCGs)<1) warning('there is no intersection DCGs between', Ndis, 'diseases.')
	if (nrow(comDCLs)<1) warning('there is no intersection DCLs between', Ndis, 'diseases.')
	
	
	DCLs<-comDCLs
	DCG<-comDCGs$DCGs
	tf.target <- paste(tf2target[,'TF'],tf2target[,'gene'],sep='; ')
	DCL.pair1 <- paste(DCLs$Gene.1,DCLs$Gene.2, sep='; ')
	TF1.idx <- DCL.pair1 %in% tf.target
	DCL.pair2 <- paste(DCLs$Gene.2,DCLs$Gene.1, sep='; ')
	TF2.idx <- DCL.pair2 %in% tf.target
	DCLs <- data.frame(TF=NA,DCLs)
	DCLs[TF1.idx,'TF'] <- as.character(DCLs[TF1.idx,'Gene.1'])
	DCLs[TF2.idx,'TF'] <- as.character(DCLs[TF2.idx,'Gene.2'])
	DCLs[TF1.idx & TF2.idx, 'TF'] <- paste(as.character(DCLs[TF1.idx & TF2.idx,'Gene.1']),as.character(DCLs[TF1.idx & TF2.idx,'Gene.2']),sep='; ')
	colnames(DCLs)[1] <- 'internal.TF'
	DCG1.idx<-DCLs$Gene.1 %in% DCG
	DCG2.idx<-DCLs$Gene.2 %in% DCG
	DCLs<-data.frame(DCG=NA,DCLs)
	DCLs[DCG1.idx,'DCG']<-as.character(DCLs[DCG1.idx,'Gene.1'])
	DCLs[DCG2.idx,'DCG']<-as.character(DCLs[DCG2.idx,'Gene.2'])
	DCLs[DCG1.idx & DCG2.idx,'DCG']<-paste(as.character(DCLs[DCG1.idx & DCG2.idx,'Gene.1']),as.character(DCLs[DCG1.idx & DCG2.idx,'Gene.2']),sep=';')
	DCLs<-DCLs[,c(3,4,2,1,5:ncol(DCLs))]
	TF<-unique(sort(tf2target$TF))
	comDCGs<-data.frame(DCGisTF=FALSE,comDCGs)
	comDCGs[,'DCGisTF']<-comDCGs[,'DCGs'] %in% TF
	comDCGs<-comDCGs[,c(2,1,3:ncol(comDCGs))]
	Result<-list(comDCGs=comDCGs,comDCLs=DCLs)
	return(Result)
}

"comDCGLplot"<-function(comDCGL.res,FigName="comDCGL.pdf",tf2target,
												vsize=5,asize=0.3,lcex=0.3,ewidth=1.5){
#Gene.1  Gene.2 internal.TF  DCG    cor.1.T2D  cor.2.T2D    type.T2D cor.diff.T2D   cor.1.IgA  cor.2.IgA    type.IgA cor.diff.IgA    cor.1.AA    cor.2.AA
#1 AKR1A1  IGFBP7        <NA> <NA> -0.937724906  0.0780705 diff signed    1.0157954 -0.83962129  0.1085241 diff signed    0.9481454 -0.05800985 -0.78363613
#2   CCNC SH3BGRL        <NA> <NA>  0.427680424 -0.8203428 diff signed    1.2480232  0.29612477 -0.7735561 diff signed    1.0696809  0.91564968  0.13898127
#3  JOSD1   AP2M1        <NA> <NA> -0.033814698 -0.7297648 same signed    0.6959501 -0.07225289 -0.6868257 same signed    0.6145728  0.91416922 -0.17348650

	DCGs<-comDCGL.res$comDCGs
	DCLs<-comDCGL.res$comDCLs
	nodes<-unique(sort(c(as.character(DCLs$Gene.1),as.character(DCLs$Gene.2))))
	DCG<-DCGs$DCGs
	TF<-unique(sort(tf2target$TF))
	nodes.classes<-data.frame(TF=nodes %in% TF, DCG= nodes %in% DCG)
	rownames(nodes.classes) <- as.character(nodes)
	
	relation<-DCLs[,1:4]
	if (nrow(relation)>1000) {
		warning ("the edge number of common DCL(>1000) is too large to display clearly and the program maybe need long time to run.\n")
	}
	g <- graph.data.frame(relation)
	if (length(V(g))!=nrow(nodes.classes)) {
		stop('oops - why rows of node classes not equal to nodes of graph?\n')
	} else {
		nodes.classes = nodes.classes[V(g)$name,]
		V(g)$shape <- 'circle'; V(g)$shape[nodes.classes$TF] <- 'square'
		V(g)$color <- 'skyblue'; V(g)$color[nodes.classes$DCG] <- 'pink'
		E(g)$color <- "black"
		E(g)$width <- ewidth
		E(g)$arrow.mode <- '-'; E(g)$arrow.mode[!is.na(relation$internal.TF)] <- '>'
		E(g)$arrow.size <- asize
		V(g)$size <- vsize
		V(g)$label.color <- "black";
		V(g)$label.cex <- lcex; 
		V(g)$label <- V(g)$name
		pdf(FigName)
		plot(g,layout=layout.fruchterman.reingold)
		dev.off()
		cat("The graph of", FigName, "has been completed and saved in your working directory.\n")
	}
}

"qLinkfilter" <-function(exprs.1,exprs.2,cutoff=0.25,r.method=c('pearson','spearman')[1],q.method=c("BH","holm", "hochberg", "hommel", "bonferroni", "BY","fdr")[1]) {
        # m <- nrow(exprs.1) # exprs.1, exprs.2 is the expression data for different conditions.
        degree.1 <- ncol(exprs.1)-2
        degree.2 <- ncol(exprs.2)-2

        genes <- rownames(exprs.1)
        exprs.1 <- as.matrix(exprs.1)
        exprs.2 <- as.matrix(exprs.2)
        cor.1 <- cor(t(exprs.1),method=r.method,use="pairwise.complete.obs")
        cor.2 <- cor(t(exprs.2),method=r.method,use="pairwise.complete.obs")
        cor.1 <- cor.1[lower.tri(cor.1,diag=F)]
        cor.2 <- cor.2[lower.tri(cor.2,diag=F)]

        rm(exprs.1); rm(exprs.2)

	
        t.1 <- cor.1*sqrt(degree.1)/sqrt(1-cor.1*cor.1)
        t.2 <- cor.2*sqrt(degree.2)/sqrt(1-cor.2*cor.2)

        p0.1 <- 2*pt(-abs(t.1), degree.1, lower.tail = TRUE, log.p = FALSE)
        p0.2 <- 2*pt(-abs(t.2), degree.2, lower.tail = TRUE, log.p = FALSE)
        #diag(p0.1) <- NA
        #diag(p0.2) <- NA
        rm(t.1); rm(t.2)

        q.1<- p.adjust(p0.1, method = q.method)
        q.2<- p.adjust(p0.2, method = q.method)
        #dim(q.1)<- dim(p0.1); 
        #dim(q.2)<- dim(p0.2)
        #diag(q.1) <- 1
        #diag(q.2) <- 1
        rm(p0.1); rm(p0.2)

        rth.1 <- abs(cor.1[which.min(abs(q.1-cutoff))])
        rth.2 <- abs(cor.2[which.min(abs(q.2-cutoff))])
        cor.1[q.1>=cutoff & q.2>=cutoff] <- cor.2[q.1>=cutoff & q.2>=cutoff] <- 0
        
        #cor.1 <- cor.2 <- diag(rep(0,length(genes)))
        #cor.1[lower.tri(cor.1,diag=F)] = cor.1; cor.1 = cor.1+t(cor.1)
        #cor.2[lower.tri(cor.2,diag=F)] = cor.2; cor.2 = cor.2+t(cor.2)
        #rownames(cor.1) <- rownames(cor.2) <- colnames(cor.1) <- colnames(cor.2) <- genes


	name.row <- matrix(rep(genes,length(genes)),length(genes),length(genes))
	name.col <- matrix(rep(genes,length(genes)),length(genes),length(genes),byrow=T)
	name.pairs <- matrix(paste(name.row,name.col,sep=','),length(genes),length(genes))
	rm(list=c('name.row','name.col'))
	name.pairs <- name.pairs[lower.tri(name.pairs,diag=F)]
	names(cor.1) <- names(cor.2) <- name.pairs


        cor.filtered <- list(rth.1 = rth.1, rth.2 = rth.2, cor.filtered.1 = cor.1, cor.filtered.2 = cor.2, genes=genes)
        return(cor.filtered)
}


"rLinkfilter" <- function(exprs.1,exprs.2,cutoff=0.8,r.method=c('pearson','spearman')[1]) {

 	genes <- rownames(exprs.1)
        exprs.1 <- as.matrix(exprs.1)
        exprs.2 <- as.matrix(exprs.2)
        cor.filtered.1 <- cor(t(exprs.1),method=r.method,use="pairwise.complete.obs")
        cor.filtered.2 <- cor(t(exprs.2),method=r.method,use="pairwise.complete.obs")
        cor.filtered.1 <- cor.filtered.1[lower.tri(cor.filtered.1,diag=F)]
        cor.filtered.2 <- cor.filtered.2[lower.tri(cor.filtered.2,diag=F)]
        rm(exprs.1); rm(exprs.2)

        cor.filtered.1[abs(cor.filtered.1)<=cutoff & abs(cor.filtered.2)<=cutoff] <- 0
        cor.filtered.2[abs(cor.filtered.1)<=cutoff & abs(cor.filtered.2)<=cutoff] <- 0

	name.row <- matrix(rep(genes,length(genes)),length(genes),length(genes))
	name.col <- matrix(rep(genes,length(genes)),length(genes),length(genes),byrow=T)
	name.pairs <- matrix(paste(name.row,name.col,sep=','),length(genes),length(genes))
	rm(list=c('name.row','name.col'))
	name.pairs <- name.pairs[lower.tri(name.pairs,diag=F)]
	names(cor.filtered.1) <- names(cor.filtered.2) <- name.pairs

        list(rth.1=cutoff, rth.2=cutoff,cor.filtered.1 = cor.filtered.1, cor.filtered.2 = cor.filtered.2, genes=genes)

}
  ###############################################################################################################
  ## Select part of the correlation pairs, the max(abs(cor.1),abs(cor.2))
  ## exprs.1 a data frame or matrix for condition A, with rows as variables (genes) and columns as samples.  
  ## exprs.2 a data frame or matrix for condition B, with rows as variables (genes) and columns as samples.  
  ## percent percent of links to be reserved. 
  # output: A list with two components of lists: one lists the rth (thresholds of correlation values) for both conditions, the other lists the two matrices of filtered pairwise correlation values.
  ###############################################################################################################
"percentLinkfilter" <-function(exprs.1,exprs.2,cutoff=0.25,r.method=c('pearson','spearman')[1]) {
  	# m <- nrow(exprs.1) # exprs.1, exprs.2 is the expression data for different conditions.
  	n.1 <- ncol(exprs.1)
  	n.2 <- ncol(exprs.2)
  
  	#degree.1 <- n.1-2
  	#degree.2 <- n.2-2
  
  	genes <- rownames(exprs.1)
  	exprs.1 <- as.matrix(exprs.1)
  	exprs.2 <- as.matrix(exprs.2)
  	cor.filtered.1 <- cor(t(exprs.1),method=r.method,use="pairwise.complete.obs")
  	cor.filtered.2 <- cor(t(exprs.2),method=r.method,use="pairwise.complete.obs")
  	cor.filtered.1 <- cor.filtered.1[lower.tri(cor.filtered.1,diag=F)]
	cor.filtered.2 <- cor.filtered.2[lower.tri(cor.filtered.2,diag=F)]
	rm(exprs.1); rm(exprs.2)
	
	#diag(cor.filtered.1) <- 0
  	#diag(cor.filtered.2) <- 0
	cor.1.2.max <- pmax(abs(cor.filtered.1),abs(cor.filtered.2))
	#cor.1.2.max <- cor.1.2.max[lower.tri(cor.1.2.max,diag=F)]
  	#cor.1.2.vector <- cbind(cor.pairwise.1[lower.tri(cor.pairwise.1, diag = FALSE)],cor.pairwise.2[lower.tri(cor.pairwise.2, diag = FALSE)])
  	#cor.1.2.max <- apply(abs(cor.1.2.vector),1,max)  #max of the two cor
  	#cat('passed lower tri extraction \n')
	cor.1.2.max <- sort(cor.1.2.max,decreasing = TRUE);
	#cat('passed max corr operation \n')
  	# cor.1.2.max.sort <- as.matrix(cor.1.2.max.sort)
  	Rth <- cor.1.2.max[as.integer(length(cor.1.2.max)*cutoff)];
  	rm(cor.1.2.max)
  	#cor.filtered.1 <- cor.pairwise.1
  	#cor.filtered.2 <- cor.pairwise.2
	#rm(cor.pairwise.1); rm(cor.pairwise.2)
  	cor.filtered.1[abs(cor.filtered.1)<=Rth & abs(cor.filtered.2)<=Rth] <- 0
  	cor.filtered.2[abs(cor.filtered.1)<=Rth & abs(cor.filtered.2)<=Rth] <- 0
	#cat('passed 0 substitution \n')
  	
	#cor.2 <- cor.1 <- diag(rep(0,length(genes)))
	#cor.1[lower.tri(cor.1,diag=F)] = cor.filtered.1; cor.1 = cor.1+t(cor.1)
	#cor.2[lower.tri(cor.2,diag=F)] = cor.filtered.2; cor.2 = cor.2+t(cor.2)
	#rownames(cor.1) <- rownames(cor.2) <- colnames(cor.1) <- colnames(cor.2) <- genes
	#rm(cor.filtered.1); rm(cor.filtered.2)
	#cat('reform to two matrices\n')

	name.row <- matrix(rep(genes,length(genes)),length(genes),length(genes))
	name.col <- matrix(rep(genes,length(genes)),length(genes),length(genes),byrow=T)
	name.pairs <- matrix(paste(name.row,name.col,sep=','),length(genes),length(genes))
	name.pairs <- name.pairs[lower.tri(name.pairs,diag=F)]
	rm(list=c('name.row','name.col'))
	names(cor.filtered.1) <- names(cor.filtered.2) <- name.pairs

	cor.filtered <- list(rth.1=Rth,rth.2=Rth,cor.filtered.1 = cor.filtered.1, cor.filtered.2 = cor.filtered.2, genes=genes)
	return(cor.filtered)
}
	calc.dC <- function(cor.filtered.1,cor.filtered.2,genes) {
		nzero.vec <- (cor.filtered.1 != 0)|(cor.filtered.2 != 0 )
		nzero.sm <- diag(rep(0,length(genes)))
		nzero.sm[lower.tri(nzero.sm,diag=F)] <- nzero.vec; nzero.sm = nzero.sm+t(nzero.sm)
		number_uniq <- apply(nzero.sm,1,sum)
 
 		squares = (cor.filtered.1-cor.filtered.2)^2
  #	number_uniq = apply(cor.filtered.1!=0 | cor.filtered.2!=0,1,sum)
#  	ss = apply(squares,1,sum)
		squares.sm <- diag(rep(0,length(genes)))
		squares.sm[lower.tri(squares.sm,diag=F)] <- squares; squares.sm = squares.sm+t(squares.sm)
		ss = apply(squares.sm,1,sum)  
		LNED.result = as.vector(matrix(NA,length(genes),1))
  		LNED.result[number_uniq!=0] = sqrt(ss[number_uniq!=0])/sqrt(number_uniq[number_uniq!=0])
  		names(LNED.result) <- genes
		list(dC=LNED.result,length=number_uniq)
	}
"LFC" <- function(exprs,nbins=20,p=0.1,sign) {
 if(sign=='same'){
	exprs.min <- apply(abs(exprs),1,min)
  	exprs.max <- apply(abs(exprs),1,max)
  	exprs.diff <- exprs.max/exprs.min
 }else {
	 		exprs.min <- apply(abs(exprs),1,min)
  			exprs.max <- apply(abs(exprs),1,max)
  			exprs.diff <- exprs.max/exprs.min
  			a <- exprs.max
	 		exprs.max <- exprs.diff
			exprs.diff <- a	
	 	}
  n.tol = length(exprs.max)
	num.bin = ceiling(n.tol/nbins)
	exprs.max.sorted = sort(exprs.max)
	steps = min(exprs.max)
	for (i in 1:nbins) {
		if (i == nbins) {
			steps = c(steps, exprs.max.sorted[n.tol]+1)
		} else {
			steps = c(steps, exprs.max.sorted[i*num.bin])
		}	
	}
	exprs.bin<-rep(1,n.tol);
	for (i in 1:nbins) {
		exprs.bin[exprs.max>=steps[i] & exprs.max<steps[i+1]]<-i
	}
	
	steps.x<-(steps[1:(length(steps)-1)]+steps[2:length(steps)])/2
	# For bin 1 to 2 to 3, each time get the decisive point s x and y coordinates - the gradually elongating vectors exprs.x.sub and exprs.y.sub.
	exprs.x.sub<-exprs.y.sub<-NULL
	for (i in 1:nbins) {
		if (sum(exprs.bin==i)>0) {
			exprs.diff.sub<-exprs.diff[exprs.bin==i]
			diff.rank<-sort(exprs.diff.sub,decreasing=T,index.return=T)$ix
			exprs.x.sub<-c(exprs.x.sub,exprs.max[exprs.bin==i][diff.rank[ceiling(length(exprs.diff.sub)*p)]])
			exprs.y.sub<-c(exprs.y.sub,exprs.diff[exprs.bin==i][diff.rank[ceiling(length(exprs.diff.sub)*p)]])
			
		}
	}
	# important changement: steps.x substitutes for exprs.x.sub.
	mm<-glm(y~x,data=data.frame(y=exprs.y.sub,x=1/steps.x))
	exprs.diff.threshold<-predict(mm,newdata=data.frame(x=1/exprs.max))
	delink<- (exprs.diff>exprs.diff.threshold)
	delink
  }
  
"PartialCor"<-function(DCpathway.disn=DCpathway.disn){
#	DCpathway.disn<-as.matrix(DCpathway.disn)
	rownames(DCpathway.disn)<-DCpathway.disn[,1]
	DCpathway.disn<-DCpathway.disn[,-1]
	DCpathway.disn<-na.omit(DCpathway.disn)
	parcor<-pcor(DCpathway.disn)
	parcor<-parcor$estimate
	gse<-colnames(DCpathway.disn)
	parcor<-parcor[lower.tri(parcor,diag=F)]
	name.row <- matrix(rep(gse,length(gse)),length(gse),length(gse))
	name.col <- matrix(rep(gse,length(gse)),length(gse),length(gse),byrow=T)
	name.pairs <- matrix(paste(name.row,name.col,sep=','),length(gse),length(gse))
	name.pairs <- name.pairs[lower.tri(name.pairs,diag=F)]
	names(parcor)<-name.pairs
	parcor<-as.data.frame(parcor)
	return(parcor)
}	
 
 
"pathway2gene.random"<-function(pathways) {
	pathways <- data.frame(pathways)
	colnames(pathways) <- c('pathID','geneID')
	genes <- unique(as.character(pathways$geneID))
	nPath <- sort(table(pathways$pathID))
	factor.nPath <- factor(nPath,levels=unique(nPath),ordered=T)
	Paths <- by(names(nPath),factor.nPath,paste,sep='')
	Genes <- sapply(levels(factor.nPath),function(x) {sample(genes,x)})
	comb <- mapply(expand.grid,Paths,genes,SIMPLIFY=FALSE)
	Pathways.r <- matrix(unlist(lapply(comb,function(x) unlist(t(x)))),ncol=2,byrow=T)
	Pathways.r		
}
Any scripts or data that you put into this service are public.
DSviaDRM documentation built on May 2, 2019, 5:54 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
DSviaDRM
Exploring Disease Similarity in Terms of Dysfunctional Regulatory Mechanisms

R/code20150507.r
In DSviaDRM: Exploring Disease Similarity in Terms of Dysfunctional Regulatory Mechanisms

Try the DSviaDRM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DSviaDRM Exploring Disease Similarity in Terms of Dysfunctional Regulatory Mechanisms

R/code20150507.r In DSviaDRM: Exploring Disease Similarity in Terms of Dysfunctional Regulatory Mechanisms

Try the DSviaDRM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DSviaDRM
Exploring Disease Similarity in Terms of Dysfunctional Regulatory Mechanisms

R/code20150507.r
In DSviaDRM: Exploring Disease Similarity in Terms of Dysfunctional Regulatory Mechanisms