R/dendrogramAdjustmentFunctions.R

Defines functions .offDiagonalMatrixSum .minTreeHeight .getBranchMembers chooseTopHubInEachModule chooseOneHubInEachModule orderBranchesUsingHubGenes

Documented in chooseOneHubInEachModule chooseTopHubInEachModule orderBranchesUsingHubGenes

## This file contains several functions which can be used to adjust the dendrogram
#   in ways which keep the dendrogram mathematically identical (ie, branch swapping,
#   branch reflection, etc).  The goal is to biologically optimize the dendrogram.

# ----------------------------------------------------------------------------- #

orderBranchesUsingHubGenes <- function(hierTOM, datExpr=NULL, colorh=NULL, type="signed", 
adj=NULL, iter=NULL, useReflections=FALSE, allowNonoptimalSwaps=FALSE){

# First read in and format all of the variables
	hGenes = hierTOM$labels
	if(is.null(adj)){
		genes = chooseOneHubInEachModule(datExpr, colorh, type=type)
		adj   = adjacency(datExpr[,genes],type=type, power=(2-(type=="unsigned")))
		colnames(adj) <- rownames(adj) <- genes
	}
	genes = rownames(adj)
	if(length(genes)!=length(intersect(genes,hGenes))){
		write("All genes in the adjacency must also be in the gene tree.  Check to make sure","")
		write("that names(hierTOM$labels) is set to the proper gene or probe names and that","")
		write("these correspond to the expression / adjacency gene names.","")
		return(0)
	}
	genes = hGenes[is.element(hGenes,genes)]
	adj   = adj[genes,genes]
	if (is.null(iter)) iter = length(genes)^2
	iters=(1:iter)/iter
	swapAnyway = rep(0,length(iters))  # Quickly decreasing chance of random swap
	if (allowNonoptimalSwaps)  swapAnyway = ((1-iters)^3)/3+0.001
		
# Iterate random swaps in the branch, only accepting the new result
#  if it produces a higher correlation than the old result OR if the 
#  random variable says to swap (which gets less likely each iteration)
	changes=NULL
	for (i in 1:iter){
		swap = 1; 
		if (useReflections) swap = sample(0:1,1)
		gInd = sample(1:length(genes),2)
		g    = genes[gInd]
		if (swap==1) {
			hierTOMnew = swapTwoBranches(hierTOM, g[1], g[2])
		} else hierTOMnew = reflectBranch(hierTOM, g[1], g[2], TRUE)
		oldSum    = .offDiagonalMatrixSum(adj)
		oGenesNew = hGenes[hierTOMnew$order]
		oGenesNew = oGenesNew[oGenesNew%in%genes]
		adjNew    = adj[oGenesNew,oGenesNew]
		newSum    = .offDiagonalMatrixSum(adjNew)
		if ((newSum>oldSum)|((sample(1:1000,1)/1000)<swapAnyway[i])) {
			hierTOM = hierTOMnew
			changes = rbind(changes,c(i,ifelse(swap==1,"Swap","Reflect"),g,oldSum,newSum))
			adj     = adjNew
		}
		write(paste("Interation",i,"of",iter),"")
		collectGarbage()
	}
	
# Perform all of the suggested swappings on the input network.
	
# Output the results
	colnames(changes)=c("Iter.#","Swap?","Gene1","Gene2","OldScore","NewScore")
    out = list(geneTree = hierTOM, changeLog = changes)
	return(out)
}

# ----------------------------------------------------------------------------- #

selectBranch <- function (hierTOM, g1, g2){
## This function selects of all genes in a branch given a gene in the
##  branch (g1) and a gene in a neighboring branch (g2), returning the 
##  indices for genes in the branch in the hierTOM$labels vector 
	
# Convert genes to UNORDERED indices (if given, indices should be ordered)
	if(is.numeric(g1)) g1 = hierTOM$order[g1]
	if(is.numeric(g2)) g2 = hierTOM$order[g2]
	if(!is.numeric(g1)) g1 = which(hierTOM$labels==g1)
	if(!is.numeric(g2)) g2 = which(hierTOM$labels==g2)
	if((length(g1)==0)|(length(g2)==0)|(max(c(g1,g2))>length(hierTOM$labels))){
		write("Input genes are not both legal indices","")
		return(hierTOM);
	}
	
# Now determine which branch is the correct one, and find the genes
	len = length(hierTOM$height)
	tree1 = which(hierTOM$merge==(-g1))%%len
	continue=length(which(hierTOM$merge==tree1))>0
	while(continue){
		nextInd = which(hierTOM$merge==tree1[length(tree1)])%%len
		tree1 = c(tree1,nextInd)
		continue=length(which(hierTOM$merge==nextInd))>0
	}
	
	branchIndex = which(hierTOM$height==.minTreeHeight(hierTOM,g1,g2))
	branch=hierTOM$merge[branchIndex,]
	b1 <- NULL
	if(is.element(branch[1],tree1)){
		b1 = .getBranchMembers(hierTOM,branch[1],b1)
	} else b1 = .getBranchMembers(hierTOM,branch[2],b1)
	collectGarbage()
	return(b1)
}

# ----------------------------------------------------------------------------- #

reflectBranch <- function (hierTOM, g1, g2, both=FALSE){
## This function reverses the ordering of all genes in a branch of the
##  clustering tree defined by the minimal branch possible that contains
##  both g1 and g2 (as either ORDERED index or gene names), or just by
##  the genes in g1
	
	b1 = selectBranch(hierTOM, g1, g2)
	if (both) b1 = c(b1,selectBranch(hierTOM, g2, g1))
	
# Now reorder the hierTOM correctly
	ord = hierTOM$order
	i1 = which(ord%in%b1)
	b=1:(min(i1)-1); 
	if(b[length(b)]<b[1]) b = NULL
	e=(max(i1)+1):length(ord); 
	if((max(i1)+1)>length(ord)) e = NULL
	ord = ord[c(b,i1[order(i1,decreasing=T)],e)]
	hierTOM$order = ord
	return(hierTOM)
}

# ----------------------------------------------------------------------------- #

swapTwoBranches <- function (hierTOM, g1, g2){
## This function re-arranges two branches in a heirarchical clustering tree
##  at the nearest branch point of two given genes (or indices) 
	
# Convert genes to indices (ORDERED AS ON THE PLOT)
	if(is.numeric(g1)) g1 = hierTOM$order[g1]
	if(is.numeric(g2)) g2 = hierTOM$order[g2]
	if(!is.numeric(g1)) g1 = which(hierTOM$labels==g1)
	if(!is.numeric(g2)) g2 = which(hierTOM$labels==g2)
	if((length(g1)==0)|(length(g2)==0)|(max(c(g1,g2))>length(hierTOM$labels))){
		write("Input genes are not both legal indices","")
		return(hierTOM);
	}
	
# Now determine the genes in each branch
	branchIndex = which(hierTOM$height==.minTreeHeight(hierTOM,g1,g2))
	b1 <- b2 <- NULL
	b1 = .getBranchMembers(hierTOM,hierTOM$merge[branchIndex,1],b1)
	b2 = .getBranchMembers(hierTOM,hierTOM$merge[branchIndex,2],b2)
	
# Now reorder the hierTOM correctly
	ord = hierTOM$order
	i1 = which(ord%in%b1)
	i2 = which(ord%in%b2)
	if(min(i1)>min(i2)) {tmp = i1; i1=i2; i2=tmp; rm(tmp)}
	b=1:(min(i1)-1); 
	if(b[length(b)]<b[1]) b = NULL
	e=(max(i2)+1):length(ord); 
	if((max(i2)+1)>length(ord)) e = NULL
	ord = ord[c(b,i2,i1,e)]
	hierTOM$order = ord
	return(hierTOM)
}

# ----------------------------------------------------------------------------- #

chooseOneHubInEachModule <- function(datExpr, colorh, numGenes=100, 
omitColors="grey", power=2, type="signed",...){
## This function returns the gene in each module with the highest connectivity, given
#   a number of randomly selected genes to test.
	
	numGenes = max(round(numGenes),2)
	keep     = NULL
	isIndex  = FALSE
	modules  = names(table(colorh));
	numCols  = table(colorh)
	if(!(is.na(omitColors)[1]))  modules = modules[!is.element(modules,omitColors)]
	if(is.null(colnames(datExpr))){
		colnames(datExpr) = 1:dim(datExpr)[2]
		isIndex = TRUE
	}
	
	for (m in modules){
		num   = min(numGenes,numCols[m])
		inMod = which(is.element(colorh,m)) 
		keep  = c(keep, sample(inMod,num))
	}
	colorh  = colorh[keep]
	datExpr = datExpr[,keep]
	return(chooseTopHubInEachModule(datExpr, colorh, omitColors, power, type,...))
}

# ----------------------------------------------------------------------------- #

chooseTopHubInEachModule <- function(datExpr, colorh, omitColors="grey", 
power=2, type="signed",...){
## This function returns the gene in each module with the highest connectivity.
	
	isIndex = FALSE
	modules = names(table(colorh));
	if(!(is.na(omitColors)[1]))  modules = modules[!is.element(modules,omitColors)]
	if(is.null(colnames(datExpr))){
		colnames(datExpr) = 1:dim(datExpr)[2]
		isIndex = TRUE
	}
	
	hubs = rep(NA,length(modules))
	names(hubs) = modules
	for (m in modules){
		adj = adjacency(datExpr[,colorh==m],power=power,type=type,...)
		hub = which.max(rowSums(adj))
		hubs[m] = colnames(adj)[hub]
	}
	if (isIndex){
		hubs = as.numeric(hubs)
		names(hubs) = modules
	}
	return(hubs)
}

#################################################################################
# Internal functions.............................................................

options(expressions=50000) # Required for .getBranchMembers

.getBranchMembers <- function(hierTOM, ind, members){
# This is a recursive function that gets all the indices of members of
#  a branch in an hClust tree.
	if(ind<0) return(c(members,-ind))
	m1 = hierTOM$merge[ind,1]
	m2 = hierTOM$merge[ind,2]
	if (m1>0) {
		members = .getBranchMembers(hierTOM,m1,members)
	} else members = c(members,-m1)
	if (m2>0) {
		members = .getBranchMembers(hierTOM,m2,members)
	} else members = c(members,-m2)
	return(members)
}

# ----------------------------------------------------------------------------- #

.minTreeHeight <- function(hierTOM,l1,l2) {
## This function finds the minimum height at which two leafs
##  in a hierarchical clustering tree are connected.  l1 and
##  l2 are the UNORDERED indices for the two leafs.
	
## Return 2 (larger than 1, if l1 or l2 is negative). This represents 
##  positions that are off the edge of the tree.
	if((l1<0)|(l2<0)) return(2)
	
## Get the tree for l1
	len = length(hierTOM$height)
	tree1 = which(hierTOM$merge==(-l1))%%len
	continue=length(which(hierTOM$merge==tree1))>0
	while(continue){
		nextInd = which(hierTOM$merge==tree1[length(tree1)])%%len
		tree1 = c(tree1,nextInd)
		continue=length(which(hierTOM$merge==nextInd))>0
	}
	
## Get the tree for l2
	tree2 = which(hierTOM$merge==(-l2))%%len
	continue=length(which(hierTOM$merge==tree2))>0
	while(continue){
		nextInd = which(hierTOM$merge==tree2[length(tree2)])%%len
		tree2 = c(tree2,nextInd)
		continue=length(which(hierTOM$merge==nextInd))>0
	}
	
## Now find the index where the two trees first agree
	minTreeLen = min(c(length(tree1),length(tree2)))
	tree1 = tree1[(length(tree1)-minTreeLen+1):length(tree1)]
	tree2 = tree2[(length(tree2)-minTreeLen+1):length(tree2)]
	treeInd = tree1[min(which(tree1==tree2))]
	
## Now find and return the minimum tree height
	return(hierTOM$height[ifelse(treeInd==0,len,treeInd)])
}

# ----------------------------------------------------------------------------- #

.offDiagonalMatrixSum <- function(adj){
    len = dim(adj)[1]	
	output=sum(diag(adj[1:(len-1),2:len]))
	return(output)	
}

Try the WGCNA package in your browser

Any scripts or data that you put into this service are public.

WGCNA documentation built on April 23, 2022, 1:06 a.m.