## This file contains several functions which can be used to adjust the dendrogram
# in ways which keep the dendrogram mathematically identical (ie, branch swapping,
# branch reflection, etc). The goal is to biologically optimize the dendrogram.
# ----------------------------------------------------------------------------- #
orderBranchesUsingHubGenes <- function(hierTOM, datExpr = NULL, colorh = NULL, type = "signed",
adj = NULL, iter = NULL, useReflections = FALSE, allowNonoptimalSwaps = FALSE) {
# First read in and format all of the variables
hGenes <- hierTOM$labels
if (is.null(adj)) {
genes <- chooseOneHubInEachModule(datExpr, colorh, type = type)
adj <- adjacency(datExpr[, genes], type = type, power = (2 - (type == "unsigned")))
colnames(adj) <- rownames(adj) <- genes
}
genes <- rownames(adj)
if (length(genes) != length(intersect(genes, hGenes))) {
write("All genes in the adjacency must also be in the gene tree. Check to make sure", "")
write("that names(hierTOM$labels) is set to the proper gene or probe names and that", "")
write("these correspond to the expression / adjacency gene names.", "")
return(0)
}
genes <- hGenes[is.element(hGenes, genes)]
adj <- adj[genes, genes]
if (is.null(iter)) iter <- length(genes)^2
iters <- (1:iter) / iter
swapAnyway <- rep(0, length(iters)) # Quickly decreasing chance of random swap
if (allowNonoptimalSwaps) swapAnyway <- ((1 - iters)^3) / 3 + 0.001
# Iterate random swaps in the branch, only accepting the new result
# if it produces a higher correlation than the old result OR if the
# random variable says to swap (which gets less likely each iteration)
changes <- NULL
for (i in 1:iter) {
swap <- 1
if (useReflections) swap <- sample(0:1, 1)
gInd <- sample(1:length(genes), 2)
g <- genes[gInd]
if (swap == 1) {
hierTOMnew <- swapTwoBranches(hierTOM, g[1], g[2])
} else {
hierTOMnew <- reflectBranch(hierTOM, g[1], g[2], TRUE)
}
oldSum <- .offDiagonalMatrixSum(adj)
oGenesNew <- hGenes[hierTOMnew$order]
oGenesNew <- oGenesNew[oGenesNew %in% genes]
adjNew <- adj[oGenesNew, oGenesNew]
newSum <- .offDiagonalMatrixSum(adjNew)
if ((newSum > oldSum) | ((sample(1:1000, 1) / 1000) < swapAnyway[i])) {
hierTOM <- hierTOMnew
changes <- rbind(changes, c(i, ifelse(swap == 1, "Swap", "Reflect"), g, oldSum, newSum))
adj <- adjNew
}
write(paste("Interation", i, "of", iter), "")
collectGarbage()
}
# Perform all of the suggested swappings on the input network.
# Output the results
colnames(changes) <- c("Iter.#", "Swap?", "Gene1", "Gene2", "OldScore", "NewScore")
out <- list(geneTree = hierTOM, changeLog = changes)
return(out)
}
# ----------------------------------------------------------------------------- #
selectBranch <- function(hierTOM, g1, g2) {
## This function selects of all genes in a branch given a gene in the
## branch (g1) and a gene in a neighboring branch (g2), returning the
## indices for genes in the branch in the hierTOM$labels vector
# Convert genes to UNORDERED indices (if given, indices should be ordered)
if (is.numeric(g1)) g1 <- hierTOM$order[g1]
if (is.numeric(g2)) g2 <- hierTOM$order[g2]
if (!is.numeric(g1)) g1 <- which(hierTOM$labels == g1)
if (!is.numeric(g2)) g2 <- which(hierTOM$labels == g2)
if ((length(g1) == 0) | (length(g2) == 0) | (max(c(g1, g2)) > length(hierTOM$labels))) {
write("Input genes are not both legal indices", "")
return(hierTOM)
}
# Now determine which branch is the correct one, and find the genes
len <- length(hierTOM$height)
tree1 <- which(hierTOM$merge == (-g1)) %% len
continue <- length(which(hierTOM$merge == tree1)) > 0
while (continue) {
nextInd <- which(hierTOM$merge == tree1[length(tree1)]) %% len
tree1 <- c(tree1, nextInd)
continue <- length(which(hierTOM$merge == nextInd)) > 0
}
branchIndex <- which(hierTOM$height == .minTreeHeight(hierTOM, g1, g2))
branch <- hierTOM$merge[branchIndex, ]
b1 <- NULL
if (is.element(branch[1], tree1)) {
b1 <- .getBranchMembers(hierTOM, branch[1], b1)
} else {
b1 <- .getBranchMembers(hierTOM, branch[2], b1)
}
collectGarbage()
return(b1)
}
# ----------------------------------------------------------------------------- #
reflectBranch <- function(hierTOM, g1, g2, both = FALSE) {
## This function reverses the ordering of all genes in a branch of the
## clustering tree defined by the minimal branch possible that contains
## both g1 and g2 (as either ORDERED index or gene names), or just by
## the genes in g1
b1 <- selectBranch(hierTOM, g1, g2)
if (both) b1 <- c(b1, selectBranch(hierTOM, g2, g1))
# Now reorder the hierTOM correctly
ord <- hierTOM$order
i1 <- which(ord %in% b1)
b <- 1:(min(i1) - 1)
if (b[length(b)] < b[1]) b <- NULL
e <- (max(i1) + 1):length(ord)
if ((max(i1) + 1) > length(ord)) e <- NULL
ord <- ord[c(b, i1[order(i1, decreasing = T)], e)]
hierTOM$order <- ord
return(hierTOM)
}
# ----------------------------------------------------------------------------- #
swapTwoBranches <- function(hierTOM, g1, g2) {
## This function re-arranges two branches in a heirarchical clustering tree
## at the nearest branch point of two given genes (or indices)
# Convert genes to indices (ORDERED AS ON THE PLOT)
if (is.numeric(g1)) g1 <- hierTOM$order[g1]
if (is.numeric(g2)) g2 <- hierTOM$order[g2]
if (!is.numeric(g1)) g1 <- which(hierTOM$labels == g1)
if (!is.numeric(g2)) g2 <- which(hierTOM$labels == g2)
if ((length(g1) == 0) | (length(g2) == 0) | (max(c(g1, g2)) > length(hierTOM$labels))) {
write("Input genes are not both legal indices", "")
return(hierTOM)
}
# Now determine the genes in each branch
branchIndex <- which(hierTOM$height == .minTreeHeight(hierTOM, g1, g2))
b1 <- b2 <- NULL
b1 <- .getBranchMembers(hierTOM, hierTOM$merge[branchIndex, 1], b1)
b2 <- .getBranchMembers(hierTOM, hierTOM$merge[branchIndex, 2], b2)
# Now reorder the hierTOM correctly
ord <- hierTOM$order
i1 <- which(ord %in% b1)
i2 <- which(ord %in% b2)
if (min(i1) > min(i2)) {
tmp <- i1
i1 <- i2
i2 <- tmp
rm(tmp)
}
b <- 1:(min(i1) - 1)
if (b[length(b)] < b[1]) b <- NULL
e <- (max(i2) + 1):length(ord)
if ((max(i2) + 1) > length(ord)) e <- NULL
ord <- ord[c(b, i2, i1, e)]
hierTOM$order <- ord
return(hierTOM)
}
# ----------------------------------------------------------------------------- #
chooseOneHubInEachModule <- function(datExpr, colorh, numGenes = 100,
omitColors = "grey", power = 2, type = "signed", ...) {
## This function returns the gene in each module with the highest connectivity, given
# a number of randomly selected genes to test.
numGenes <- max(round(numGenes), 2)
keep <- NULL
isIndex <- FALSE
modules <- names(table(colorh))
numCols <- table(colorh)
if (!(is.na(omitColors)[1])) modules <- modules[!is.element(modules, omitColors)]
if (is.null(colnames(datExpr))) {
colnames(datExpr) <- 1:dim(datExpr)[2]
isIndex <- TRUE
}
for (m in modules) {
num <- min(numGenes, numCols[m])
inMod <- which(is.element(colorh, m))
keep <- c(keep, sample(inMod, num))
}
colorh <- colorh[keep]
datExpr <- datExpr[, keep]
return(chooseTopHubInEachModule(datExpr, colorh, omitColors, power, type, ...))
}
# ----------------------------------------------------------------------------- #
chooseTopHubInEachModule <- function(datExpr, colorh, omitColors = "grey",
power = 2, type = "signed", ...) {
## This function returns the gene in each module with the highest connectivity.
isIndex <- FALSE
modules <- names(table(colorh))
if (!(is.na(omitColors)[1])) modules <- modules[!is.element(modules, omitColors)]
if (is.null(colnames(datExpr))) {
colnames(datExpr) <- 1:dim(datExpr)[2]
isIndex <- TRUE
}
hubs <- rep(NA, length(modules))
names(hubs) <- modules
for (m in modules) {
adj <- adjacency(datExpr[, colorh == m], power = power, type = type, ...)
hub <- which.max(rowSums(adj))
hubs[m] <- colnames(adj)[hub]
}
if (isIndex) {
hubs <- as.numeric(hubs)
names(hubs) <- modules
}
return(hubs)
}
#################################################################################
# Internal functions.............................................................
options(expressions = 50000) # Required for .getBranchMembers
.getBranchMembers <- function(hierTOM, ind, members) {
# This is a recursive function that gets all the indices of members of
# a branch in an hClust tree.
if (ind < 0) {
return(c(members, -ind))
}
m1 <- hierTOM$merge[ind, 1]
m2 <- hierTOM$merge[ind, 2]
if (m1 > 0) {
members <- .getBranchMembers(hierTOM, m1, members)
} else {
members <- c(members, -m1)
}
if (m2 > 0) {
members <- .getBranchMembers(hierTOM, m2, members)
} else {
members <- c(members, -m2)
}
return(members)
}
# ----------------------------------------------------------------------------- #
.minTreeHeight <- function(hierTOM, l1, l2) {
## This function finds the minimum height at which two leafs
## in a hierarchical clustering tree are connected. l1 and
## l2 are the UNORDERED indices for the two leafs.
## Return 2 (larger than 1, if l1 or l2 is negative). This represents
## positions that are off the edge of the tree.
if ((l1 < 0) | (l2 < 0)) {
return(2)
}
## Get the tree for l1
len <- length(hierTOM$height)
tree1 <- which(hierTOM$merge == (-l1)) %% len
continue <- length(which(hierTOM$merge == tree1)) > 0
while (continue) {
nextInd <- which(hierTOM$merge == tree1[length(tree1)]) %% len
tree1 <- c(tree1, nextInd)
continue <- length(which(hierTOM$merge == nextInd)) > 0
}
## Get the tree for l2
tree2 <- which(hierTOM$merge == (-l2)) %% len
continue <- length(which(hierTOM$merge == tree2)) > 0
while (continue) {
nextInd <- which(hierTOM$merge == tree2[length(tree2)]) %% len
tree2 <- c(tree2, nextInd)
continue <- length(which(hierTOM$merge == nextInd)) > 0
}
## Now find the index where the two trees first agree
minTreeLen <- min(c(length(tree1), length(tree2)))
tree1 <- tree1[(length(tree1) - minTreeLen + 1):length(tree1)]
tree2 <- tree2[(length(tree2) - minTreeLen + 1):length(tree2)]
treeInd <- tree1[min(which(tree1 == tree2))]
## Now find and return the minimum tree height
return(hierTOM$height[ifelse(treeInd == 0, len, treeInd)])
}
# ----------------------------------------------------------------------------- #
.offDiagonalMatrixSum <- function(adj) {
len <- dim(adj)[1]
output <- sum(diag(adj[1:(len - 1), 2:len]))
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.