# R/LatentForests-Helpers.R In Lucaweihs/sBIC: Computing the Singular BIC for Multiple Models

#### Documented in generateAllBinaryTrees

# These are a collection of helper functions for the LatentForests class. These
# are still in need of much better documentation.

toEdgeListMat = function(edgeListVec) {
if (is.matrix(edgeListVec)) {
return(edgeListVec)
}
return(t(matrix(edgeListVec, 2, length(edgeListVec) / 2)))
}

toEdgeListVec = function(edgeListMat) {
if (is.vector(edgeListMat)) {
return(edgeListMat)
}
return(as.numeric(t(edgeListMat)))
}

getSubModelSupports = function(m, edges) {
# This function computes possible getSubModelSupports (q-subforests)
# the output is in the {0,1}-vectors indicating which edges are in.
# m = number of leaves
# edges = |E|x2 matrix listing the edges
edges = toEdgeListMat(edges)

ne = nrow(edges)
if (ne == 0) {
return(matrix(numeric(0), ncol = 0, nrow = 1))
}

subs = combinat::hcube(rep(2, ne), 1,-1) # indicators of all possible subsets of E
subms = rep(1, 2 ^ ne)

# Compute degree of each node
nodeDegree = numeric(max(edges))
for (i in 1:nrow(edges)) {
for (j in 1:ncol(edges)) {
nodeDegree[edges[i, j]] = nodeDegree[edges[i, j]] + 1
}
}

maxVertex = max(edges)
if (maxVertex > m) {
for (i in 1:2 ^ ne) {
indicatorMatrix = matrix(c(subs[i,], subs[i,]), ne, 2)
for (j in (m + 1):max(edges)) {
if (sum(indicatorMatrix * edges == j) == 1) {
subms[i] = 0
}
}
}
}

if (sum(subms) == 1) {
return(t(subs[subms == 1, ]))
} else if (ne == 1) {
return(matrix(subs[subms == 1, ]))
} else {
return(subs[subms == 1, ])
}
}

getSubmodelEdges = function(subs, edges) {
#returns the set of edges for a given indicator function
return(toEdgeListMat(edges)[subs == 1, , drop = F])
}

subModelsToDAG = function(subModels) {
numSubModels = nrow(subModels)
if (ncol(subModels) == 0) {
return(igraph::graph.empty(1))
}

lessThanMatrix = matrix(numeric(numSubModels ^ 2), ncol = numSubModels)

for (i in 1:(numSubModels - 1)) {
curSupport = subModels[i, ]
curNumEdges = sum(curSupport)
for (j in (i + 1):numSubModels) {
compSupport = subModels[j, ]
compNumEdges = sum(compSupport)
intersectSum = sum(curSupport * compSupport)
if (intersectSum == curNumEdges) {
lessThanMatrix[i, j] = 1
} else if (intersectSum == compNumEdges) {
lessThanMatrix[j, i] = 1
}
}
}

greaterThanMatrix = 1 - lessThanMatrix
trueLessThanMatrix = lessThanMatrix
for (i in 1:numSubModels) {
for (j in which(lessThanMatrix[i, ] == 1)) {
trueLessThanMatrix[i, ] = trueLessThanMatrix[i, ] * greaterThanMatrix[j, ]
}
}

numEdgesInLattice = sum(trueLessThanMatrix)
edgeMatrix = matrix(numeric(numEdgesInLattice * 2), ncol = 2)

k = 1
for (i in 1:numSubModels) {
for (j in which(trueLessThanMatrix[i, ] == 1)) {
edgeMatrix[k, ] = c(i, j)
k = k + 1
}
}

return(igraph::graph.edgelist(toEdgeListMat(edgeMatrix)))
}

forestDim = function(edgeList, support, numLeaves) {
if (numLeaves == 0) {
return(0)
}
edgeList = toEdgeListMat(edgeList[support == 1,])
g = igraph::graph.edgelist(edgeList, directed = F)
return(numLeaves + nrow(edgeList) - sum(igraph::degree(g) == 2))
}

forestFromTreeEdgesAndSupport = function(tree, edges, support) {
if (is.vector(edges)) {
edges = t(matrix(edges, 2, length(edges) / 2))
}
edgesToDelete = as.vector(t(edges[support == 0, ]))
return(igraph::delete.edges(tree, igraph::get.edge.ids(tree, edgesToDelete)))
}

edgesInRootedDAGOrder = function(edgeList, root = "max") {
if (is.vector(edgeList)) {
edgeList = t(matrix(edgeList, 2, length(edgeList) / 2))
}
if (nrow(edgeList) == 0) {
return(edgeList)
}
if (root == "max") {
root = max(edgeList)
}

g = igraph::graph.empty(max(edgeList), directed = F)
dfsResults = igraph::graph.dfs(g, max(edgeList), father = T, unreachable = T)
dfsFather = as.numeric(dfsResults\$father)

for (i in 1:nrow(edgeList)) {
v1 = edgeList[i, 1]
v2 = edgeList[i, 2]
if (!is.na(dfsFather[v1]) && dfsFather[v1] == v2) {
edgeList[i,] = c(v2, v1)
}
}

return(edgeList)
}

removeDeg2Nodes = function(forest) {
deg2Nodes = which(igraph::degree(forest) == 2)

while (length(deg2Nodes) > 0) {
deg2Node = deg2Nodes[1]
forest = igraph::delete.edges(forest, edgeIdsToDelete)
deg2Nodes = which(igraph::degree(forest) == 2)
}
return(forest)
}

###
# Takes as input two vectors (pBig, pSmall) of the same length n which
# represent partitions of 1,...,n. Here i,j of pBig (pSmall) are
# considered to be in the same partition if pBig[i] == pBig[j]
# (pSmall[i] == pSmall[j]). The function returns true if pSmall is a
# refinement (sub-partition) of bBig.
###
isSubPartition = function(pBig, pSmall) {
bigInds = sort(unique(pBig))
for (bigInd in bigInds) {
smallClustInds = sort(unique(pSmall[pBig == bigInd]))
if (any(smallClustInds %in% pSmall[pBig != bigInd])) {
return(F)
}
}
return(T)
}

###
# Requires that either f1 or f2 is a sub-forest of the other and
# that the leaves are the first numLeaves vertices of the two forests.
###
forestDistance = function(f1, f2, numLeaves) {
if (length(igraph::V(f1)) == 0) {
return(0)
}
leaves = 1:numLeaves
f1 = removeDeg2Nodes(f1)
f2 = removeDeg2Nodes(f2)
f1Clusters = igraph::clusters(f1)\$membership[leaves]
f2Clusters = igraph::clusters(f2)\$membership[leaves]

# There are fewer, bigger, clusters in one forest
forestFlipped = F
if (max(f1Clusters) <= max(f2Clusters)) {
fBig = f1
bigClusters = f1Clusters
fSmall = f2
smallClusters = f2Clusters
} else {
forestFlipped = T
fBig = f2
bigClusters = f2Clusters
fSmall = f1
smallClusters = f1Clusters
}

if (!isSubPartition(bigClusters, smallClusters)) {
return(NA)
}

numBigClusters = length(unique(bigClusters))
numSmallClusters = length(unique(smallClusters))
dist = numSmallClusters - numBigClusters
while (numBigClusters != numSmallClusters) {
edgeList = igraph::get.edgelist(fBig)
didChange = F
for (i in 1:nrow(edgeList)) {
edge = edgeList[i, ]
edgeId = igraph::get.edge.ids(fBig, edge)
newF = removeDeg2Nodes(igraph::delete.edges(fBig, edgeId))
newClusters = igraph::clusters(newF)\$membership[leaves]
if (isSubPartition(newClusters, smallClusters)) {
didChange = T
fBig = newF
bigClusters = newClusters
numBigClusters = length(unique(bigClusters))
break
}
}
if (!didChange) {
return(NA)
}
}

bigForestString = forestToString(fBig, numLeaves)
smallForestString = forestToString(fSmall, numLeaves)
if (bigForestString == smallForestString) {
return(dist * (-1) ^ forestFlipped)
} else {
return(NA)
}
}

###
# Forest with no edges is considered to be at depth 0 while a full
# tree has depth numLeaves-1
###
forestDepth = function(forest, numLeaves) {
numClusters = length(unique(igraph::clusters(forest)\$membership[1:numLeaves]))
return((numLeaves - 1) - (numClusters - 1))
}

# Helper function
isRootedDag = function(g) {
if (!igraph::is.simple(g)) {
return(F)
}
clusts = igraph::clusters(g, mode = "weak")
membership = clusts\$membership
numClusts = clusts\$no

for (i in 1:numClusts) {
sourceFound = F
vertices = which(membership == i)
for (v in vertices) {
if (length(igraph::incident(g, v, "in")) == 0) {
if (sourceFound == T) {
return(F)
}
sourceFound = T
}
}
}
return(T)
}

is.support = function(support) {
return(is.vector(support) && is.atomic(support)
&&
(length(support) == 0 || all(support == 1 || support == 0)))
}

is.atomic.vec = function(vec) {
return(is.atomic(vec) && is.vector(vec))
}

is.edgelist.mat = function(edgeList) {
return(is.matrix(edgeList) &&
(ncol(edgeList) == 2) &&
all(edgeList %% 1 == 0) &&
all(edgeList > 0))
}

is.edgelist.vec = function(edgeList) {
return(
is.vector(edgeList) &&
is.atomic(edgeList) &&
(length(edgeList) %% 2 == 0) &&
all(edgeList %% 1 == 0) &&
all(edgeList > 0)
)
}

is.edgelist.vec.or.mat = function(edgeList) {
return(((is.matrix(edgeList) &&
ncol(edgeList) == 2) ||
(
is.vector(edgeList) &&
is.atomic(edgeList) &&
length(edgeList) %% 2 == 0
)
) &&
all(edgeList %% 1 == 0) &&
all(edgeList > 0))
}

isBinaryEdgelistMat = function(edgeList, numLeaves) {
if (!is.edgelist.mat(edgeList)) {
return(F)
}
g = igraph::graph.edgelist(edgeList, directed = F)
v = length(igraph::V(g))
if (v < numLeaves) {
g = igraph::add.vertices(g, numLeaves - v)
}
return(is.binary.forest(g, numLeaves))
}

isBinaryEdgelistVecOrMat = function(edgeList, numLeaves) {
return(isBinaryEdgelistMat(toEdgeListMat(edgeList), numLeaves))
}

is.pos.int = function(num) {
return(length(num) == 1 &&
(is.numeric(num) || is.integer(num)) && (num %% 1 == 0 && num > 0))
}

is.nonneg.int = function(num) {
return(length(num) == 1 &&
(is.numeric(num) ||
is.integer(num)) && (num %% 1 == 0 && num >= 0))
}

is.data.matrix = function(mat) {
return(is.matrix(mat) && nrow(mat) > 0)
}

is.forest = function(f) {
minSpanTree = igraph::minimum.spanning.tree(f)
return(!igraph::is.directed(f) && length(igraph::E(f)) == length(igraph::E(minSpanTree)))
}

is.binary.forest = function(f, numLeaves) {
if (numLeaves == 0) {
return(length(igraph::V(f)) == 0)
}
degs = igraph::degree(f)
return(
is.forest(f) &&
numLeaves %% 1 == 0 &&
numLeaves > 0 &&
numLeaves <= length(igraph::V(f)) &&
all(degs[1:numLeaves] %in% c(0, 1)) &&
all(degs[-(1:numLeaves)] %in% c(0, 2, 3))
)
}

###
# Creates a unique string representation of the bifucating forest.
##
forestToString = function(forest, numLeaves) {
subTreeMembership = igraph::clusters(forest)\$membership

numSubTrees = max(subTreeMembership)
subTreeVertexList = vector("list", numSubTrees)

subTreeOrder = numeric(numSubTrees)
subTreeContainsLeaf = logical(numSubTrees)
for (i in 1:numSubTrees) {
subTreeVertexList[[i]] = sort(which(subTreeMembership == i))
subTreeOrder[i] = subTreeVertexList[[i]][1]
if (subTreeOrder[i] <= numLeaves) {
subTreeContainsLeaf[i] = T
}
}
subTreePermutation = order(subTreeOrder)

newSubTreeVertexList = vector("list", sum(subTreeContainsLeaf))
k = 1
for (i in subTreePermutation) {
if (subTreeContainsLeaf[i] == T) {
newSubTreeVertexList[[k]] = subTreeVertexList[[i]]
k = k + 1
}
}
subTreeVertexList = newSubTreeVertexList
numSubTrees = length(subTreeVertexList)

ancestors = vector("list", length(igraph::V(forest)))

subTreeStrings = character(numSubTrees)
for (i in 1:numSubTrees) {
subTreeVertices = subTreeVertexList[[i]]

if (length(subTreeVertices) == 1) {
subTreeStrings[i] = paste("(", subTreeVertices[1], ")", sep = "")
} else if (sum(subTreeVertices <= numLeaves) == 2) {
subTreeLeaves = sort(subTreeVertices[which(subTreeVertices <= numLeaves)])
subTreeStrings[i] = paste("((", subTreeLeaves[1], ")(", subTreeLeaves[2], "))", sep =
"")
} else {
# The smallest node in the subtree, note that this
# is the first in subTreeVertices by the above ordering
# We know that this smallest node is a leaf by the above pruning.
smallestObserved = subTreeVertices[1]

last = smallestObserved
current = smallestObservedDegree3Parent
smallestObservedDegree3Parent = curNeighbors[which(curNeighbors != last)]
last = current
}

dfsResult = igraph::graph.dfs(
forest,
root = smallestObservedDegree3Parent,
unreachable = F,
father = T,
order.out = T
)

orderOut = as.numeric(dfsResult\$order.out)
order = as.numeric(dfsResult\$order)
parents = as.numeric(dfsResult\$father)

orderOut = orderOut[!is.na(orderOut)]
order = order[!is.na(order)]

for (node in orderOut) {
if (!is.na(parents[node]) && parents[node] != 0) {
parent = parents[node]
ancestors[[parent]] = sort(c(ancestors[[parent]], ancestors[[node]], node))
}
}

subTreeStrings[i] = treeToString(smallestObservedDegree3Parent,
parents,
ancestors,
numLeaves)
}
}

return(paste(subTreeStrings, collapse = "|"))
}

###
# Helper function, do not call.
###
getSubForestString = function(forest, edgeList, support, numLeaves) {
edgesToDelete = edgeList[support == 0, ]
if (!is.vector(edgesToDelete)) {
edgesToDelete = as.vector(t(edgesToDelete))
}
edgesToDelete = igraph::get.edge.ids(forest, edgesToDelete)
return(forestToString(igraph::delete.edges(forest, edgesToDelete), numLeaves))
}

###
# Helper function, do not call.
###
treeToString = function(root,
parents,
ancestors,
numLeaves) {
if (length(rootNeighbors) == 1 || length(rootNeighbors) == 0) {
if (root <= numLeaves) {
return(paste("(", as.character(root), ")", sep = ""))
} else {
return("")
}
} else if (length(rootNeighbors) == 2) {
print("ERROR: root should never have degree 2.")
return(F)
} else if (length(rootNeighbors) == 3) {
if (!is.na(parents[root]) && parents[root] != 0) {
rootNeighbors = rootNeighbors[rootNeighbors != parents[root]]
}

neighborPermutation = numeric(length(rootNeighbors))
for (i in 1:length(rootNeighbors)) {
neighborPermutation[i] = min(rootNeighbors[i], ancestors[[rootNeighbors[i]]][1])
}
rootNeighbors = rootNeighbors[order(neighborPermutation)]

treeString = character(length(rootNeighbors))
for (i in 1:length(rootNeighbors)) {
newRoot = rootNeighbors[i]
}
treeString[i] = treeToString(newRoot, parents, ancestors, adjList, numLeaves)
}
treeString = treeString[treeString != ""]

if (length(treeString) == 0) {
return("")
} else if (length(treeString) == 1) {
return(treeString[1])
} else {
return(paste("(", paste(treeString, collapse = ""), ")", sep = ""))
}
} else {
print("Error: input tree is not bivariate.")
return(F)
}
}

#' Generate all non-isomorphic binary trees.
#'
#' Generates all non-isomorphic binary trees with a given number of
#' leaves where leaves are considered labeled and inner nodes are
#' unlabeled. Takes as argument the number of leaves for which to produce the
#' binary trees and returns a list of (n-1)x2 matrices where each row
#' corresponds to a edge in the tree. These edge matrices will be in 'directed
#' order,' i.e. will be so that if they are considered to be directed edges
#' then the resulting graph will have exactly one source.
#'
#' @export
#'
#' @param numLeaves the number of leaves
generateAllBinaryTrees = function(numLeaves) {
if (numLeaves <= 1) {
print("Number of leaves must be greater than or equal to 2")
return(list())
}

edgeMatrix = t(c(1, 2))
n = 2
lastGraphList = list(edgeMatrix)
while (n < numLeaves) {
curGraphList = list()
for (edgeMatrix in lastGraphList) {
#increment all non-leaf vertices by 1 so that there is
# room for the next leaf
maxVertex = max(edgeMatrix)
edgeMatrix = edgeMatrix + (edgeMatrix > n)
for (i in 1:dim(edgeMatrix)[1]) {
newEdges = matrix(
c(edgeMatrix[i, 1], maxVertex + 2, edgeMatrix[i, 2], maxVertex + 2),
ncol = 2,
byrow = T
)
newEdgeMatrix = edgeMatrix
newEdgeMatrix[i, ] = c(maxVertex + 2, n + 1)
newEdgeMatrix = rbind(newEdgeMatrix, newEdges)
g = igraph::graph.edgelist(newEdgeMatrix, directed = F)
dfsResults = igraph::graph.dfs(g, max(newEdgeMatrix), father = T)

newEdgeMatrix = newEdgeMatrix * 0

for (j in 1:(length(dfsResults\$order) - 1)) {
node = dfsResults\$order[j + 1]
father = dfsResults\$father[node]
newEdgeMatrix[j, 1] = father
newEdgeMatrix[j, 2] = node
}
curGraphList = c(curGraphList, list(newEdgeMatrix))
}
}
lastGraphList = curGraphList
n = n + 1
}

return(lastGraphList)
}

generateRandomBinaryTree = function(numLeaves, directedOrdering = F) {
if (numLeaves %in% c(0, 1)) {
return(matrix(numeric(0), ncol = 2))
}

edgeMatrix = t(c(1, 2))
n = 2
while (n < numLeaves) {
maxVertex = max(edgeMatrix)
edgeMatrix = edgeMatrix + (edgeMatrix > n)

edgeToSplit = sample(1:(dim(edgeMatrix)[1]), 1)

newEdges = matrix(
c(edgeMatrix[edgeToSplit, 1], maxVertex + 2, edgeMatrix[edgeToSplit, 2], maxVertex + 2),
ncol = 2,
byrow = T
)
newEdgeMatrix = edgeMatrix
newEdgeMatrix[edgeToSplit, ] = c(maxVertex + 2, n + 1)
newEdgeMatrix = rbind(newEdgeMatrix, newEdges)

edgeMatrix = newEdgeMatrix
n = n + 1
}

if (directedOrdering) {
g = igraph::graph.edgelist(edgeMatrix, directed = F)
dfsResults = igraph::graph.dfs(g, max(edgeMatrix), father = T)

edgeMatrix = edgeMatrix * 0

for (j in 1:(length(dfsResults\$order) - 1)) {
node = dfsResults\$order[j + 1]
father = dfsResults\$father[node]
edgeMatrix[j, 1] = father
edgeMatrix[j, 2] = node
}
}

return(edgeMatrix)
}

generateRandomBinaryForest = function(numLeaves, depth) {
if (numLeaves < 0 || depth < 0 || depth > numLeaves - 1) {
stop("Invalid number of leaves or depth")
}
if (numLeaves == 1) {
return(igraph::graph.empty(1, directed = F))
}
edgeList = generateRandomBinaryTree(numLeaves, T)

numVertices = 2 * numLeaves - 2
support = rep(1, nrow(edgeList))
subForestIndicator = support

if ((numLeaves - 1) - depth > 0) {
for (i in 1:((numLeaves - 1) - depth)) {
supports = generateOneStepSubForestSupports(subForestIndicator, edgeList, numLeaves, numVertices)
subForestIndicator = supports[sample(nrow(supports), 1),]
}
}

tree = igraph::graph.edgelist(edgeList, directed = F)
edgesToDelete = edgeList[subForestIndicator == 0, ]
if (!is.vector(edgesToDelete)) {
edgesToDelete = as.vector(t(edgesToDelete))
}
edgesToDelete = igraph::get.edge.ids(tree, edgesToDelete)
return(removeDeg2Nodes(igraph::delete.edges(tree, edgesToDelete)))
}

###
# Generates the subforests F' that are strictly less than the given
# forest F and for which there exists no other subforest Q such that
# F' subset Q subset F (deg 2 nodes have been removed).
#
# Assumes the support comes from a valid minimal subforest F.
###
generateOneStepSubForestSupports = function(support, edgeList, numLeaves, numVertices) {
edgeList = toEdgeListMat(edgeList)

numPossibleSubForests = sum(support)
if (numPossibleSubForests == 0) {
return(numeric(0))
}

subForestSupports = matrix(numeric(numPossibleSubForests * length(support)), ncol =
length(support))
edgesToRemove = which(support != 0)

seenSupports = hash::hash()
k = 1
for (i in 1:numPossibleSubForests) {
newSupport = support
newSupport[edgesToRemove[i]] = 0
newSupport = pruneEdges(newSupport, edgeList, numLeaves, numVertices)
newSupportString = paste(newSupport, collapse = "")
if (!hash::has.key(newSupportString, seenSupports)) {
seenSupports[[newSupportString]] = 1
subForestSupports[k,] = newSupport
k = k + 1
}
}

if (k - 1 == 1) {
return(t(subForestSupports[1:(k - 1),]))
} else {
return(subForestSupports[1:(k - 1),])
}
}

pruneEdges = function(support, edgeList, numLeaves, numVertices) {
if (nrow(edgeList) == 0) {
return(numeric(0))
}
return(pruneEdgesHelper(support, edgeList, numLeaves, numVertices))
}
Lucaweihs/sBIC documentation built on June 3, 2017, 3:34 a.m.