# R/constraints.R In amirms/MultiViz: MultiViz

```#propagate must links to induce new links
#Input: a list of vectors: each denoting a user-defined set of words to appear together
require(igraph)

return(NULL)

edges = c()

break

}

gr <- graph.data.frame(edges, directed=FALSE)

dgr <- decompose.graph(gr)

return (lapply(dgr, function(x) V(x)\$name))

}

#Pre-condition: Assume cannot links are pairwise constraints
require(igraph)

return(NULL)

edges = c()

if (length(l) < 2)
break

for (j in 2:length(l))
edges <- rbind(edges, c(l[j-1], l[j]))

}

gr <- graph.data.frame(edges, directed=FALSE)

#   print(gr)

dgr <- decompose.graph(gr)

#   print(lapply(dgr, function(c) V(c)))

# For each connected component, enumerate all maximal independent sets
# (equivalent to all maximal cliques in the complement graph)
#   allowable = [((cl.subgraph(cc)).complement()).cliques()
#                for cc in clcc]

#    allowables <- lapply(dgr, function(c) maximal.cliques(graph.complementer(gr), subset=V(c)))
#
#
# print(allowables)

# allowables <- lapply(dgr, function(c) independent.vertex.sets(subgraph.edges(gr, E(c), delete.vertices = FALSE), min=2) )

# allowables <- lapply(dgr, function(c) maximal.independent.vertex.sets(subgraph.edges(gr, E(c), delete.vertices = FALSE)) )

allowables <- lapply(dgr, function(c) maximal.cliques(graph.complementer(subgraph.edges(gr, E(c), delete.vertices = FALSE), loops=FALSE)))

# print(allowables)
# stop("allowables")#print(allowables)

allowables <- lapply(allowables, function(s) lapply(s, function(c)  V(subgraph(gr, c))\$name))

print(allowables)

#min=2)), max=2))
#allowables <- lapply(dgr, function(c) maximal.cliques(gr, subset=V(c)))

return(list(clcc =lapply(dgr, function(x) V(x)\$name), allowable = allowables))

}

#     stop(":as")

}

return(constraints)

}

#Pre-condition: constraint words is a subst of the vocabulary
process.pairwise <- function(constr,W,vocab){
require(igraph)

# Unpack pairwise to construct graphs

#construct a edge-less graph
gr <- graph.empty(directed=FALSE) + vertices(vocab)
#set of vertices of gr
vertices <- V(gr)\$name

# Get ML connected components

mlcc <- lapply(mlcc, function(cc) match(cc, vertices))
#   # Invert this mapping
#   if(length(mlcc) > 0):
#     invmlcc = dict(reduce(OP.concat,[[(wi,mli+W) for wi in mlcc[mli]]
#                                      for mli in range(len(mlcc))],[]))
#   else:
#     invmlcc = {}

#   # Invert this mapping
#   invmlcc = list()
#   if (length(mlcc) > 0)
#     for (mli in seq(length(mlcc)))
#       for (wi in mlcc[[mli]])
#         invmlcc[[wi]] <- mli+W
#
#     # between ML components (when applicable)
#   #   newcannotlinks = map(lambda x: tuple(map(lambda y: invmlcc.get(y,y),x)),
#
#   print(mlcc)
#
#                            function(cl) unlist(lapply(cl,
#                                       function(y){
#                                         y <- match(y, vertices)
#                                         if (!is.null(invmlcc[[y]]))
#                                           invmlcc[[y]]
#                                         else
#                                           y
#                                       }))
#                            )
#
#   # Check for conflicting constraints
#   #
#   # if 2 words are connected (transitively or directly) by must-links,
#   # they will be in the same ML connected component
#   #
#   # if 2 words also have a pairwise cannot-link btwn them, then after
#   # the mapping we will have a a pairwise cannot-link between an ML
#   # connected component and *itself*
#
#  #conflicts <- [cl[0] for cl in newcannotlinks if cl[0]==v[1]]
#
#  conflicts <- unlist(lapply(newcannotlinks, function(cl) if (cl[1] == cl[2]) cl[1]))
#
#  conflict.constraints =list()
#
#
# #  print(conflicts)
# #  stop("")
#
#   if(length(conflicts) > 0){
#     # Identify the conflicting constraints
#     print('!!! CONFLICTING CONSTRAINTS !!!')
#     for(conflict in conflicts){
#       print('Following pairs must *and* cannot-link:')
#       mli <- conflict - W
#       mlw <- mlcc[[mli]]
#       vmlw <- vocab[mlw]
#         if((cl[1] %in% vmlw) && (cl[2] %in% vmlw)){
#           # Print words if available, else word indices
# #           if(length(vocab) > 0){
#             conflict <- c(cl[1],cl[2])
#             print(conflict)
#             conflict.constraints[[length(conflict.constraints) + 1]] <- conflict
# #           }
# #           else{
# #             conflict <- c(cl[1],cl[2])
# #             print(conflict)
# #             conflict.constraints[[length(conflict.constraints) + 1]] <- conflict
# #           }
#           }
#     }
#     #raise RuntimeError('Conflicting constraints, see err msg')
#     #stop('Conflicting constraints, see err msg')
#     return(list(conflicts = conflict.constraints))
#   }

# Get CL graph connected components and allowable sets

clcc <- lapply(clinks\$clcc, function(cc) match(cc, vertices))

function(aset) lapply(aset,
function(as) match(as, vertices)))

#FIXME to allow for a list called clinks = list(clcc, allowable)
# return(list(mlcc = mlcc, clcc = clcc, allowable=allowable, vertices = vertices))
return(list(mlcc = mlcc, clcc = clcc, allowable=allowable, conflicts = NULL))
}

buildTree <- function(mlcc,clcc, allowables,W, beta,eta, dt){
#"""
#Given:
#mlcc = must-link clusters of words (List of Vectors)
#clcc = cannot-link clusters of must-link clusters and words (List of Tuples)
#allowable = within each clcc, the allowable combinations of mlcc/words
#W = total number of words (connect extra words directly to root)

#Return Dirichlet Forest data structure
#(Dirichlet Tree plus special multinodes for structure variants)

#PROCEDURE
#1) construct tree structure
#2) populate indices
#3) tuple-ify tree for pass to intLDA
#"""
require(Rcpp)
require(ITM)
require(inline)

DTNode = dt\$DTNode
MLNode = dt\$MLNode
MultiNode = dt\$MultiNode

#return(MLNode)

# Build M-nodes for each mlcc (will have only leaf children)
#MLNode(edges, ichildren, maxind, leafstart, words)

# leaftstart=-1

ml_nodes = lapply(mlcc, function(mi)
new (MLNode, eta*beta*rep(1, length(mi)), list(),integer(0), -1,mi))
#   print("length(mlcc)")
#   print(length(mlcc))
# Build multinodes for each clcc
multinodes = list()
idx = 0

allvars = list()
for (cc in clcc){
idx <- idx + 1
# these are the true output connections of the multinode
# ensure that ML-nodes always appear 1st
#FIXED the indices
#icids = sapply(clink\$clcc, function(ci) ifelse(ci >= W, ci, NULL))
icids = unlist(lapply(cc, function(ci) if(ci > W) ci))
#lcids = sapply(clink\$clcc, function(ci) if (ci < W) return(ci) else c())
lcids = unlist(lapply(cc, function(ci) if (ci <= W) ci))
#fake_words = cicids + lcids
fake_words = c(icids, lcids)
print(fake_words)

#return(list(cc=cc,icids=icids, lcids=lcids, fake_words=fake_words))

# construct a variation for each allowable set
variations = list()

for (aset in allowables[[idx]])
if (length(aset) > 0){

good = aset

#bad = lapply(fake_words, function(fl) if (!(fl %in% good)) return fl else c())
bad = unlist(lapply(fake_words, function(fl) if (!(fl %in% good)) fl))
# scale the edges coming out of the likely internal node

aedges = c()

#       print(ml_nodes)

for (a in good)
if(a > W)
# M-node? --> beta * num leaves
aedges <- c(aedges, beta * ml_nodes[[a-W]]\$numLeaves())
else
# Leaf? --> beta
aedges <- c(aedges, beta)
#       print(list(aset=aset, aedges=aedges))

print(aedges)

# scale the edges coming out of the fakeroot
fedges = c(eta * sum(aedges))

if(b > W)
# M-node? --> beta * num leaves
fedges <- c(fedges, beta * ml_nodes[[b-W]]\$numLeaves())
else
# Leaf? --> beta
fedges <- c(fedges, beta)

print(list(aedges=aedges, fedges=fedges))

# stop("no reason")

# variant subtree
#likely_internal = DTNode(aedges,list(),list(),0)
likely_internal = new (DTNode, aedges,list(),integer(0),1)
#fakeroot = DTNode(fedges, likely_internal, length(good)-1, length(good))
fakeroot = new (DTNode, fedges, list(likely_internal),
#       length(good)-1, length(good))
length(good), length(good)+1)

# fake leafmap permutation for this variant
# fake_wordmap = good + bad
# fake_wordmap is a vector

#the index of first word index - corresponding to index function to python
fake_wordmap <- unique(fake_wordmap)
fake_leafmap = sapply(fake_wordmap, function(wi) which(fake_wordmap==wi)[1])

#       fake_leafmap = unlist(lapply(fake_wordmap, function(wi) match(wi, fake_wordmap)))

# save it
#FIXME An expensive operation
variations[[length(variations)+1]] = list(fakeroot = fakeroot,fake_leafmap = fake_leafmap)
}

allvars[[length(allvars) + 1]] <- variations

#
#     return(variations)
#
# build the multinode
#     ichildren = [ml_nodes[ci-W] for ci in cc if ci >= W]
#     lchildren = [ci for ci in cc if ci < W]
#FIXED the indices for ci and W are fixed
ichildren = list()

for (i in 1:length(cc)){
print(cc[i])
print(W)
if(cc[i] > W)
ichildren[[length(ichildren)+1]] <- ml_nodes[[cc[i]-W]]
}
lchildren = unlist(lapply(cc, function(ci) if (ci <= W) ci))

print(list(ichildren=ichildren, lchildren=lchildren))
multinodes <- append(multinodes,
new (MultiNode, numeric(0),ichildren,integer(0),0,lchildren,variations))

}

# return(allvars)

print(length(multinodes))

# Create empty root node
#root = DTNode(c(),list(),list(),0)
root <- new(DTNode, numeric(0), list(), integer(0) , 0)

# Start connecting things and populating indices
#
cur_ind = 1 # current leaf index
wordmap = c() # maps leaf index --> word index

# MultiNodes connected to root
#
for (multinode in multinodes) {
# Map ML-node children
for (ml_child in multinode\$ichildren){
# Get word indices under this M-node
#wordmap = wordmap + ml_child\$words,
# is it c, cbind
wordmap <- c(wordmap, ml_child\$words)
# Save leafstart index for this M-node
ml_child\$leafstart = cur_ind
# Record maxind in multinode
cur_ind <- cur_ind + length(ml_child\$words)
#       print("printing current index")
#       print(cur_ind)
multinode\$maxind <- c(multinode\$maxind, cur_ind-1)
#       print("printing multinode maxind")
#       print(multinode\$maxind)

}

# Does this multinode connect directly to any leaves?
if(length(multinode\$words) > 0){
# Record leaf start index
multinode\$leafstart = cur_ind
cur_ind <- cur_ind + length(multinode\$words)
# Record word indices
# wordmap = wordmap + multinode.words
wordmap = c(wordmap, multinode\$words)
}

# Put this C-node and maxindex in root
root\$edges <- c(root\$edges, beta*multinode\$numLeaves())
root\$ichildren <- c(root\$ichildren, multinode)
root\$maxind <- c(root\$maxind, cur_ind-1)

}

# return(multinodes)

# ML-nodes connected directly to root
#

print(length(ml_nodes))
for (mln in ml_nodes)
# Leaf start index == None --> has not been assigned yet
if(mln\$leafstart == -1){

#stop("reached here")
# Set leaf start index for ML-node
mln\$leafstart = cur_ind
cur_ind <- cur_ind + mln\$numLeaves()
# Put ML-node and its maxind in root
#       print("mln\$numLeaves()")
#       print(mln\$numLeaves())
root\$edges <- c(root\$edges, beta*mln\$numLeaves())
root\$ichildren <- c(root\$ichildren, mln)
root\$maxind <- c(root\$maxind, cur_ind-1)
# Record word indices under this M-node
#wordmap = wordmap + mln.words
wordmap = c(wordmap, mln\$words)
}

# print(allvars)
# print("wordmap")
# print(wordmap)
# stop("nor eason")
# Finally, rest of words must be connected directly to root
#FIXME Not sure how to interpret this:
#root.edges = array(root.edges + [beta for i in range(W - cur_ind)])
#   print("cur_ind")
#   print(cur_ind)
#   print(rep(beta, W - (cur_ind - 1)))

print(W)
print(beta)
print(cur_ind)

root\$edges = append(root\$edges, rep(beta, W - (cur_ind - 1)))

#   print("printing root edges")
#   print(root\$edges)

root\$leafstart = cur_ind
#wordmap = wordmap + [wi for wi in seq(W) if wi not in wordmap]
#   print("wordmap")
print(wordmap)
wordmap = c(unique(wordmap), unlist(sapply(seq(W), function(wi) if (!(wi %in% wordmap)) wi )))
print(length(wordmap))

# Convert everything to tuples
#root = root\$tupleConvert()

# print(wordmap)

# Recover leafmap by inverting wordmap permutation
#leafmap = [wordmap.index(wi) for wi in range(W)]
#the index of first word index - corresponding to index function to python
leafmap = unlist(sapply(seq(W), function(wi) which(wordmap==wi)[1]))

# print(leafmap)

return(list(root= root,leafmap = leafmap))
}

test.constraints <- function() {
require(Rcpp)
require(inline)

dt = Module("dt", getDynLib("ITM"))
voc = c("a", "b", "c", "d", "e", "f")
#   mlinks = list(c("a", "b", "c"), c("d", "e", "f"))