In adamsardar/stoneTrees: Solve Node Centric Steiner Tree Problems

ompr is a fantastic pacakge, heavily inspired by Julia's JuMP.

library(ompr)
library(ROI.plugin.glpk)
library(ROI.plugin.lpsolve)
library(ompr.roi)

library(igraph)

Working from the manuscript:

Fischetti M, Leitner M, Ljubic I, Luipersbeck M, Monaci M, Resch M, et al. Thinning out Steiner trees. DIMACS. 2015;

terminals = fixedTerminals U potentialTerminals

1.) min sum( nodeInTree[i] * nodeCost[i] ) 2.) nodeInTree[ minNodeSeperators_ij ] >= nodeInTree[i] + nodeInTree[j] -1 3.) nodeInTree[i] = 1 if i in fixedTerminals 4.) nodeInTree in {0,1} (i.e. all binary)

5.) nodeInTree[ A_i ] >= nodeInTree[i] if i in terminals 6.) nodeInTree[i] <= nodeInTree[j] if j in potentialTerminals and i in neighbourhood of j

load("./data/karateGraph.RData")

load("./data/lymphomaGraph.RData")

searchGraph = lymphomaGraph

V(searchGraph)$isTerminal = FALSE
#V(searchGraph)[nodeScore > 5]$isTerminal = TRUE

#searchGraph = condenseSearchGraph(searchGraph)


fixedTerminals = V(searchGraph)[isTerminal]
potentialTerminals =  V(searchGraph)[nodeScore > 0]

terminals = unique(c(fixedTerminals, potentialTerminals))

potentialSteinerNodes =  delete_vertices(searchGraph,
                                          c(fixedTerminals, potentialTerminals))

nodeDT = get.data.frame(searchGraph, what = "vertices") %>% data.table
nodeDT[, .nodeID := .I]

edgeDT = get.data.frame(searchGraph, what = "edges") %>% data.table
edgeDT = rbind(edgeDT[,.(from,to)], edgeDT[,.(from = to, to = from)]) #Undirected network
edgeDT[, .edgeID := .I]

edgeDT[nodeDT, fromNodeID := .nodeID, on = .(from = name)]
edgeDT[nodeDT, toNodeID := .nodeID, on = .(to = name)]

Let's start easy - a minimum steiner tree problem

Constraint 1.)

nodeScores = V(searchGraph)$nodeScore

#nodeScores = rep(-1, vcount(searchGraph))
names(nodeScores) = V(searchGraph)$name

Constraint 3.)

fixedTerminals_variables =
  sparseMatrix(i = as.integer(fixedTerminals),
               j = as.integer(fixedTerminals),
               x = 1,
               dims = c( vcount(searchGraph), vcount(searchGraph)),
       dimnames = list( paste0("fixedTerminalConstraintFor", 1:vcount(searchGraph)),
                        V(searchGraph)$name))

fixedTerminals_variables %<>% .[as.integer(fixedTerminals), ]

fixedTerminals_directions = rep("==", nrow(fixedTerminals_variables))
fixedTerminals_rhs = rep(1, nrow(fixedTerminals_variables))

Constraint 4.) All binary variables (easier to specify in solver)

Constraint 5.)

nodeDegreeInequalities_variables = get.adjacency(searchGraph, sparse = TRUE)
diag(nodeDegreeInequalities_variables) = -2
diag(nodeDegreeInequalities_variables)[as.integer(terminals)] = -1


nodeDegreeInequalities_directions = rep(">=", nrow(nodeDegreeInequalities_variables))
nodeDegreeInequalities_rhs = rep(0, nrow(nodeDegreeInequalities_variables))

Constraint 6.) If any potential Steiner node next to a potential terminal is included, then the potential terminal must also be included.

twoCycle_variables = edgeDT[fromNodeID %in% as.integer(potentialTerminals),
         sparseMatrix(i = c(.edgeID,.edgeID) ,
                      j = c(fromNodeID,toNodeID),
                      x = rep(c(1, -1) , each = length(.edgeID) ),
                      dims = c( max(.edgeID), vcount(searchGraph)),
                      dimnames = list( paste("twoCycleOnEdge", 1:max(.edgeID)) , V(searchGraph)$name))]

twoCycle_variables  %<>% .[ edgeDT[fromNodeID %in% as.integer(potentialTerminals), .edgeID], ]

twoCycle_directions = rep(">=", nrow(twoCycle_variables))
twoCycle_rhs = rep(0, nrow(twoCycle_variables))

Solve it

solution = Rglpk_solve_LP(
    obj = nodeScores,
    mat = rbind(fixedTerminals_variables, nodeDegreeInequalities_variables, twoCycle_variables),
    dir = c(fixedTerminals_directions, nodeDegreeInequalities_directions, twoCycle_directions),
    rhs = c(fixedTerminals_rhs,nodeDegreeInequalities_rhs ,twoCycle_rhs),
    max = TRUE,
    control = list(verbose = TRUE),
    types = "B")

graphOfSolution = induced_subgraph(searchGraph,
                                    V(searchGraph)[which(solution$solution == 1)]) 

is.connected(graphOfSolution)

allTerminalPairs = combn( names(terminals), 2) %>% t %>% as.data.table
setnames(allTerminalPairs, c("T1name","T2name"))

setkey(nodeDT, name)
nodeDT[, inComponent := NA_integer_]

disconnectedComponentList = decompose(graphOfSolution)

allTerminalPairs[nodeDT, T1nodeID := .nodeID, on = .(T1name == name)]
allTerminalPairs[nodeDT, T2nodeID := .nodeID, on = .(T2name == name)]

for(i in 1:length(disconnectedComponentList)){

  nodeDT[V(disconnectedComponentList[[i]])$name, inComponent := i]
}

allTerminalPairs[nodeDT, T1inComponent := inComponent, on = .(T1nodeID = .nodeID)]
allTerminalPairs[nodeDT, T2inComponent := inComponent, on = .(T2nodeID = .nodeID)]

terminalPairsInDifferentComponents = allTerminalPairs[!is.na(T1inComponent) & !is.na(T1inComponent)][T1inComponent != T2inComponent]



connConstraints_variables = minimalNodeSeperator(searchGraph, nodeDT, terminalPairsInDifferentComponents)

connConstraints_directions = rep(">=", nrow(connConstraints_variables))
connConstraints_rhs = rep(-1, nrow(connConstraints_variables))

library(profvis)


conConstraintProfiling =  profvis({

connConstraints_variables = minimalNodeSeperator(searchGraph, nodeDT, terminalPairsInDifferentComponents)
})

solution2 = Rglpk_solve_LP(
    obj = nodeScores,
    mat = rbind(fixedTerminals_variables,
                nodeDegreeInequalities_variables,
                twoCycle_variables,
                connConstraints_variables),
    dir = c(fixedTerminals_directions, 
            nodeDegreeInequalities_directions, 
            twoCycle_directions,
            connConstraints_directions),
    rhs = c(fixedTerminals_rhs,
            nodeDegreeInequalities_rhs,
            twoCycle_rhs,
            connConstraints_rhs),
    max = TRUE,
    control = list(verbose = TRUE),
    types = "B")



graphOfSolution2 = induced_subgraph(searchGraph,
                                    V(searchGraph)[which(solution2$solution == 1)]) 

is.connected(graphOfSolution2)

A function that, when given two node indicies, returns their minimal node seperators

 # nodeClusterMembershipDT = nodeDT
 # fullSearchGraph = searchGraph
 # terminalPairsDT = terminalPairsInDifferentComponents

minimalNodeSeperator = function(fullSearchGraph, nodeClusterMembershipDT, terminalPairsDT){

  # Pre-compute the component surfaces (A(C_i) in paper)
  clusterSurfacesDT = nodeClusterMembershipDT[!is.na(inComponent)] %>%
                merge(edgeDT, by.x = ".nodeID", by.y = "fromNodeID") %>%
                .[,.(componentSurfaceNodeID = .SD[!toNodeID %in% .nodeID, unique(toNodeID)]), by = inComponent]

  allConnectivityConstraints = Matrix(0,
                                       nrow = nrow(terminalPairsDT),
                                       ncol = nrow(nodeClusterMembershipDT),
                                       dimnames = list(NULL, nodeClusterMembershipDT[order(.nodeID), name]))

  graphDiameter = diameter(fullSearchGraph)

  for(r in 1:nrow(terminalPairsDT)){

    terminalPair = terminalPairsDT[r,]

    C_i_componentSurfaceNodeIDs = clusterSurfacesDT[ inComponent ==  terminalPair$T1inComponent, componentSurfaceNodeID]

    graph_omitCi = delete_vertices(fullSearchGraph,
                      V(fullSearchGraph)[ nodeClusterMembershipDT[inComponent ==  terminalPair$T1inComponent,.nodeID] ])

    #Note that this is the graph LESS the Ci componenet - hence we can't precompute upfront.
    # Notice the use of name, not ID
    nodeNamesReachableFromJ = V(make_ego_graph(graph_omitCi,
                               nodes = V(graph_omitCi)[name == terminalPair$T2name],
                               order = graphDiameter)[[1]])$name
    #Refered to as Rj in the paper

    # N = A(Ci) intersect Rj
    minNodeSep_ij_nodeIDs = intersect(C_i_componentSurfaceNodeIDs,
                       nodeClusterMembershipDT[name %in% nodeNamesReachableFromJ, .nodeID])


    allConnectivityConstraints[r, terminalPair$T1nodeID] = -1
    allConnectivityConstraints[r, terminalPair$T2nodeID] = -1

    allConnectivityConstraints[r, minNodeSep_ij_nodeIDs] = 1
  }

  return(allConnectivityConstraints)
}

given two terminal nodes

library(lpSolve)

solution = lp(
    direction = "max",
    objective.in = nodeScores,
    const.mat = as.matrix(rbind(fixedTerminals_variables, nodeDegreeInequalities_variables, twoCycle_variables)),
    const.dir = c(fixedTerminals_directions, nodeDegreeInequalities_directions, twoCycle_directions),
    const.rhs = c(fixedTerminals_rhs,nodeDegreeInequalities_rhs ,twoCycle_rhs),
    all.bin = TRUE,
    num.bin.solns = 10)