#' @export
#' @title Hsu et al. (2009) Algorithm
#'
#' @description It is an implementation of Hsu et al. algorithm to transform a digraph and a
#' known set of forbidden paths, into a new graph that does not allow any forbidden path
#' as part of its solutions.
#'
#' @details This version of the algorithm produce smaller graphs, with less new nodes and arcs.
#'
#' @family Graph Transformation
#'
#' @seealso \url{https://doi.org/10.1007/978-3-642-03095-6_60}
#'
#' @param g The digraph to be transformed, written as a data frame where each row represents a
#' directed arc. The columns must be named \code{from} and \code{to}, and can be of any data
#' type. On each row no cells can be \code{NULL} or \code{NA}.
#' @param f The set of forbidden paths, written as a data frame. Each row represents a path
#' as a sequence of nodes. Each row may be of different size, filling the empty cells with
#' \code{NA}. All nodes involved must be part of \code{g}, and no forbidden path can be of
#' size 2. This is because the latter is thought as an arc that should not exist in
#' the first place.
#' @param cores This algorithm can be run using R's parallel processing functions. This
#' variable represents the number of processing cores you want to assign for the
#' transformation. The default value is one single core.
#' It is suggested to not assign all of your available cores to the function.
#'
#' @return A new graph, generated following Hsu's backward construction, in which no path
#' includes one of the forbidden subpaths. The graph is returned in a data frame format,
#' where each row represents a directed arc, with or without additional attributes (if
#' corresponds). However, regardless of the data type of the original graph, nodes on the
#' new graph are of type character. The new nodes names are generated by incrementally
#' concatenating the nodes on a forbidden path, but split by a pipe character (\code{|}).
#'
#' @importFrom foreach %dopar%
#' @importFrom dplyr %>%
#' @importFrom parallel makeCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom parallel stopCluster
#' @importFrom utils tail
#'
#'
#' @examples
#' # Obtain a graph and its forbidden subpaths
#' graph <- data.frame(from = c("c", "c", "u", "u", "t", "a", "a", "r", "e", "e", "e",
#' "p", "i", "i", "n", "o"),
#' to = c("u", "p", "e", "t", "a", "r", "i", "u", "r", "i", "p",
#' "n", "n", "o", "o", "m"),
#' stringsAsFactors = FALSE)
#' fpaths <- data.frame(X1 = c("u", "p", "a", "a"), X2 = c("t", "n", "i", "r"),
#' X3 = c("a", "o", "n", "u"), X4 = c("r", "m", "o", NA),
#' X5 = c("u", NA, NA, NA), stringsAsFactors = FALSE)
#'
#' # Show the input
#' graph
#' fpaths
#'
#' # Call the function and store the result
#' gStar <- modify_graph_hsu(graph, fpaths)
#' gStar
#'
modify_graph_hsu <- function(g, f, cores = 1L) {
# Transform the graph
g$from <- as.character(g$from)
g$to <- as.character(g$to)
# CHECKS
# Check that the graph starts with from and to column
if(any(colnames(g)[1:2] != c("from", "to"))) {
stop("The first two columns of g must be named 'from' and 'to'")
}
# Check that all nodes that are used in f appear in g, otherwise halt it
if(!.nodesExists(g, f)) {
stop("All nodes used in f must exists in g.")
}
# Clean the forbidden paths
f <- as.data.frame( apply(f, 2, function(x) gsub("^$|^ $", NA, x)), stringsAsFactors = FALSE)
f <- f[apply(f,1,function(x)any(!is.na(x))),]
# Get the number of columns
ncol <- ncol(g)
# Set up the parallel
cluster <- parallel::makeCluster(cores)
doParallel::registerDoParallel(cluster)
# Stop Cluster
on.exit(parallel::stopCluster(cluster))
# Create the first output
firstOutput <- foreach::foreach(startNode = unique(f[,1]), .combine = .comb, .multicombine = TRUE,
.export = ".get_arc_attributes") %dopar% {
# Create the new arcs
tempNewArcs <- g[0,]
tempDeletedArcs <- g[0,1:2]
tempFDelete <- f[0,]
# Create a list of banned arcs and nodes
tempBannedArcs <- data.frame(from = as.character(), to = as.character(), stringsAsFactors = FALSE)
tempBannedNodes <- data.frame(node = as.character(), stringsAsFactors = FALSE)
# Subset all the paths that start with that node
subsetSN <- subset(f, f[,1] == startNode)
# Get all the endings of paths starting with that node, transposed
endings <- t(sapply(
split(subsetSN, seq(nrow(subsetSN))), function(x) tail(x[!is.na(x)], 2) ) )
# For each ending
for(e in nrow(endings)) {
# Get all FP starting in "startNode" and ending in these pair
fps <- subsetSN[sapply(split(subsetSN, seq(nrow(subsetSN))),
function(x) tail(x[!is.na(x)], 2) == endings[e,])[1,]
,]
# SET 1, SUBSET 1
# ---------------------------------------------------------------------
# If there are more than two paths here, it means the FPs on the subset
# are parte of set 1 subset 1 (same start, same end pair).
# Therefore, we remove it from the original list
if(nrow(fps) > 1) {
tempFDelete <- fps
# For all paths
for(i in 1:nrow(fps)) {
# Delete the first arc of the FP in the original graph
# g <- subset(g, from == fps[i,1] & to == fps[i,2])
tempDeletedArcs[nrow(tempDeletedArcs) + 1,] <- c(fps[i,1], fps[i,2])
# Prepare a predecesor node
preNode <- fps[i,1]
# We are looping from the second to the penultimate item on the list.
# The last node is ignored because it is only temporal,
# So by ignoring it, we aim to reduce working time
# Create a flag to check if we continue adding
continue <- TRUE
j <- 2
while(continue & j < (length(fps[i,]) - 1) ) {
# Create the new node name
nodeName <- paste0(as.character(fps[i,])[1:j], collapse = "|")
# Add the link to the predecesor, with all the attributes
if(ncol > 2) {
tempNewArcs[nrow(tempNewArcs) + 1,] <- c(preNode, nodeName, .get_arc_attributes(g, fps[i, j-1], fps[i, j]))
}
# Or without the attributes
else tempNewArcs[nrow(tempNewArcs) + 1,] <- list(preNode, nodeName)
# Ban the next arc
tempBannedArcs[nrow(tempBannedArcs) + 1,] <- c(nodeName, fps[i, j + 1])
# And update the predecesor
preNode <- nodeName
# SET 1 SUBGROUP 1
# ----------------
if(i != 1) {
# In this case, we also need to check if there is a node ending on the next node
newTo <- unique(tempNewArcs$to)[grepl(paste0("\\|", fps[i, j + 1], "$"), unique(tempNewArcs$to))]
# If there is one
if(!identical(newTo, character(0))) {
# Write that arc as well, with its attributes
if(ncol > 2) {
tempNewArcs[nrow(tempNewArcs) + 1,] <- c(nodeName, newTo,
.get_arc_attributes(g, fps[i,j-1], fps[i,j]))
}
# Or without them
else tempNewArcs[nrow(tempNewArcs) + 1,] <- list(nodeName, newTo)
# And mark that we stop looping here
continue <- FALSE
}
}
j <- j + 1
}
# Save the last node as banned
tempBannedNodes[nrow(tempBannedNodes) + 1,1] <- paste0(as.character(fps[i,][which(!is.na(fps[i,]))]),
collapse = "|")
} # End loop of all paths
} # Endif
}
# Return all the values
list(tempNewArcs, tempDeletedArcs, tempBannedArcs, tempBannedNodes, tempFDelete)
}
# Delete forbidden paths
f <- dplyr::setdiff(f, firstOutput[[5]])
# Leave unique only
firstOutput[[1]] <- dplyr::distinct(firstOutput[[1]])
firstOutput[[2]] <- dplyr::distinct(firstOutput[[2]])
firstOutput[[3]] <- dplyr::distinct(firstOutput[[3]])
# STEP 2: CLASSIFY SET 2, AND SET 1 SUBSET 2
# -------------------------------------------------------------
# The remaining forbidden paths are those belonging to this set
secondOutput <- foreach::foreach(k = 1:nrow(f), .combine = .comb, .multicombine = TRUE,
.export = ".get_arc_attributes") %dopar% {
# Create the new arcs
tempNewArcs <- g[0,]
tempDeletedArcs <- g[0,1:2]
# Create a list of banned arcs and nodes
tempBannedArcs <- data.frame(from = as.character(), to = as.character(), stringsAsFactors = FALSE)
tempBannedNodes <- data.frame(node = as.character(), stringsAsFactors = FALSE)
# Remove the first arc of the FP in the original graph
tempDeletedArcs[nrow(tempDeletedArcs) + 1,] <- list(f[k,1], f[k,2])
# Prepare a predecesor node
preNode <- f[k,1]
# Get the cleaned path without NA
cleanfp <- f[k,][which(!is.na(f[k,]))]
# From the second to the last one
# Here we need to go until the last node
continue2 <- TRUE
l <- 2
while(continue2 & l <= length(cleanfp)) {
# Create the new node name
nodeName <- paste0(as.character(cleanfp)[1:l], collapse = "|")
# If this is the second we use the node as-is
if(l == 2) {
# Add the link to the predecesor with its attributes
if(ncol > 2) {
tempNewArcs[nrow(tempNewArcs) + 1,] <- c(preNode, nodeName,
.get_arc_attributes(g, f[k,l-1], f[k,l]))
}
# Or without them
else tempNewArcs[nrow(tempNewArcs) + 1,] <- list(preNode, nodeName)
# Ban the next arc
tempBannedArcs[nrow(tempBannedArcs) + 1,] <- c(nodeName, f[k, l + 1])
# And update the predecesor
preNode <- nodeName
}
else {
# Node name and quality of last
lastNode <- l == length(cleanfp[k,])
lastPart <- ifelse(lastNode | l == 3, nodeName,
paste0(tail(strsplit(nodeName, "\\|")[[1]], 3), collapse = "|"))
# If this is the last node, ban it
if(lastNode)
tempBannedNodes[nrow(tempBannedNodes) + 1,1] <- nodeName
# If there is a banned element
if(lastPart %in% tempBannedNodes | lastPart %in% firstOutput[[4]]$node) {
# If the node has arcs going or comming to it, delete them
tempNewArcs <- setdiff(tempNewArcs, subset(tempNewArcs, tempNewArcs$to == lastPart))
tempNewArcs <- setdiff(tempNewArcs, subset(tempNewArcs, tempNewArcs$from == lastPart))
# Do not continue
continue2 <- FALSE
}
# Otherwise, add it
else {
# Add the link to the predecesor, with its attributes
if(ncol > 2) {
tempNewArcs[nrow(tempNewArcs) + 1,] <- c(preNode, nodeName,
.get_arc_attributes(g, f[k,l-1], f[k,l]))
}
# Or without them
else tempNewArcs[nrow(tempNewArcs) + 1,] <- list(preNode, nodeName)
# Ban the next arc
tempBannedArcs[nrow(tempBannedArcs) + 1,] <- c(nodeName, f[k, l + 1])
# And update the predecesor
preNode <- nodeName
}
}
# Increase the counter
l <- l + 1
}
# Return the results
list(tempNewArcs, tempDeletedArcs, tempBannedArcs, tempBannedNodes)
}
# Merge banned nodes
bannedNodes <- c(firstOutput[[4]]$node, secondOutput[[4]]$node)
# For each of the banned nodes
deletedBannedArcs <- foreach::foreach(bn = bannedNodes, .combine = rbind) %dopar% {
temp <- g[0,]
# Get the length of the banned node
length <- nchar(bn)
# Get the arcs where the from contains the banned node
arcsEvaluation <- as.data.frame(
apply(X = secondOutput[[1]], MARGIN = 1:2,
FUN = function(x) substr(x, nchar(x) - length + 1, nchar(x)) == bn)
)
# Now get the arcs
subset(secondOutput[[1]], arcsEvaluation$from | arcsEvaluation$to)
}
# Now remove this
secondOutput[[1]] <- dplyr::setdiff(secondOutput[[1]], deletedBannedArcs)
rm(deletedBannedArcs)
# Fill the banned arcs
secondOutput[[3]] <- unique(rbind(secondOutput[[3]], firstOutput[[3]]))
# STEP 3: LINK BACK TO THE ORIGINAL GRAPH
# ---------------------------------------
# Now go through the new nodes
newNodes <- c(unique(secondOutput[[1]]$to), unique(firstOutput[[1]]$to))
thirdOutput <- foreach::foreach(nn = newNodes, .combine = rbind, .export = ".get_arc_attributes") %dopar% {
# Create the arcs
newArcs <- g[0,]
# Split by pipe and get the last element
nsName <- gsub(".*\\|(.*)", "\\1", nn)
# Get the outgoing arcs for the original node, that are not banned
# toNodes <- setdiff(subset(g, from == nsName), secondOutput[[3]])$to
toNodes <- setdiff(subset(g[,1:2], from == nsName)$to,
subset(secondOutput[[3]], from == nn | from == nsName)$to )
# For each element
for(toNode in toNodes) {
# Evaluate if there is a node
newTo <- newNodes[grepl(paste0(nsName, "\\|", toNode, "$"), newNodes)]
newTo <- newTo[grepl(paste0("^", nsName), newTo)]
# If there is a value, and the transitive is not banned
if(!identical(newTo, character(0)) & nrow(subset(secondOutput[[3]], from == nn & to == toNode)) == 0) {
# Add it using the new node, if the original is not deleted
if( !any(c(nsName, toNode) %in% secondOutput[[2]]) )
# Add it with its attributes
if(ncol > 2) {
newArcs[nrow(newArcs) + 1, ] <- c(nn, newTo, .get_arc_attributes(g, nsName, toNode))
}
# Or without them
else newArcs[nrow(newArcs) + 1, ] <- list(nn, newTo)
}
# Otherwise, if there is is no match,
# ...and if the value with the original node is not banned
else if(! any(apply(
secondOutput[[3]], 1, function(x) paste(x, collapse="") == paste(c(nn,toNode), collapse="")) ) ) {
# Add the new link, but with the original node and its attributes
if(ncol > 2) {
newArcs[nrow(newArcs) + 1, ] <- c(nn, toNode, .get_arc_attributes(g, nsName, toNode)) }
# Or without them
else newArcs[nrow(newArcs) + 1, ] <- list(nn, toNode)
}
}
# Return the value
newArcs
}
# Remove deleted arcs from here
g <- dplyr::anti_join(g, firstOutput[[2]], by = c("from", "to"))
g <- dplyr::anti_join(g, secondOutput[[2]], by = c("from", "to"))
# Now add the new arcs and return
return(unique(rbind(g, firstOutput[[1]]) %>% rbind(g, secondOutput[[1]]) %>% rbind(thirdOutput)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.