R/guessMSTRoots.R

Defines functions guessMSTRoots

Documented in guessMSTRoots

#' Guess the roots of a MST
#'
#' Pick nodes to use as the root(s) of an MST using a variety of ad hoc methods.
#'
#' @param g A \link{graph} object containing a MST.
#' All nodes should be named.
#' @param method String specifying the method to use to pick the root.
#'
#' @return A character vector containing the identity of the root for each component in \code{g}.
#'
#' @details
#' When \code{method="degree1"}, an arbitrary node of degree 1 is chosen as the root for each component.
#' This aims to reduce the number of branch events by starting from an already-terminal node.
#'
#' When \code{method="maxstep"}, we pick the node of degree 1 that has the highest average number of steps to reach all other nodes of degree 1.
#' This aims to maximize the number of shared clusters between different paths when traversing \code{g} from the root to the other terminal nodes,
#' under the philosophy that branch events should occur as late as possible.
#' When \code{method="maxlen"}, we instead pick the node of degree 1 that has the highest average distance to reach all other nodes of degree 1.
#' This also considers the distance spanned by each cluster.
#'
#' When \code{method="minstep"}, we pick the node that has the lowest average number of steps to reach all nodes of degree 1.
#' This aims to minimize the number of shared clusters between different paths under the philosophy that branch events should occur as early as possible.
#' When \code{method="minlen"}, we instead pick the node that has the highest average distance to reach all nodes of degree 1.
#'
#' @author Aaron Lun,
#' based on code by Kelly Street
#'
#' @examples
#' library(igraph)
#' edges <- c("A", "B", "B", "C", "C", "D", "C", "E")
#' g <- make_graph(edges, directed=FALSE)
#'
#' guessMSTRoots(g)
#' guessMSTRoots(g, method="maxstep")
#' guessMSTRoots(g, method="minstep")
#'
#' # Works with multiple components.
#' edges2 <- c(edges, "F", "G", "G", "H")
#' g2 <- make_graph(edges2, directed=FALSE)
#' guessMSTRoots(g2)
#'
#' @export
#' @importFrom igraph shortest_paths distances decompose degree V
guessMSTRoots <- function(g, method=c("degree1", "maxstep", "maxlen", "minstep", "minlen")) {
    method <- match.arg(method)

    forest <- decompose(g)
    roots <- vector("list", length(forest))

    for (i in seq_along(forest)) {
        tree <- forest[[i]]
        named <- names(V(tree))
        if (length(named) == 1L) {
            roots[[i]] <- named[1]
            next
        }

        deg <- degree(tree)
        deg1 <- named[deg <= 1L]
        if (method=="degree1") {
            roots[[i]] <- deg1[1]
            next
        }

        if (method %in% c("maxstep", "maxlen")) {
            candidates <- deg1
            FUN <- which.max
        } else {
            candidates <- named
            FUN <- which.min
        }

        averages <- numeric(length(candidates))
        names(averages) <- candidates
        for (j in candidates) {
            ends <- setdiff(deg1, j)
            if (method %in% c("minstep", "maxstep")) {
                paths <- shortest_paths(g, from = j, to = ends, output = 'vpath')$vpath
                averages[j] <- mean(lengths(paths))
            } else {
                dists <- distances(g, v = j, to = ends)
                averages[j] <- mean(dists)
            }
        }

        roots[[i]] <- candidates[FUN(averages)]
    }

    # As character for the edge case of an empty list.
    as.character(unlist(roots))
}
LTLA/TrajectoryUtils documentation built on Feb. 5, 2024, 11:56 a.m.