R/extractTopology.R

Defines functions extractTopology

Documented in extractTopology

#' Extract the non-3D topology of trees in phylo(3D) format
#'
#' \code{extractTopology} - Extracts the topology of a rooted tree in phylo or
#' phylo3D format by ignoring edge subdivisions, i.e. nodes with both in- and
#' out-degree of 1. The nodes have to be enumerated from 1 to \eqn{|V|}
#' (otherwise use enum2_1toV()).\cr
#' The function works for trees in phylo as well as phylo3D format as existing
#' information on the coordinates or edge weights is dropped anyway.\cr
#' Edge lengths in the topology are based on the sum of the edge lengths of
#' the corresponding paths in the original tree.
#'
#'
#' @author Sophie Kersting, Luise Kühn
#'
#' @param tree A rooted tree in (extended) phylo(3D) format
#' (no special node enumeration required, except that nodes are numbered
#' from 1 to |V| = the total number of nodes). There must be at least 2 nodes,
#' i.e. one edge.
#'
#' @return \code{extractTopology(tree)} Tree in phylo format with cladewise
#' node enumeration in which all interior nodes have at least out-degree 2.
#'
#' @export
#' @rdname extractTopology
#' @examples
#' tree <- treeDbalance::example3Dtrees$bean09
#' top <- extractTopology(tree)
extractTopology <- function(tree) {
  if (!inherits(tree, "phylo") && !inherits(tree, "phylo3D")) {
    stop("The input tree must have class phylo or phylo3D.")
  }
  n <- length(tree$tip.label)
  m <- tree$Nnode
  if (!identical(seq(1, (m + n)), as.integer(unique(sort(tree$edge))))) {
    stop("Nodes are labeled with other values than 1,...,|V|.\n")
  }
  CONSIDER_EDGE_LENGTHS <- "edge.length" %in% attributes(tree)$names
  # List all edge-subdividing nodes, i.e. nodes with in- and out-degree 1.
  # Since all nodes have only one ancestor in a directed tree, list all nodes
  # that are ancestor to only one node instead.
  ancs <- getAncs(tree)
  to_remove <- rep(FALSE, n + m)
  to_remove[as.numeric(names(which(table(ancs[1, ]) == 1)))] <- TRUE
  # If the root is among the nodes to remove, check if there are other nodes
  # that have more than one direct descendant. If not, then the whole tree is
  # a path graph and a single edge is returned with a warning.
  is_leaf <- getLeaves(tree)
  inner_remaining <- !is_leaf & !to_remove
  if (sum(inner_remaining) < 1) {
    warning("The tree is a path graph. A single edge is returned.")
    mytop <- list(edge = matrix(c(1, 2),
      byrow = TRUE,
      nrow = 1, ncol = 2
    ), tip.label = c(" "), Nnode = 1)
    if (CONSIDER_EDGE_LENGTHS) {
      mytop$edge.length <- sum(tree$edge.length)
    }
    class(mytop) <- "phylo"
    return(mytop)
  }
  if (sum(to_remove) > 0) {
    # If there are interior nodes with more than one descendant, then we can now
    # incrementally make the tree smaller. We start with the lowest nodes going
    # up through the tree and check if their parents have to be removed. If that
    # is the case we set the edge from parent to current node to (NA, NA) and
    # connect the "grandparent" with the current node (the former incoming edge
    # of the parent).
    node_order <- rev(getNodeDepths(tree)["orderByIncrDepth", ])
    for (curr_node in node_order) {
      parent <- ancs["ancestor", curr_node]
      if (!is.na(parent) && to_remove[parent]) { # not the root & should be removed
        if (is.na(ancs["inc_edge", parent])) { # if no grandparent exists
          tree$edge[ancs["inc_edge", curr_node], ] <- c(NA, NA)
        } else { # if the grandparent exists
          tree$edge[ancs["inc_edge", parent], 2] <-
            tree$edge[ancs["inc_edge", curr_node], 2]
          tree$edge[ancs["inc_edge", curr_node], ] <- c(NA, NA)
          if (CONSIDER_EDGE_LENGTHS) {
            # Set incoming edge of parent (now from grandparent to current node)
            # to sum of both edges.
            tree$edge.length[ancs["inc_edge", parent]] <-
              tree$edge.length[ancs["inc_edge", parent]] +
              tree$edge.length[ancs["inc_edge", curr_node]]
            # Set removed edge length (from parent to current node) to NA.
            tree$edge.length[ancs["inc_edge", curr_node]] <- NA
          }
        }
      }
    }
    # Remove (NA,NA) rows.
    edges_to_keep <- which(!is.na(tree$edge[, 1]))
    tree$edge <- tree$edge[edges_to_keep, ]
    if (CONSIDER_EDGE_LENGTHS) {
      tree$edge.length <- tree$edge.length[edges_to_keep]
    }
    # Relabel the vertices from 1 to |V_new|. The new enumeration has in the
    # i-th position the desired new number of the i-th node.
    curr_labels <- sort(unique(as.vector(tree$edge)))
    new_enum <- rep(NA, n + m)
    for (i in 1:length(curr_labels)) {
      new_enum[curr_labels[i]] <- i
    }
    tree$edge <- matrix(new_enum[tree$edge], byrow = FALSE, ncol = 2)
  }
  # Create a phylo tree and change the node enumeration to cladewise.
  mytop <- list(
    edge = tree$edge, tip.label = tree$tip.label,
    Nnode = tree$Nnode - sum(to_remove)
  )
  if (CONSIDER_EDGE_LENGTHS) {
    mytop$edge.length <- tree$edge.length
  }
  class(mytop) <- "phylo"
  mytop <- enum2cladewise(mytop)
  return(mytop)
}

Try the treeDbalance package in your browser

Any scripts or data that you put into this service are public.

treeDbalance documentation built on Feb. 25, 2026, 1:06 a.m.