R/tree_shape.R

Defines functions NRootedShapes NUnrootedShapes .UnrootedKeys UnrootedTreeKey UnrootedTreeShape UnrootedTreeWithKey UnrootedTreeWithShape RootedTreeWithShape.integer64 RootedTreeWithShape.numeric RootedTreeWithShape .Int64 RootedTreeShape

Documented in NRootedShapes NUnrootedShapes RootedTreeShape RootedTreeWithShape .UnrootedKeys UnrootedTreeKey UnrootedTreeShape UnrootedTreeWithKey UnrootedTreeWithShape

#' @importFrom utils globalVariables
utils::globalVariables(c("nRootedShapes",
                         "nUnrootedShapes",
                         "unrootedKeys"),
                       "TreeTools")

#' Integer representing shape of a tree
#'
#' Returns an integer that uniquely represents the shape of an _n_-tip
#' binary tree, ignoring tip labels.
#'
#' Rooted trees are numbered working up from the root.
#'
#' The root node divides _n_ tips into two subtrees.  The smaller subtree
#' may contain $a = 1, 2, ..., n/2$ tips, leaving $b = n - a$ tips in
#' These options are worked through in turn.
#'
#' For the first shape of the smaller subtree, work through each possible shape
#' for the larger subtree.  Then, move to the next shape of the smaller subtree,
#' and work through each possible shape of the larger subtree.
#'
#' Stop when the desired topology is encountered.
#'
#' Unrooted trees are numbered less elegantly.  Each cherry (i.e. node
#' subtending a pair of tips) is treated in turn.  The subtended tips are
#' removed, and the node treated as the root of a rooted tree.  The number of
#' this rooted tree is then calculated.  The tree is assigned a _key_
#' corresponding to the lowest such value.  The keys of all unrooted tree shapes
#' on _n_ tips are ranked, and the unrooted tree shape is assigned a _number_
#' based on the rank order of its key among all possible keys, counting from
#' zero.
#'
#' If `UnrootedTreeShape()` or `UnrootedTreeKey()` is passed a rooted tree,
#' the position of the root will be ignored.
#'
#' The number of unlabelled binary rooted trees corresponds to the
#' [Wedderburn-Etherington numbers](https://oeis.org/A001190).
#'
#'
#' @template treeParam
#'
#' @return `TreeShape()` returns an integer specifying the shape of a tree,
#' ignoring tip labels.
#'
#' @examples
#' RootedTreeShape(PectinateTree(8))
#' plot(RootedTreeWithShape(0, nTip = 8L))
#'
#' NRootedShapes(8L)
#' # Shapes are numbered from 0 to NRootedShapes(n) - 1.  The maximum shape is:
#' RootedTreeShape(BalancedTree(8))
#'
#' # Unique shapes of unrooted trees:
#' NUnrootedShapes(8L)
#'
#' # Keys of these trees:
#' UnrootedKeys(8L)
#'
#' # A tree may be represented by multiple keys.
#' # For a one-to-one correspondence, use a number instead:
#' unrootedShapes8 <- as.integer(NUnrootedShapes(8L))
#' allShapes <- lapply(seq_len(unrootedShapes8) - 1L,
#'                     UnrootedTreeWithShape, 8L)
#' plot(allShapes[[1]])
#' sapply(allShapes, UnrootedTreeShape)
#' sapply(allShapes, UnrootedTreeKey, asInteger = TRUE) # Key >= number
#'
#' # If numbers larger than 2>31 are required, sapply needs a little help
#' # with 64-bit integers:
#' structure(sapply(allShapes, UnrootedTreeKey), class = "integer64")
#'
#'
#' @seealso Unique number for a labelled tree: [`TreeNumber()`]
#'
#' @template MRS
#' @name TreeShape
#' @export
RootedTreeShape <- function(tree) {
  edge <- tree[["edge"]]
  nTip <- NTip(tree)
  edge <- Postorder(edge)
  .Int64(edge_to_rooted_shape(edge[, 1], edge[, 2], nTip))
}

#' @importFrom bit64 as.integer64
#' @keywords internal
.Int64 <- function(n) {
  n <- as.integer64(n)
  if (length(n) == 2L) {
    n <- n[1] * 2147483647L + n[2]
  }

  # Return:
  n
}

#' @rdname TreeShape
#' @param shape Integer specifying shape of tree, perhaps generated by
#'  `TreeShape()`.
#' @param nTip Integer specifying number of tips.
#' @return `RootedTreeWithShape()` returns a tree of class `phylo`
#' corresponding to the shape provided.  Tips are unlabelled.
#' @export
RootedTreeWithShape <- function(shape, nTip, tipLabels)
  UseMethod("RootedTreeWithShape")

#' @export
RootedTreeWithShape.numeric <- function(shape, nTip,
                                        tipLabels = character(nTip)) {
  structure(list(edge = rooted_shape_to_edge(shape, nTip),
                 Nnode = nTip - 1L,
                 tip.label = tipLabels),
            class = "phylo")
}

#' @export
RootedTreeWithShape.integer64 <- function(shape, nTip,
                                          tipLabels = character(nTip)) {
  if (shape < 0) {
    stop("Shape may not be negative.")
  } else if (shape > 2L^31L - 1L) {
    stop("Shapes this large are not currently implemented. ",
         "Please contact maintainer for help.")
  } else {
    RootedTreeWithShape(as.integer(shape), nTip, tipLabels)
  }
}

#' @rdname TreeShape
#' @template tipLabelsParam
#' @return `UnrootedTreeWithShape()` returns a tree of class `phylo`
#' corresponding to the shape provided.  Tips are unlabelled.
#' @export
UnrootedTreeWithShape <- function(shape, nTip, tipLabels = character(nTip)) {
  if (nTip > 30) {
    stop("Only trees with < 31 leaves are presently handled")
  }
  nShapes <- nUnrootedShapes[nTip]
  if (shape >= as.integer(nShapes)) {
    stop("Shape must be between 0 and ", nShapes - 1L)
  }

  key <- UnrootedKeys(nTip)[shape + 1L]

  UnrootedTreeWithKey(key, nTip, tipLabels)
}

#' @rdname TreeShape
#' @param key Integer specifying the _key_ (not number) of an unrooted tree.
#' @return `UnrootedTreeWithKey()` returns a tree of class `phylo` corresponding
#' to the key provided.  Tips are unlabelled.
#' @export
UnrootedTreeWithKey <- function(key, nTip, tipLabels = character(nTip)) {
  AddRoot <- function(x) {
    x[["root.edge"]] <- 1L
    x
  }
  SingleTaxonTree(tipLabels[1]) + SingleTaxonTree(tipLabels[2]) +
    AddRoot(RootedTreeWithShape(key, nTip - 2L, tipLabels[-c(1, 2)]))
}

#' @rdname TreeShape
#' @export
UnrootedTreeShape <- function(tree) {
  which(UnrootedKeys(NTip(tree)) == UnrootedTreeKey(tree)) - 1L
}

#' @rdname TreeShape
#' @param asInteger Logical specifying whether to coerce the return value to
#' mode `integer`: only possible for values < 2^31.
#' If `FALSE`, values will have class `integer64`.
#' @importFrom bit64 integer64
#' @export
UnrootedTreeKey <- function(tree, asInteger = FALSE) {
  edge <- Postorder(tree[["edge"]])
  nTip <- NTip(tree)
  parent <- edge[, 1]
  child <- edge[, 2]
  nEdge <- length(child)
  unrooted <- nEdge %% 2L
  nodeFirst <- c(rep.int(c(TRUE, FALSE), nEdge / 2L),
                 logical(as.integer(unrooted)))
  nodeSecond <- !nodeFirst
  nodeNumbers <- unique(parent)
  if (unrooted) {
    nodeFirst [nEdge - 0:2] <- FALSE
    nodeSecond[nEdge - 0:2] <- FALSE
    nodeNumbers <- nodeNumbers[-(nTip - 2L)]
  }

  RootedNumber <- function(nodeChildren) {
    RootedTreeShape(Postorder(DropTip(RootTree(tree, nodeChildren[1]), nodeChildren)))
  }

  basalTipEdges <- nEdge - (seq_len(4L - unrooted) - 1L)
  rootCandidate <- if (sum(child[basalTipEdges] <= nTip) == 2) {
    RootedNumber(child[basalTipEdges][child[basalTipEdges] <= nTip])
  } else {
    integer64(0)
  }

  cherryNodes <- nodeNumbers[child[nodeFirst] <= nTip & child[nodeSecond] <= nTip]
  allKeys <- structure(c(vapply(cherryNodes, function(node) {
    RootedNumber(child[parent == node])
  }, integer64(1)), rootCandidate), class = "integer64")

  # Return:
  min(if (asInteger) as.integer(allKeys) else allKeys)
}

#' @rdname TreeShape
#' @keywords internal
#' @importFrom bit64 integer64
#' @export
.UnrootedKeys <- function(nTip) {
  if (nTip > 28L) {
    stop("Too many shapes to calculate with ", nTip, " leaves.")
  } else if (nTip > length(unrootedKeys)) {
    #TODO make efficient - this is horrible!
    # nocov start
    shapes <- as.integer(structure(
      vapply(seq_len(as.integer(NRootedShapes(nTip))) - 1L,
             function(shape) UnrootedTreeKey(RootedTreeWithShape(shape, nTip)),
             integer64(1L)),
      class = "integer64"))
    uniqueShapes <- unique(shapes)
    # nocov end
  } else {
    uniqueShapes <- unrootedKeys[[nTip]]
  }

  # Return:
  sort(uniqueShapes)
}

#' @rdname TreeShape
#' @param \dots Value of `nTip`, to pass to memoized `.UnrootedKeys`.
#' @param envir Unused; passed to [`addMemoization`].
#' @return `UnrootedKeys()` returns a vector of integers corresponding to the
#' keys (not shape numbers) of unrooted tree shapes with `nTip` tips.
#' It is a wrapper to `.UnrootedKeys()`, with memoization, meaning that results
#' once calculated are cached and need not be calculated on future calls to
#' the function.
#' @importFrom R.cache addMemoization
#' @export
UnrootedKeys <- addMemoization(.UnrootedKeys, envir = "package:TreeTools")

#' @rdname TreeShape
#' @return `NUnrootedShapes()` returns an object of class `integer64` specifying
#' the number of unique unrooted tree shapes with `nTip` (< 61) tips.
#' @export
NUnrootedShapes <- function(nTip) {
  if (nTip > 60L) {
    stop("Too many shapes to represent as a 64-bit integer. ",
         "Consult OEIS for value: https://oeis.org/A000672/b000672.txt")
  }
  nUnrootedShapes[nTip]
}

#' @rdname TreeShape
#' @return `NRootedShapes()` returns an object of class `integer64` specifying
#' the number of unique rooted tree shapes with `nTip` (< 56) leaves.
#'
#' @export
NRootedShapes <- function(nTip) {
  if (nTip > 55L) {
    stop("Too many shapes to represent as a 64-bit integer. ",
         "Consult OEIS for value: https://oeis.org/A001190/b001190.txt")
  }
  nRootedShapes[nTip]
}

Try the TreeTools package in your browser

Any scripts or data that you put into this service are public.

TreeTools documentation built on June 22, 2024, 9:27 a.m.