test-transfer_consensus.R
In TreeDist: Calculate and Map Distances Between Phylogenetic Trees

test_that("Identical trees return fully resolved consensus", {
  tree <- as.phylo(0, nTip = 10)
  trees <- structure(rep(list(tree), 10), class = "multiPhylo")

  tc <- TransferConsensus(trees, greedy = "first")
  expect_equal(NSplits(tc), NSplits(tree))

  tc_best <- TransferConsensus(trees, greedy = "best")
  expect_equal(NSplits(tc_best), NSplits(tree))
})

test_that("Star tree returned when no signal", {
  set.seed(6129)
  # Fully random trees with many tips — negligible split overlap
  trees <- structure(lapply(1:30, function(i) TreeTools::RandomTree(50, root = TRUE)),
                     class = "multiPhylo")
  tc <- TransferConsensus(trees, greedy = "first")

  # Should be very unresolved (0 or near-0 splits)
  expect_lte(NSplits(tc), 5)
})

test_that("Transfer consensus is at least as resolved as majority-rule for structured trees",
{
  # Trees from as.phylo with moderate overlap
  trees <- as.phylo(0:29, nTip = 20)
  tc <- TransferConsensus(trees, greedy = "best")
  mr <- Consensus(trees, p = 0.5)

  # Transfer consensus should be at least as resolved
  expect_gte(NSplits(tc), NSplits(mr))
})

test_that("Both greedy strategies produce valid trees", {
  trees <- as.phylo(1:15, nTip = 12)

  tc_first <- TransferConsensus(trees, greedy = "first")
  tc_best  <- TransferConsensus(trees, greedy = "best")

  expect_s3_class(tc_first, "phylo")
  expect_s3_class(tc_best, "phylo")
  expect_equal(sort(TipLabels(tc_first)), sort(TipLabels(trees[[1]])))
  expect_equal(sort(TipLabels(tc_best)), sort(TipLabels(trees[[1]])))
})

test_that("scale = FALSE (unscaled) works", {
  trees <- as.phylo(1:10, nTip = 10)

  tc_scaled <- TransferConsensus(trees, scale = TRUE)
  tc_unscaled <- TransferConsensus(trees, scale = FALSE)

  expect_s3_class(tc_scaled, "phylo")
  expect_s3_class(tc_unscaled, "phylo")
})

test_that("init = 'majority' works", {
  trees <- as.phylo(0:19, nTip = 15)

  tc_empty <- TransferConsensus(trees, init = "empty")
  tc_maj   <- TransferConsensus(trees, init = "majority")

  expect_s3_class(tc_empty, "phylo")
  expect_s3_class(tc_maj, "phylo")
  # Both should produce reasonable trees
  expect_gte(NSplits(tc_empty), 1)
  expect_gte(NSplits(tc_maj), 1)
})

test_that("Error on bad input", {
  expect_error(TransferConsensus(list(TreeTools::BalancedTree(5))), "multiPhylo")
  expect_error(TransferConsensus(
    structure(list(TreeTools::BalancedTree(5)), class = "multiPhylo")),
    "at least 2")

  # Fewer than 4 tips → star tree
  tiny <- structure(rep(list(ape::read.tree(text = "(a,b,c);")), 3),
                    class = "multiPhylo")
  tc_tiny <- TransferConsensus(tiny)
  expect_equal(NSplits(tc_tiny), 0)

  # tc_profile validation
  expect_error(TreeDist:::tc_profile(list(TreeTools::BalancedTree(5))),
               "multiPhylo")
  expect_error(TreeDist:::tc_profile(
    structure(list(TreeTools::BalancedTree(5)), class = "multiPhylo")),
    "at least 2")
  expect_error(TreeDist:::tc_profile(tiny), "at least 4 tips")
})

test_that("Two-tree consensus returns a valid tree", {
  trees <- as.phylo(1:2, nTip = 8)
  tc <- TransferConsensus(trees)
  expect_s3_class(tc, "phylo")
  expect_equal(length(TipLabels(tc)), 8)
})


# =========================================================================
# C++ edge cases (transfer_consensus.cpp coverage)
# =========================================================================

test_that("TransferConsensus returns star tree for all-star input", {
  star <- StarTree(8)
  trees <- structure(rep(list(star), 3), class = "multiPhylo")
  tc <- TransferConsensus(trees)
  expect_s3_class(tc, "phylo")
  expect_equal(NSplits(tc), 0)
})

test_that("TransferConsensus covers all parameter combinations", {
  trees <- as.phylo(0:9, nTip = 10)

  # greedy="first", init="majority", scale=FALSE
  tc <- TransferConsensus(trees, greedy = "first", init = "majority",
                          scale = FALSE)
  expect_s3_class(tc, "phylo")

  # greedy="best", init="majority", scale=FALSE
  tc2 <- TransferConsensus(trees, greedy = "best", init = "majority",
                           scale = FALSE)
  expect_s3_class(tc2, "phylo")

  # greedy="first", init="empty", scale=FALSE
  tc3 <- TransferConsensus(trees, greedy = "first", init = "empty",
                           scale = FALSE)
  expect_s3_class(tc3, "phylo")
})

test_that("Greedy remove path fires with conflicting majority splits", {
  # Two groups of trees with very different topologies: majority-init seeds
  # splits from the larger group, some of which the greedy then removes.
  t1 <- as.phylo(0, nTip = 12)
  t2 <- as.phylo(100000, nTip = 12)
  trees <- structure(c(rep(list(t1), 6), rep(list(t2), 5)),
                     class = "multiPhylo")

  # greedy="best" exercises do_remove + find_second in GreedyState
  tc_best_s <- TransferConsensus(trees, greedy = "best", init = "majority",
                                 scale = TRUE)
  expect_s3_class(tc_best_s, "phylo")

  tc_best_u <- TransferConsensus(trees, greedy = "best", init = "majority",
                                 scale = FALSE)
  expect_s3_class(tc_best_u, "phylo")

  # greedy="first" exercises the same remove path via greedy_first

  tc_first_s <- TransferConsensus(trees, greedy = "first", init = "majority",
                                  scale = TRUE)
  expect_s3_class(tc_first_s, "phylo")

  tc_first_u <- TransferConsensus(trees, greedy = "first", init = "majority",
                                  scale = FALSE)
  expect_s3_class(tc_first_u, "phylo")
})

test_that("Greedy exercises diverse topologies (extra C++ path coverage)", {
  # Random trees with many tips — sparse split overlap
  set.seed(5123)
  rand_trees <- structure(
    lapply(1:30, function(i) TreeTools::RandomTree(10, root = TRUE)),
    class = "multiPhylo")
  # Diverse indexed trees
  diverse_trees <- as.phylo(seq(0, 500, by = 25), nTip = 20)
  # Three conflicting groups
  mixed_trees <- structure(c(
    rep(list(as.phylo(0, 8)), 5),
    rep(list(as.phylo(300, 8)), 5),
    rep(list(as.phylo(9999, 8)), 3)
  ), class = "multiPhylo")

  for (trees in list(rand_trees, diverse_trees, mixed_trees)) {
    for (gr in c("best", "first")) {
      for (init in c("empty", "majority")) {
        for (sc in c(TRUE, FALSE)) {
          tc <- TransferConsensus(trees, greedy = gr, init = init, scale = sc)
          expect_s3_class(tc, "phylo")
        }
      }
    }
  }
})

test_that("tc_profile() runs without error", {
  trees <- as.phylo(0:4, nTip = 8)

  # Greedy best, empty init
  res <- TreeDist:::tc_profile(trees, scale = TRUE, greedy = "best",
                               init = "empty", n_iter = 1L)
  expect_length(res, 5)
  expect_true(all(res >= 0))
  expect_equal(names(res),
               c("pool_splits", "transfer_dist_mat", "compute_td",
                 "compat_mat", "greedy"))

  # Greedy first, majority init, unscaled
  res2 <- TreeDist:::tc_profile(trees, scale = FALSE, greedy = "first",
                                init = "majority")
  expect_length(res2, 5)
  expect_true(all(res2 >= 0))

  # Multiple iterations
  res3 <- TreeDist:::tc_profile(trees, n_iter = 3L)
  expect_length(res3, 5)

  # Greedy best, majority init
  res4 <- TreeDist:::tc_profile(trees, init = "majority")
  expect_length(res4, 5)
})


# =========================================================================
# R-level internal helper functions (transfer_consensus.R lines 94-551)
# =========================================================================

test_that(".FlipRaw() flips bits and masks correctly", {
  # 8 tips (1 byte, all bits used, mask = 0xFF)
  expect_equal(TreeDist:::.FlipRaw(as.raw(0x07), 8), as.raw(0xf8))

  # 5 tips (1 byte, 5 bits, mask = 0x1F)
  expect_equal(TreeDist:::.FlipRaw(as.raw(0x07), 5), as.raw(0x18))

  # Multi-byte: 10 tips (2 bytes, last byte 2 bits, mask = 0x03)
  result <- TreeDist:::.FlipRaw(as.raw(c(0x00, 0x01)), 10)
  expect_equal(result, as.raw(c(0xff, 0x02)))
})

test_that(".TransferDistMat() computes pairwise transfer distances", {
  splitMat <- matrix(c(
    TRUE, TRUE, FALSE, FALSE, FALSE, FALSE,
    TRUE, TRUE, TRUE, FALSE, FALSE, FALSE,
    FALSE, FALSE, FALSE, TRUE, TRUE, TRUE
  ), nrow = 3, byrow = TRUE)

  DIST <- TreeDist:::.TransferDistMat(splitMat, 6)
  expect_equal(dim(DIST), c(3, 3))
  expect_equal(diag(DIST), c(0, 0, 0))
  expect_true(isSymmetric(DIST))
  # {1,2} vs {1,2,3}: hamming = 1
  expect_equal(DIST[1, 2], 1)
  # {1,2,3} vs {4,5,6}: complements → transfer = 0
  expect_equal(DIST[2, 3], 0)

  # Single split
  DIST1 <- TreeDist:::.TransferDistMat(
    matrix(c(TRUE, TRUE, FALSE, FALSE), nrow = 1), 4)
  expect_equal(dim(DIST1), c(1, 1))
  expect_equal(DIST1[1, 1], 0)
})

test_that(".CompatMat() checks bipartition compatibility", {
  splitMat <- matrix(c(
    TRUE, TRUE, FALSE, FALSE, FALSE, FALSE,
    FALSE, FALSE, TRUE, TRUE, FALSE, FALSE,
    TRUE, FALSE, TRUE, FALSE, FALSE, FALSE
  ), nrow = 3, byrow = TRUE)

  compat <- TreeDist:::.CompatMat(splitMat, 6)
  expect_equal(dim(compat), c(3, 3))
  expect_true(compat[1, 2])    # {1,2} and {3,4}: disjoint → compatible
  expect_false(compat[1, 3])   # {1,2} and {1,3}: all 4 intersections → incompatible
  expect_true(isSymmetric(compat))
})

test_that(".ComputeTD() works for both scale modes", {
  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
  lightSide <- c(3, 2, 3)
  sentDist <- lightSide - 1L
  treeMembers <- list(c(1, 2), c(2, 3))

  td_s <- TreeDist:::.ComputeTD(DIST, sentDist, treeMembers, lightSide, 2, TRUE)
  td_u <- TreeDist:::.ComputeTD(DIST, sentDist, treeMembers, lightSide, 2, FALSE)
  expect_length(td_s, 3)
  expect_length(td_u, 3)
  expect_true(all(td_s >= 0))
  expect_true(all(td_u >= 0))

  # Single member per tree
  td_single <- TreeDist:::.ComputeTD(
    DIST, sentDist, list(c(1), c(3)), lightSide, 2, TRUE)
  expect_length(td_single, 3)
})

test_that(".IsCompat() handles edge cases", {
  compat <- matrix(TRUE, 3, 3)
  compat[1, 3] <- compat[3, 1] <- FALSE

  # Empty included → always compatible
  expect_true(TreeDist:::.IsCompat(1, c(FALSE, FALSE, FALSE), compat, 10))
  # Too many included (>= nTip - 3) → never compatible
  expect_false(TreeDist:::.IsCompat(1, c(TRUE, TRUE, TRUE), compat, 6))
  # Compatible with included set
  expect_true(TreeDist:::.IsCompat(1, c(FALSE, TRUE, FALSE), compat, 10))
  # Incompatible with an included split
  expect_false(TreeDist:::.IsCompat(1, c(FALSE, FALSE, TRUE), compat, 10))
})

test_that(".Dist() returns correct distance", {
  DIST <- matrix(c(0, 2, 3, 2, 0, 4, 3, 4, 0), 3, 3)
  sentDist <- c(5, 6, 7)

  expect_equal(TreeDist:::.Dist(1, 2, DIST, sentDist), 2)
  expect_equal(TreeDist:::.Dist(1, NA, DIST, sentDist), 5)
  expect_equal(TreeDist:::.Dist(3, NA, DIST, sentDist), 7)
})

test_that(".DiagDist() vectorizes correctly", {
  DIST <- matrix(c(0, 2, 3, 2, 0, 4, 3, 4, 0), 3, 3)
  sentDist <- c(5, 6, 7)

  expect_equal(
    TreeDist:::.DiagDist(c(2, NA, 1), DIST, sentDist),
    c(DIST[1, 2], 6, DIST[3, 1])
  )
  expect_equal(TreeDist:::.DiagDist(c(NA, NA, NA), DIST, sentDist), sentDist)
  expect_equal(
    TreeDist:::.DiagDist(c(2, 3, 1), DIST, sentDist),
    c(DIST[1, 2], DIST[2, 3], DIST[3, 1])
  )
})

test_that(".FindSecond() works for both scale modes and edge cases", {
  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
  pMinus1 <- c(4, 4, 4)

  # Unscaled: exclude matchIdx, find best
  expect_equal(
    TreeDist:::.FindSecond(1, 2, c(1, 2, 3), DIST, pMinus1, FALSE), 1)

  # matchIdx = NA → search all
  expect_equal(
    TreeDist:::.FindSecond(1, NA_integer_, c(2, 3), DIST, pMinus1, FALSE), 2)

  # No candidates
  expect_true(is.na(
    TreeDist:::.FindSecond(1, 2, c(2), DIST, pMinus1, FALSE)))

  # All exceed threshold (unscaled)
  DIST_far <- matrix(c(0, 5, 5, 5, 0, 5, 5, 5, 0), 3, 3)
  expect_true(is.na(
    TreeDist:::.FindSecond(1, NA_integer_, c(2, 3), DIST_far, c(2, 2, 2), FALSE)
  ))

  # Scaled: normal case
  expect_equal(
    TreeDist:::.FindSecond(1, NA_integer_, c(2, 3), DIST, pMinus1, TRUE), 2)

  # Scaled: exceeds threshold
  expect_true(is.na(
    TreeDist:::.FindSecond(1, NA_integer_, c(2), DIST_far, c(2, 2, 2), TRUE)
  ))
})

test_that(".DoAdd() updates match state correctly", {
  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
  sentDist <- c(4, 4, 4)

  st <- new.env(parent = emptyenv())
  st$MATCH <- rep(NA_integer_, 3)
  st$MATCH2 <- rep(NA_integer_, 3)
  st$incl <- rep(FALSE, 3)

  # First add: all splits match the newly added split
  TreeDist:::.DoAdd(2, st, DIST, sentDist)
  expect_true(st$incl[2])
  expect_equal(st$MATCH[1], 2)
  expect_equal(st$MATCH[2], 2)
  expect_equal(st$MATCH[3], 2)

  # Second add: closer splits update, old match demotes to second
  TreeDist:::.DoAdd(1, st, DIST, sentDist)
  expect_equal(st$MATCH[1], 1)    # self-match closer
  expect_equal(st$MATCH2[1], 2)   # old match becomes second
  expect_equal(st$MATCH[2], 2)    # split 2 still closest to itself
  expect_equal(st$MATCH2[2], 1)   # split 1 becomes second
})

test_that(".DoRemove() handles sentinel promotion and affected2", {
  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
  sentDist <- c(4, 4, 4)
  lightSide <- c(5, 5, 5)

  # Case 1: sentinel promotion (MATCH2 = NA, remove MATCH, no other included)
  st <- new.env(parent = emptyenv())
  st$MATCH <- c(2L, NA_integer_, 2L)
  st$MATCH2 <- c(NA_integer_, NA_integer_, NA_integer_)
  st$incl <- c(FALSE, TRUE, FALSE)

  TreeDist:::.DoRemove(2, st, DIST, sentDist, lightSide, FALSE)
  expect_true(is.na(st$MATCH[1]))
  expect_true(is.na(st$MATCH[3]))

  # Case 2: sentinel promotion with rescan finding another match
  st2 <- new.env(parent = emptyenv())
  st2$MATCH <- c(2L, 2L, NA_integer_)
  st2$MATCH2 <- c(NA_integer_, NA_integer_, NA_integer_)
  st2$incl <- c(TRUE, TRUE, FALSE)

  TreeDist:::.DoRemove(2, st2, DIST, sentDist, lightSide, FALSE)
  expect_equal(st2$MATCH[1], 1L)  # rescanned, found split 1

  # Case 3: affected2 path (MATCH2 == removed, MATCH != removed)
  st3 <- new.env(parent = emptyenv())
  st3$MATCH <- c(1L, 1L, 1L)
  st3$MATCH2 <- c(2L, 2L, NA_integer_)
  st3$incl <- c(TRUE, TRUE, FALSE)

  TreeDist:::.DoRemove(2, st3, DIST, sentDist, lightSide, FALSE)
  expect_equal(st3$MATCH, c(1L, 1L, 1L))  # unchanged
  expect_true(is.na(st3$MATCH2[1]))
  expect_true(is.na(st3$MATCH2[2]))

  # Case 4: scaled mode
  st4 <- new.env(parent = emptyenv())
  st4$MATCH <- c(2L, NA_integer_, NA_integer_)
  st4$MATCH2 <- c(NA_integer_, NA_integer_, NA_integer_)
  st4$incl <- c(TRUE, TRUE, FALSE)

  TreeDist:::.DoRemove(2, st4, DIST, sentDist, lightSide, TRUE)
  expect_equal(st4$MATCH[1], 1L)
})

test_that(".AddBenefitVec() computes add benefits", {
  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
  lightSide <- c(3, 3, 3)
  sentDist <- lightSide - 1L
  counts <- c(2, 3, 1)
  TD <- c(0.5, 0.3, 0.8)

  st <- new.env(parent = emptyenv())
  st$MATCH <- rep(NA_integer_, 3)
  st$MATCH2 <- rep(NA_integer_, 3)
  st$incl <- rep(FALSE, 3)

  ben_s <- TreeDist:::.AddBenefitVec(
    c(1, 2, 3), st, DIST, sentDist, TD, counts, lightSide, scale = TRUE)
  expect_length(ben_s, 3)

  ben_u <- TreeDist:::.AddBenefitVec(
    c(1, 2, 3), st, DIST, sentDist, TD, counts, lightSide, scale = FALSE)
  expect_length(ben_u, 3)
})

test_that(".RemoveBenefitVec() computes remove benefits", {
  DIST <- matrix(c(0, 1, 3, 1, 0, 2, 3, 2, 0), 3, 3)
  lightSide <- c(3, 3, 3)
  sentDist <- lightSide - 1L
  counts <- c(2, 3, 1)
  TD <- c(0.5, 0.3, 0.8)

  st <- new.env(parent = emptyenv())
  st$MATCH <- c(2L, 1L, 2L)
  st$MATCH2 <- c(NA_integer_, NA_integer_, NA_integer_)
  st$incl <- c(TRUE, TRUE, FALSE)

  ben_s <- TreeDist:::.RemoveBenefitVec(
    c(1, 2), st, DIST, sentDist, TD, counts, lightSide, scale = TRUE)
  expect_length(ben_s, 2)

  ben_u <- TreeDist:::.RemoveBenefitVec(
    c(1, 2), st, DIST, sentDist, TD, counts, lightSide, scale = FALSE)
  expect_length(ben_u, 2)

  # No affected splits (MATCH doesn't reference candidate)
  st2 <- new.env(parent = emptyenv())
  st2$MATCH <- c(1L, 1L, 1L)
  st2$MATCH2 <- rep(NA_integer_, 3)
  st2$incl <- c(TRUE, TRUE, FALSE)

  ben_none <- TreeDist:::.RemoveBenefitVec(
    c(2), st2, DIST, sentDist, TD, counts, lightSide, scale = TRUE)
  # Only TD contribution, no fn_cost (no affected splits)
  expect_equal(ben_none, TD[2])
})

test_that(".PoolSplits() pools and deduplicates splits", {
  trees <- as.phylo(0:4, nTip = 8)
  tipLabels <- TipLabels(trees[[1]])
  pool <- TreeDist:::.PoolSplits(trees, tipLabels)

  expect_true(is.matrix(pool$splits))
  expect_true(is.matrix(pool$rawSplits))
  expect_equal(ncol(pool$splits), length(tipLabels))
  expect_equal(length(pool$counts), nrow(pool$splits))
  expect_equal(length(pool$lightSide), nrow(pool$splits))
  expect_equal(length(pool$treeMembers), length(trees))
  expect_true(all(pool$counts >= 1))
  expect_true(all(pool$lightSide >= 1))
})

test_that(".SplitsToPhylo() converts splits to tree or star", {
  trees <- as.phylo(0:2, nTip = 8)
  tipLabels <- TipLabels(trees[[1]])
  pool <- TreeDist:::.PoolSplits(trees, tipLabels)

  # Some splits included
  included <- rep(FALSE, nrow(pool$rawSplits))
  included[1:min(2, nrow(pool$rawSplits))] <- TRUE
  tr <- TreeDist:::.SplitsToPhylo(pool$rawSplits, included, tipLabels, 8)
  expect_s3_class(tr, "phylo")
  expect_equal(sort(TipLabels(tr)), sort(tipLabels))

  # No splits → star tree
  tr_star <- TreeDist:::.SplitsToPhylo(
    pool$rawSplits, rep(FALSE, nrow(pool$rawSplits)), tipLabels, 8)
  expect_s3_class(tr_star, "phylo")
  expect_equal(NSplits(tr_star), 0)
})

test_that(".InitMatches() initializes match state from included splits", {
  # DIST: included = {1,2}; split 3 close to split 2 (dist 1 < thresh 2)
  DIST <- matrix(c(
    0, 1, 3, 5,
    1, 0, 1, 4,
    3, 1, 0, 1,
    5, 4, 1, 0
  ), 4, 4, byrow = TRUE)
  lightSide <- c(3, 3, 3, 3)
  sentDist <- lightSide - 1L  # all 2

  # Scaled mode
  st <- new.env(parent = emptyenv())
  st$MATCH <- rep(NA_integer_, 4)
  st$MATCH2 <- rep(NA_integer_, 4)
  st$incl <- c(TRUE, TRUE, FALSE, FALSE)

  TreeDist:::.InitMatches(st, DIST, sentDist, lightSide, scale = TRUE)
  # Split 1: self-match (dist 0), second = split 2 (dist 1)
  expect_equal(st$MATCH[1], 1)
  expect_equal(st$MATCH2[1], 2)
  # Split 3: best among included is split 2 (dist 1), 1/2 < 1 → matched
  expect_equal(st$MATCH[3], 2)
  # Split 4: best among included is split 2 (dist 4), 4/2 >= 1 → skipped
  expect_true(is.na(st$MATCH[4]))

  # Unscaled mode
  st2 <- new.env(parent = emptyenv())
  st2$MATCH <- rep(NA_integer_, 4)
  st2$MATCH2 <- rep(NA_integer_, 4)
  st2$incl <- c(TRUE, TRUE, FALSE, FALSE)

  TreeDist:::.InitMatches(st2, DIST, sentDist, lightSide, scale = FALSE)
  expect_equal(st2$MATCH[1], 1)
  expect_equal(st2$MATCH[3], 2)

  # Empty included set → no-op
  st3 <- new.env(parent = emptyenv())
  st3$MATCH <- rep(NA_integer_, 4)
  st3$MATCH2 <- rep(NA_integer_, 4)
  st3$incl <- rep(FALSE, 4)

  TreeDist:::.InitMatches(st3, DIST, sentDist, lightSide, scale = TRUE)
  expect_true(all(is.na(st3$MATCH)))
})


# =========================================================================
# Full R-level greedy pipeline integration tests
# =========================================================================

test_that("R-level GreedyBest pipeline produces valid consensus (scaled)", {
  trees <- as.phylo(0:9, nTip = 8)
  tipLabels <- TipLabels(trees[[1]])
  nTip <- length(tipLabels)
  nTree <- length(trees)

  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
  nSplits <- nrow(pool$splits)
  DIST <- TreeDist:::.TransferDistMat(pool$splits, nTip)
  sentDist <- pool$lightSide - 1L
  TD <- TreeDist:::.ComputeTD(DIST, sentDist, pool$treeMembers, pool$lightSide,
                               nTree, scale = TRUE)
  compat <- TreeDist:::.CompatMat(pool$splits, nTip)
  sortOrd <- order(-pool$counts, seq_len(nSplits))

  st <- new.env(parent = emptyenv())
  st$MATCH <- rep(NA_integer_, nSplits)
  st$MATCH2 <- rep(NA_integer_, nSplits)
  st$incl <- rep(FALSE, nSplits)

  TreeDist:::.GreedyBest(st, DIST, sentDist, TD, pool$counts, pool$lightSide,
                          compat, sortOrd, scale = TRUE, nSplits, nTip)
  expect_true(any(st$incl))

  tr <- TreeDist:::.SplitsToPhylo(pool$rawSplits, st$incl, tipLabels, nTip)
  expect_s3_class(tr, "phylo")
})

test_that("R-level GreedyFirst pipeline produces valid consensus (unscaled)", {
  trees <- as.phylo(0:9, nTip = 8)
  tipLabels <- TipLabels(trees[[1]])
  nTip <- length(tipLabels)
  nTree <- length(trees)

  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
  nSplits <- nrow(pool$splits)
  DIST <- TreeDist:::.TransferDistMat(pool$splits, nTip)
  sentDist <- pool$lightSide - 1L
  TD <- TreeDist:::.ComputeTD(DIST, sentDist, pool$treeMembers, pool$lightSide,
                               nTree, scale = FALSE)
  compat <- TreeDist:::.CompatMat(pool$splits, nTip)
  sortOrd <- order(-pool$counts, seq_len(nSplits))

  st <- new.env(parent = emptyenv())
  st$MATCH <- rep(NA_integer_, nSplits)
  st$MATCH2 <- rep(NA_integer_, nSplits)
  st$incl <- rep(FALSE, nSplits)

  TreeDist:::.GreedyFirst(st, DIST, sentDist, TD, pool$counts, pool$lightSide,
                           compat, sortOrd, scale = FALSE, nSplits, nTip)
  expect_true(any(st$incl))

  tr <- TreeDist:::.SplitsToPhylo(pool$rawSplits, st$incl, tipLabels, nTip)
  expect_s3_class(tr, "phylo")
})

test_that("R-level GreedyBest with majority init exercises InitMatches", {
  trees <- as.phylo(0:14, nTip = 10)
  tipLabels <- TipLabels(trees[[1]])
  nTip <- length(tipLabels)
  nTree <- length(trees)

  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
  nSplits <- nrow(pool$splits)
  DIST <- TreeDist:::.TransferDistMat(pool$splits, nTip)
  sentDist <- pool$lightSide - 1L
  TD <- TreeDist:::.ComputeTD(DIST, sentDist, pool$treeMembers, pool$lightSide,
                               nTree, scale = TRUE)
  compat <- TreeDist:::.CompatMat(pool$splits, nTip)
  sortOrd <- order(-pool$counts, seq_len(nSplits))

  st <- new.env(parent = emptyenv())
  st$MATCH <- rep(NA_integer_, nSplits)
  st$MATCH2 <- rep(NA_integer_, nSplits)

  # Majority init: include splits present in > half of trees
  st$incl <- pool$counts > (nTree / 2)
  if (any(st$incl)) {
    TreeDist:::.InitMatches(st, DIST, sentDist, pool$lightSide, scale = TRUE)
  }

  TreeDist:::.GreedyBest(st, DIST, sentDist, TD, pool$counts, pool$lightSide,
                          compat, sortOrd, scale = TRUE, nSplits, nTip)

  tr <- TreeDist:::.SplitsToPhylo(pool$rawSplits, st$incl, tipLabels, nTip)
  expect_s3_class(tr, "phylo")
})

test_that("R-level GreedyFirst with majority init (unscaled)", {
  trees <- as.phylo(0:14, nTip = 10)
  tipLabels <- TipLabels(trees[[1]])
  nTip <- length(tipLabels)
  nTree <- length(trees)

  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
  nSplits <- nrow(pool$splits)
  DIST <- TreeDist:::.TransferDistMat(pool$splits, nTip)
  sentDist <- pool$lightSide - 1L
  TD <- TreeDist:::.ComputeTD(DIST, sentDist, pool$treeMembers, pool$lightSide,
                               nTree, scale = FALSE)
  compat <- TreeDist:::.CompatMat(pool$splits, nTip)
  sortOrd <- order(-pool$counts, seq_len(nSplits))

  st <- new.env(parent = emptyenv())
  st$MATCH <- rep(NA_integer_, nSplits)
  st$MATCH2 <- rep(NA_integer_, nSplits)
  st$incl <- pool$counts > (nTree / 2)
  if (any(st$incl)) {
    TreeDist:::.InitMatches(st, DIST, sentDist, pool$lightSide, scale = FALSE)
  }

  TreeDist:::.GreedyFirst(st, DIST, sentDist, TD, pool$counts, pool$lightSide,
                           compat, sortOrd, scale = FALSE, nSplits, nTip)

  tr <- TreeDist:::.SplitsToPhylo(pool$rawSplits, st$incl, tipLabels, nTip)
  expect_s3_class(tr, "phylo")
})

test_that("R greedy pipeline matches C++ consensus", {
  trees <- as.phylo(0:9, nTip = 8)
  tipLabels <- TipLabels(trees[[1]])
  nTip <- length(tipLabels)
  nTree <- length(trees)

  # C++ result
  tc_cpp <- TransferConsensus(trees, greedy = "best", scale = TRUE)

  # R-level result
  pool <- TreeDist:::.PoolSplits(trees, tipLabels)
  nSplits <- nrow(pool$splits)
  DIST <- TreeDist:::.TransferDistMat(pool$splits, nTip)
  sentDist <- pool$lightSide - 1L
  TD <- TreeDist:::.ComputeTD(DIST, sentDist, pool$treeMembers, pool$lightSide,
                               nTree, scale = TRUE)
  compat <- TreeDist:::.CompatMat(pool$splits, nTip)
  sortOrd <- order(-pool$counts, seq_len(nSplits))

  st <- new.env(parent = emptyenv())
  st$MATCH <- rep(NA_integer_, nSplits)
  st$MATCH2 <- rep(NA_integer_, nSplits)
  st$incl <- rep(FALSE, nSplits)

  TreeDist:::.GreedyBest(st, DIST, sentDist, TD, pool$counts, pool$lightSide,
                          compat, sortOrd, scale = TRUE, nSplits, nTip)

  tc_r <- TreeDist:::.SplitsToPhylo(pool$rawSplits, st$incl, tipLabels, nTip)

  # Both should produce resolved trees with same tip labels
  expect_s3_class(tc_r, "phylo")
  expect_equal(sort(TipLabels(tc_r)), sort(TipLabels(tc_cpp)))
  # Resolution should be similar (not necessarily identical due to
  # possible differences in canonicalization/sort-order between R and C++)
  expect_gte(NSplits(tc_r), 1)
})
Any scripts or data that you put into this service are public.
TreeDist documentation built on June 10, 2026, 5:06 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
TreeDist
Calculate and Map Distances Between Phylogenetic Trees

tests/testthat/test-transfer_consensus.R
In TreeDist: Calculate and Map Distances Between Phylogenetic Trees

Try the TreeDist package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

TreeDist Calculate and Map Distances Between Phylogenetic Trees

tests/testthat/test-transfer_consensus.R In TreeDist: Calculate and Map Distances Between Phylogenetic Trees

Try the TreeDist package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

TreeDist
Calculate and Map Distances Between Phylogenetic Trees

tests/testthat/test-transfer_consensus.R
In TreeDist: Calculate and Map Distances Between Phylogenetic Trees