test-batch_coverage.R
In TreeDist: Calculate and Map Distances Between Phylogenetic Trees

## Coverage tests for batch C++ functions, fast paths, and cross-pairs.
##
## These tests target code paths introduced during the OpenMP / sort+merge /
## CostMatrix-pooling optimisation cycle that are not exercised by the
## existing test suite.

library("TreeTools", quietly = TRUE)

# Shared fixtures ----
tips20 <- paste0("t", seq_len(20))
tA <- ape::as.phylo(0:4, tipLabels = tips20)
tB <- ape::as.phylo(5:9, tipLabels = tips20)
trees20 <- ape::as.phylo(0:9, tipLabels = tips20)

tips8 <- paste0("t", seq_len(8))
tA8 <- ape::as.phylo(0:2, tipLabels = tips8)
tB8 <- ape::as.phylo(3:5, tipLabels = tips8)


# DifferentPhylogeneticInfo ----

test_that("DPI all-pairs fast path agrees with per-pair", {
  batch <- DifferentPhylogeneticInfo(trees20)
  m <- as.matrix(batch)
  expect_equal(m[2, 1],
               DifferentPhylogeneticInfo(trees20[[1]], trees20[[2]]),
               tolerance = 1e-10)
  expect_equal(m[5, 3],
               DifferentPhylogeneticInfo(trees20[[3]], trees20[[5]]),
               tolerance = 1e-10)
})

test_that("DPI cross-pairs fast path agrees with per-pair", {
  cross <- DifferentPhylogeneticInfo(tA, tB)
  expect_equal(dim(cross), c(5L, 5L))
  expect_equal(cross[1, 1],
               DifferentPhylogeneticInfo(tA[[1]], tB[[1]]),
               tolerance = 1e-10)
  expect_equal(cross[3, 2],
               DifferentPhylogeneticInfo(tA[[3]], tB[[2]]),
               tolerance = 1e-10)
})


# MatchingSplitInfoDistance ----

test_that("MSID all-pairs fast path agrees with per-pair", {
  batch <- MatchingSplitInfoDistance(trees20)
  m <- as.matrix(batch)
  expect_equal(m[2, 1],
               MatchingSplitInfoDistance(trees20[[1]], trees20[[2]]),
               tolerance = 1e-10)
  expect_equal(m[5, 3],
               MatchingSplitInfoDistance(trees20[[3]], trees20[[5]]),
               tolerance = 1e-10)
})

test_that("MSID cross-pairs fast path agrees with per-pair", {
  cross <- MatchingSplitInfoDistance(tA, tB)
  expect_equal(dim(cross), c(5L, 5L))
  expect_equal(cross[1, 1],
               MatchingSplitInfoDistance(tA[[1]], tB[[1]]),
               tolerance = 1e-10)
  expect_equal(cross[3, 2],
               MatchingSplitInfoDistance(tA[[3]], tB[[2]]),
               tolerance = 1e-10)
})


# InfoRobinsonFoulds (distance mode) ----

test_that("IRF all-pairs fast path agrees with per-pair", {
  batch <- InfoRobinsonFoulds(trees20)
  m <- as.matrix(batch)
  expect_equal(m[2, 1],
               InfoRobinsonFoulds(trees20[[1]], trees20[[2]]),
               tolerance = 1e-10)
  expect_equal(m[5, 3],
               InfoRobinsonFoulds(trees20[[3]], trees20[[5]]),
               tolerance = 1e-10)
})

test_that("IRF cross-pairs fast path agrees with per-pair", {
  cross <- InfoRobinsonFoulds(tA, tB)
  expect_equal(dim(cross), c(5L, 5L))
  expect_equal(cross[1, 1],
               InfoRobinsonFoulds(tA[[1]], tB[[1]]),
               tolerance = 1e-10)
  expect_equal(cross[3, 2],
               InfoRobinsonFoulds(tA[[3]], tB[[2]]),
               tolerance = 1e-10)
})


# ClusteringInfoDistance (all-pairs already tested; add cross-pairs variety) ----

test_that("CID cross-pairs with different-sized collections", {
  tA3 <- ape::as.phylo(0:2, tipLabels = tips20)
  tB7 <- ape::as.phylo(3:9, tipLabels = tips20)
  cross <- ClusteringInfoDistance(tA3, tB7)
  expect_equal(dim(cross), c(3L, 7L))
  expect_equal(cross[2, 5],
               ClusteringInfoDistance(tA3[[2]], tB7[[5]]),
               tolerance = 1e-10)
})


# MSD cross-pairs via CalculateTreeDistance → .SplitDistanceManyMany ----

test_that("MSD cross-pairs batch agrees with per-pair", {
  cross <- MatchingSplitDistance(tA8, tB8)
  expect_equal(dim(cross), c(3L, 3L))
  expect_equal(cross[1, 1],
               MatchingSplitDistance(tA8[[1]], tB8[[1]]),
               tolerance = 1e-10)
  expect_equal(cross[2, 3],
               MatchingSplitDistance(tA8[[2]], tB8[[3]]),
               tolerance = 1e-10)
})


# Nye / Jaccard cross-pairs via CalculateTreeDistance → .SplitDistanceManyMany ----

test_that("NyeSimilarity cross-pairs batch agrees with per-pair", {
  cross <- NyeSimilarity(tA8, tB8)
  expect_equal(dim(cross), c(3L, 3L))
  expect_equal(cross[1, 1],
               NyeSimilarity(tA8[[1]], tB8[[1]]),
               tolerance = 1e-10)
})

test_that("JRF cross-pairs batch with k and allowConflict", {
  # Default k=1, allowConflict=TRUE
  cross1 <- JaccardRobinsonFoulds(tA8, tB8)
  expect_equal(dim(cross1), c(3L, 3L))
  expect_equal(cross1[2, 1],
               JaccardRobinsonFoulds(tA8[[2]], tB8[[1]]),
               tolerance = 1e-10)

  # k = 2
  cross_k2 <- JaccardRobinsonFoulds(tA8, tB8, k = 2)
  expect_equal(cross_k2[1, 1],
               JaccardRobinsonFoulds(tA8[[1]], tB8[[1]], k = 2),
               tolerance = 1e-10)

  # allowConflict = FALSE
  cross_nc <- JaccardRobinsonFoulds(tA8, tB8, allowConflict = FALSE)
  expect_equal(cross_nc[1, 1],
               JaccardRobinsonFoulds(tA8[[1]], tB8[[1]],
                                     allowConflict = FALSE),
               tolerance = 1e-10)

  # k = Inf
  cross_inf <- JaccardRobinsonFoulds(tA8, tB8, k = Inf)
  expect_equal(cross_inf[2, 2],
               JaccardRobinsonFoulds(tA8[[2]], tB8[[2]], k = Inf),
               tolerance = 1e-10)
})


# MSI cross-pairs via CalculateTreeDistance → .SplitDistanceManyMany ----

test_that("MSI cross-pairs batch agrees with per-pair", {
  cross <- MatchingSplitInfo(tA8, tB8)
  expect_equal(dim(cross), c(3L, 3L))
  expect_equal(cross[1, 1],
               MatchingSplitInfo(tA8[[1]], tB8[[1]]),
               tolerance = 1e-10)
})

# SPI cross-pairs via CalculateTreeDistance → .SplitDistanceManyMany ----

test_that("SPI cross-pairs batch agrees with per-pair", {
  cross <- SharedPhylogeneticInfo(tA8, tB8)
  expect_equal(dim(cross), c(3L, 3L))
  expect_equal(cross[1, 1],
               SharedPhylogeneticInfo(tA8[[1]], tB8[[1]]),
               tolerance = 1e-10)
})


# C++ batch entropy/info functions ----

test_that("cpp_clustering_entropy_batch matches R ClusteringEntropy", {
  trees <- ape::as.phylo(0:4, tipLabels = tips20)
  splits_list <- as.Splits(trees)
  nTip <- length(tips20)
  batch <- TreeDist:::cpp_clustering_entropy_batch(splits_list, as.integer(nTip))
  r_ref <- vapply(trees, ClusteringEntropy, double(1))
  expect_equal(batch, unname(r_ref), tolerance = 1e-12)
})

test_that("cpp_splitwise_info_batch matches R SplitwiseInfo", {
  trees <- ape::as.phylo(0:4, tipLabels = tips20)
  splits_list <- as.Splits(trees)
  nTip <- length(tips20)
  batch <- TreeDist:::cpp_splitwise_info_batch(splits_list, as.integer(nTip))
  r_ref <- vapply(trees, SplitwiseInfo, double(1))
  expect_equal(batch, unname(r_ref), tolerance = 1e-8)
})

test_that("cpp_clustering_entropy_batch: polytomy trees", {
  # Trees with a single non-trivial split (polytomy)
  poly1 <- ape::read.tree(text = "((t1,t2,t3),(t4,t5,t6));")
  poly2 <- ape::read.tree(text = "((t1,t2),(t3,t4,t5,t6));")
  trees <- structure(list(poly1, poly2), class = "multiPhylo")
  splits_list <- as.Splits(trees)
  nTip <- 6L
  batch <- TreeDist:::cpp_clustering_entropy_batch(splits_list, nTip)
  r_ref <- vapply(trees, ClusteringEntropy, double(1))
  expect_equal(batch, unname(r_ref), tolerance = 1e-12)
})

test_that("cpp_splitwise_info_batch: polytomy trees", {
  poly1 <- ape::read.tree(text = "((t1,t2,t3),(t4,t5,t6));")
  poly2 <- ape::read.tree(text = "((t1,t2),(t3,t4,t5,t6));")
  trees <- structure(list(poly1, poly2), class = "multiPhylo")
  splits_list <- as.Splits(trees)
  nTip <- 6L
  batch <- TreeDist:::cpp_splitwise_info_batch(splits_list, nTip)
  r_ref <- vapply(trees, SplitwiseInfo, double(1))
  expect_equal(batch, unname(r_ref), tolerance = 1e-8)
})


# KendallColijn batch paths ----

test_that("KC all-pairs uses pair_diff_euclidean", {
  trees <- ape::as.phylo(0:4, tipLabels = tips8)
  kc <- KendallColijn(trees)
  expect_s3_class(kc, "dist")
  expect_equal(attr(kc, "Size"), 5L)
  m <- as.matrix(kc)
  expect_equal(m[2, 1], KendallColijn(trees[[1]], trees[[2]]),
               tolerance = 1e-10)
  expect_equal(m[4, 3], KendallColijn(trees[[3]], trees[[4]]),
               tolerance = 1e-10)
})

test_that("KC cross-pairs uses vec_diff_euclidean", {
  cross <- KendallColijn(tA8, tB8)
  expect_equal(dim(cross), c(3L, 3L))
  expect_equal(cross[1, 1], KendallColijn(tA8[[1]], tB8[[1]]),
               tolerance = 1e-10)
  expect_equal(cross[2, 3], KendallColijn(tA8[[2]], tB8[[3]]),
               tolerance = 1e-10)
})

test_that("KC all-pairs with SplitVector", {
  trees <- ape::as.phylo(0:3, tipLabels = tips8)
  kc <- KendallColijn(trees, Vector = SplitVector)
  expect_s3_class(kc, "dist")
  m <- as.matrix(kc)
  expect_equal(m[2, 1],
               KendallColijn(trees[[1]], trees[[2]], Vector = SplitVector),
               tolerance = 1e-10)
})

test_that("KC cross-pairs with SplitVector", {
  cross <- KendallColijn(tA8, tB8, Vector = SplitVector)
  expect_equal(dim(cross), c(3L, 3L))
  expect_equal(cross[1, 1],
               KendallColijn(tA8[[1]], tB8[[1]], Vector = SplitVector),
               tolerance = 1e-10)
})


# Large-tree cross-pairs (multi-bin >64 tips) ----

test_that("Cross-pairs with >64-tip trees exercise multi-bin path", {
  skip_on_cran()
  tips100 <- paste0("t", seq_len(100))
  tA100 <- ape::as.phylo(0:2, tipLabels = tips100)
  tB100 <- ape::as.phylo(3:5, tipLabels = tips100)

  # CID cross-pairs
  cid <- ClusteringInfoDistance(tA100, tB100)
  expect_equal(dim(cid), c(3L, 3L))
  expect_equal(cid[1, 1],
               ClusteringInfoDistance(tA100[[1]], tB100[[1]]),
               tolerance = 1e-10)

  # MSD cross-pairs
  msd <- MatchingSplitDistance(tA100, tB100)
  expect_equal(dim(msd), c(3L, 3L))
  expect_equal(msd[1, 1],
               MatchingSplitDistance(tA100[[1]], tB100[[1]]),
               tolerance = 1e-10)

  # DPI cross-pairs
  dpi <- DifferentPhylogeneticInfo(tA100, tB100)
  expect_equal(dim(dpi), c(3L, 3L))
  expect_equal(dpi[1, 1],
               DifferentPhylogeneticInfo(tA100[[1]], tB100[[1]]),
               tolerance = 1e-10)
})

test_that("All-pairs entropy batch with >64-tip trees", {
  skip_on_cran()
  tips100 <- paste0("t", seq_len(100))
  trees100 <- ape::as.phylo(0:2, tipLabels = tips100)
  splits_list <- as.Splits(trees100)

  ce <- TreeDist:::cpp_clustering_entropy_batch(splits_list, 100L)
  ce_ref <- vapply(trees100, ClusteringEntropy, double(1))
  expect_equal(ce, unname(ce_ref), tolerance = 1e-12)

  si <- TreeDist:::cpp_splitwise_info_batch(splits_list, 100L)
  si_ref <- vapply(trees100, SplitwiseInfo, double(1))
  expect_equal(si, unname(si_ref), tolerance = 1e-8)
})


# SPI/MSI all-pairs batch dispatch via .SplitDistanceAllPairs ----

test_that("SPI all-pairs batch agrees with per-pair", {
  batch <- SharedPhylogeneticInfo(trees20)
  m <- as.matrix(batch)
  expect_equal(m[2, 1],
               SharedPhylogeneticInfo(trees20[[1]], trees20[[2]]),
               tolerance = 1e-10)
})

test_that("MSI all-pairs batch agrees with per-pair", {
  batch <- MatchingSplitInfo(trees20)
  m <- as.matrix(batch)
  expect_equal(m[2, 1],
               MatchingSplitInfo(trees20[[1]], trees20[[2]]),
               tolerance = 1e-10)
})


# Nye all-pairs batch dispatch ----

test_that("Nye all-pairs batch agrees with per-pair", {
  batch <- NyeSimilarity(trees20)
  m <- as.matrix(batch)
  expect_equal(m[2, 1],
               NyeSimilarity(trees20[[1]], trees20[[2]]),
               tolerance = 1e-10)
})


# Regression test for issue #162 ----
# MutualClusteringInfo matching was not globally optimal for larger trees,
# causing ClusteringInfoDistance to disagree with manual calculation.
# Fixed in v2.11.0 (PR #163).  This test verifies the fix holds in the
# batch path as well as the per-pair path.

test_that("Issue #162: CID by-hand matches function for 33-taxon trees", {
  tree1 <- ape::read.tree(text = "(B,A,((AG,AF),((((C,(E,D)),((F,G),H)),((K,I),J)),((Q,R),((((AE,(AC,AD)),(AB,(N,(P,O)))),(((Y,(Z,AA)),(W,X)),(V,(T,U)))),((M,S),L))))));")
  tree2 <- ape::read.tree(text = "(B,A,((AG,AF),((((C,(E,D)),((F,G),H)),(((Q,R),((((AE,(AC,AD)),AB),(N,(P,O))),((Y,(Z,AA)),((V,(T,U)),(W,X))))),((M,L),S))),((K,I),J))));")

  # Per-pair path: reported matching score must equal MCI value

  mci <- MutualClusteringInfo(tree1, tree2, reportMatching = TRUE)
  expect_equal(sum(attr(mci, "matchedScores")), mci[[1]], tolerance = 1e-10)

  # Per-pair path: CID by hand must equal CID function
  h <- ClusteringEntropy(tree1) + ClusteringEntropy(tree2)
  d_fn <- ClusteringInfoDistance(tree1, tree2, normalize = TRUE)
  d_hand <- (h - 2 * mci[[1]]) / h
  expect_equal(d_fn, d_hand, tolerance = 1e-10)
})

# RobinsonFoulds cross-pairs (Day 1985 ClusterTable batch) ----

test_that("RF cross-pairs fast path agrees with per-pair", {
  cross <- RobinsonFoulds(tA, tB)
  expect_equal(dim(cross), c(5L, 5L))
  expect_equal(cross[1, 1],
               RobinsonFoulds(tA[[1]], tB[[1]]),
               tolerance = 0)
  expect_equal(cross[3, 2],
               RobinsonFoulds(tA[[3]], tB[[2]]),
               tolerance = 0)
  expect_equal(cross[5, 5],
               RobinsonFoulds(tA[[5]], tB[[5]]),
               tolerance = 0)
})

test_that("RF cross-pairs similarity mode agrees with per-pair", {
  cross_sim <- RobinsonFoulds(tA, tB, similarity = TRUE)
  expect_equal(cross_sim[2, 3],
               RobinsonFoulds(tA[[2]], tB[[3]], similarity = TRUE),
               tolerance = 0)
})

test_that("RF cross-pairs normalization agrees with per-pair", {
  cross_norm <- RobinsonFoulds(tA, tB, normalize = TRUE)
  expect_equal(cross_norm[1, 4],
               RobinsonFoulds(tA[[1]], tB[[4]], normalize = TRUE),
               tolerance = 1e-10)
})

test_that("RF cross-pairs handles single-tree inputs via fallback", {
  # Single tree1 → falls back to CalculateTreeDistance
  d <- RobinsonFoulds(tA[[1]], tB)
  expect_equal(length(d), 5L)
  expect_equal(d[1], RobinsonFoulds(tA[[1]], tB[[1]]), tolerance = 0)
})

test_that("RF cross-pairs all-pairs matches cross-pairs diagonal", {
  all_dist <- as.matrix(RobinsonFoulds(trees20))
  # Cross-pairs of trees20 with itself should match all-pairs
  cross <- RobinsonFoulds(trees20, trees20)
  expect_equal(dim(cross), c(10L, 10L))
  for (i in 1:10) {
    for (j in 1:10) {
      expect_equal(cross[i, j], all_dist[i, j], tolerance = 0,
                   info = paste0("i=", i, " j=", j))
    }
  }
})

test_that("RF batch paths use heap allocation for large trees (>8192 tips)", {
  skip_on_cran()
  n <- 8193L
  tr <- list(TreeTools::RandomTree(n), TreeTools::RandomTree(n))
  class(tr) <- "multiPhylo"

  # All-pairs (hits S_heap resize in robinson_foulds_all_pairs)
  ap <- RobinsonFoulds(tr)
  expect_length(ap, 1L)
  expect_gte(ap, 0)

  # Cross-pairs (hits S_heap resize in robinson_foulds_cross_pairs)
  cp <- RobinsonFoulds(tr[1], tr[2])
  expect_equal(cp[1, 1], as.matrix(ap)[1, 2])
})

test_that("Issue #162: batch path agrees with per-pair for 33-taxon trees", {
  tree1 <- ape::read.tree(text = "(B,A,((AG,AF),((((C,(E,D)),((F,G),H)),((K,I),J)),((Q,R),((((AE,(AC,AD)),(AB,(N,(P,O)))),(((Y,(Z,AA)),(W,X)),(V,(T,U)))),((M,S),L))))));")
  tree2 <- ape::read.tree(text = "(B,A,((AG,AF),((((C,(E,D)),((F,G),H)),(((Q,R),((((AE,(AC,AD)),AB),(N,(P,O))),((Y,(Z,AA)),((V,(T,U)),(W,X))))),((M,L),S))),((K,I),J))));")
  tree3 <- PectinateTree(tree1[["tip.label"]])
  trees <- structure(list(tree1, tree2, tree3), class = "multiPhylo")

  # Batch all-pairs via fast path
  batch_cid <- ClusteringInfoDistance(trees)
  m <- as.matrix(batch_cid)

  # All three pairwise CIDs must match per-pair computation
  expect_equal(m[2, 1], ClusteringInfoDistance(tree1, tree2), tolerance = 1e-10)
  expect_equal(m[3, 1], ClusteringInfoDistance(tree1, tree3), tolerance = 1e-10)
  expect_equal(m[3, 2], ClusteringInfoDistance(tree2, tree3), tolerance = 1e-10)
})
Any scripts or data that you put into this service are public.
TreeDist documentation built on June 10, 2026, 5:06 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
TreeDist
Calculate and Map Distances Between Phylogenetic Trees

tests/testthat/test-batch_coverage.R
In TreeDist: Calculate and Map Distances Between Phylogenetic Trees

Try the TreeDist package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

TreeDist Calculate and Map Distances Between Phylogenetic Trees

tests/testthat/test-batch_coverage.R In TreeDist: Calculate and Map Distances Between Phylogenetic Trees

Try the TreeDist package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

TreeDist
Calculate and Map Distances Between Phylogenetic Trees

tests/testthat/test-batch_coverage.R
In TreeDist: Calculate and Map Distances Between Phylogenetic Trees