library('TreeTools', quietly = TRUE, warn.conflicts = FALSE)
library('TreeDist')
library('TreeDistData')
nSample <- 100L
se <- TreeDistData::shapeEffect
seMethods <- names(se)
nMethod <- length(seMethods)
PlotMethod <- function (method, doPlot = TRUE) {
  methodEffect <- se[[method]]
  dat <- data.frame(
    shapePair = rep(as.factor(seq_along(methodEffect)),
                    sapply(methodEffect, length)),
    distance = unlist(methodEffect))
  reg <- summary(lm(distance ~ shapePair, data = dat))

  if (doPlot) {
    plot(type = 'n', 0, 0, xlim = c(-1, 1), ylim = c(-1, 1), axes = F)
    text(0, 0.5, tdAbbrevs[method], col = TreeDistCol(method))
    text(0, -0.5, signif(reg$adj.r.squared, 3))

    plot(dat, border = TreeDistCol(method), axes = FALSE)
  }
  c(reg$r.squared, reg$adj.r.squared)
}

There are four unrooted tree shapes on eight leaves:

origPar <- par(mfrow = c(1, 4), mar = rep(0.3, 4))
lapply(lapply(0:3, UnrootedTreeWithShape, 8), plot) -> XX

To evaluate the extent to which trees are ranked as more or less different based on their shape, I evaluated the extent to which tree distance could be predicted based only on the shape of the two input trees.

par(xpd = NA)
r <- vapply(seMethods, PlotMethod, doPlot = FALSE, double(2))
r2 <- r[1, ]
plot(r2, ylab = '', xlab = '', axes = FALSE, pch = 3, ylim = c(-0.5, max(r2)))
text(seq_along(seMethods), r2, paste(' ', round(r2 * 100, 1), '%'),
     col = TreeDistCol(seMethods), srt = 90, pos = 4, offset = 0)
text(srt = 90, seq_along(seMethods), -0.12,
     TreeDistData::tdAbbrevs[seMethods], 
     col = TreeDistCol(seMethods), pos = 2, offset = 0)
axis(2, at = seq(0, 0.6, 0.1), pos = -0.6)
axis(1, at = c(0, length(seMethods)) + 0.5, pos = 0, labels = FALSE)
#mtext(expression('log'[10]*'(r'^2*')'), 2, 2.5)
mtext(expression('r'^2), 2, 2.5)

One aspect of tree shape is balance, which can be quantified using the Total Cophenetic Index (TCI). For 10 000 random pairs of 25-leaf trees, I calculated the tree distance under each metric, and the difference in TCI bettween the trees. A high correlation between TCI and a tree distance indicates that a metric allocates a low distance to trees with a similar degree of balance, regardless of the relationship information they contain.

sBal <- TreeDistData::balance25 # generated in balance.R
sBal <- sBal[!is.na(sBal)]
sBalMethods <- names(sBal)
nMethod <- length(sBalMethods)
par(xpd = NA)

plot(sBal, ylab = '', xlab = '', axes = FALSE, pch = 3, 
     ylim = c(-0.5, max(sBal, na.rm = TRUE)))
text(seq_along(sBalMethods), sBal, paste(' ', round(sBal * 100, 1), '%'),
     col = TreeDistCol(sBalMethods), srt = 90, pos = 4, offset = 0)
text(srt = 90, seq_along(sBalMethods), -0.12,
     TreeDistData::tdAbbrevs[sBalMethods], 
     col = TreeDistCol(sBalMethods), pos = 2, offset = 0)
axis(2, at = seq(0, 0.6, 0.1), pos = -0.6)
axis(1, at = c(0, length(sBalMethods)) + 0.5, pos = 0, labels = FALSE)
#mtext(expression('log'[10]*'(r'^2*')'), 2, 2.5)
mtext(expression('r'^2), 2, 2.5)
precision <- 4L
result <- cbind(round(r2, precision),
                round(sBal[names(r2)], precision))
dimnames(result) <- list(
  TreeDistData::tdMdAbbrevs[rownames(result)],
  c('8-leaf tree shape / r²', 'TCI / r²'))

.TDDTable(DT::datatable, result)
cbPalette8 <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", 
"#D55E00", "#CC79A7")
header <- cbind(c(1, 1), matrix(2:21, ncol = 10, byrow = TRUE))
body <- matrix(rep(21 + seq_len(nMethod * 2), rep(c(1, 10), nMethod)),
               byrow = TRUE, ncol = 11)
layout(rbind(header, body), widths = c(2, rep(1, 10)))
par(mar = rep(0, 4L))

plot(type = 'n', 0, 0, xlim = c(-1, 1), ylim = c(-1, 1), axes = F)

shape1 <- c(0,0,0,0, 1,1,1, 2,2, 3)
shape2 <- c(0,1,2,3, 1,2,3, 2,3, 3)

xx <- lapply(c(shape1, shape2), function (shape) {
  plot(UnrootedTreeWithShape(shape, nTip = 8),
       edge.color = cbPalette8[shape + 1L])
  text(1, 9, shape + 1L, col = cbPalette8[shape + 1L])
})

lapply(seMethods, PlotMethod) -> XX
par(origPar)


ms609/TreeDistData documentation built on May 21, 2021, 6:53 a.m.