library('TreeTools', quietly = TRUE, warn.conflicts = FALSE) library('TreeDist') library('TreeDistData') nSample <- 100L se <- TreeDistData::shapeEffect seMethods <- names(se) nMethod <- length(seMethods)
PlotMethod <- function (method, doPlot = TRUE) { methodEffect <- se[[method]] dat <- data.frame( shapePair = rep(as.factor(seq_along(methodEffect)), sapply(methodEffect, length)), distance = unlist(methodEffect)) reg <- summary(lm(distance ~ shapePair, data = dat)) if (doPlot) { plot(type = 'n', 0, 0, xlim = c(-1, 1), ylim = c(-1, 1), axes = F) text(0, 0.5, tdAbbrevs[method], col = TreeDistCol(method)) text(0, -0.5, signif(reg$adj.r.squared, 3)) plot(dat, border = TreeDistCol(method), axes = FALSE) } c(reg$r.squared, reg$adj.r.squared) }
There are four unrooted tree shapes on eight leaves:
origPar <- par(mfrow = c(1, 4), mar = rep(0.3, 4)) lapply(lapply(0:3, UnrootedTreeWithShape, 8), plot) -> XX
To evaluate the extent to which trees are ranked as more or less different based on their shape, I evaluated the extent to which tree distance could be predicted based only on the shape of the two input trees.
par(xpd = NA) r <- vapply(seMethods, PlotMethod, doPlot = FALSE, double(2)) r2 <- r[1, ] plot(r2, ylab = '', xlab = '', axes = FALSE, pch = 3, ylim = c(-0.5, max(r2))) text(seq_along(seMethods), r2, paste(' ', round(r2 * 100, 1), '%'), col = TreeDistCol(seMethods), srt = 90, pos = 4, offset = 0) text(srt = 90, seq_along(seMethods), -0.12, TreeDistData::tdAbbrevs[seMethods], col = TreeDistCol(seMethods), pos = 2, offset = 0) axis(2, at = seq(0, 0.6, 0.1), pos = -0.6) axis(1, at = c(0, length(seMethods)) + 0.5, pos = 0, labels = FALSE) #mtext(expression('log'[10]*'(r'^2*')'), 2, 2.5) mtext(expression('r'^2), 2, 2.5)
One aspect of tree shape is balance, which can be quantified using the Total Cophenetic Index (TCI). For 10 000 random pairs of 25-leaf trees, I calculated the tree distance under each metric, and the difference in TCI bettween the trees. A high correlation between TCI and a tree distance indicates that a metric allocates a low distance to trees with a similar degree of balance, regardless of the relationship information they contain.
sBal <- TreeDistData::balance25 # generated in balance.R sBal <- sBal[!is.na(sBal)] sBalMethods <- names(sBal) nMethod <- length(sBalMethods) par(xpd = NA) plot(sBal, ylab = '', xlab = '', axes = FALSE, pch = 3, ylim = c(-0.5, max(sBal, na.rm = TRUE))) text(seq_along(sBalMethods), sBal, paste(' ', round(sBal * 100, 1), '%'), col = TreeDistCol(sBalMethods), srt = 90, pos = 4, offset = 0) text(srt = 90, seq_along(sBalMethods), -0.12, TreeDistData::tdAbbrevs[sBalMethods], col = TreeDistCol(sBalMethods), pos = 2, offset = 0) axis(2, at = seq(0, 0.6, 0.1), pos = -0.6) axis(1, at = c(0, length(sBalMethods)) + 0.5, pos = 0, labels = FALSE) #mtext(expression('log'[10]*'(r'^2*')'), 2, 2.5) mtext(expression('r'^2), 2, 2.5)
precision <- 4L result <- cbind(round(r2, precision), round(sBal[names(r2)], precision)) dimnames(result) <- list( TreeDistData::tdMdAbbrevs[rownames(result)], c('8-leaf tree shape / r²', 'TCI / r²')) .TDDTable(DT::datatable, result)
cbPalette8 <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") header <- cbind(c(1, 1), matrix(2:21, ncol = 10, byrow = TRUE)) body <- matrix(rep(21 + seq_len(nMethod * 2), rep(c(1, 10), nMethod)), byrow = TRUE, ncol = 11) layout(rbind(header, body), widths = c(2, rep(1, 10))) par(mar = rep(0, 4L)) plot(type = 'n', 0, 0, xlim = c(-1, 1), ylim = c(-1, 1), axes = F) shape1 <- c(0,0,0,0, 1,1,1, 2,2, 3) shape2 <- c(0,1,2,3, 1,2,3, 2,3, 3) xx <- lapply(c(shape1, shape2), function (shape) { plot(UnrootedTreeWithShape(shape, nTip = 8), edge.color = cbPalette8[shape + 1L]) text(1, 9, shape + 1L, col = cbPalette8[shape + 1L]) }) lapply(seMethods, PlotMethod) -> XX par(origPar)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.