library('phangorn') library('TreeDistData') origPar <- par() data("bullseyeMorphScores", package = 'TreeDistData') data("bullMoDiScores", package = 'TreeDistData') data("tdMethods", package = 'TreeDistData') data("tdAbbrevs", package = 'TreeDistData') lwd <- 1L semitransparency <- '44' SpearmanRho <- function (y) { x <- seq_along(y) r <- length(y) d <- sum((x - rank(y)) ^ 2) # Return: rho <- 1 - (6 * d / (r * (r ^ 2 - 1))) # CI calculation: https://stats.stackexchange.com/questions/18887 # 1.06 from 10.1093/biomet/64.3.645 # ci <- tanh(atanh(rho) + (c(upper = 1, lower = -1) * 1.06 / sqrt(r-3))) # c(rho = rho, ci) } Spear <- function (scoreList) { vapply(scoreList, function (allScores) { allScores[, c('mast', 'masti'), ] <- -allScores[, c('mast', 'masti'), ] rhos <- apply(allScores, 3L, function (scores) { apply(scores, 2, SpearmanRho) }) rbind(mean = rowMeans(rhos), 'Std. Err.' = apply(rhos, 1, sd) / sqrt(dim(rhos)[2])) }, matrix(0, nrow = 2, ncol = dim(scoreList[[1]])[2])) } # Note the surprising performance of rf Successes <- function (scores, gap) { rowSums(scores[(10 - gap):1, ] < scores[10:(gap + 1), ]) } BinomCI <- function (scores, nScores, gap) { vapply(Successes(scores, gap), binom::binom.lrt, n = nScores, binom::binom.lrt(1,2)) } MethodLines <- function (method, allScores, gap = 1L, nScores = dim(allScores)[3]) { if (!is.na(TreeDistCol(method))) { ci <- BinomCI(allScores[, method, ], nScores = nScores, gap = gap) datasets <- as.integer(colnames(ci)) lines(datasets, as.double(ci['mean', ]), lwd = lwd, lty = 1, col = TreeDistCol(method)) pointOrder <- seq_along(datasets) polygon(datasets[c(pointOrder, rev(pointOrder))], c(ci['upper', ], rev(ci['lower', ])), col = paste0(TreeDistCol(method), semitransparency), border = NA) } } MethodLegend <- function (methods) { legend('topleft', rep('', length(methods)), col = TreeDistCol(methods, 88), lwd = 4, lty = 1, bty='n') legend('topleft', tdAbbrevs[methods], col = TreeDistCol(methods), lwd = lwd, lty = 1, bty='n') } PlotRanks <- function (scoreList, methods, plotTitle = '', yLab = "Spearman's rho") { rhos <- Spear(scoreList) tipCounts <- dimnames(rhos)[[3]] nTip <- as.integer(substr(tipCounts, 0, nchar(tipCounts) - 7L)) plot(0, 0, type = 'n', xlim = range(nTip), ylim = c(0.3, 1), xlab = if(plotTitle == '') 'Number of leaves' else '', ylab = yLab, axes = FALSE) title(plotTitle, font.main = 1, line = 1) axis(1, c(5, 10, 20, 50)) axis(2) lapply(methods, function (method) { if (!is.na(TreeDistCol(method))) { rhoMeans <- rhos['mean', method, ] rhoSE <- rhos['Std. Err.', method, ] lines(nTip, rhoMeans, lwd = lwd, col = TreeDistCol(method)) polygon(c(nTip, rev(nTip)), c(rhoMeans + rhoSE, rev(rhoMeans - rhoSE)), col = paste0(TreeDistCol(method), semitransparency), border = NA ) } }) invisible() } YLabel <- function (lastLegend) { ifelse(lastLegend %% 5, '', 'Success rate') } RowTitle <- function (words, font = 2, cex = 1) { oPar <- par(mar = rep(0, 4), cex = cex) plot(0, 0, type='n', axes=FALSE, xlab='', ylab=NA) text(0, 0, words, font = font) par(oPar) }
PlotFunc <- function() { layout(matrix(c(rep(1, 5), t(cbind(matrix(2:9, 2), c(19, 21))), rep(10, 5), t(cbind(matrix(11:18, 2), c(20, 22)))), byrow = T, nrow = 6), heights = c(1, 6, 7, 1, 6, 7)) marTop <- c(1, 2.5, 1.5, 0) marBtm <- c(3, 2.5, 1.5, 0) par(mar = marTop, oma = c(0, 0, 0, 0.3), xpd = NA, mgp = c(1.5, 0.5, 0)) lastLegend <- 0 RowTitle('Subsampling experiment') gap <- 2L firstMethods <- c('cid', 'pid', 'msid', 'qd', 'icrf', 'jnc2', 'jco4', 'nye', 'rf') # NB: All NNI estimates plot very close together. # MAST * MASTI are indistinguishable secondMethods <- c('cid', 'ms', 'path', 'kc', 'es', 'mast', 'tbr_u', 'tbr_l', 'spr', 'nni_l') notPlotted <- colnames(bullseyeMorphScores[[1]])[ !colnames(bullseyeMorphScores[[1]]) %in% c(firstMethods, secondMethods)] #cat(notPlotted) for (nTip in names(bullseyeMorphScores)) { par(mar = marTop) plot(0, 0, type='n', xlim = c((1L + gap) * 200, 2000), ylim = c(0, 1), axes = FALSE, xlab = '', #xlab = 'Size of larger dataset (bp)', ylab = YLabel(lastLegend)) title(paste0('(', letters[lastLegend <- lastLegend + 1L], ') ', nTip), font.main = 1, line = 1) axis(1) axis(2) allScores <- bullseyeMorphScores[[nTip]] allScores[, c('mast', 'masti'), ] <- -allScores[, c('mast', 'masti'), ] lapply(firstMethods, MethodLines, allScores = allScores, gap = gap) -> XX if (nTip == '5 leaves') MethodLegend(firstMethods) if (nTip == '10 leaves') legend('topleft', c('Mean', 'Std. Err.'), col=paste0('#000000', c('', semitransparency)), lwd = c(1, 4), lty = 1, bty='n', inset=c(0, 0)) par(mar = marBtm) plot(0, 0, type='n', xlim = c((1L + gap) * 200, 2000), ylim = c(0, 1), axes = FALSE, xlab = 'Size of larger dataset (bp)', ylab = YLabel(lastLegend - 1L)) axis(1) axis(2) lapply(secondMethods, MethodLines, allScores = allScores, gap = gap) -> XX if (nTip == '5 leaves') MethodLegend(secondMethods) } lastLegend <- lastLegend + 1L # For Accuracy panels RowTitle('Miscoding experiment') gap <- 1L for (nTip in names(bullMoDiScores)) { par(mar = marTop) plot(0, 0, type = 'n', xlim = c(as.integer(rownames(bullMoDiScores[[1]])[10 - gap]), 0), #xlab = '% errors in better dataset', xlab = '', axes = FALSE, ylim = c(0, 1), ylab = YLabel(lastLegend)) title(paste0('(', letters[lastLegend <- lastLegend + 1L], ') ', nTip), font.main = 1, line = 1) axis(1) axis(2) allScores <- bullMoDiScores[[nTip]] allScores[, c('mast', 'masti'), ] <- -allScores[, c('mast', 'masti'), ] lapply(firstMethods, MethodLines, allScores = allScores, gap = gap) -> XX par(mar = marBtm) plot(0, 0, type = 'n', xlim = c(as.integer(rownames(bullMoDiScores[[1]])[10 - gap]), 0), xlab = '% errors in better dataset', axes = FALSE, ylim = c(0, 1), ylab = YLabel(lastLegend - 1L)) axis(1) axis(2) lapply(secondMethods, MethodLines, allScores = allScores, gap = gap) -> XX } #RowTitle('Accuracy of ranking') #RowTitle('(e) Subsampling experiment', 0, 0.8) #RowTitle('(j) Miscoding experiment', 0, 0.8) #marTop <- c(1, 2.5, 1.5, 0) par(mar = c(1, 4.5, 1.5, 0)) PlotRanks(bullseyeMorphScores, firstMethods, '(e) Ranking accuracy .') PlotRanks(bullMoDiScores, firstMethods, '(j) Ranking accuracy .') #marBtm <- c(3, 2.5, 1.5, 0) par(mar = c(3, 4.5, 1.5, 0)) PlotRanks(bullseyeMorphScores, secondMethods, '') PlotRanks(bullMoDiScores, secondMethods, '') } PlotFunc() suppressWarnings(par(origPar))
Figure 3. Results of 'bullseye' test. (a–d), success rate for each tree distance metric when comparing pairs of datasets differing in size by 400 base pairs; (f–i), success rates when comparing pairs of datasets differing by 2% in proportion of erroneous tokens; (e, j), accuracy of each method in ranking trees inferred from progressively degraded datasets according to the degree of dataset degradation.
bA <- bullseyeMorphScores[['50 leaves']]['1400', , ] bB <- bullseyeMorphScores[['50 leaves']]['1000', , ] bC <- bullMoDiScores[['50 leaves']]['4', , ] bD <- bullMoDiScores[['50 leaves']]['8', , ] invert <- c('mast', 'masti') bA[invert, ] <- -bA[invert, ] bB[invert, ] <- -bB[invert, ] bC[invert, ] <- -bC[invert, ] bD[invert, ] <- -bD[invert, ] ret <- cbind(subsampling50 = rowSums(bA < bB), miscoding50 = rowSums(bC < bD), subsamplingAccuracy = round(Spear(bullseyeMorphScores)['mean', , '50 leaves'], 3), miscodingAccuracy = round(Spear(bullMoDiScores)['mean', , '50 leaves'], 3) ) # nni_t contains NAs, which are not ignored, so values are misleading ret <- ret[!rownames(ret) %in% 'nni_t', ] dimnames(ret) <- list(tdMdAbbrevs[rownames(ret)], c("Subsampling: Success rate (50\U00A0leaves)", "Miscoding: Success rate (50\U00A0leaves)", "Subsampling: Accuracy (\U03C1)", "Miscoding: Accuracy (\U03C1)")) .TDDTable(DT::datatable, ret)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.