The following document presents some results from testing the new ape::read.tree function from the ape package version r packageVersion("ape")
. The first test consists basically on reading a tree with a singleton, which is a new feature of the package. The second tests compares the performance in terms of time that ape::read.tree takes relative to rncl::read_newick_phylo which has show to be significantly faster.
The data used here is the PANTHER database version 11.1 (which you can get here). You can download a more recent version from the pantherdb website.
We use the microbenchmark R package together with the ape and rncl R packages
library(microbenchmark) knitr::opts_chunk$set(echo=TRUE, message = FALSE, warning = FALSE)
# Listing panther trees tree.files <- list.files( "~/PANTHER11.1/books/", pattern = "tree.tree", recursive=TRUE, full.names = TRUE)
This is a tree that I know for sure that has a singleton. Previous version of ape (before 4.1-0.14) returned with an error.
a_tree_with_singleton <- " ((((((((((((((AN14:0.000,AN15:0.000):0.000[&&NHX:Ev=0>1:S=Homo-Pan:ID=AN13],(AN17:0.000,AN18:0.000):0.000[&&NHX:Ev=0>1:S=Murinae:ID=AN16]):0.000[&&NHX:Ev=0>1:S=Euarchontoglires:ID=AN12],(AN20:0.000,AN21:0.000):0.000[&&NHX:Ev=0>1:S=Laurasiatheria:ID=AN19]):0.000[&&NHX:Ev=0>1:S=Eutheria:ID=AN11],AN22:0.004):0.003[&&NHX:Ev=0>1:S=Mammalia:ID=AN10],AN23:0.000):0.014[&&NHX:Ev=0>1:S=Amniota:ID=AN9],AN24:0.017):0.021[&&NHX:Ev=0>1:S=Tetrapoda:ID=AN8],((AN27:0.066,AN28:0.052):0.010[&&NHX:Ev=1>0:ID=AN26],(AN30:0.031,AN31:0.014):0.003[&&NHX:Ev=1>0:ID=AN29]):0.021[&&NHX:Ev=0>1:S=Teleostei:ID=AN25]):0.147[&&NHX:Ev=0>1:S=Osteichthyes:ID=AN7],AN32:0.194):0.104[&&NHX:Ev=0>1:S=Deuterostomia:ID=AN6],((AN35:0.020,AN36:0.038):0.416[&&NHX:Ev=0>1:S=Caenorhabditis:ID=AN34],((AN39:0.119,AN40:0.131):0.154[&&NHX:Ev=0>1:S=Insecta:ID=AN38],AN41:0.317):0.104[&&NHX:Ev=0>1:S=Arthropoda:ID=AN37]):0.118[&&NHX:Ev=0>1:S=Ecdysozoa:ID=AN33],AN42:0.381):0.594[&&NHX:Ev=0>1:S=Bilateria:ID=AN5],AN43:2.000):0.414[&&NHX:Ev=0>1:S=Eumetazoa:ID=AN4],((((((((AN52:0.484,AN53:0.427):0.418[&&NHX:Ev=0>1:S=Saccharomycetaceae:ID=AN51],AN54:0.733):0.320[&&NHX:Ev=0>1:S=Saccharomycetaceae-Candida:ID=AN50],AN55:0.808):0.290[&&NHX:Ev=0>1:S=Saccharomycetales:ID=AN49],(AN57:0.793,((AN60:0.206,AN61:0.186):0.143[&&NHX:Ev=0>1:S=Sordariomyceta:ID=AN59],AN62:0.331):0.332[&&NHX:Ev=0>1:S=Sordariomycetes-Leotiomycetes:ID=AN58]):0.467[&&NHX:Ev=0>1:S=Pezizomycotina:ID=AN56]):0.432[&&NHX:Ev=0>1:S=Pezizomycotina-Saccharomycotina:ID=AN48],AN63:0.864):0.223[&&NHX:Ev=0>1:S=Ascomycota:ID=AN47],(AN65:0.613,AN66:0.930,AN67:0.755):0.332[&&NHX:Ev=0>1:S=Basidiomycota:ID=AN64]):0.618[&&NHX:Ev=0>1:S=Dikarya:ID=AN46],((((((AN74:0.337,AN75:0.406):0.298[&&NHX:Ev=0>1:S=Saccharomycetaceae:ID=AN73],AN76:0.578):0.338[&&NHX:Ev=0>1:S=Saccharomycetaceae-Candida:ID=AN72],AN77:0.567):0.304[&&NHX:Ev=0>1:S=Saccharomycetales:ID=AN71],(AN79:0.379,((AN82:0.398,AN83:0.403):0.191[&&NHX:Ev=0>1:S=Sordariomyceta:ID=AN81],AN84:0.366):0.140[&&NHX:Ev=0>1:S=Sordariomycetes-Leotiomycetes:ID=AN80]):0.457[&&NHX:Ev=0>1:S=Pezizomycotina:ID=AN78]):0.331[&&NHX:Ev=0>1:S=Pezizomycotina-Saccharomycotina:ID=AN70],AN85:0.737):0.128[&&NHX:Ev=0>1:S=Ascomycota:ID=AN69],(AN87:0.599,AN88:0.698):0.161[&&NHX:Ev=0>1:S=Basidiomycota:ID=AN86]):0.398[&&NHX:Ev=0>1:S=Dikarya:ID=AN68]):0.117[&&NHX:Ev=1>0:ID=AN45],(AN90:0.984,AN91:0.893):0.158[&&NHX:Ev=1>0:ID=AN89]):0.262[&&NHX:Ev=0>1:S=Fungi:ID=AN44]):0.394[&&NHX:Ev=0>1:S=Opisthokonts:ID=AN3],(AN93:0.457,AN94:0.462):0.860[&&NHX:Ev=0>1:S=Dictyostelium:ID=AN92]):0.301[&&NHX:Ev=0>1:S=Unikonts:ID=AN2],AN95:1.440,AN96:2.000):2.000[&&NHX:Ev=0>1:S=Eukaryota:ID=AN1])[&&NHX:Ev=1>0:ID=AN0]; " # Reading using the ape 4.1-0.14 ----------------------------------------------- ans_ape <- ape::read.tree(text=a_tree_with_singleton) # Reading using rncl 0.8.2 ----------------------------------------------------- # For some weird reason, rncl::read_newick_phylo prints a progress bar when # the document is been knitted. tmptree <- tempfile() cat(a_tree_with_singleton, file=tmptree) invisible(capture.output( ans_rncl <- rncl::read_newick_phylo(tmptree) )) file.remove(tmptree) # APE should have an extra node ans_ape ans_rncl # Which is a singleton! ape::has.singles(ans_ape) ape::has.singles(ans_rncl)
For this benchmark, we will read all r length(tree.files)
phylogenetic trees available in PANTHER 11.1
# A function to (partially) read PANTHER trees read_panther <- function(x, f) { x <- readLines(x, n=1L) tmptree <- tempfile() cat(x, file=tmptree) ans <- f(tmptree) file.remove(tmptree) ans } # Checking how it works with 100 samples set.seed(1984) n <- length(tree.files) # 2000 tree.samples <- sample(tree.files, n) library(parallel) cl <- makeForkCluster(10) ans <- parLapply(cl, tree.samples, function(tree) { microbenchmark( ape = read_panther(tree, ape::read.tree), rncl = read_panther(tree, rncl::read_newick_phylo), times = 1, unit = "ms" ) }) stopCluster(cl) tab <- lapply(ans, function(x) { # A bad way of reshaping the data... with(x, data.frame( ape = time[expr == "ape"], nrcl = time[expr == "rncl"] )) }) tab <- do.call(rbind, tab)
oldpar <- par(no.readonly = TRUE) par(las = 2) boxplot(tab/1e6, log = "y", ylab = "Time in Milliseconds (log scale)", main = "Distribution of reading times reading\nall the trees from PANTHER 11.1", sub = sprintf("Each boxplot represents elapsed times on reading %i trees.", length(tree.files)) ) par(oldpar)
While ape
seems to be faster, it is not significantly faster than rncl
.
devtools::session_info()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.