R/dist.topo.R
In ape: Analyses of Phylogenetics and Evolution

Documented in boot.phylo .compressTipLabel consensus dist.topo ONEwise plot.prop.part postprocess.prop.part print.prop.part prop.clades prop.part SHORTwise summary.prop.part

## dist.topo.R (2023-09-08)

##      Topological Distances, Tree Bipartitions,
##   Consensus Trees, and Bootstrapping Phylogenies

## Copyright 2005-2023 Emmanuel Paradis, 2016-2021 Klaus Schliep

## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.

.getTreesFromDotdotdot <- function(...)
{
    obj <- list(...)
    if (length(obj) == 1 && !inherits(obj[[1]], "phylo")) obj <- obj[[1]]
    obj
}

dist.topo <- function(x, y = NULL, method = "PH85", mc.cores = 1)
{
    method <- match.arg(method, c("PH85", "score"))
    if (!is.null(y)) x <- c(x, y)
    testroot <- any(is.rooted(x))
    n <- length(x) # number of trees
    nms <- names(x)
    if (is.null(nms)) nms <- paste0("tree", 1:n)

    if (method == "PH85") {
        if (testroot)
            warning("Some trees were rooted: topological distances may be spurious.")

        x <- .compressTipLabel(x)
        ntip <- length(attr(x, "TipLabel"))
        nnode <- sapply(x, Nnode)

        foo <- function(phy, ntip) {
            phy <- reorder(phy, "postorder")
            pp <- bipartition2(phy$edge, ntip)
            attr(pp, "labels") <- phy$tip.label
            ans <- SHORTwise(pp)
            sapply(ans, paste, collapse = "\r")
        }

        if (mc.cores > 1) {
            x <- mclapply(x, foo, ntip = ntip, mc.cores = mc.cores)
            bar <- function(i) {
                y <- x[[i]]
                m1 <- nnode[i]
                res_sub <- numeric(n - i)
                for (j in (i + 1):n) {
                    z <- x[[j]]
                    res_sub[j - i] <- m1 + nnode[j] - 2 * sum(z %in% y)
                }
                res_sub
            }
            res_list <- mclapply(1:(n - 1), bar, mc.cores = mc.cores)
            res <- unlist(res_list)
        } else {
            x <- lapply(x, foo, ntip = ntip)
            k <- 0L
            res <- numeric(n * (n - 1) /2)
            for (i in 1:(n - 1)) {
                y <- x[[i]]
                m1 <- nnode[i]
                for (j in (i + 1):n) {
                    z <- x[[j]]
                    k <- k + 1L
                    res[k] <- m1 + nnode[j] - 2 * sum(z %in% y)
                }
            }
        }
    } else { # method == "score"
        NTIP <- Ntip(x)
        x <- unroot(x)

        fooscore <- function(phy) {
            if (is.null(phy$edge.length))
                stop("trees must have branch lengths for the branch score distance.")
            ntip <- length(phy$tip.label)
            phy <- reorder.phylo(phy, "postorder")
            bp <- bipartition2(phy$edge, ntip)
            lapply(bp, function(x) sort(phy$tip.label[x]))
        }

        if (mc.cores > 1) {
            BP <- mclapply(x, fooscore, mc.cores = mc.cores)
            bar <- function(i) {
                tr <- x[[i]]
                bp <- BP[[i]]
                nx <- NTIP[i]
                res_sub <- numeric(n - i)
                for (j in (i + 1):n)
                    res_sub[j - i] <- .dist.topo.score(tr, x[[j]], nx, NTIP[j], bp, BP[[j]])
                res_sub
            }
            res_list <- mclapply(1:(n - 1), bar, mc.cores = mc.cores)
            res <- unlist(res_list)
        } else {
            BP <- lapply(x, fooscore)
            k <- 0L
            res <- numeric(n * (n - 1) /2)
            for (i in 1:(n - 1)) {
                tr <- x[[i]]
                bp <- BP[[i]]
                nx <- NTIP[i]
                for (j in (i + 1):n) {
                    k <- k + 1L
                    res[k] <- .dist.topo.score(tr, x[[j]], nx, NTIP[j], bp, BP[[j]])
                }
            }
        }
    }

    attr(res, "Size") <- n
    attr(res, "Labels") <- nms
    attr(res, "Diag") <- attr(res, "Upper") <- FALSE
    attr(res, "method") <- method
    class(res) <- "dist"
    res
}

.dist.topo.score <- function(x, y, nx, ny, bp1, bp2)
{
    ## ny <- length(y$tip.label) # fix by Otto Cordero
    ## fix by Tim Wallstrom:
    bp2.comp <- lapply(bp2, function(a) sort(y$tip.label[is.na(match(y$tip.label, a))]))
    ## End
    q1 <- length(bp1)
    q2 <- length(bp2)

    xe2 <- x$edge[, 2]
    ye2 <- y$edge[, 2]
    xel <- x$edge.length
    yel <- y$edge.length

    dT <- 0
    found1 <- FALSE
    found2 <- logical(q2)
    found2[1] <- TRUE
    for (i in 2:q1) {
        for (j in 2:q2) {
            if (identical(bp1[[i]], bp2[[j]]) || identical(bp1[[i]], bp2.comp[[j]])) {
                dT <- dT + (xel[which(xe2 == nx + i)] - yel[which(ye2 == ny + j)])^2
                found1 <- found2[j] <- TRUE
                break
            }
        }
        if (found1) found1 <- FALSE
        else dT <- dT + (xel[which(xe2 == nx + i)])^2
    }
    if (!all(found2))
        dT <- dT + sum((yel[ye2 %in% (ny + which(!found2))])^2)
    sqrt(dT)
}

.compressTipLabel <- function(x, ref = NULL)
{
    ## 'x' is a list of objects of class "phylo" possibly with no class
    if (!is.null(attr(x, "TipLabel"))) return(x)
    if (is.null(ref)) ref <- x[[1]]$tip.label
    n <- length(ref)
    if (length(unique(ref)) != n)
        stop("some tip labels are duplicated in tree no. 1")

    ## serious improvement by Joseph W. Brown!
    relabel <- function (y) {
        label <- y$tip.label
        if (!identical(label, ref)) {
            if (length(label) != length(ref))
                stop("one tree has a different number of tips")
            ilab <- match(label, ref)
            if (any(is.na(ilab)))
                stop("one tree has different tip labels")
            ie <- match(1:n, y$edge[, 2])
            y$edge[ie, 2] <- ilab
        }
        y$tip.label <- NULL
        y
    }
    x <- unclass(x) # another killer improvement by Tucson's hackathon (1/2/2013)
    x <- lapply(x, relabel)
    attr(x, "TipLabel") <- ref
    class(x) <- "multiPhylo"
    x
}

prop.part <- function(..., check.labels = TRUE)
{
    obj <- .getTreesFromDotdotdot(...)
    ntree <- length(obj)
    if (ntree == 1) check.labels <- FALSE
    if (check.labels) obj <- .compressTipLabel(obj) # fix by Klaus Schliep (2011-02-21)
    class(obj) <- NULL # fix by Klaus Schliep (2014-03-06)
    for (i in 1:ntree) storage.mode(obj[[i]]$Nnode) <- "integer"
    class(obj) <- "multiPhylo"
    obj <- reorder(obj, "postorder")
# the following line should not be necessary any more
#    obj <- .uncompressTipLabel(obj) # fix a bug (2010-11-18)
    nTips <- length(obj[[1]]$tip.label)
    clades <- prop_part2(obj, nTips)
    attr(clades, "labels") <- obj[[1]]$tip.label
    clades
}

print.prop.part <- function(x, ...)
{
    if (is.null(attr(x, "labels"))) {
        for (i in 1:length(x)) {
            cat("==>", attr(x, "number")[i], "time(s):")
            print(x[[i]], quote = FALSE)
        }
    } else {
        for (i in 1:length(attr(x, "labels")))
          cat(i, ": ", attr(x, "labels")[i], "\n", sep = "")
        cat("\n")
        for (i in 1:length(x)) {
            cat("==>", attr(x, "number")[i], "time(s):")
            print(x[[i]], quote = FALSE)
        }
    }
}

summary.prop.part <- function(object, ...) attr(object, "number")

plot.prop.part <- function(x, barcol = "blue", leftmar = 4, col = "red", ...)
{
    if (is.null(attr(x, "labels")))
      stop("cannot plot this partition object; see ?prop.part for details.")
    L <- length(x)
    n <- length(attr(x, "labels"))
    layout(matrix(1:2, 2, 1), heights = c(1, 3))
    par(mar = c(0.1, leftmar, 0.1, 0.1))
    one2L <- seq_len(L)
    plot(one2L - 0.5, attr(x, "number"), type = "h", col = barcol, xlim = c(0, L),
         xaxs = "i", xlab = "", ylab = "Frequency", xaxt = "n", bty = "n", ...)
    M <- matrix(0L, L, n)
    for (i in one2L) M[i, x[[i]]] <- 1L
    image.default(one2L, 1:n, M, col = c("white", col), xlab = "", ylab = "", yaxt = "n")
    mtext(attr(x, "labels"), side = 2, at = 1:n, las = 1)
}

### by Klaus (2016-03-23):
prop.clades <- function(phy, ..., part = NULL, rooted = FALSE)
{
    if (is.null(part)) {
        obj <- .getTreesFromDotdotdot(...)
        ## avoid double counting of edges if trees are rooted
        if (!rooted) obj <- lapply(obj, unroot)
        part <- prop.part(obj, check.labels = TRUE)
    }
    LABS <- attr(part, "labels")
    if (!identical(phy$tip.label, LABS)) {
        i <- match(phy$tip.label, LABS)
        j <- match(seq_len(Ntip(phy)), phy$edge[, 2])
        phy$edge[j, 2] <- i
        phy$tip.label <- LABS
    }
    bp <- prop.part(phy)
    if (!rooted) {
        ## avoid messing up the order and length if phy is rooted in some cases
        bp <- SHORTwise(bp)
        part <- postprocess.prop.part(part, "SHORTwise")
    }
    pos <- match(bp, part)
    tmp <- which(!is.na(pos))
    n <- rep(NA_real_, phy$Nnode)
    n[tmp] <- attr(part, "number")[pos[tmp]]
    n
}

boot.phylo <-
    function(phy, x, FUN, B = 100, block = 1,
             trees = FALSE, quiet = FALSE,
             rooted = is.rooted(phy), jumble = TRUE,
             mc.cores = 1)
{
    if (is.null(dim(x)) || length(dim(x)) != 2)
        stop("the data 'x' must have two dimensions (e.g., a matrix or a data frame)")

    if (anyDuplicated(rownames(x)))
        stop("some labels are duplicated in the data: you won't be able to analyse tree bipartitions")

    boot.tree <- vector("list", B)
    y <- nc <- ncol(x)
    nr <- nrow(x)

    if (nr < 4 && !trees) {
        warning("not enough rows in 'x' to compute bootstrap values.\nSet 'trees = TRUE' if you want to get the bootstrap trees")
        return(integer())
    }

    if (block > 1) {
        a <- seq(1, nc - 1, block)
        b <- seq(block, nc, block)
        y <- mapply(":", a, b, SIMPLIFY = FALSE)
        getBootstrapIndices <- function() unlist(sample(y, replace = TRUE))
    } else getBootstrapIndices <- function() sample.int(y, replace = TRUE)

    if (!quiet) {
        prefix <- "\rRunning bootstraps:      "
        suffix <- paste("/", B)
        updateProgress <- function(i) cat(prefix, i, suffix)
    }

    if (mc.cores == 1) {
        for (i in 1:B) {
            boot.samp <- x[, getBootstrapIndices()]
            if (jumble) boot.samp <- boot.samp[sample.int(nr), ]
            boot.tree[[i]] <- FUN(boot.samp)
            if (!quiet && !(i %% 100)) updateProgress(i)
        }
    } else {
        if (!quiet) cat("Running parallel bootstraps...")
        foo <- function(i) {
            boot.samp <- x[, getBootstrapIndices()]
            if (jumble) boot.samp <- boot.samp[sample.int(nr), ]
            FUN(boot.samp)
        }
        boot.tree <- mclapply(1:B, foo, mc.cores = mc.cores)
        if (!quiet) cat(" done.")
    }

    if (nr < 4 && trees)
        return(list(BP = integer(), trees = boot.tree))

    if (!quiet) cat("\nCalculating bootstrap values...")

    ## sort labels after mixed them up
    if (jumble) {
        boot.tree <- .compressTipLabel(boot.tree, ref = phy$tip.label)
        boot.tree <- .uncompressTipLabel(boot.tree)
        boot.tree <- unclass(boot.tree) # otherwise countBipartitions crashes
    }
    class(boot.tree) <- "multiPhylo"
    if (rooted) {
        pp <- prop.part(boot.tree)
        ans <- prop.clades(phy, part = pp, rooted = rooted)
    } else {
        phy <- reorder(phy, "postorder")
        ints <- phy$edge[, 2] > Ntip(phy)
        ans <- countBipartitions(phy, boot.tree)
        ans <- c(NA_integer_, ans[order(phy$edge[ints, 2])])
    }

    if (!quiet) cat(" done.\n")

    if (trees) ans <- list(BP = ans, trees = boot.tree)
    ans
}

### The next function transforms an object of class "prop.part" so
### that the vectors which are identical in terms of splits are aggregated.
### For instance if n = 5 tips, 1:2 and 3:5 actually represent the same
### split though they are different clades. The aggregation is done
### arbitrarily.
### The call to SHORTwise() insures that all splits are the shortest ones.
### The call to ONEwise() insures that all splits include the first tip.
### (rewritten by Klaus)
postprocess.prop.part <- function(x, method = "ONEwise")
{
    w <- attr(x, "number")
    labels <- attr(x, "labels")

    method <- match.arg(toupper(method), c("ONEWISE", "SHORTWISE"))
    FUN <- switch(method, "ONEWISE" = ONEwise, "SHORTWISE" = SHORTwise)

    x <- FUN(x)
    drop <- duplicated(x)

    if (any(drop)) {
        ind1 <- match(x[drop], x)
        ind2 <- which(drop)
        for (i in seq_along(ind2))
            w[ind1[i]] <- w[ind1[i]] + w[ind2[i]]
        x <- x[!drop]
        w <- w[!drop]
    }
    attr(x, "number") <- w
    attr(x, "labels") <- labels
    class(x) <- "prop.part"
    x
}

### This function changes an object of class "prop.part" so that they
### all include the first tip. For instance if n = 5 tips, 3:5 is
### changed to 1:2.
ONEwise <- function(x)
{
    nTips <- length(attr(x, "labels"))
    v <- seq_len(nTips)
    l <- lengths(x) == 0
    if (any(l)) x[l] <- list(v)
    for (i in which(!l)) {
        y <- x[[i]]
        if (y[1] != 1) x[[i]] <- v[-y]
    }
    x
}

### This function changes an object of class "prop.part" so that they
### all include the shorter part of the partition.
### For instance if n = 5 tips, 1:3 is changed to 4:5. In case n is even, e.g.
### n = 6 similar to ONEwise.
SHORTwise <- function(x) {
    ## ensures the next line should also work for splits objects from phangorn
    nTips <- length(attr(x, "labels"))
    v <- seq_len(nTips)
    l <- lengths(x)
    lv <- nTips / 2
    for (i in which(l >= lv)) {
        y <- x[[i]]
        if (l[i] > lv) {
            x[[i]] <- v[-y]
        } else { # (l[i] == lv) only possible alternative
            if (y[1] != 1)
                x[[i]] <- v[-y]
        }
    }
    x
}

consensus <- function(..., p = 1, check.labels = TRUE, rooted = FALSE)
{
    foo <- function(ic, node) {
        ## ic: index of 'pp'
        ## node: node number in the final tree
        pool <- pp[[ic]]
        if (ic < m) {
            for (j in (ic + 1):m) {
                wh <- match(pp[[j]], pool)
                if (!any(is.na(wh))) {
                    edge[pos, 1] <<- node
                    pool <- pool[-wh]
                    edge[pos, 2] <<- nextnode <<- nextnode + 1L
                    pos <<- pos + 1L
                    foo(j, nextnode)
                }
            }
        }
        size <- length(pool)
        if (size) {
            ind <- pos:(pos + size - 1)
            edge[ind, 1] <<- node
            edge[ind, 2] <<- pool
            pos <<- pos + size
        }
    }

    obj <- .getTreesFromDotdotdot(...)

    if (!is.null(attr(obj, "TipLabel")))
        labels <- attr(obj, "TipLabel")
    else {
        labels <- obj[[1]]$tip.label
        if (check.labels) obj <- .compressTipLabel(obj)
    }
    if(!rooted) obj <- root(obj, 1)

    ntree <- length(obj)
    ## Get all observed partitions and their frequencies:
    pp <- prop.part(obj, check.labels = FALSE)
    if (!rooted) {
        pp <- postprocess.prop.part(pp, "SHORTwise")
        pp[[1]] <- seq_along(labels)
    }
    ## Drop the partitions whose frequency is less than 'p':
    if (p == 0.5) p <- 0.5000001 # avoid incompatible splits
    bs <- attr(pp, "number")
    sel <- bs >= p * ntree
    pp <- pp[sel]
    bs <- bs[sel]
    lens <- lengths(pp)
    if (length(drop <- which(lens == 1))) {
        pp <- pp[-drop]
        lens <- lens[-drop]
    }
    ## Get the order of the remaining partitions by decreasing size:
    ind <- order(lens, decreasing = TRUE)
    pp <- pp[ind]
    bs <- bs[ind]
    n <- length(labels)
    m <- length(pp)
    edge <- matrix(0L, n + m - 1, 2)
    if (m == 1) {
        edge[, 1] <- n + 1L
        edge[, 2] <- 1:n
    } else {
        nextnode <- n + 1L
        pos <- 1L
        foo(1, nextnode)
    }
    res <- structure(list(edge = edge, tip.label = labels,
                          Nnode = m), class = "phylo")
    res <- reorder(res)
    node.label <- prop.clades(res, obj, rooted=rooted)/ntree
    res$node.label <- node.label
    res
}

Any scripts or data that you put into this service are public.

ape documentation built on April 3, 2025, 7:53 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ape
Analyses of Phylogenetics and Evolution

R/dist.topo.R
In ape: Analyses of Phylogenetics and Evolution

Defines functions consensus SHORTwise ONEwise postprocess.prop.part boot.phylo prop.clades plot.prop.part summary.prop.part print.prop.part prop.part .compressTipLabel .dist.topo.score dist.topo .getTreesFromDotdotdot

Documented in boot.phylo .compressTipLabel consensus dist.topo ONEwise plot.prop.part postprocess.prop.part print.prop.part prop.clades prop.part SHORTwise summary.prop.part

Try the ape package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ape Analyses of Phylogenetics and Evolution

R/dist.topo.R In ape: Analyses of Phylogenetics and Evolution

Defines functions consensus SHORTwise ONEwise postprocess.prop.part boot.phylo prop.clades plot.prop.part summary.prop.part print.prop.part prop.part .compressTipLabel .dist.topo.score dist.topo .getTreesFromDotdotdot

Documented in boot.phylo .compressTipLabel consensus dist.topo ONEwise plot.prop.part postprocess.prop.part print.prop.part prop.clades prop.part SHORTwise summary.prop.part

Try the ape package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ape
Analyses of Phylogenetics and Evolution

R/dist.topo.R
In ape: Analyses of Phylogenetics and Evolution