#' Phylogeny generating
#'
#' Generates a phylogeny from a \code{sim} object containing speciation and
#' extinction times, parent and status information (see \code{?sim}). Returns a
#' \code{phylo} object containing information on the phylogeny, following an
#' "evolutionary Hennigian" (sensu Ezard et al 2011) format (i.e., a
#' bifurcating tree). Takes an optional argument encoding fossil occurrences to
#' return a sampled ancestor tree (see references). This tree consists of the
#' original tree, plus the fossil occurrences added as branches of length
#' \code{0} branching off of the corresponding species at the time of
#' occurrence. Such trees can be used, as is or with small modifications, as
#' starting trees in phylogenetic inference software that make use of the
#' fossilized birth-death model. Returns \code{NA} and sends a warning if the
#' simulation has only one lineage or if more than one species has \code{NA}
#' as parent (i.e. there is no single common ancestor in the simulation). In
#' the latter case, please use \code{find.lineages} first.
#'
#' @param sim A \code{sim} object, containing extinction times, speciation
#' times, parent, and status information for each species in the simulation.
#' See \code{?sim}.
#'
#' @param fossils A data frame with a \code{"Species"} column and a
#' \code{SampT} column, usually an output of the \code{sample.clade}
#' function. Species names must contain only one number each, corresponding
#' to the order of the \code{sim} vectors. Note that we require it to have a
#' \code{SampT} column, i.e. fossils must have an exact age. This assumption
#' might be relaxed in the future.
#'
#' @param saFormat A string indicating which form sampled ancestors should take
#' in the tree. If set to \code{"branch"} (default), SAs are returned as
#' 0-length branches. If set to \code{"node"}, they are returned as degree-2
#' nodes. Note that some software prefer the former (e.g. Beast) and some the
#' latter (e.g. RevBayes). The code for making 0-length branches become nodes
#' was written by Joshua A. Justison.
#'
#' @param returnTrueExt A logical indicating whether to include in tree the
#' tips representing the true extinction time of extinct species. If set to
#' \code{FALSE}, the returned tree will include those tips. If \code{TRUE}
#' (default), they will be dropped and instead the last sampled fossil of
#' a given species will be the last sampled tip of that species. Note that if
#' a species was not sampled it will then not appear in the tree. If no fossils
#' have been added to the tree with the parameter \code{fossils}, this will
#' have the same effect as the ape function \code{drop.fossil}, returning an
#' ultrametric tree. Note that if this is set to \code{FALSE}, the
#' \code{root.time} and \code{root.edge} arguments will not be accurate,
#' depending on which species are dropped. The user is encouraged to use the
#' \code{ape} package to correct these problems, as shown in an example below.
#'
#' @param returnRootTime Logical indicating if phylo should have information
#' regarding \code{root.time}. If set to \code{NULL} (default), returned
#' phylogenies will not have \code{root.time} if there is at least one extant
#' lineage in the sim object. If there are only extinct lineages in the
#' \code{sim} object and it is set to \code{NULL}, \code{root.time} will be
#' returned. If set to \code{FALSE} or \code{TRUE}, \code{root.time} will be
#' removed or forced into the phylo object, respectively. In this case, we
#' highly recommend users to read about the behavior of some functions (such as
#' APE's \code{axisPhylo}) when this argument is forced.
#'
#' @details When \code{root.time} is added to a phylogeny, packages such as APE
#' can change their interpretation of the information in the \code{phylo}
#' object. For instance, a completely extinct phylogeny might be interpreted as
#' extant if there is no info about \code{root.time}. This might create
#' misleading interpretations even with simple functions such as
#' \code{ape::axisPhylo}. \code{make.phylo} tries to accommodate different
#' evo/paleo practices in its default value for \code{returnRootTime} by
#' automatically attributing \code{root.time} when the \code{sim} object is
#' extinct. We encourage careful inspection of output if users force
#' \code{make.phylo} to use a specific behavior, especially when using
#' phylogenies generated by this function as input in functions from other
#' packages. For extinct phylogenies, it might usually be important to
#' explicitly provide information that the edge is indeed a relevant part of
#' the phylogeny (for instance adding \code{root.edge = TRUE} when plotting a
#' phylogeny with \code{root.time} information with \code{ape::plot.phylo}. An
#' example below provides a visualization of this issue.
#'
#' @return A \code{phylo} object from the APE package. Tip labels are numbered
#' following the order of species in the \code{sim} object. If fossil
#' occurrence data was supplied, the tree will include fossil occurrences as
#' tips with branch length \code{0}, bifurcating at its sampling time from the
#' corresponding species' edge (i.e. a sampled ancestor tree). Note that to
#' obtain a true sampled ancestor (SA) tree, one must perform the last step of
#' deleting tips that are not either extant or fossil occurrences (i.e. the
#' tips at true time of extinction).
#'
#' Note this package does not depend on APE (Paradis et al, 2004) since it is
#' never used inside its functions, but it is suggested since one might want to
#' manipulate the phylogenies generated by this function. Furthermore, a
#' limited version of the \code{drop.tip} function from APE has been copied for
#' use in this function (namely, due to the parameter \code{returnTrueExt}).
#' Likewise, a limited version of \code{collapse.singles} and
#' \code{node.depth.edgelength} were also copied to support those features. One
#' does not need to have APE installed for the function to use that code, but
#' the authors wished to do their due diligence by crediting the package and
#' its maintainers.
#'
#' @author Matheus Januario and Bruno do Rosario Petrucci
#'
#' @references
#'
#' Ezard, T. H., Pearson, P. N., Aze, T., & Purvis, A. (2012). The meaning of
#' birth and death (in macroevolutionary birth-death models). Biology letters,
#' 8(1), 139-142.
#'
#' Paradis, E., Claude, J., Strimmer, & K. (2004). APE: Analyses of
#' Phylogenetics and Evolution in R language. Bioinformatics, 20(2), 289-290.
#'
#' Heath, T. A., Huelsenbeck, J. P., & Stadler, T. (2014). The fossilized
#' birth–death process for coherent calibration of divergence-time estimates.
#' Proceedings of the National Academy of Sciences, 111(29), E2957-E2966.
#'
#' @examples
#'
#' ###
#' # we can start with a simple phylogeny
#'
#' # set a simulation seed
#' set.seed(1)
#'
#' # simulate a BD process with constant rates
#' sim <- bd.sim(n0 = 1, lambda = 0.3, mu = 0.1, tMax = 10,
#' nExtant = c(2, Inf))
#'
#' # make the phylogeny
#' phy <- make.phylo(sim)
#'
#' # plot it
#' if (requireNamespace("ape", quietly = TRUE)) {
#' # store old par settings
#' oldPar <- par(no.readonly = TRUE)
#'
#' # change par to show phylogenies
#' par(mfrow = c(1, 2))
#'
#' ape::plot.phylo(phy)
#'
#' # we can also plot only the molecular phylogeny
#' ape::plot.phylo(ape::drop.fossil(phy))
#'
#' # reset par
#' par(oldPar)
#' }
#'
#' ###
#' # this works for sim generated with any of the scenarios in bd.sim
#'
#' # set seed
#' set.seed(1)
#'
#' # simulate
#' sim <- bd.sim(n0 = 1, lambda = function(t) 0.2 + 0.01*t,
#' mu = function(t) 0.03 + 0.015*t, tMax = 10,
#' nExtant = c(2, Inf))
#'
#' # make the phylogeny
#' phy <- make.phylo(sim)
#'
#' # plot it
#' if (requireNamespace("ape", quietly = TRUE)) {
#' # store old par settings
#' oldPar <- par(no.readonly = TRUE)
#'
#' # change par to show phylogenies
#' par(mfrow = c(1, 2))
#'
#' # plot phylogeny
#' ape::plot.phylo(phy)
#' ape::axisPhylo()
#'
#' # we can also plot only the molecular phylogeny
#' ape::plot.phylo(ape::drop.fossil(phy))
#' ape::axisPhylo()
#'
#' # reset par
#' par(oldPar)
#' }
#'
#' ###
#' # we can use the fossils argument to generate a sample ancestors tree
#'
#' # set seed
#' set.seed(1)
#'
#' # simulate a simple birth-death process
#' sim <- bd.sim(n0 = 1, lambda = 0.2, mu = 0.05, tMax = 10,
#' nExtant = c(2, Inf))
#'
#' # make the traditional phylogeny
#' phy <- make.phylo(sim)
#'
#' # sample fossils
#' fossils <- sample.clade(sim, 0.1, 10)
#'
#' # make the sampled ancestor tree
#' fbdPhy <- make.phylo(sim, fossils)
#'
#' # plot them
#' if (requireNamespace("ape", quietly = TRUE)) {
#' # store old par settings
#' oldPar <- par(no.readonly = TRUE)
#'
#' # visualize longevities and fossil occurrences
#' draw.sim(sim, fossils = fossils)
#'
#' # change par to show phylogenies
#' par(mfrow = c(1, 2))
#'
#' # phylogeny
#' ape::plot.phylo(phy, main = "Phylogenetic tree")
#' ape::axisPhylo()
#'
#' # sampled ancestor tree
#' ape::plot.phylo(fbdPhy, main = "Sampled Ancestor tree")
#' ape::axisPhylo()
#'
#' # reset par
#' par(oldPar)
#' }
#'
#' ###
#' # we can instead have the sampled ancestors as degree-2 nodes
#'
#' # set seed
#' set.seed(1)
#'
#' # simulate a simple birth-death process
#' sim <- bd.sim(n0 = 1, lambda = 0.2, mu = 0.05, tMax = 10,
#' nExtant = c(2, Inf))
#'
#' # make the traditional phylogeny
#' phy <- make.phylo(sim)
#'
#' # sample fossils
#' fossils <- sample.clade(sim, 0.1, 10)
#'
#' # make the sampled ancestor tree
#' fbdPhy <- make.phylo(sim, fossils, saFormat = "node")
#'
#' # plot them
#' if (requireNamespace("ape", quietly = TRUE)) {
#' # store old par settings
#' oldPar <- par(no.readonly = TRUE)
#'
#' # visualize longevities and fossil occurrences
#' draw.sim(sim, fossils = fossils)
#'
#' # change par to show phylogenies
#' par(mfrow = c(1, 2))
#'
#' # phylogeny
#' ape::plot.phylo(phy, main = "Phylogenetic tree")
#' ape::axisPhylo()
#'
#' # sampled ancestor tree, need show.node.label parameter to see SAs
#' ape::plot.phylo(fbdPhy, main = "Sampled Ancestor tree",
#' show.node.label = TRUE)
#' ape::axisPhylo()
#'
#' # reset par
#' par(oldPar)
#' }
#'
#' ###
#' # we can use the returnTrueExt argument to delete the extinct tips and
#' # have the last sampled fossil of a species as the fossil tip instead
#'
#' # set seed
#' set.seed(5)
#'
#' # simulate a simple birth-death process
#' sim <- bd.sim(n0 = 1, lambda = 0.2, mu = 0.05, tMax = 10,
#' nExtant = c(2, Inf))
#'
#' # make the traditional phylogeny
#' phy <- make.phylo(sim)
#'
#' # sample fossils
#' fossils <- sample.clade(sim, 0.5, 10)
#'
#' # make the sampled ancestor tree
#' fbdPhy <- make.phylo(sim, fossils, saFormat = "node", returnTrueExt = FALSE)
#' # returnTrueExt = FALSE means the extinct tips will be removed,
#' # so we will only see the last sampled fossil (see tree below)
#'
#' # plot them
#' if (requireNamespace("ape", quietly = TRUE)) {
#' # store old par settings
#' oldPar <- par(no.readonly = TRUE)
#'
#' # visualize longevities and fossil occurrences
#' draw.sim(sim, fossils = fossils)
#'
#' # change par to show phylogenies
#' par(mfrow = c(1, 2))
#'
#' # phylogeny
#' ape::plot.phylo(phy, main = "Phylogenetic tree")
#' ape::axisPhylo()
#'
#' # sampled ancestor tree, need show.node.label parameter to see SAs
#' ape::plot.phylo(fbdPhy, main = "Sampled Ancestor tree",
#' show.node.label = TRUE)
#' ape::axisPhylo()
#' # note how t1.3 is an extinct tip now, as opposed to t1, since
#' # we would not know the exact extinction time for t1,
#' # rather just see the last sampled fossil
#'
#' # reset par
#' par(oldPar)
#' }
#'
#' ###
#' # suppose in the last example, t2 went extinct and left no fossils
#' # this might lead to problems with the root.time object
#'
#' # set seed
#' set.seed(5)
#'
#' # simulate a simple birth-death process
#' sim <- bd.sim(n0 = 1, lambda = 0.2, mu = 0.05, tMax = 10,
#' nExtant = c(2, Inf))
#'
#' # make the traditional phylogeny
#' phy <- make.phylo(sim)
#'
#' # sample fossils
#' fossils <- sample.clade(sim, 0.5, 10)
#'
#' # make it so t2 is extinct
#' sim$TE[2] <- 9
#' sim$EXTANT[2] <- FALSE
#'
#' # take out fossils of t2
#' fossils <- fossils[-which(fossils$Species == "t2"), ]
#'
#' # make the sampled ancestor tree
#' fbdPhy <- make.phylo(sim, fossils, saFormat = "node", returnTrueExt = FALSE)
#' # returnTrueExt = FALSE means the extinct tips will be removed,
#' # so we will only see the last sampled fossil (see tree below)
#'
#' # plot them
#' if (requireNamespace("ape", quietly = TRUE)) {
#' # store old par settings
#' oldPar <- par(no.readonly = TRUE)
#'
#' # visualize longevities and fossil occurrences
#' draw.sim(sim, fossils = fossils)
#'
#' # change par to show phylogenies
#' par(mfrow = c(1, 2))
#'
#' # phylogeny
#' ape::plot.phylo(phy, main = "Phylogenetic tree")
#' ape::axisPhylo()
#'
#' # sampled ancestor tree, need show.node.label parameter to see SAs
#' ape::plot.phylo(fbdPhy, main = "Sampled Ancestor tree",
#' show.node.label = TRUE)
#' ape::axisPhylo()
#' # note how t2 is gone, since it went extinct and left no fossils
#'
#' # this made it so the length of the tree + the root edge
#' # does not equal the origin time of the simulation anymore
#' max(ape::node.depth.edgelength(fbdPhy)) + fbdPhy$root.edge
#' # it should equal 10
#'
#' # to correct it, we need to set the root edge again
#' fbdPhy$root.edge <- 10 - max(ape::node.depth.edgelength(fbdPhy))
#' # this is necessary because ape does not automatically fix the root.edge
#' # when species are dropped, and analyes using phylogenies + fossils
#' # frequently condition on the origin of the process
#'
#' # reset par
#' par(oldPar)
#' }
#'
#' ###
#' # finally, we can test the usage of returnRootTime
#'
#' # set seed
#' set.seed(1)
#'
#' # simulate a simple birth-death process with more than one
#' # species and completely extinct:
#' sim <- bd.sim(n0 = 1, lambda = 0.5, mu = 0.5, tMax = 10, nExtant = c(0, 0))
#'
#' # make a phylogeny using default values
#' phy <- make.phylo(sim)
#'
#' # force phylo to not have root.time info
#' phy_rootless <- make.phylo(sim, returnRootTime = FALSE)
#'
#' # plot them
#' if (requireNamespace("ape", quietly = TRUE)) {
#' # store old par settings
#' oldPar <- par(no.readonly = TRUE)
#'
#' # change par to show phylogenies
#' par(mfrow = c(1, 3))
#'
#' # if we use the default value, axisPhylo works as intended
#' ape::plot.phylo(phy, root.edge = TRUE, main = "root.time default value")
#' ape::axisPhylo()
#'
#' # note that without root.edge, we have incorrect times,
#' # as APE assumes tMax was the time of first speciation
#' ape::plot.phylo(phy, main = "root.edge not passed to plot.phylo")
#' ape::axisPhylo()
#'
#' # if we force root.time to be FALSE, APE assumes the tree is
#' # ultrametric, which leads to an incorrect time axis
#' ape::plot.phylo(phy_rootless, main = "root.time forced as FALSE")
#' ape::axisPhylo()
#' # note time scale in axis
#'
#' # reset par
#' par(oldPar)
#' }
#'
#' @name make.phylo
#' @rdname make.phylo
#' @export
make.phylo <- function(sim, fossils = NULL, saFormat = "branch",
returnTrueExt = TRUE, returnRootTime = NULL) {
# check that sim is a valid sim object
if (!is.sim(sim)) {
stop("Invalid argument, must be a sim object. See ?sim")
}
# simulations with just one species do not have a phylogeny
if (length(sim$TE) < 2) {
stop("There is no phylogeny for a simulation with only one lineage")
}
# simulations with more than one starting species have multiple phylogenies
if (sum(is.na(sim$PAR)) > 1) {
stop("Multiple starting species. Use function find.lineages")
}
# saFormat must be either branch or node
if (saFormat != "branch" && saFormat != "node") {
stop("saFormat must either be set to branch or node, indicating whether
sampled ancestors should be returned as 0-length branches or
degree-2 nodes")
}
# should not have returnTrueExt = FALSE and no fossils
if (!returnTrueExt && is.null(fossils)) {
stop("returnTrueExt = FALSE without fossils is the same as
applying ape::drop.fossil, so do that instead")
}
# make a backup of sim to use later
backupSim <- sim
# if fossils are provided, make a sampled ancestor tree instead
if (!is.null(fossils)) {
if (nrow(fossils) == 0) {
stop("Please insert a data frame containig fossil data.
See ?make.phylo for more information.")
}
# make Species field of fossils numeric
fossils$Species <- as.numeric(gsub('.([0-9]+)*','\\1', fossils$Species))
# get a list of sample species
sampledSpecies <- unique(c(which(sim$EXTANT), fossils$Species))
# if any of them are not in the sim range, error
if (any(!(sampledSpecies %in% 1:length(sim$TE)))) {
stop("Sampled species must all be in sim")
}
# start the vector with names
names <- paste0("t", 1:length(sim$TS))
sampledNames <- paste0("t", which(sim$EXTANT))
# fossil count for each species
count <- 1
# previous fossil species starting count
prevFossil <- 0
# changes to make this part easier
sim$PAR[is.na(sim$PAR)] <- sim$TE[sim$EXTANT] <- 0
# data frame with current and new species numbers
numbers <- data.frame(cur = 1:length(sim$TS), orig = 1:length(sim$TS))
# need this to have correct naming conventions
# for each fossil occurrence
for (i in 1:nrow(fossils)) {
# change count as needed
if (fossils[i, ]$Species == prevFossil) {
count <- count + 1
} else {
count <- 1
prevFossil <- fossils[i, ]$Species
}
# take fossil sampling time
sampT <- fossils[i, ]$SampT
# and species number
nSp <- fossils[i, ]$Species
# if sampT is out of the time nSp was alive, error
if ((sampT > sim$TS[nSp]) || (sampT < sim$TE[nSp])) {
stop("Fossil occurrences must fall during corresponding species' period")
}
# first we need to find the position of the fossil on sim
# get daughters of nSp
daug <- sim$PAR == nSp
# if nSp does not have daughters
if (sum(daug) == 0) {
pos <- max(which(sim$PAR < nSp)) + 1
} else if (sampT > max(sim$TS[daug])) {
# if sampT is higher than the age of all other daughters of nSp,
# make it the first daughter
pos <- min(which(daug))
} else {
# if there are daughters with a higher age, it will be younger than those
pos <- which(sim$TS == min(sim$TS[daug][sim$TS[daug] > sampT])) + 1
}
# before and after this position
before <- if (pos == 0) 0 else 1:(pos - 1)
after <- if (pos > length(sim$PAR)) 0 else pos:length(sim$PAR)
# get parents that need to be changes
changePAR <- sim$PAR >= pos
previousPars <- sim$PAR[changePAR]
numbers[numbers$cur >= pos, 1] <- numbers[numbers$cur >= pos, 1] + 1
# change corresponding parents
sim$PAR[changePAR] <- sim$PAR[changePAR] + 1
# add new parent to the vector
sim$PAR <- c(sim$PAR[before], nSp, sim$PAR[after])
# and change the easy sim elements
sim$TS <- c(sim$TS[before], sampT, sim$TS[after])
sim$TE <- c(sim$TE[before], sampT, sim$TE[after])
sim$EXTANT <- c(sim$EXTANT[before], FALSE, sim$EXTANT[after])
# find original species number
nSpOrig <- numbers[numbers$cur == nSp, 2]
# number of fossils for this species
nFossilsSp <- sum(fossils$Species == numbers[numbers[, 2] == nSpOrig, 1])
# numerical name
numName <- nSpOrig + count/10^ceiling(log(nFossilsSp + 1, 10))
# add new name
newName <- paste0("t", numName, ifelse(count %% 10 == 0, "0", ""))
names <- c(names[before], newName, names[after])
# add to sampled names
sampledNames <- c(sampledNames, newName)
# change fossil species names as needed
fossils$Species <- ifelse(fossils$Species < pos,
fossils$Species,
fossils$Species + 1)
}
# change back
sim$PAR[sim$PAR == 0] <- sim$TE[sim$EXTANT] <- NA
}
# construct the phylogeny
# make TE sensible
sim$TE[sim$EXTANT] <- 0
# aux function
all.dir.daughter <- function(lin, x) {
# all.dir.daughters returns the name of each direct daughter species
# x = a simulation from paleobuddy
# lin = a numeric specyfing the name of a lineage
return(which(x$PAR == lin))
}
# current node
curNode <- length(sim$TE) + 1
# create the edge matrix
edge <- matrix(nrow = 1, ncol = 2, data = c(curNode, NA))
# lineages which the function already put in the phylogeny
passed <- vector()
# current lineage
i <- 2
# lineages which the function still has to solve (at least)
lins <- c(1, 2)
# internal variable to help control the node function
jump <- 0
# number of nodes in the phylogeny
nNode <- length(sim$TE) - 1
# vector storing the node corresponding to each birth
birthsNode <- rep(NA, times = length(sim$TE))
birthsNode[2] <- curNode
# needed for debugging
counter <- 0
# while some tip does not have a place in the phylogeny
while (length(lins) > 0) {
# find daughters
dau <- all.dir.daughter(lin = i, x = sim)
dau <- dau[!(dau %in% passed)]
# if lineage has daughters
if (is.numeric(dau) & length(dau) > 0) {
# if a whole clade has very recently been put in the phylogeny
if (jump == 1) {
curNode <- max(edge) + 1
# append it to the edge matrix
if (is.na(edge[nrow(edge), 2])) {
# if there is no edge there currently
edge[nrow(edge), 2] <- curNode
} else {
# if there is
edge <- rbind(edge,
matrix(nrow = 1, ncol = 2,
data = c(prevNode, curNode)))
}
# update birthsNode
birthsNode[dau[1]] <- curNode
# update jump
jump <- 0
# if the current lineage is a non-monophyletic branch
} else {
# update curNode
curNode <- curNode + 1
# append to edge matrix, as above
if (is.na(edge[nrow(edge), 2])) {
edge[nrow(edge), 2] <- curNode
} else {
edge <- rbind(edge,
matrix(nrow = 1, ncol = 2,
data = c(curNode - 1, curNode)))
}
# update birthsNode
birthsNode[dau[1]] <- curNode
}
# update edge
edge <- rbind(edge,
matrix(nrow = 1, ncol = 2, data = c(curNode, NA)))
# update lineage list and current lineage
lins <- c(lins, dau[1])
i <- lins[length(lins)]
}
# if lineage has no daughters
if (is.numeric(dau) & length(dau) == 0) {
# append lineage to the edge matrix
if (is.na(edge[nrow(edge), 2])) {
# if there is no edge there currently
edge[nrow(edge), 2] <- i
}
else {
# if there is
edge <- rbind(edge,
matrix(nrow = 1, ncol = 2,
data = c(max(
edge[!(duplicated(edge[,1]) |
duplicated(edge[,1], fromLast = TRUE)), 1]), i)))
}
# we put the lineage on the phylogeny
passed <- c(passed, i)
# update lineage list and current lineage
lins <- lins[-length(lins)]
i <- lins[length(lins)]
}
# this means that the function reached the end of the lineage of the curNode
if (sum(edge[, 1] %in% curNode) > 1) {
# the warning here only "affects" a a condition which is never satisfied
# (jump when there is previous opened edge).
suppressWarnings(
{prevNode <-
max(edge[!(duplicated(edge[, 1]) |
duplicated(edge[, 1], fromLast = TRUE)), 1])})
# update jump
jump <- 1
}
# registering bugs (if any)
counter <- counter + 1
# if the function ran for too long
if (counter > 10*dim(edge)[1]) {
return("The function is lost and seems that it will not find a phylogeny.
Please report this error and provide this simulation for debugging")}
}
# calculating edge length
edgeLength <- vector()
for (i in 1:nrow(edge)) {
# make auxiliary variables
aux1 <- edge[i, 1]
aux2 <- edge[i, 2]
# if the branch is a tip
if (aux2 <= length(sim$TE)) {
# calculate length
edgeLength[i] <- sim$TS[which(birthsNode == aux1)] - sim$TE[aux2]
} else {
# calculate length
edgeLength[i] <- sim$TS[which(birthsNode == aux1)] -
sim$TS[which(birthsNode == aux2)]
}
}
# Tyding all together to create the phylo object
phy <- list(tip.label = paste0("t", 1:length(sim$TE)),
edge = edge,
edge.length = edgeLength,
Nnode = nNode,
root.edge = sim$TS[1] - sim$TS[2])
# if user does not force root.time
if (is.null(returnRootTime)) {
# if there are no extinct species, set root time to
# origin of the simulation so phylo axis are not wrong
if (sum(sim$EXTANT) < 1) {
phy$root.time <- sim$TS[1]
}
} else {
# otherwise, return root time if user asked for it
if (returnRootTime) {
phy$root.time <- sim$TS[1]
}
}
phy$node.label <- seq(from = length(sim$TE) + 1,
to = length(sim$TE) + nNode)
# if fossils are provided
if(!is.null(fossils)){
# alter names to label occurrences
phy$tip.label <- names
}
class(phy) <- "phylo"
# collect tip names of extinction time tips
extinctTips <- paste0("t", which(!backupSim$EXTANT))
# if there are fossils, collapse fossils if saFormat is node, and
# then check to see if there was
if (!is.null(fossils)) {
# drop true extinction time tips if the parameter is set
if (!returnTrueExt) phy <- dropTipPB(phy, extinctTips)
# make 0-length branches degree-2 nodes if saFormat is "node"
if (saFormat == "node") {
# number of tips in the original tree
nTips <- length(phy$tip.label)
# indices to delete - edges with length 0
delInd <- phy$edge.length == 0
# tips to delete
deletedTips<- phy$edge[delInd, 2]
# get the labels of the deleted tips
internalLabels <- phy$tip.label[deletedTips]
# node numbers for the tips we are deleting
nodeNumbers <- phy$edge[delInd, 1]
# drop tips, but keep them as nodes
phy <- dropTipPB(phy, deletedTips, collapse.singles = FALSE)
# reset node labels
phy$node.label <- rep("", rep(phy$Nnode))
# add previous deleted tips' labels to the new nodes
phy$node.label[nodeNumbers - nTips] <- internalLabels
}
# correct root.edge
# if (max(node.depth.edgelength.pb(phy)) + phy$root.edge != max(sim$TS)) {
# # if the MRCA is a speciation event
# if (max(node.depth.edgelength.pb(phy)) %in% backupSim$TS) {
# # get species that was born
# spBorn <- which(backupSim$TS == max(node.depth.edgelength.pb(phy)))
#
# # get its parent
# spPar <- backupSim$PAR[spBorn]
#
# # get fossils for parent species
# fossilsSpPar <- fossils[fossils$Species == paste0("t", spPar), ]
#
# # check if spPar has any fossils older than spBorn
# if (nrow(fossilsSpPar) > 0 &&
# any(fossilsSpPar$SampT > backupSim$TS[spBorn])) {
# # root edge will be the difference between
# # the speciation time and oldest fossil
# phy$root.edge <- backupSim$TS[spPar] - max(fossilsSpPar$SampT)
# } else {
# # if not, root edge is just the time between speciations
# phy$root.edge <- backupSim$TS[spPar] - backupSim$TS[spBorn]
# }
# } else {
# # if not, it is a fossil, find which species'
# spFossil <- fossils$Species[which(fossils$SampT ==
# max(node.depth.edgelength.pb(phy)))]
#
# # root edge then is the difference between speciation and fossilization
# phy$root.edge <- backupSim$TS[spFossil] -
# max(node.depth.edgelength.pb(phy))
# }
# }
}
return(phy)
}
dropTipPB <- function(phy, tip, root.edge = 0, collapse.singles = TRUE)
{
### if (!inherits(phy, "phylo"))
### stop('object "phy" is not of class "phylo"')
Ntip <- length(phy$tip.label)
if (is.character(tip))
tip <- which(phy$tip.label %in% tip)
out.of.range <- tip > Ntip
if (any(out.of.range)) {
warning("some tip numbers were larger than the number of tips: they were ignored")
tip <- tip[!out.of.range]
}
if (!length(tip)) return(phy)
if (length(tip) == Ntip) {
warning("drop all tips of the tree: returning NULL")
return(NULL)
}
wbl <- !is.null(phy$edge.length)
if (length(tip) == Ntip - 1) {
i <- which(phy$edge[, 2] == (1:Ntip)[-tip])
res <- list(edge = matrix(2:1, 1, 2),
tip.label = phy$tip.label[phy$edge[i, 2]],
Nnode = 1L)
class(res) <- "phylo"
if (wbl) res$edge.length <- phy$edge.length[i]
if (!is.null(phy$node.label))
res$node.label <- phy$node.label[phy$edge[i, 1] - Ntip]
return(res)
}
phy <- reorder(phy)
NEWROOT <- ROOT <- Ntip + 1
Nnode <- phy$Nnode
Nedge <- dim(phy$edge)[1]
edge1 <- phy$edge[, 1] # local copies
edge2 <- phy$edge[, 2] #
keep <- !logical(Nedge)
## delete the terminal edges given by `tip':
keep[match(tip, edge2)] <- FALSE
ints <- edge2 > Ntip
## delete the internal edges that do not have anymore
## descendants (ie, they are in the 2nd col of `edge' but
## not in the 1st one)
repeat {
sel <- !(edge2 %in% edge1[keep]) & ints & keep
if (!sum(sel)) break
keep[sel] <- FALSE
}
if (root.edge && wbl) {
degree <- tabulate(edge1[keep])
if (degree[ROOT] == 1) {
j <- integer(0) # will store the indices of the edges below the new root
repeat {
i <- which(edge1 == NEWROOT & keep)
j <- c(i, j)
NEWROOT <- edge2[i]
## degree <- tabulate(edge1[keep]) # utile ?
if (degree[NEWROOT] > 1) break
}
keep[j] <- FALSE
## if (length(j) > root.edge) j <- 1:root.edge
j <- j[1:root.edge]
NewRootEdge <- sum(phy$edge.length[j])
if (length(j) < root.edge && !is.null(phy$root.edge))
NewRootEdge <- NewRootEdge + phy$root.edge
phy$root.edge <- NewRootEdge
}
}
##if (!root.edge) phy$root.edge <- NULL # moved above (2021-09-29)
## drop the edges
phy$edge <- phy$edge[keep, ]
if (wbl) phy$edge.length <- phy$edge.length[keep]
## find the new terminal edges
TERMS <- !(phy$edge[, 2] %in% phy$edge[, 1])
## get the old No. of the nodes and tips that become tips:
oldNo.ofNewTips <- phy$edge[TERMS, 2]
n <- length(oldNo.ofNewTips) # the new number of tips in the tree
## the tips may not be sorted in increasing order in the
## 2nd col of edge, so no need to reorder $tip.label
phy$edge[TERMS, 2] <- rank(phy$edge[TERMS, 2])
## fix by Thomas Sibley (2017-10-28):
if (length(tip)) phy$tip.label <- phy$tip.label[-tip]
phy$Nnode <- dim(phy$edge)[1] - n + 1L # update phy$Nnode
## The block below renumbers the nodes so that they conform
## to the "phylo" format
newNb <- integer(Ntip + Nnode)
newNb[NEWROOT] <- n + 1L
sndcol <- phy$edge[, 2] > n
newNb[sort(phy$edge[sndcol, 2])] <- (n + 2):(n + phy$Nnode)
phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]]
phy$edge[, 1] <- newNb[phy$edge[, 1]]
storage.mode(phy$edge) <- "integer"
if (!is.null(phy$node.label)) # update node.label if needed
phy$node.label <- phy$node.label[which(newNb > 0) - Ntip]
if (collapse.singles) phy <- collapseSinglesPB(phy)
phy
}
collapseSinglesPB <- function(tree, root.edge = FALSE)
{
n <- length(tree$tip.label)
tree <- reorder(tree) # this works now
e1 <- tree$edge[, 1]
e2 <- tree$edge[, 2]
tab <- tabulate(e1)
if (all(tab[-c(1:n)] > 1)) return(tree) # tips are zero
if (is.null(tree$edge.length)) {
root.edge <- FALSE
wbl <- FALSE
} else {
wbl <- TRUE
el <- tree$edge.length
}
if (root.edge) ROOTEDGE <- 0
## start with the root node:
ROOT <- n + 1L
while (tab[ROOT] == 1) {
i <- which(e1 == ROOT)
ROOT <- e2[i]
if (wbl) {
if (root.edge) ROOTEDGE <- ROOTEDGE + el[i]
el <- el[-i]
}
e1 <- e1[-i]
e2 <- e2[-i]
}
singles <- which(tabulate(e1) == 1)
if (length(singles) > 0) {
ii <- sort(match(singles, e1), decreasing = TRUE)
jj <- match(e1[ii], e2)
for (i in 1:length(singles)) {
e2[jj[i]] <- e2[ii[i]]
if (wbl) el[jj[i]] <- el[jj[i]] + el[ii[i]]
}
e1 <- e1[-ii]
e2 <- e2[-ii]
if (wbl) el <- el[-ii]
}
Nnode <- length(e1) - n + 1L
oldnodes <- unique(e1)
if (!is.null(tree$node.label))
tree$node.label <- tree$node.label[oldnodes - n]
newNb <- integer(max(oldnodes))
newNb[ROOT] <- n + 1L
sndcol <- e2 > n
e2[sndcol] <- newNb[e2[sndcol]] <- n + 2:Nnode
e1 <- newNb[e1]
tree$edge <- cbind(e1, e2, deparse.level = 0)
tree$Nnode <- Nnode
if (wbl) {
if (root.edge) tree$root.edge <- ROOTEDGE
tree$edge.length <- el
}
tree
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.