#' Bind a tip to a phylogeny
#'
#' Graft a tip to a phylogeny at location specified.
#'
#' @param tree A phylogeny, with class of "phylo".
#' @param where Location where to insert the tip. It can be either tip label or node label, but must be characters. If the location does not have a name, assign it first.
#' @param tip_label Name of the new tip inserted.
#' @param frac The fraction of branch length, must be between 0 and 1. This only applies when location is a tip or `new_node_above = TRUE`. The distance from the new inserted node to the location (a node or a tip) is the branch length of the location * (1 - frac).
#' @param new_node_above Whether to insert the new node above when the location is a node? Default is `FALSE`, which will attach the new tip to the location node.
#' @param node_label Name of the new node created. This only applies when location is a tip or `new_node_above = TRUE`.
#' @param return_tree Whether to return a phylogeny with class "phylo?" Default is `TRUE`. Otherwise, it will return a data frame.
#' @param tree_tbl A tibble version of the tree, optional.
#' @param node_heights A named numeric vector of node hieghts of the tree, generated by [ape::branching.times()]. It is also optional if `tree` is specified; but required if `tree_tbl` is specified.
#' @param use_castor Whether to use package `castor` to get the phylogeny at a node; it is faster than `tidytree::offspring` to figure out what are the tip offsprings at a node.
#' @param sequential Whether to add the tip with sequential node number in the edge matrix. For example, if we want to bind a tip to a clade and the node number of the tips of this clade is from 101 to 150. We can set the node id of the new tip to 151 and push all the remaining node id to 1 after their current values. This will require us to find out the node ids of all tips that are descents of the node where we want to bind the new tip to, and it can be time costly. Yet I am still not sure whether this is necessary. Normally, the node ids of the `phylo` class are sequential. Therefore, the default value here is `TRUE`. If set to `FALSE`, we can just assign the id of the new tip to Ntip + 1 to save time. In addition, we probably don't need to order the node column of the edge matrix every time.
#' @return Either a phylogeny or a data frame, which can be then converted to a phylogeny later.
#' @examples
#' \dontrun{
#' library(rtrees)
#' bind_tip(tree_plant_otl, "N70407", tip_label = "test_sp")
#' tree_plant_otl_df = tidytree::as_tibble(tree_plant_otl)
#' node_heights = ape::branching.times(tree_plant_otl)
#' bind_tip(tree_tbl = tree_plant_otl_df, where = "N70407",
#' tip_label = "test_sp", node_heights = node_heights)
#' }
#' @export
#'
bind_tip = function(tree = NULL, where, tip_label,
frac = 0.5, new_node_above = FALSE,
node_label = NULL, return_tree = TRUE,
tree_tbl = NULL, node_heights = NULL,
use_castor = TRUE, sequential = TRUE){
if(frac > 1 | frac < 0) stop("frac must be between 0 and 1.")
if(is.null(tree) & is.null(tree_tbl)) stop("No input tree, please assign one.")
if(!is.null(tree_tbl) & is.null(node_heights)) stop("Please specify node_heights.")
if(!is.null(tree_tbl) & new_node_above & is.null(node_label))
stop("new_node_above is TRUE but no node_label.")
if(!is.null(tree_tbl) & !new_node_above & !is.null(node_label))
stop("new_node_above is FALSE but with node_label.")
if(!is.null(tree)){
# make sure node labels are assigned and unique
if(is.null(tree$node.label) || any(duplicated(tree$node.label)))
tree$node.label = paste0("N", 1:ape::Nnode(tree))
tree = ape::makeLabel(tree, tips = FALSE, nodes = TRUE)
tree_tbl = tidytree::as_tibble(tree)
node_heights = ape::branching.times(tree)
}
if(is.null(node_label)) node_label = paste0("node", length(node_heights) + 1L)
if(!"is_tip" %in% names(tree_tbl)){
tree_tbl$is_tip = !(tree_tbl$node %fin% tree_tbl$parent)
}
tree_tbl_node = tree_tbl[tree_tbl$label == where, ] # original node
# tree_tbl_node$isTip = !tree_tbl_node$node %fin% tree_tbl$parent
# tree_tbl_node$isTip = tree_tbl_node$is_tip
node_orig = tree_tbl_node$node # original node number of target location
parent_orig = as.integer(tree_tbl_node$parent)
at_root = tree_tbl_node$parent == tree_tbl_node$node
node_height = node_heights[where]
# tree_tbl$isTip = !(tree_tbl$node %fin% tree_tbl$parent)
# setkey(tree_tbl, label)
# tree_tbl_node = tree_tbl[label == where, ]
if(!tree_tbl_node$is_tip & !at_root){
if(sequential){
if(use_castor){
where_offspring = tree_tbl[tree_tbl$label %fin%
castor::get_subtree_at_node(as_tree(tree_tbl), node = where)$subtree$tip.label, ]
max_offspring = max(where_offspring$node)
} else {
where_offspring = tidytree::offspring(tree_tbl, .node = where)
max_offspring = max(where_offspring$node[where_offspring$is_tip])
}
} else {
# # ??? is it safe to remove the above chunk of code???
# # then the node number of the inserted tip will be n_tip + 1
# # would this be a problem??
max_offspring = sum(tree_tbl$is_tip)
}
}
# tree_tbl = copy(tree_tbl) # to hold results
# tree_tbl = as.data.table(tree_tbl)
tree_tbl_2 = as.data.table(tree_tbl)
# tree_tbl_2[, isTip := !node %fin% parent]
if(!tree_tbl_node$is_tip) { # the target is a node
if(at_root & new_node_above) message("New tip cannot be inserted above the root; attached instead.")
if(new_node_above & !at_root){ # insert above the target location node
# add node first, push the numbers of nodes if they are after the inserted one
# including the node_orig
tree_tbl_2[parent >= node_orig, parent := parent + 1L]
tree_tbl_2[node >= node_orig, node := node + 1L]
# tree_tbl$parent[tree_tbl$parent >= node_orig] = tree_tbl$parent[tree_tbl$parent >= node_orig] + 1L
# tree_tbl$node[tree_tbl$node >= node_orig] = tree_tbl$node[tree_tbl$node >= node_orig] + 1L
# insert the new node
tree_tbl_new = data.table(parent = parent_orig,
node = node_orig, # the new node takes the node number of the original one
branch.length = tree_tbl_node$branch.length * frac,
label = node_label,
is_tip = FALSE)
tree_tbl_2 = rbindlist(list(tree_tbl_2, tree_tbl_new), use.names = TRUE)
# update original node, which will have the inserted node (which takes the original node number) as its parent
tree_tbl_2$parent[tree_tbl_2$label == where] = node_orig
# tree_tbl_2$node[tree_tbl_2$label == node_label]
tree_tbl_2$branch.length[tree_tbl_2$label == where] =
tree_tbl_node$branch.length * (1 - frac)
# add tip
tree_tbl_2[, parent := parent + 1L] # all nodes will be added 1
# tree_tbl$parent = tree_tbl$parent + 1L # all nodes will be added 1
tree_tbl_2[node > max_offspring, node := node + 1L] # all tips behind will be added 1
# tree_tbl$node = data.table::fifelse(tree_tbl$node > max_offspring, tree_tbl$node + 1L, tree_tbl$node)
tree_tbl_new = data.table(parent = node_orig + 1L, # added 1
node = max_offspring + 1L,
branch.length = tree_tbl_node$branch.length * (1 - frac) +
node_height, # the original node branch length (updated) and its node height
label = tip_label,
is_tip = TRUE)
tree_tbl_2 = rbindlist(list(tree_tbl_2, tree_tbl_new), use.names = TRUE)
} else { # attach to a target node; no need to create a new one
if(at_root){ # root
tree_tbl_new = data.table(parent = node_orig + 1L, node = 1L,
branch.length = node_height, label = tip_label,
is_tip = TRUE)
tree_tbl_2[, parent := parent + 1L]
tree_tbl_2[, node := node + 1L]
tree_tbl_2 = rbindlist(list(tree_tbl_2, tree_tbl_new))
} else { # an internal node
tree_tbl_2[, parent := parent + 1L]
tree_tbl_2[node > max_offspring, node := node + 1L]
tree_tbl_new = data.table(parent = node_orig + 1L, # added 1 above
node = max_offspring + 1L,
branch.length = node_height,
label = tip_label,
is_tip = TRUE)
tree_tbl_2 = rbindlist(list(tree_tbl_2, tree_tbl_new), use.names = TRUE)
}
}
} else { # the target is a tip
# add node first, push the numbers of nodes if they are after the inserted one
tree_tbl_2[parent > parent_orig, parent := parent + 1L]
tree_tbl_2[node > parent_orig, node := node + 1L]
# tree_tbl$parent[tree_tbl$parent > parent_orig] = tree_tbl$parent[tree_tbl$parent > parent_orig] + 1L
# tree_tbl$node[tree_tbl$node > parent_orig] = tree_tbl$node[tree_tbl$node > parent_orig] + 1L
# insert the new node
tree_tbl_new = data.table(parent = parent_orig, node = parent_orig + 1L,
branch.length = tree_tbl_node$branch.length * frac,
label = node_label,
is_tip = FALSE)
tree_tbl_2 = rbindlist(list(tree_tbl_2, tree_tbl_new), use.names = TRUE)
# update orginal node, which will have the inserted node as its parent
tree_tbl_2$parent[tree_tbl_2$label == where] = parent_orig + 1L
tree_tbl_2$branch.length[tree_tbl_2$label == where] =
tree_tbl_node$branch.length * (1 - frac)
# add tip
tree_tbl_2[, parent := parent + 1L] # all nodes will be added 1
tree_tbl_2[node > node_orig, node := node + 1L]
# tree_tbl$node = data.table::fifelse(tree_tbl$node > node_orig, tree_tbl$node + 1L, tree_tbl$node)
tree_tbl_new = data.table(parent = parent_orig + 2L, # one new node and one new tip
node = node_orig + 1L,
branch.length = tree_tbl_node$branch.length * (1 - frac),
label = tip_label,
is_tip = TRUE)
tree_tbl_2 = rbindlist(list(tree_tbl_2, tree_tbl_new), use.names = TRUE)
# # another way
# tree_tbl_above = tree_tbl[1:node_orig, ]
# tree_tbl_below = tree_tbl[-(1:node_orig), ]
# # add tip
# tree_tbl_above$parent = tree_tbl_above$parent + 1
# tree_tbl_below$parent = tree_tbl_below$parent + 1
# tree_tbl_below$node = tree_tbl_below$node + 1
# tree_tbl_new = tibble::tibble(parent = tree_tbl_above$parent[tree_tbl_above$label == where],
# node = node_orig + 1,
# branch.length = tree_tbl_above$branch.length[tree_tbl_above$label == where],
# label = tip_label)
# tree_tbl = dplyr::bind_rows(tree_tbl_above, tree_tbl_new, tree_tbl_below)
# # add node
# loc_in_df2 = which(tree_tbl$node == tree_tbl_new$parent)
# tree_tbl$parent = data.table::fifelse(tree_tbl$parent > tree_tbl_new$parent, tree_tbl$parent + 1, tree_tbl$parent)
# tree_tbl$parent[tree_tbl$label %fin% c(where, tip_label)] = tree_tbl_new$parent + 1
# tree_tbl$node = data.table::fifelse(tree_tbl$node > tree_tbl_new$parent, tree_tbl$node + 1, tree_tbl$node)
# tree_tbl = dplyr::bind_rows(tree_tbl,
# tibble::tibble(parent = tree_tbl_new$parent,
# node = tree_tbl_new$parent + 1,
# branch.length = tree_tbl_new$branch.length * frac,
# label = "")) %>%
# dplyr::arrange(node)
# tree_tbl$branch.length[tree_tbl$label %fin% c(where, tip_label)] = tree_tbl_new$branch.length * (1 - frac)
}
# tree_tbl = dplyr::arrange(tree_tbl, node)
# cat("tree_tbl:", class(tree_tbl))
# cat("tree_tbl:", class(tree_tbl))
# tree_tbl = tree_tbl[order(tree_tbl$node),]
# if(sequential) setkey(tree_tbl_2, node)
# tree_tbl_2 = tree_tbl_2[order(tree_tbl_2$node), ] # order node
# tree_tbl = tree_tbl[order(node),] # order node
tree_tbl_2 = tibble::as_tibble(tree_tbl_2)
if(!inherits(tree_tbl_2, "tbl_tree")) class(tree_tbl_2) = c("tbl_tree", class(tree_tbl_2))
if(return_tree) return(as_tree(tree_tbl_2))
tree_tbl_2
}
#' Bind a tip to a phylogeny (data frame version)
#'
#' Graft a tip to a phylogeny at location specified.
#'
#' @param tree A phylogeny, with class of "phylo".
#' @param where Location where to insert the tip. It can be either tip label or node label, but must be characters. If the location does not have a name, assign it first.
#' @param tip_label Name of the new tip inserted.
#' @param frac The fraction of branch length, must be between 0 and 1. This only applies when location is a tip or `new_node_above = TRUE`. The distance from the new inserted node to the location (a node or a tip) is the branch length of the location * (1 - frac).
#' @param new_node_above Whether to insert the new node above when the location is a node? Default is `FALSE`, which will attach the new tip to the location node.
#' @param node_label Name of the new node created. This only applies when location is a tip or `new_node_above = TRUE`.
#' @param return_tree Whether to return a phylogeny with class "phylo?" Default is `TRUE`. Otherwise, it will return a data frame.
#' @param tree_tbl A tibble version of the tree, optional.
#' @param node_heights A named numeric vector of node hieghts of the tree, generated by [ape::branching.times()]. It is also optional if `tree` is specified; but required if `tree_tbl` is specified.
#' @param use_castor Whether to use package `castor` to get the phylogeny at a node; it is faster than `tidytree::offspring` to figure out what are the tip offsprings at a node.
#' @return Either a phylogeny or a data frame, which can be then converted to a phylogeny later.
#' @examples
#' \dontrun{
#' library(rtrees)
#' bind_tip(tree_plant_otl, "N70407", tip_label = "test_sp")
#' tree_plant_otl_df = tidytree::as_tibble(tree_plant_otl)
#' node_heights = ape::branching.times(tree_plant_otl)
#' bind_tip(tree_tbl = tree_plant_otl_df, where = "N70407",
#' tip_label = "test_sp", node_heights = node_heights)
#' }
#'
bind_tip_df = function(tree = NULL, where, tip_label,
frac = 0.5, new_node_above = FALSE,
node_label = NULL, return_tree = TRUE,
tree_tbl = NULL, node_heights = NULL, use_castor = FALSE){
if(frac > 1 | frac < 0) stop("frac must be between 0 and 1.")
if(is.null(tree) & is.null(tree_tbl)) stop("No input tree, please assign one.")
if(!is.null(tree_tbl) & is.null(node_heights)) stop("Please specify node_heights.")
if(!is.null(tree)){
# make sure node labels are assigned and unique
if(is.null(tree$node.label) || any(duplicated(tree$node.label)))
tree$node.label = paste0("N", 1:ape::Nnode(tree))
tree = ape::makeLabel(tree, tips = FALSE, nodes = TRUE)
tree_tbl = tidytree::as_tibble(tree)
node_heights = ape::branching.times(tree)
}
if(is.null(node_label)) node_label = paste0("node", length(node_heights) + 1L)
tree_tbl$isTip = !(tree_tbl$node %fin% tree_tbl$parent)
if(!"is_tip" %in% names(tree_tbl)){
tree_tbl$is_tip = tree_tbl$isTip
}
tree_tbl_node = tree_tbl[tree_tbl$label == where, ] # original node
if(!tree_tbl_node$isTip){
if(use_castor){
where_offspring = tree_tbl[tree_tbl$label %fin%
castor::get_subtree_at_node(as_tree_isTip(tree_tbl), node = where)$subtree$tip.label, ]
max_offspring = max(where_offspring$node)
} else {
where_offspring = tidytree::offspring(tree_tbl, .node = where)
max_offspring = max(where_offspring$node[where_offspring$isTip])
}
node_height = node_heights[where]
}
node_orig = tree_tbl_node$node # original node number of target location
at_root = tree_tbl_node$parent == tree_tbl_node$node
tree_tbl_2 = tree_tbl # to hold results
if(!tree_tbl_node$isTip) { # the target is a node
if(at_root & new_node_above) message("New tip cannot be inserted above the root; attached instead.")
if(new_node_above & !at_root){ # insert above the target location node
# add node first, push the numbers of nodes if they are after the inserted one
tree_tbl_2$parent[tree_tbl_2$parent >= node_orig] = tree_tbl_2$parent[tree_tbl_2$parent >= node_orig] + 1L
tree_tbl_2$node[tree_tbl_2$node >= node_orig] = tree_tbl_2$node[tree_tbl_2$node >= node_orig] + 1L
# insert the new node
tree_tbl_2 = tibble::add_row(tree_tbl_2,
parent = tree_tbl_2$parent[tree_tbl_2$label == where],
node = node_orig,
branch.length = tree_tbl_node$branch.length * frac,
label = node_label,
is_tip = FALSE, isTip = FALSE)
# add tip
tree_tbl_2$parent = tree_tbl_2$parent + 1L # all nodes will be added 1
tree_tbl_2$node = data.table::fifelse(tree_tbl_2$node > max_offspring, tree_tbl_2$node + 1L, tree_tbl_2$node)
# update orginal node, which will have the inserted node as its parent
tree_tbl_2$parent[tree_tbl_2$label == where] =
tree_tbl_2$node[tree_tbl_2$label == node_label]
tree_tbl_2$branch.length[tree_tbl_2$label == where] =
tree_tbl_node$branch.length * (1 - frac)
tree_tbl_2 = tibble::add_row(tree_tbl_2,
parent = tree_tbl_2$node[tree_tbl_2$label == node_label],
node = max_offspring + 1L,
branch.length = tree_tbl_2$branch.length[tree_tbl_2$label == where] +
node_height,
label = tip_label,
is_tip = TRUE, isTip = TRUE)
} else { # attach to a target node; no need to create a new one
if(at_root){ # root
tree_tbl_new = tibble::tibble(parent = node_orig + 1L, node = 1L,
branch.length = node_height, label = tip_label,
is_tip = TRUE, isTip = TRUE)
tree_tbl$parent = tree_tbl$parent + 1L
tree_tbl$node = tree_tbl$node + 1L
tree_tbl_2 = dplyr::bind_rows(tree_tbl_new, tree_tbl)
} else { # an internal node
# tree_tbl_2 = tree_tbl
tree_tbl_2$parent = tree_tbl_2$parent + 1L # all nodes will be added 1
tree_tbl_2$node = data.table::fifelse(tree_tbl_2$node > max_offspring, tree_tbl_2$node + 1L, tree_tbl_2$node)
tree_tbl_2 = tibble::add_row(tree_tbl_2,
parent = tree_tbl_2$node[tree_tbl_2$label == where],
node = max_offspring + 1L,
branch.length = node_height,
label = tip_label,
is_tip = TRUE, isTip = TRUE)
}
}
} else { # the target is a tip
parent_orig = tree_tbl_node$parent
# add node first, push the numbers of nodes if they are after the inserted one
tree_tbl_2$parent[tree_tbl_2$parent > parent_orig] = tree_tbl_2$parent[tree_tbl_2$parent > parent_orig] + 1L
tree_tbl_2$node[tree_tbl_2$node > parent_orig] = tree_tbl_2$node[tree_tbl_2$node > parent_orig] + 1L
# insert the new node
tree_tbl_2 = tibble::add_row(tree_tbl_2,
parent = parent_orig, node = parent_orig + 1L,
branch.length = tree_tbl_node$branch.length * frac,
label = node_label,
is_tip = FALSE, isTip = FALSE)
# add tip
tree_tbl_2$parent = tree_tbl_2$parent + 1L # all nodes will be added 1
tree_tbl_2$node = data.table::fifelse(tree_tbl_2$node > node_orig, tree_tbl_2$node + 1L, tree_tbl_2$node)
tree_tbl_2 = tibble::add_row(tree_tbl_2,
parent = tree_tbl_2$node[tree_tbl_2$label == node_label],
node = node_orig + 1L,
branch.length = tree_tbl_node$branch.length * (1 - frac),
label = tip_label,
is_tip = TRUE, isTip = TRUE)
# update orginal node, which will have the inserted node as its parent
tree_tbl_2$parent[tree_tbl_2$label == where] =
tree_tbl_2$node[tree_tbl_2$label == node_label]
tree_tbl_2$branch.length[tree_tbl_2$label == where] =
tree_tbl_2$branch.length[tree_tbl_2$label == tip_label]
# # another way
# tree_tbl_above = tree_tbl[1:node_orig, ]
# tree_tbl_below = tree_tbl[-(1:node_orig), ]
# # add tip
# tree_tbl_above$parent = tree_tbl_above$parent + 1
# tree_tbl_below$parent = tree_tbl_below$parent + 1
# tree_tbl_below$node = tree_tbl_below$node + 1
# tree_tbl_new = tibble::tibble(parent = tree_tbl_above$parent[tree_tbl_above$label == where],
# node = node_orig + 1,
# branch.length = tree_tbl_above$branch.length[tree_tbl_above$label == where],
# label = tip_label)
# tree_tbl_2 = dplyr::bind_rows(tree_tbl_above, tree_tbl_new, tree_tbl_below)
# # add node
# loc_in_df2 = which(tree_tbl_2$node == tree_tbl_new$parent)
# tree_tbl_2$parent = data.table::fifelse(tree_tbl_2$parent > tree_tbl_new$parent, tree_tbl_2$parent + 1, tree_tbl_2$parent)
# tree_tbl_2$parent[tree_tbl_2$label %fin% c(where, tip_label)] = tree_tbl_new$parent + 1
# tree_tbl_2$node = data.table::fifelse(tree_tbl_2$node > tree_tbl_new$parent, tree_tbl_2$node + 1, tree_tbl_2$node)
# tree_tbl_2 = dplyr::bind_rows(tree_tbl_2,
# tibble::tibble(parent = tree_tbl_new$parent,
# node = tree_tbl_new$parent + 1,
# branch.length = tree_tbl_new$branch.length * frac,
# label = "")) %>%
# dplyr::arrange(node)
# tree_tbl_2$branch.length[tree_tbl_2$label %fin% c(where, tip_label)] = tree_tbl_new$branch.length * (1 - frac)
}
# tree_tbl_2 = dplyr::arrange(tree_tbl_2, node)
tree_tbl_2 = tree_tbl_2[order(tree_tbl_2$node), ]
if(!inherits(tree_tbl_2, "tbl_tree")) class(tree_tbl_2) = c("tbl_tree", class(tree_tbl_2))
if(return_tree) return(tidytree::as.phylo(tree_tbl_2))
tree_tbl_2$isTip = NULL
tree_tbl_2
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.