R/a.R

Defines functions rmNdmtrx addNdmtrx updateSlts pstMnp .cTreeObjs .cMenToAny .cMenToMan .cMenToMen rmOtherSlt setNdsOther setNdOther setNdsID setNdID setNdsSpn setNdSpn setAge setPD setTxnyms .findClade .safeFromJSON taxaResolve searchTxnyms loadTreeMan saveTreeMan .readTrmn readTrmn writeTrmn .readTree readTree .writeTree writeTree .newNd .getTreeAgeFrmMtrx .getNdsPDFrmMtrx .getNdsPridsFrmMtrx .getNdsPtidsFrmMtrx .getNdsKidsFrmMtrx .getNdsPrdstsFrmMtrx .getNdmtrxFrmLst .getTinds .getPrinds .getSltSpns .getTreeAgeFrmLst .getNdPDFrmLst .getNdKidsFrmLst .getNdPtidsFrmLst .getNdPrdstsFrmLst .getNdPridsFrmLst .getNdSstrFrmLst .getNdsPrdstsFrmLst .getNdsKidsFrmLst .getNdsPDFrmLst .getNdsPridsFrmLst .getNdsPtidsFrmLst .preCtest pinTips rmClade addClade addTip rmTips rmNodes ultrTree getAge getSubtree getNdsFrmTxnyms getCnnctdNds getUnqNds getOtgrp getPath getPrnt getLvng getDcsd .livingOrDesceased isUltrmtrc getBiprts getNdsPtids getNdsPrids getSpnsAge getNdsAge getNdsKids getNdsSlt getNdsPrdst getNdsPD getNdsSstr getNdsLng getNdSstr getNdPD getNdSlt getNdPrdst getNdKids getNdPtids getNdPrids getSpnAge getNdAge getNdLng twoer .cnstrctTree unblncdTree blncdTree randTree checkTreeMen checkNdlst fastCheckTreeMan calcDstMtrx calcPrtFrPrp calcFrPrp calcPhyDv calcDstRF calcDstBLD calcOvrlp calcDstTrp calcNdsBlnc calcNdBlnc

Documented in addClade addNdmtrx addTip blncdTree calcDstBLD calcDstMtrx calcDstRF calcDstTrp calcFrPrp calcNdBlnc calcNdsBlnc calcOvrlp calcPhyDv calcPrtFrPrp checkNdlst checkTreeMen fastCheckTreeMan getAge getBiprts getCnnctdNds getDcsd getLvng getNdAge getNdKids getNdLng getNdPD getNdPrdst getNdPrids getNdPtids getNdsAge getNdsFrmTxnyms getNdsKids getNdsLng getNdSlt getNdsPD getNdsPrdst getNdsPrids getNdsPtids getNdsSlt getNdsSstr getNdSstr getOtgrp getPath getPrnt getSpnAge getSpnsAge getSubtree getUnqNds isUltrmtrc loadTreeMan pinTips pstMnp randTree readTree readTrmn rmClade rmNdmtrx rmNodes rmOtherSlt rmTips saveTreeMan searchTxnyms setAge setNdID setNdOther setNdsID setNdsOther setNdSpn setNdsSpn setPD setTxnyms taxaResolve twoer ultrTree unblncdTree updateSlts writeTree writeTrmn

# treeman package -------------------------------
#' @name calcNdBlnc
#' @title Calculate the balance of a node
#' @description Returns the balance of a node.
#' @details Balance is calculated as the absolute difference between the number of descendents
#' of the two bifurcating edges of a node and the expected value for a balanced tree.
#' \code{NA} is returned if the node is polytomous or a tip.
#' @param tree \code{TreeMan} object
#' @param id node id
#' @seealso
#' \code{\link{calcNdsBlnc}},
#' \url{https://github.com/DomBennett/treeman/wiki/calc-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' calcNdBlnc(tree, id = tree["root"]) # root balance
calcNdBlnc <- function(tree, id) {
  ntot <- length(getNdKids(tree, id))
  ptids <- tree@ndlst[[id]][["ptid"]]
  if (length(ptids) > 2) {
    return(NA)
  }
  ptid <- ptids[1]
  nprt <- length(getNdKids(tree, ptid))
  if (nprt == 0) {
    nprt <- 1
  }
  abs((ntot / 2) - nprt)
}

#' @name calcNdsBlnc
#' @title Calculate the balances of all nodes
#' @description Returns the absolute differences in number of descendants for bifurcating
#' branches of every node
#' @details Runs \code{calcNdBlnc()} across all node IDs. \code{NA} is returned if the
#' node is polytomous. Parallelizable.
#' @param tree \code{TreeMan} object
#' @param ids node ids
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{calcNdBlnc}},
#' \url{https://github.com/DomBennett/treeman/wiki/calc-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' calcNdsBlnc(tree, ids = tree["nds"])
calcNdsBlnc <- function(tree, ids, parallel = FALSE, progress = "none") {
  l_data <- data.frame(id = ids, stringsAsFactors = FALSE)
  plyr::mdply(
    .data = l_data, .fun = calcNdBlnc, tree = tree,
    .parallel = parallel, .progress = progress
  )[, 2]
}

#' @name calcDstTrp
#' @title Calculate the triplet distance between two trees
#' @description Returns the triplet distance between two trees.
#' @details The triplet distance is calculated as the sum of different outgroups among
#' every triplet of tips between the two trees. Normalisation is performed by dividing
#' the resulting number by the total number of triplets shared between the two trees.
#' The triplet distance is calculated only for shared tips between the two trees. Parallelizable.
#' @param tree_1 \code{TreeMan} object
#' @param tree_2 \code{TreeMan} object
#' @param nrmlsd Boolean, should returned value be between 0 and 1? Default, FALSE.
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @references
#' Critchlow DE, Pearl DK, Qian C. (1996) The Triples Distance for rooted bifurcating phylogenetic trees.
#' Systematic Biology, 45, 323-34.
#' @seealso
#' \code{\link{calcDstBLD}}, \code{\link{calcDstRF}}
#' \url{https://github.com/DomBennett/treeman/wiki/calc-methods}
#' @export
#' @examples
#'
#' tree_1 <- randTree(10)
#' tree_2 <- randTree(10)
#' calcDstTrp(tree_1, tree_2)
calcDstTrp <- function(tree_1, tree_2, nrmlsd = FALSE,
                       parallel = FALSE, progress = "none") {
  .count <- function(i) {
    o1 <- getOtgrp(tree_1, cmbs[, i])
    o2 <- getOtgrp(tree_2, cmbs[, i])
    if (length(o1) != length(o2) || o1 != o2) {
      cntr <- 1
    } else {
      cntr <- 0
    }
    cntr
  }
  shrd <- tree_1@tips[tree_1@tips %in% tree_2@tips]
  cmbs <- combn(shrd, 3)
  l_data <- data.frame(i = 1:ncol(cmbs), stringsAsFactors = FALSE)
  res <- plyr::mdply(.data = l_data, .count, .parallel = parallel, .progress = progress)
  cntr <- sum(res[, 2])
  if (nrmlsd) {
    cntr <- cntr / ncol(cmbs)
  }
  cntr
}

#' @name calcOvrlp
#' @title Calculate phylogenetic overlap
#' @description Returns the sum of branch lengths represented by ids_1 and ids_2 for a tree.
#' @details Use this to calculate the sum of branch lengths that are represented between two
#' communities. This measure is also known as the unique fraction. It can be used to measure
#' concepts of phylogenetic turnover. Parallelizable.
#' @param tree \code{TreeMan} object
#' @param ids_1 tip ids of community 1
#' @param ids_2 tip ids of community 2
#' @param nrmlsd Boolean, should returned value be between 0 and 1? Default, FALSE.
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @references
#' Lozupone, C., & Knight, R. (2005). UniFrac: a new phylogenetic method for comparing
#' microbial communities. Applied and Environmental Microbiology, 71(12), 8228-35.
#' @seealso
#' \code{\link{calcPhyDv}}
#' \url{https://github.com/DomBennett/treeman/wiki/calc-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' ids_1 <- sample(tree["tips"], 5)
#' ids_2 <- sample(tree["tips"], 5)
#' calcOvrlp(tree, ids_1, ids_2)
calcOvrlp <- function(tree, ids_1, ids_2, nrmlsd = FALSE,
                      parallel = FALSE, progress = "none") {
  if (progress != "none") {
    cat("Part 1/3 ....\n")
  }
  spns <- getNdsSlt(tree,
    slt_nm = "spn", ids = tree@all,
    parallel = parallel, progress = progress
  )
  names(spns) <- tree@all
  if (progress != "none") {
    cat("Part 2/3 ....\n")
  }
  ids_1 <- c(unique(unlist(getNdsPrids(tree,
    ids = ids_1,
    parallel = parallel,
    progress = progress
  ))), ids_1)
  if (progress != "none") {
    cat("Part 3/3 ....\n")
  }
  ids_2 <- c(unique(unlist(getNdsPrids(tree,
    ids = ids_2,
    parallel = parallel,
    progress = progress
  ))), ids_2)
  ovrlp <- sum(spns[ids_2[ids_2 %in% ids_1]])
  if (nrmlsd) {
    ovrlp <- ovrlp / tree@pd
  }
  ovrlp
}

#' @name calcDstBLD
#' @title Calculate the BLD between two trees
#' @description Returns the branch length distance between two trees.
#' @details BLD is the Robinson-Foulds distance weighted by branch length. Instead of summing
#' the differences in partitions between the two trees, the metric takes the square root
#' of the squared difference in branch lengths. Parallelizable.
#' @param tree_1 \code{TreeMan} object
#' @param tree_2 \code{TreeMan} object
#' @param nrmlsd Boolean, should returned value be between 0 and 1? Default, FALSE.
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @references
#' Kuhner, M. K. and Felsenstein, J. (1994) Simulation comparison of phylogeny
#' algorithms under equal and unequal evolutionary rates. Molecular Biology and
#' Evolution, 11, 459-468.
#' @seealso
#' \code{\link{calcDstTrp}}, \code{\link{calcDstRF}}
#' \url{https://github.com/DomBennett/treeman/wiki/calc-methods}
#' @export
#' @examples
#'
#' tree_1 <- randTree(10)
#' tree_2 <- randTree(10)
#' calcDstBLD(tree_1, tree_2)
calcDstBLD <- function(tree_1, tree_2, nrmlsd = FALSE,
                       parallel = FALSE, progress = "none") {
  n1 <- tree_1@nds[!tree_1@nds == tree_1@root]
  n2 <- tree_2@nds[!tree_2@nds == tree_2@root]
  if (progress != "none") {
    cat("Part 1/2 ....\n")
  }
  c1 <- getNdsKids(tree_1, n1, parallel = parallel, progress = progress)
  if (progress != "none") {
    cat("Part 2/2 ....\n")
  }
  c2 <- getNdsKids(tree_2, n2, parallel = parallel, progress = progress)
  s1 <- getNdsSlt(tree_1, slt_nm = "spn", ids = n1)
  s2 <- getNdsSlt(tree_2, slt_nm = "spn", ids = n2)
  d1 <- s2[match(c1, c2)]
  d1[which(is.na(d1))] <- 0
  d1 <- s1 - d1
  d2 <- s1[match(c2, c1)]
  d2[which(is.na(d2))] <- 0
  d2 <- s2 - d2
  d <- sqrt(sum(c(d1^2, d2^2)))
  if (nrmlsd) {
    max_d <- sqrt(sum(c(s1^2, s2^2)))
    d <- d / max_d
  }
  d
}

#' @name calcDstRF
#' @title Calculate the Robinson-Foulds distance between two trees
#' @description Returns the Robinson-Foulds distance between two trees.
#' @details RF distance is calculated as the sum of partitions in one tree that are
#' not shared by the other. The maximum number of split differences is the total number
#' of nodes in both trees (excluding the roots). Trees are assumed to be bifurcating,
#' this is not tested. The metric is calculated as if trees are unrooted. Parallelizable.
#' @param tree_1 \code{TreeMan} object
#' @param tree_2 \code{TreeMan} object
#' @param nrmlsd Boolean, should returned value be between 0 and 1? Default, FALSE.
#' @references
#' Robinson, D. R.; Foulds, L. R. (1981). "Comparison of phylogenetic trees".
#' Mathematical Biosciences 53: 131-147.
#' @seealso
#' \code{\link{calcDstBLD}}, \code{\link{calcDstTrp}}
#' \url{https://github.com/DomBennett/treeman/wiki/calc-methods}
#' @export
#' @examples
#'
#' tree_1 <- randTree(10)
#' tree_2 <- randTree(10)
#' calcDstRF(tree_1, tree_2)
calcDstRF <- function(tree_1, tree_2, nrmlsd = FALSE) {
  shared_tips <- c(tree_1@tips, tree_2@tips)
  shared_tips <- shared_tips[duplicated(shared_tips)]
  b1 <- getBiprts(
    tree = tree_1, tips = shared_tips, universal = TRUE,
    root = FALSE
  )
  b2 <- getBiprts(
    tree = tree_2, tips = shared_tips, universal = TRUE,
    root = FALSE
  )
  # count unique paritions and sum to calc RFD
  d <- sum(!b1 %in% b2) + sum(!b2 %in% b1)
  if (nrmlsd) {
    max_d <- (length(b1) + length(b2))
    d <- d / max_d
  }
  d
}

#' @name calcPhyDv
#' @title Calculate phylogenetic diversity
#' @description Returns the phylogenetic diversity of a tree for the tips specified.
#' @details Faith's phylogenetic diversity is calculated as the sum of all connected
#' branches for specified tips in a tree. It can be used to investigate how biodviersity
#' as measured by the phylogeny changes. Parallelizable.
#' The function uses \code{getCnntdNds()}.
#' @param tree \code{TreeMan} object
#' @param tids tip ids
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @references
#' Faith, D. (1992). Conservation evaluation and phylogenetic diversity.
#'  Biological Conservation, 61, 1-10.
#' @seealso
#' \code{\link{calcFrPrp}}, \code{\link{calcOvrlp}}, \code{\link{getCnnctdNds}},
#' \url{https://github.com/DomBennett/treeman/wiki/calc-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' calcPhyDv(tree, tree["tips"])
calcPhyDv <- function(tree, tids,
                      parallel = FALSE, progress = "none") {
  prids <- getCnnctdNds(tree, tids)
  spns <- getNdsSlt(tree,
    slt_nm = "spn", ids = prids,
    parallel = parallel, progress = progress
  )
  sum(spns)
}

#' @name calcFrPrp
#' @title Calculate evolutionary distinctness
#' @description Returns the evolutationary distinctness of ids using the fair proportion metric.
#' @details The fair proportion metric calculates the evolutionary distinctness of tips
#' in a tree through summing the total amount of branch length each tip represents, where
#' each branch in the tree is evenly divided between all descendants. Parallelizable.
#' @param tree \code{TreeMan} object
#' @param tids tip IDs
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @references
#' Isaac, N.J.B., Turvey, S.T., Collen, B., Waterman, C. and Baillie, J.E.M. (2007).
#'  Mammals on the EDGE: conservation priorities based on threat and phylogeny. PLoS ONE, 2, e296.
#' @seealso
#' \code{\link{calcPhyDv}}, \code{\link{calcPrtFrPrp}},
#' \url{https://github.com/DomBennett/treeman/wiki/calc-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' calcFrPrp(tree, tree["tips"])
calcFrPrp <- function(tree, tids, progress = "none") {
  .calc <- function(i) {
    id <- tree@all[i]
    spn <- getNdSlt(tree, "spn", id)
    if (id %in% tree@tips) {
      spn_shres[i, id] <<- spn
    } else {
      kids <- getNdKids(tree, id)
      spn_shre <- spn / length(kids)
      spn_shres[i, kids] <<- spn_shre
    }
  }
  spn_shres <- matrix(0, ncol = tree@ntips, nrow = tree@nall)
  colnames(spn_shres) <- tree@tips
  plyr::m_ply(
    .data = data.frame(i = 1:tree@nall), .fun = .calc,
    .progress = progress
  )
  colSums(spn_shres[, tids])
}

#' @name calcPrtFrPrp
#' @title Calculate evolutionary distinctness for part of tree
#' @description Returns the evolutationary distinctness of ids using the fair proportion metric.
#' @details Extension of \code{calcFrPrp()} but with ignore argument.
#' Use \code{ignr} to ignore certain tips from calculation. For example, if any of tips
#' are extinct you may wish to ignore these.
#' @param tree \code{TreeMan} object
#' @param tids tip IDs
#' @param ignr tips to ignore in calculation
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @references
#' Isaac, N.J.B., Turvey, S.T., Collen, B., Waterman, C. and Baillie, J.E.M. (2007).
#'  Mammals on the EDGE: conservation priorities based on threat and phylogeny. PLoS ONE, 2, e296.
#' @seealso
#' \code{\link{calcFrPrp}}
#' \url{https://github.com/DomBennett/treeman/wiki/calc-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' calcPrtFrPrp(tree, c("t1", "t3"), ignr = "t2")
calcPrtFrPrp <- function(tree, tids, ignr = NULL, progress = "none") {
  .calc <- function(i) {
    id <- allnds[i]
    spn <- getNdSlt(tree, "spn", id)
    if (id %in% tips) {
      spn_shres[i, id] <<- spn
    } else {
      kids <- getNdKids(tree, id)
      kids <- kids[!kids %in% ignr]
      if (length(kids) > 0) {
        spn_shre <- spn / length(kids)
        spn_shres[i, kids] <<- spn_shre
      }
    }
  }
  tips <- tree@tips[!tree@tips %in% ignr]
  allnds <- tree@all[!tree@all %in% ignr]
  spn_shres <- matrix(0, ncol = length(tips), nrow = length(allnds))
  colnames(spn_shres) <- tips
  plyr::m_ply(
    .data = data.frame(i = 1:length(allnds)), .fun = .calc,
    .progress = progress
  )
  colSums(spn_shres[, tids])
}

#' @name calcDstMtrx
#' @title Calculate the distance matrix
#' @description Returns a distance matrix for specified ids of a tree.
#' @details The distance between every id in the tree is calculated by summing the
#' lengths of the branches that connect them. This can be useful for testing the distances
#' between trees, checking for evoltuionary isolated tips etc. Parallelizable.
#' @param tree \code{TreeMan} object
#' @param ids IDs of nodes/tips
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{calcDstBLD}}, \code{\link{calcDstRF}}, \code{\link{calcDstTrp}}
#' \url{https://github.com/DomBennett/treeman/wiki/calc-methods}
#' @export
#' @examples
#' # checking the distance between two trees
#'
#' tree_1 <- randTree(10)
#' tree_2 <- randTree(10)
#' dmat1 <- calcDstMtrx(tree_1, tree_1["tips"])
#' dmat2 <- calcDstMtrx(tree_2, tree_2["tips"])
#' mdl <- cor.test(x = dmat1, y = dmat2)
#' as.numeric(1 - mdl$estimate) # 1 - Pearson's r
calcDstMtrx <- function(tree, ids, parallel = FALSE,
                        progress = "none") {
  .getDist <- function(id_1, id_2) {
    if (id_1 == id_2) {
      return(0)
    }
    path <- getPath(tree, from = id_1, to = id_2)
    path_spns <- unlist(lapply(tree@ndlst[path], function(n) n[["spn"]]))
    sum(path_spns)
  }
  cmbs <- expand.grid(ids, ids, stringsAsFactors = FALSE)
  colnames(cmbs) <- c("id_1", "id_2")
  res <- plyr::mdply(
    .data = cmbs, .fun = .getDist,
    .parallel = parallel, .progress = progress
  )
  res <- matrix(res[, 3], ncol = length(ids))
  colnames(res) <- rownames(res) <- ids
  res
}

#' @name fastCheckTreeMan
#' @title Check if tree is correct, fast!
#' @description Return T/F if tree is a true \code{TreeMan} object
#' @details Whenever a tree is first initiated this check is used.
#' For more detailed checking use \code{checkNdlst}.
#' @param object \code{TreeMan} object
#' @seealso
#' \code{\link{checkNdlst}}, \code{\link{checkTreeMen}}
#' @export
fastCheckTreeMan <- function(object) {
  kwn_ids <- names(object@ndlst)
  ids <- unlist(sapply(object@ndlst, function(x) x[c("id", "ptid", "prid")]))
  test <- all(ids %in% kwn_ids) & object@root %in% kwn_ids
  # check hierarchy through prinds
  prinds <- object@prinds
  if (length(prinds) > 0) {
    # only root node should equal its index
    prind_test <- sum(prinds == 1:length(prinds)) == 1
    # all internal nodes should occur more than once (twice for bifurcating trees)
    prind_test <- all(table(prinds) > 1) & prind_test
    test <- test & prind_test
  }
  test
}

#' @name checkNdlst
#' @title Check if ndlst is correct
#' @description Return T/F fpr \code{ndlst} consistency
#' @details Tests whether each node in tree points to valid other node IDs. Also
#' ensures `spn` and `root` are correct. Reports nodes that have errors.
#' @param ndlst \code{ndlst}
#' @param root root ID
#' @seealso
#' \code{\link{fastCheckTreeMan}}, \code{\link{checkTreeMen}}
#' @export
#' @examples
#'
#' tree <- randTree(100)
#' (checkNdlst(tree@ndlst, tree@root))
checkNdlst <- function(ndlst, root) {
  .check <- function(nd) {
    # must have id
    test_id <- is.character(nd[["id"]]) & "id" %in% names(nd)
    # id must contain no special characters
    test_spcl_chrs <- test_id && !grepl("[^0-9a-zA-Z_]", nd[["id"]])
    # txnyms
    test_txnym <- TRUE
    if ("txnym" %in% names(nd)) {
      test_txnym <- is.character(nd[["txnym"]])
      for (txnym in nd[["txnym"]]) {
        test_txnym <- test_txnym && !grepl("[^0-9a-zA-Z_]", txnym)
      }
    }
    # must have either prid/ptid or both
    test_slts <- ("ptid" %in% names(nd) | "prid" %in% names(nd))
    test_valid_nd <- nd[["id"]] %in% nds # nd id must be known
    # prid and ptids must be known
    test_prid <- is.character(nd[["prid"]]) & nd[["prid"]] %in% nds
    test_ptid <- is.character(nd[["ptid"]]) > 0 & all(nd[["ptid"]] %in% nds)
    # spns must be 0 or more
    test_spn <- is.numeric(nd[["spn"]]) && nd[["spn"]] >= 0
    # test self-reference
    test_sr <- !nd[["prid"]] %in% nd[["ptid"]]
    # test root is never a ptid, proxy for circularity
    test_circ <- !rid %in% nd[["ptid"]]
    # only root is self-referential
    test_root <- rid != nd[["id"]] |
      (rid == nd[["id"]] & rid == nd[["prid"]])
    bool <- test_id & test_valid_nd & test_prid &
      test_ptid & test_sr & test_circ & test_slts &
      test_spcl_chrs & test_txnym
    if (length(bool) > 0 && bool) {
      return(TRUE)
    }
    FALSE
  }
  nds <- names(ndlst)
  if (any(duplicated(nds))) {
    dups <- nds[duplicated(nds)]
    dups <- unique(dups)
    msg <- "These node IDs are duplicated:\n"
    if (length(dups) > 1) {
      for (i in 1:length(dups) - 1) {
        msg <- paste0(msg, dups[i], ", ")
      }
    }
    msg <- paste0(msg, dups[length(dups)], "\n")
    cat(msg)
    return(FALSE)
  }
  rid <- root
  nd_checks <- vapply(ndlst, .check, logical(1))
  if (!all(nd_checks)) {
    msg <- "These nodes are invalid:\n"
    bad <- which(!nd_checks)
    if (length(bad) > 1) {
      for (i in bad[-length(bad)]) {
        msg <- paste0(msg, nds[i], ", ")
      }
    }
    msg <- paste0(msg, nds[bad[length(bad)]], "\n")
    cat(msg, "\n")
    return(FALSE)
  }
  if (!rid %in% nds) {
    msg <- paste0("Root node `", rid, "` not in ndlst\n")
    cat(msg, "\n")
    return(FALSE)
  }
  TRUE
}

#' @name checkTreeMen
#' @title Check if trees are correct
#' @description Return T/F if trees is a true \code{TreeMen} object
#' @details Tests whether all trees in object are \code{TreeMan} objects
#' @param object \code{TreeMen} object
#' @seealso
#' \code{\link{checkNdlst}}
#' @export
checkTreeMen <- function(object) {
  .check <- function(i) {
    if (class(object@treelst[[i]])[1] != "TreeMan") {
      invlds <<- c(i, invlds)
    }
    NULL
  }
  invlds <- NULL
  mapply(.check, i = 1:length(object@treelst))
  if (length(invlds) > 0) {
    for (i in invlds) {
      cat("[", i, "] in treelst is not a TreeMan object\n", sep = "")
    }
    return(FALSE)
  }
  TRUE
}

#' phylo class
#'
#' @name phylo-class
#' @aliases phylo
#'
#' @exportClass phylo
setOldClass("phylo")

#' multiPhylo class
#'
#' @name multiPhylo-class
#' @aliases multiPhylo
#'
#' @exportClass multiPhylo
setOldClass("multiPhylo")



#' @name randTree
#' @title Generate a random tree
#' @description Returns a random \code{TreeMan} tree with \code{n}
#' tips.
#' @details Equivalent to \code{ape}'s \code{rtree()} but returns a
#' \code{TreeMan} tree. Tree is always rooted and bifurcating.
#' @param n number of tips, integer, must be 3 or greater
#' @param wndmtrx T/F add node matrix? Default FALSE.
#' @param parallel T/F run in parallel? Default FALSE.
#' @seealso
#' \code{\link{TreeMan-class}}, \code{\link{blncdTree}},
#' \code{\link{unblncdTree}}
#' @export
#' @examples
#'
#' tree <- randTree(5)
randTree <- function(n, wndmtrx = FALSE, parallel = FALSE) {
  # Return a random tree based on a broken-stick model
  .randomPrinds <- function(n) {
    pool <- rep((1:(n - 1)), each = 2)
    res <- rep(NA, length(pool) + 1)
    res[1] <- 1
    for (i in 2:length(res)) {
      pssbls <- which(i > pool)
      if (length(pssbls) == 1) {
        i_pool <- pssbls
      } else {
        i_pool <- sample(pssbls, 1)
      }
      res[i] <- pool[i_pool]
      pool[i_pool] <- NA
    }
    res
  }
  if (n < 3) {
    stop("`n` is too small")
  }
  prinds <- .randomPrinds(n)
  .cnstrctTree(n, prinds,
    wndmtrx = wndmtrx,
    parallel = parallel
  )
}

#' @name blncdTree
#' @title Generate a balanced tree
#' @description Returns a balanced \code{TreeMan} tree with \code{n}
#' tips.
#' @details Equivalent to \code{ape}'s \code{stree(type='balanced')} but returns a
#' \code{TreeMan} tree. Tree is always rooted and bifurcating.
#' @param n number of tips, integer, must be 3 or greater
#' @param wndmtrx T/F add node matrix? Default FALSE.
#' @param parallel T/F run in parallel? Default FALSE.
#' @seealso
#' \code{\link{TreeMan-class}}, \code{\link{randTree}},
#' \code{\link{unblncdTree}}
#' @export
#' @examples
#'
#' tree <- blncdTree(5)
blncdTree <- function(n, wndmtrx = FALSE, parallel = FALSE) {
  if (n < 3) {
    stop("`n` is too small")
  }
  prinds <- c(1, rep((1:(n - 1)), each = 2))
  .cnstrctTree(n, prinds,
    wndmtrx = wndmtrx,
    parallel = parallel
  )
}

#' @name unblncdTree
#' @title Generate an unbalanced tree
#' @description Returns an unbalanced \code{TreeMan} tree with \code{n}
#' tips.
#' @details Equivalent to \code{ape}'s \code{stree(type='left')} but returns a
#' \code{TreeMan} tree. Tree is always rooted and bifurcating.
#' @param n number of tips, integer, must be 3 or greater
#' @param wndmtrx T/F add node matrix? Default FALSE.
#' @param parallel T/F run in parallel? Default FALSE.
#' @seealso
#' \code{\link{TreeMan-class}}, \code{\link{randTree}},
#' \code{\link{blncdTree}}
#' @export
#' @examples
#'
#' tree <- unblncdTree(5)
unblncdTree <- function(n, wndmtrx = FALSE, parallel = FALSE) {
  if (n < 3) {
    stop("`n` is too small")
  }
  prinds <- c(1, 1:(n - 1), 1:(n - 1))
  .cnstrctTree(n, prinds,
    wndmtrx = wndmtrx,
    parallel = parallel
  )
}

.cnstrctTree <- function(n, prinds, wndmtrx, parallel) {
  .add <- function(i) {
    nd <- vector("list", length = 4)
    names(nd) <- c("id", "ptid", "prid", "spn")
    nd[["id"]] <- ids[i]
    nd[["spn"]] <- spns[i]
    nd[["prid"]] <- ids[prinds[i]]
    nd[["ptid"]] <- ptids[ptnds_pool == i]
    nd
  }
  nnds <- length(prinds)
  # random numbers for spans
  spns <- c(0, runif(nnds - 1, 0, 1))
  ids <- rep(NA, nnds)
  tinds <- which(!1:nnds %in% prinds)
  ids[tinds] <- paste0("t", 1:n)
  ids[1:nnds %in% prinds] <- paste0("n", 1:(n - 1))
  ptnds_pool <- prinds[-1]
  ptids <- ids[-1]
  ndlst <- plyr::mlply(.data = 1:nnds, .fun = .add, .parallel = parallel)
  attr(ndlst, "split_labels") <-
    attr(ndlst, "split_type") <- NULL
  names(ndlst) <- ids
  tree <- new("TreeMan",
    ndlst = ndlst, root = "n1", wtxnyms = FALSE,
    ndmtrx = NULL, prinds = prinds, tinds = tinds
  )
  tree <- updateSlts(tree)
  if (wndmtrx) {
    tree <- addNdmtrx(tree)
  }
  tree
}

#' @name twoer
#' @title Generate a tree of two tips
#' @description Returns a \code{TreeMan} tree with two tips and a root.
#' @details Useful for building larger trees with \code{addClade()}.
#' Note, a node matrix cannot be added to a tree of two tips.
#' @param tids tip IDs
#' @param spns tip spans
#' @param rid root ID
#' @param root_spn root span
#' @seealso
#' \code{\link{TreeMan-class}}, \code{\link{randTree}}
#' @export
#' @examples
#'
#' tree <- twoer()
twoer <- function(tids = c("t1", "t2"), spns = c(1, 1),
                  rid = "root", root_spn = 0) {
  ndlst <- list()
  ndlst[[rid]] <- list(
    "id" = rid, "prid" = rid,
    "ptid" = tids[1:2], "spn" = root_spn
  )
  ndlst[[tids[1]]] <- list(
    "id" = tids[[1]], "prid" = rid,
    "ptid" = NULL, "spn" = spns[1]
  )
  ndlst[[tids[2]]] <- list(
    "id" = tids[[2]], "prid" = rid,
    "ptid" = NULL, "spn" = spns[2]
  )
  prinds <- c(1, 1, 1)
  tinds <- c(2, 3)
  tree <- new("TreeMan",
    ndlst = ndlst, root = "root", wtxnyms = FALSE,
    ndmtrx = NULL, prinds = prinds, tinds = tinds
  )
  updateSlts(tree)
}

#' @name getNdLng
#' @title Get lineage
#' @description Return unique taxonomic names for connecting \code{id} to root.
#' @details Returns a vector.
#' @param tree \code{TreeMan} object
#' @param id node id
#' @seealso
#' \code{\link{getNdsLng}}, \code{\link{getNdsFrmTxnyms}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' data(mammals)
#' # return human lineage
#' getNdLng(mammals, id = "Homo_sapiens")
getNdLng <- function(tree, id) {
  .get <- function(txnym, ...) {
    lng <<- c(txnym, lng)
  }
  prids <- getNdPrids(tree, id)
  lng <- NULL
  plyr::m_ply(tree@ndlst[prids], .fun = .get)
  unique(lng)
}

#' @name getNdAge
#' @title Get age
#' @description Return the age for \code{id}. Requires the known age of the tree to be provided.
#' @details Returns a numeric.
#' @param tree \code{TreeMan} object
#' @param id node id
#' @param tree_age numeric value of known age of tree
#' @seealso
#' \code{\link{getNdsAge}},
#' \code{\link{getSpnAge}},
#' \code{\link{getSpnsAge}},
#' \code{\link{getPrnt}}, \code{\link{getAge}}
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' data(mammals)
#' # when did apes emerge?
#' # get parent id for all apes
#' prnt_id <- getPrnt(mammals, ids = c("Homo_sapiens", "Hylobates_concolor"))
#' # mammal_age <- getAge(mammals)  # ~166.2, needs to be performed when tree is not up-to-date
#' getNdAge(mammals, id = prnt_id, tree_age = 166.2)
getNdAge <- function(tree, id, tree_age) {
  tree_age - .getNdPrdstsFrmLst(tree@ndlst, tree@prinds, id = id)
}

#' @name getSpnAge
#' @title Get age range
#' @description Return start and end ages for \code{id} from when it first appears to when it splits
#' @details Returns a dataframe.
#' @param tree \code{TreeMan} object
#' @param id node id
#' @param tree_age numeric value of known age of tree
#' @seealso
#' \code{\link{getNdAge}},
#' \code{\link{getNdsAge}},
#' \code{\link{getSpnsAge}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' data(mammals)
#' # mammal_age <- getAge(mammals)  # ~166.2, needs to be performed when tree is not up-to-date
#' getSpnAge(mammals, id = "Homo_sapiens", tree_age = 166.2)
getSpnAge <- function(tree, id, tree_age) {
  end <- .getNdPrdstsFrmLst(tree@ndlst, tree@prinds, id = id)
  start <- end - tree@ndlst[[id]][["spn"]]
  end <- tree_age - end
  start <- tree_age - start
  data.frame(spn = id, start, end)
}

#' @name getNdPrids
#' @title Get pre-nodes to root
#' @description Return node ids for connecting \code{id} to root.
#' @details Returns a vector. IDs are returned order from node ID to root.
#' @param tree \code{TreeMan} object
#' @param id node id
#' @seealso
#' \code{\link{getNdsPrids}},
#' \code{\link{getNdPtids}},
#' \code{\link{getNdsPtids}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' # get all nodes to root
#' getNdPrids(tree, id = "t1")
getNdPrids <- function(tree, id) {
  .getNdPridsFrmLst(tree@ndlst, prinds = tree@prinds, id = id)
}

#' @name getNdPtids
#' @title Get post-nodes to tips
#' @description Return node ids for connecting \code{id} to kids.
#' @details Returns a vector.
#' @param tree \code{TreeMan} object
#' @param id node id
#' @seealso
#' \code{\link{getNdsPtids}},
#' \code{\link{getNdPrids}},
#' \code{\link{getNdsPrids}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' # get all nodes from root to tip
#' getNdPtids(tree, id = "n1")
# reduce dependence on the recursive, by getting prenodes
# tip ids to id
getNdPtids <- function(tree, id) {
  .getNdPtidsFrmLst(tree@ndlst, tree@prinds, id = id)
}

#' @name getNdKids
#' @title Get children IDs
#' @description Return the node ids of all tips that descend from node.
#' @details Returns a vector
#' @param tree \code{TreeMan} object
#' @param id node id
#' @seealso
#' \code{\link{getNdsKids}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' # everyone descends from root
#' getNdKids(tree, id = tree["root"])
getNdKids <- function(tree, id) {
  .getNdKidsFrmLst(tree@ndlst, prinds = tree@prinds, tinds = tree@tinds, id = id)
}

#' @name getNdPrdst
#' @title Get pre-distance
#' @description Return root to tip distance (prdst) for \code{id}
#' @details Sums the lengths of all branches from \code{id} to root.
#' @param tree \code{TreeMan} object
#' @param id node id
#' @seealso
#' \code{\link{getNdsPrdst}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdPrdst(tree, id = "t1") # return the distance to root from t1
getNdPrdst <- function(tree, id) {
  .getNdPrdstsFrmLst(tree@ndlst, tree@prinds, id = id)
}

#' @name getNdSlt
#' @title Get a node slot
#' @description Returns the value of named slot.
#' @details Returned object depends on name, either character, vector or numeric.
#' Default node slots are: id, spn, prid, ptid and txnym. If slot is empty, returns NA.
#' @param tree \code{TreeMan} object
#' @param slt_nm slot name
#' @param id node id
#' @seealso
#' \code{\link{getNdsSlt}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdSlt(tree, slt_nm = "spn", id = "t1") # return span of t1
getNdSlt <- function(tree, slt_nm, id) {
  res <- tree@ndlst[[id]][[slt_nm]]
  if (is.null(res)) {
    res <- NA
  }
  res
}

#' @name getNdPD
#' @title Get phylogenetic diversity of node
#' @description Return summed value of all descending spns
#' @details Sums the lengths of all descending branches from a node.
#' @param tree \code{TreeMan} object
#' @param id node id
#' @seealso
#' \code{\link{getNdsPD}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdPD(tree, id = "n1") # return PD of n1 which in this case is for the whole tree
getNdPD <- function(tree, id) {
  .getNdPDFrmLst(tree@ndlst, prinds = tree@prinds, id = id)
}

#' @name getNdSstr
#' @title Get sister id
#' @description Returns the id of the sister(s) of node id given.
#' @details An error is raised if there is no sister (e.g. for the root).
#'  There can be more than one sister if tree is polytomous.
#' @param tree \code{TreeMan} object
#' @param id node id
#' @seealso
#' \code{\link{getNdsSstr}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdSstr(tree, id = "t1")
getNdSstr <- function(tree, id) {
  if (id == tree@root) {
    return(NULL)
  }
  .getNdSstrFrmLst(tree@ndlst, id)
}

#' @name getNdsLng
#' @title Get lineage for multiple nodes
#' @description Return unique taxonyms for connecting \code{ids} to root.
#' @details Returns a list, parallelizable.
#' @param tree \code{TreeMan} object
#' @param ids vector of node ids
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{getNdLng}}, \code{\link{getNdsFrmTxnyms}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' data(mammals)
#' # return human and gorilla lineages
#' getNdsLng(mammals, id = c("Homo_sapiens", "Gorilla_gorilla"))
getNdsLng <- function(tree, ids, parallel = FALSE, progress = "none") {
  l_data <- data.frame(id = ids, stringsAsFactors = FALSE)
  out <- plyr::mlply(
    .data = l_data, .fun = getNdLng, tree = tree,
    .parallel = parallel, .progress = progress
  )
  names(out) <- attr(out, "split_labels")[, 1]
  res <- out[1:length(out)]
  res
}

#' @name getNdsSstr
#' @title Get sister id
#' @description Returns the ids of the sister(s) of nd ids given.
#' @details An error is raised if there is no sister (e.g. for the root).
#'  There can be more than one sister if tree is polytomous. Parallelizable.
#' @param tree \code{TreeMan} object
#' @param ids nd ids
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{getNdSstr}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdsSstr(tree, ids = tree["tips"])
getNdsSstr <- function(tree, ids, parallel = FALSE, progress = "none") {
  l_data <- data.frame(id = ids, stringsAsFactors = FALSE)
  res <- plyr::mdply(
    .data = l_data, .fun = .getNdSstrFrmLst, ndlst = tree@ndlst,
    .parallel = parallel, .progress = progress
  )
  res[, 2]
}

#' @name getNdsPD
#' @title Get phylogenetic diversities of nodes
#' @description Return summed value of all descending spns
#' @details Sums the lengths of all descending branches from a node.
#' @param tree \code{TreeMan} object
#' @param ids vector of node ids
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{getNdPD}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdsPD(tree, ids = tree["all"]) # return PD of all ids
getNdsPD <- function(tree, ids, parallel = FALSE, progress = "none") {
  if (!is.null(tree@ndmtrx) & length(ids) > 1) {
    all_ids <- tree@all
    spns <- .getSltSpns(tree@ndlst)
    res <- .getNdsPDFrmMtrx(
      tree@ndmtrx, all_ids, ids, spns,
      parallel, progress
    )
  } else {
    res <- .getNdsPDFrmLst(tree@ndlst,
      prinds = tree@prinds,
      ids = ids, parallel = parallel, progress = progress
    )
  }
  res
}

#' @name getNdsPrdst
#' @title Get pre-distances
#' @description Return root to tip distances (prdst) for \code{ids}
#' @details Sums the lengths of all branches from \code{ids} to root.
#' @param tree \code{TreeMan} object
#' @param ids vector of node ids
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{getNdPrdst}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdsPrdst(tree, ids = tree["tips"]) # return prdsts for all tips
getNdsPrdst <- function(tree, ids, parallel = FALSE, progress = "none") {
  if (!is.null(tree@ndmtrx) & length(ids) > 1) {
    all_ids <- tree@all
    spns <- .getSltSpns(tree@ndlst)
    res <- .getNdsPrdstsFrmMtrx(
      tree@ndmtrx, all_ids, ids, spns,
      parallel, progress
    )
  } else {
    res <- .getNdsPrdstsFrmLst(tree@ndlst,
      prinds = tree@prinds,
      ids = ids, parallel, progress
    )
  }
  res
}

#' @name getNdsSlt
#' @title Get a node slot for multiple nodes
#' @description Returns the values of named slot as a vector for atomic values, else list.
#' @details Returned object depends on name, either character, vector or numeric. Parallelizable.
#' Default node slots are: id, spn, prid, ptid and txnym.
#' @param tree \code{TreeMan} object
#' @param slt_nm slot name
#' @param ids vector of node ids
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{getNdSlt}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdsSlt(tree, slt_nm = "spn", ids = tree["tips"]) # return spans of all tips
getNdsSlt <- function(tree, slt_nm, ids, parallel = FALSE, progress = "none") {
  .get <- function(i) {
    getNdSlt(tree, slt_nm, ids[i])
  }
  l_data <- data.frame(i = 1:length(ids), stringsAsFactors = FALSE)
  res <- plyr::mlply(
    .data = l_data, .fun = .get, .parallel = parallel,
    .progress = progress
  )
  res <- res[1:length(res)]
  if (all(vapply(res, length, integer(1)) == 1)) {
    res <- unlist(res, recursive = FALSE)
  }
  names(res) <- ids
  res
}

#' @name getNdsKids
#' @title Get children IDs for multiple nodes
#' @description Return the node ids of all tips that descend from each node in \code{ids}.
#' @details Returns a list, parallelizable.
#' @param tree \code{TreeMan} object
#' @param ids vector of node ids
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{getNdKids}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdsKids(tree, id = tree["nds"])
getNdsKids <- function(tree, ids, parallel = FALSE,
                       progress = "none") {
  if (!is.null(tree@ndmtrx) & length(ids) > 1) {
    res <- .getNdsKidsFrmMtrx(
      tree@ndmtrx, tree@all,
      ids, tree@tips, parallel, progress
    )
  } else {
    res <- .getNdsKidsFrmLst(tree@ndlst,
      ids = ids,
      prinds = tree@prinds, tinds = tree@tinds,
      parallel = parallel, progress = progress
    )
  }
  res
}

#' @name getNdsAge
#' @title Get ages for multiple nodes
#' @description Return the age for \code{ids}.
#' @details Returns a vector, parallelizable.
#' @param tree \code{TreeMan} object
#' @param ids vector of node ids
#' @param tree_age numeric value of known age of tree
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{getNdAge}},
#' \code{\link{getSpnAge}},
#' \code{\link{getSpnsAge}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdsAge(tree, ids = tree["nds"], tree_age = getAge(tree))
getNdsAge <- function(tree, ids, tree_age,
                      parallel = FALSE,
                      progress = "none") {
  if (!is.null(tree@ndmtrx) & length(ids) > 1) {
    spns <- .getSltSpns(tree@ndlst)
    res <- .getNdsPrdstsFrmMtrx(
      tree@ndmtrx, tree@all,
      ids, spns, parallel, progress
    )
    res <- tree_age - res
  } else {
    res <- .getNdsPrdstsFrmLst(tree@ndlst,
      ids = ids,
      prinds = tree@prinds,
      parallel = parallel,
      progress = progress
    )
    res <- tree_age - res
  }
  res
}

#' @name getSpnsAge
#' @title Get age ranges for multiple nodes
#' @description Return start and end ages for \code{ids} from
#' when they first appear to when they split
#' @details Returns a dataframe, parallelizable.
#' @param tree \code{TreeMan} object
#' @param ids vector of node ids
#' @param tree_age numeric value of known age of tree
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{getNdAge}},
#' \code{\link{getNdsAge}},
#' \code{\link{getSpnAge}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' # all nodes but root
#' ids <- tree["nds"][tree["nds"] != tree["root"]]
#' getSpnsAge(tree, ids = ids, tree_age = getAge(tree))
getSpnsAge <- function(tree, ids, tree_age,
                       parallel = FALSE, progress = "none") {
  if (!is.null(tree@ndmtrx) & length(ids) > 1) {
    spns <- .getSltSpns(tree@ndlst)
    end <- .getNdsPrdstsFrmMtrx(
      tree@ndmtrx, tree@all,
      ids, spns, parallel, progress
    )
  } else {
    end <- .getNdsPrdstsFrmLst(tree@ndlst,
      ids = ids,
      prinds = tree@prinds,
      parallel = parallel,
      progress = progress
    )
  }
  spns <- getNdsSlt(tree, slt_nm = "spn", ids = ids, parallel)
  start <- end - spns
  end <- tree_age - end
  start <- tree_age - start
  data.frame(spn = ids, start, end, row.names = NULL)
}

#' @name getNdsPrids
#' @title Get pre-nodes for multiple nodes
#' @description Return node ids for connecting \code{id} to root.
#' @details Returns a list, parallizable. The function will work faster
#' if \code{ordrd} is FALSE.
#' @param tree \code{TreeMan} object
#' @param ids vector of node ids
#' @param ordrd logical, ensure returned prids are ordered ID to root
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{getNdPrids}},
#' \code{\link{getNdPtids}},
#' \code{\link{getNdsPtids}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' getNdsPrids(tree, ids = tree["tips"])
getNdsPrids <- function(tree, ids, ordrd = FALSE,
                        parallel = FALSE, progress = "none") {
  if (!is.null(tree@ndmtrx) & length(ids) > 1 & !ordrd) {
    res <- .getNdsPridsFrmMtrx(
      tree@ndmtrx, tree@all,
      ids, parallel, progress
    )
  } else {
    res <- .getNdsPridsFrmLst(tree@ndlst,
      ids = ids,
      prinds = tree@prinds, parallel = parallel,
      progress = progress
    )
  }
  res
}

#' @name getNdsPtids
#' @title Get post-nodes to tips for multiple nodes
#' @description Return node ids for connecting \code{ids} to kids.
#' @details Returns a list, parallizable.
#' @param tree \code{TreeMan} object
#' @param ids vector of node ids
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{getNdPtids}},
#' \code{\link{getNdPrids}},
#' \code{\link{getNdsPrids}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' # get all nodes to tip for all nodes
#' getNdsPtids(tree, ids = tree["nds"])
getNdsPtids <- function(tree, ids, parallel = FALSE, progress = "none") {
  if (!is.null(tree@ndmtrx) & length(ids) > 1) {
    res <- .getNdsPtidsFrmMtrx(
      tree@ndmtrx, tree@all,
      ids, parallel, progress
    )
  } else {
    res <- .getNdsPtidsFrmLst(tree@ndlst,
      ids = ids,
      prinds = tree@prinds, parallel = parallel,
      progress = progress
    )
  }
  res
}

# bipartitions
#' @name getBiprts
#' @title Get the sets of labels for each bipartition in tree
#' @description Returns a list of tip IDs for each branch in the tree. Options
#' allow the user to act as if the root is not present and to use a universal
#' code for comparing between trees.
#' @details Setting \code{root} to FALSE will ignore the bipartitions created by
#' the root. Setting \code{universal} to TRUE will return a vector of 0s and 1s,
#' not a list of tips. These codes will always begin with 1, and will allow for
#' the comparison of splits between trees as they do not have "chiralty", so to
#' speak.
#' @param tree \code{TreeMan} object
#' @param tips vector of tips IDs to use for bipartitions
#' @param root Include the root for the bipartitions? Default TRUE.
#' @param universal Create a code for comparing between trees
#' @seealso
#' \code{\link{calcDstRF}}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' # get all of the tip IDs for each branch in the rooted tree
#' (getBiprts(tree))
#' # ignore the root and get bipartitions for unrooted tree
#' (getBiprts(tree, root = FALSE))
#' # use the universal code for comparing splits between trees
#' (getBiprts(tree, root = FALSE, universal = TRUE))
getBiprts <- function(tree, tips = tree@tips, root = TRUE, universal = FALSE) {
  kids <- getNdsKids(tree = tree, ids = tree@nds)
  res <- lapply(X = kids, FUN = function(x) tips %in% x)
  if (!root) {
    n <- vapply(X = res, FUN = sum, FUN.VALUE = integer(1))
    # drop splits consisting of all tips or just 1
    res <- res[n < (length(tips) - 1) & n > 1]
  }
  if (universal) {
    res <- unname(vapply(X = res, FUN = function(x) {
      if (!x[[1]]) {
        x <- !x
      }
      paste0(as.integer(x), collapse = "")
    }, FUN.VALUE = character(1)))
    res <- unique(res)
  } else {
    res <- lapply(X = res, FUN = function(x) {
      list(tree@tips[x], tree@tips[!x])
    })
  }
  res
}

# ULTRAMETRIC
#' @name isUltrmtrc
#' @title Is tree ultrametric?
#' @description Return TRUE if all tips end at 0, else FALSE.
#' @details Returns a boolean. This function works in the background
#' for the \code{['ultr']} slot in a \code{TreeMan} object.
#' @param tree \code{TreeMan} object
#' @param tol zero tolerance
#' @seealso
#' \code{\link{getLvng}}, \code{\link{getDcsd}}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' (isUltrmtrc(tree))
isUltrmtrc <- function(tree, tol = 1e-8) {
  dead <- .livingOrDesceased(tree, tol = tol, bool = FALSE)
  if (length(dead) > 0) {
    return(FALSE)
  }
  TRUE
}

# EXTINCT/EXTANT
.livingOrDesceased <- function(tree, tol = 1e-8, bool) {
  if (!is.null(tree@ndmtrx)) {
    spns <- getNdsSlt(tree, "spn", names(tree@ndlst))
    tip_prdsts <- .getNdsPrdstsFrmMtrx(tree@ndmtrx, tree@all,
      tree@tips, spns,
      parallel = FALSE,
      progress = "none"
    )
  } else {
    tip_prdsts <- .getNdsPrdstsFrmLst(tree@ndlst, tree@tips,
      tree@prinds,
      parallel = FALSE,
      progress = "none"
    )
  }
  age <- max(tip_prdsts)
  extant_is <- (age - tip_prdsts) <= tol
  living <- names(extant_is)[extant_is]
  deceased <- tree@tips[!tree@tips %in% living]
  if (bool) {
    return(living)
  }
  deceased
}

#' @name getDcsd
#' @title Get extinct tips from a tree
#' @description Return all extinct tip \code{ID}s.
#' @details Returns a vector.
#' @param tree \code{TreeMan} object
#' @param tol zero tolerance
#' @seealso
#' \code{\link{getLvng}}, \code{\link{isUltrmtrc}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' (getDcsd(tree))
getDcsd <- function(tree, tol = 1e-8) {
  .livingOrDesceased(tree = tree, tol = tol, bool = FALSE)
}

#' @name getLvng
#' @title Get extant tips from a tree
#' @description Return all extant tip \code{ID}s.
#' @details Returns a vector.
#' @param tree \code{TreeMan} object
#' @param tol zero tolerance
#' @seealso
#' \code{\link{getDcsd}}, \code{\link{isUltrmtrc}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' (getLvng(tree))
getLvng <- function(tree, tol = 1e-8) {
  .livingOrDesceased(tree = tree, tol = tol, bool = TRUE)
}

# SINGLE ND
# TODO: bring outgroup, parent and path into terminological line with getNd(s)

#' @name getPrnt
#' @title Get parent
#' @description Return parental (most recent common ancestor) node id for \code{ids}.
#' @details Returns a character.
#' @param tree \code{TreeMan} object
#' @param ids vector of node ids
#' @seealso
#' \code{\link{getSubtree}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' data(mammals)
#' # choosing ids from the two main branches of apes allows to find the parent for all apes
#' ape_id <- getPrnt(mammals, ids = c("Homo_sapiens", "Hylobates_concolor"))
getPrnt <- function(tree, ids) {
  # using ndlst guarrantees order
  prids <- .getNdsPridsFrmLst(tree@ndlst,
    ids = ids, prinds = tree@prinds,
    parallel = FALSE, progress = "none"
  )
  rf <- prids[[1]]
  mn_rnk <- 0
  for (n in prids[-1]) {
    rnk <- min(match(n, rf), na.rm = TRUE)
    if (rnk > mn_rnk) mn_rnk <- rnk
  }
  rf[mn_rnk]
}

#' @name getPath
#' @title Get path between nodes
#' @description Return node ids for connecting \code{from} to \code{to}.
#' @details Returns a vector, first id is \code{from} to \code{to}.
#' @param tree \code{TreeMan} object
#' @param from starting node id
#' @param to ending node id
#' @seealso
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' data(mammals)
#' # what's the phylogenetic distance from humans to gorillas?
#' ape_id <- getPrnt(mammals, ids = c("Homo_sapiens", "Hylobates_concolor"))
#' pth <- getPath(mammals, from = "Homo_sapiens", to = "Gorilla_gorilla")
#' sum(getNdsSlt(mammals, ids = pth, slt_nm = "spn"))
getPath <- function(tree, from, to) {
  pre_1 <- c(from, getNdPrids(tree, from))
  pre_2 <- c(to, getNdPrids(tree, to))
  parent <- pre_1[which(pre_1 %in% pre_2)[1]]
  path_1 <- pre_1[!pre_1 %in% pre_2]
  path_2 <- pre_2[!pre_2 %in% pre_1]
  path_2 <- path_2[length(path_2):1]
  c(path_1, parent, path_2)
}

#' @name getOtgrp
#' @title Get outgroup
#' @description Return the outgroup based on a tree and a vector of IDs.
#' @details Returns a id, character. If there are multiple possible outgroups, returns NULL.
#' @param tree \code{TreeMan} object
#' @param ids vector of node ids
#' @seealso
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' data(mammals)
#' # orangutan is an outgroup wrt humans and chimps
#' getOtgrp(mammals, ids = c("Homo_sapiens", "Pan_troglodytes", "Pongo_pygmaeus"))
getOtgrp <- function(tree, ids) {
  .cntr <- function(id) {
    kids <- getNdKids(tree, id)
    sum(ids %in% kids)
  }
  prnt <- getPrnt(tree, ids)
  ptids <- tree@ndlst[[prnt]][["ptid"]]
  cnts <- vapply(ptids, .cntr, integer(1))
  outnd <- names(cnts)[which.min(cnts)]
  kids <- getNdKids(tree, outnd)
  if (length(kids) == 0) {
    return(outnd)
  }
  outgroup <- ids[ids %in% kids]
  if (length(outgroup) > 1) {
    return(NULL)
  }
  outgroup
}

# SPECIAL

#' @name getUnqNds
#' @title Get unique nodes represented by tips
#' @description Return a list of IDs for any node that are represented by tip IDs given.
#' @details Returns a vector.
#' @param tree \code{TreeMan} object
#' @param tids vector of tip IDs
#' @seealso
#' \code{\link{getCnnctdNds}}, \code{\link{calcFrPrp}},
#' \code{\link{calcPhyDv}}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' unqnds <- getUnqNds(tree, c("t1", "t2"))
getUnqNds <- function(tree, tids) {
  rmng <- tree@tips[!tree@tips %in% tids]
  ignr <- c(unique(unlist(getNdsPrids(tree, tids))), tids)
  rmng <- c(unique(unlist(getNdsPrids(tree, rmng))), rmng)
  ignr[!ignr %in% rmng]
}

#' @name getCnnctdNds
#' @title Get all nodes connected by given tips
#' @description Return a vector of IDs of all nodes that are connected to tip IDs given.
#' @details Returns a vector. This function is the basis for \code{calcPhyDv()}, it determines
#' the unique set of nodes connected for a set of tips.
#' @param tree \code{TreeMan} object
#' @param tids vector of tip IDs
#' @seealso
#' \code{\link{getUnqNds}}, \code{\link{calcFrPrp}},
#' \code{\link{calcPhyDv}}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' cnntdnds <- getCnnctdNds(tree, c("t1", "t2"))
getCnnctdNds <- function(tree, tids) {
  prids <- c(
    unlist(getNdsPrids(tree, tids)),
    tids
  )
  counts <- table(prids)
  names(counts)[counts < length(tids)]
}

#' @name getNdsFrmTxnyms
#' @title Get IDs for nodes represented txnyms
#' @description Return a list of IDs for any node that contains the given txnyms.
#' @details Returns a list. Txnyms must be spelt correctly.
#' @param tree \code{TreeMan} object
#' @param txnyms vector of taxonomic group names
#' @seealso
#' \code{\link{taxaResolve}}, \code{\link{setTxnyms}}, \code{\link{searchTxnyms}},
#' \code{\link{getNdsLng}}, \code{\link{getNdLng}}
#' @export
#' @examples
#'
#' data(mammals)
#' # what ID represents the apes?
#' getNdsFrmTxnyms(mammals, "Hominoidea")
getNdsFrmTxnyms <- function(tree, txnyms) {
  # get nd id(s) for taxonyms
  .get <- function(id, txnym, ...) {
    for (t in txnyms) {
      if (t %in% txnym) {
        res[[t]] <<- c(res[[t]], id)
      }
    }
  }
  res <- list()
  plyr::m_ply(tree@ndlst, .fun = .get)
  res
}

#' @name getSubtree
#' @title Get subtree
#' @description Return tree descending from \code{id}.
#' @details Returns a \code{TreeMan}, parallelizable. \code{id} must be an internal node.
#' @param tree \code{TreeMan} object
#' @param id node id
#' @seealso
#' \code{\link{getPrnt}}, \code{\link{addClade}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' data(mammals)
#' # get tree of apes
#' ape_id <- getPrnt(mammals, ids = c("Homo_sapiens", "Hylobates_concolor"))
#' apes <- getSubtree(mammals, id = ape_id)
#' summary(apes)
getSubtree <- function(tree, id) {
  if (!id %in% tree@nds) {
    stop("`id` is not an internal node")
  }
  ids <- c(id, getNdPtids(tree, id))
  ndlst <- tree@ndlst[ids]
  ndlst[[id]][["prid"]] <- id
  ndlst[[id]][["spn"]] <- 0
  new_tree <- new("TreeMan",
    ndlst = ndlst, root = id,
    ndmtrx = NULL
  )
  new_tree <- pstMnp(new_tree)
  new_tree <- updateSlts(new_tree)
  new_tree
}

# TREE FUNCTIONS

#' @name getAge
#' @title Get age of tree
#' @description Returns age, numeric, of tree
#' @details Calculates the age of a tree, determined as the maximum tip to root
#' distance.
#' @param tree \code{TreeMan} object
#' @param parallel logical, make parallel?
#' @seealso
#' \code{\link{updateSlts}},
#' \url{https://github.com/DomBennett/treeman/wiki/get-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' (getAge(tree))
getAge <- function(tree, parallel = FALSE) {
  tids <- tree@tips
  if (!is.null(tree@ndmtrx)) {
    all_ids <- tree@all
    spns <- .getSltSpns(tree@ndlst)
    res <- .getTreeAgeFrmMtrx(tree@ndmtrx, all_ids, tids, spns, parallel)
  } else {
    res <- .getTreeAgeFrmLst(tree@ndlst,
      prinds = tree@prinds,
      tids = tids, parallel
    )
  }
  res
}


#' @name ultrTree
#' @title Make tree ultrametric
#' @description Returns a tree with all tips ending at time 0
#' @details Re-calculates the branch lengths in the tree so that all
#' tips are brought to the same time point: all species are extant.
#' @param tree \code{TreeMan} object
#' @seealso
#' \url{https://github.com/DomBennett/treeman/wiki/manip-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' (getDcsd(tree)) # list all extinct tips
#' tree <- ultrTree(tree)
#' (getDcsd(tree)) # list all extinct tips
ultrTree <- function(tree) {
  # bring tips to maximum possible length
  tip_dpths <- vapply(getNdsPrids(tree, tree@tips), length, integer(1))
  tip_spns <- (tree["ntips"] - tip_dpths)
  nd_spns <- rep(1, tree@nnds)
  names(nd_spns) <- tree@nds
  spns <- c(tip_spns, nd_spns)
  tree <- setNdsSpn(tree, ids = names(spns), vals = spns)
  updateSlts(tree)
}

#' @name rmNodes
#' @title Remove nodes from a tree
#' @description Returns a tree with a node ID(s) removed
#' @details Removes nodes in a tree. Joins the nodes following to
#' the nodes preceding the node to be removed. Creates polytomies.
#' Warning: do not use this function to remove tip nodes, this create a
#' corrupted tree.
#' @param tree \code{TreeMan} object
#' @param nids internal node IDs
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{addTip}}, \code{\link{rmTips}},
#'  \url{https://github.com/DomBennett/treeman/wiki/manip-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' tree <- rmNodes(tree, "n3")
#' summary(tree) # tree is now polytmous
rmNodes <- function(tree, nids, progress = "none") {
  .rmNode <- function(nid) {
    ptids <- ndlst[[nid]][["ptid"]]
    prid <- ndlst[[nid]][["prid"]]
    if (tree@wspn) {
      for (ptid in ptids) {
        ndlst[[ptid]][["spn"]] <-
          ndlst[[ptid]][["spn"]] +
          ndlst[[nid]][["spn"]]
      }
    }
    for (ptid in ptids) {
      ndlst[[ptid]][["prid"]] <- prid
    }
    new_ptids <- ndlst[[prid]][["ptid"]]
    new_ptids <- new_ptids[new_ptids != nid]
    new_ptids <- c(new_ptids, ptids)
    ndlst[[prid]][["ptid"]] <- new_ptids
    ndlst <<- ndlst[names(ndlst) != nid]
  }
  if (tree@root %in% nids) {
    stop("Cannot remove root.")
  }
  ndlst <- tree@ndlst
  plyr::m_ply(.data = nids, .fun = .rmNode, .progress = progress)
  bool <- tree@all %in% names(ndlst)
  tree@ndlst <- ndlst
  tree <- pstMnp(tree)
  tree <- updateSlts(tree)
  if (!is.null(tree@ndmtrx)) {
    tree@ndmtrx <- bigmemory::as.big.matrix(tree@ndmtrx[bool, bool])
  }
  tree
}

#' @name rmTips
#' @title Remove tips from a tree
#' @description Returns a tree with a tip ID(s) removed
#' @details Removes tips in a tree. Set drp_intrnl to FALSE to convert
#' internal nodes into new tips. Warning: do not use this function to remove
#' internal nodes, this create a corrupted tree.
#' @param tree \code{TreeMan} object
#' @param tids tip IDs
#' @param drp_intrnl Boolean, drop internal branches, default FALSE
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{addTip}}, \code{\link{rmNodes}},
#'  \url{https://github.com/DomBennett/treeman/wiki/manip-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' tree <- rmTips(tree, "t1")
#' summary(tree)
#' # running the function using an internal
#' # node will create a corrupted tree
#' tree <- rmTips(tree, "n3")
#' # run summary() to make sure a change has
#' # not created a corruption
#' # summary(tree)
rmTips <- function(tree, tids, drp_intrnl = TRUE, progress = "none") {
  # internal
  .rmTip <- function(tid) {
    # get sister IDs
    sids <- .getNdSstrFrmLst(ndlst, tid)
    # get prid
    prid <- ndlst[[tid]][["prid"]][[1]]
    # remove tid
    ndlst <<- ndlst[names(ndlst) != tid]
    ndlst[[prid]][["ptid"]] <<-
      ndlst[[prid]][["ptid"]][ndlst[[prid]][["ptid"]] != tid]
    # remove prnd if specified and not polytomous
    if (drp_intrnl & length(sids) == 1) {
      ptid <- ndlst[[prid]][["ptid"]][[1]]
      ndlst[[ptid]][["spn"]] <<- ndlst[[prid]][["spn"]] +
        ndlst[[ptid]][["spn"]]
      if (prid != rid) {
        gprid <- ndlst[[prid]][["prid"]][[1]]
        ndlst[[ptid]][["prid"]] <<- gprid
        g_ptids <- ndlst[[gprid]][["ptid"]]
        g_ptids <- g_ptids[g_ptids != prid]
        ndlst[[gprid]][["ptid"]] <<- c(g_ptids, ptid)
      } else {
        # if prid to be dropped is root, set root to ptid
        rid <<- ptid
        ndlst[[ptid]][["prid"]] <<- ptid
      }
      ndlst <<- ndlst[names(ndlst) != prid]
    }
  }
  ndlst <- tree@ndlst
  rid <- tree@root
  plyr::m_ply(.data = tids, .fun = .rmTip, .progress = progress)
  bool <- tree@all %in% names(ndlst)
  tree@ndlst <- ndlst
  tree@root <- rid
  tree <- pstMnp(tree)
  tree <- updateSlts(tree)
  if (!is.null(tree@ndmtrx)) {
    tree@ndmtrx <- bigmemory::as.big.matrix(tree@ndmtrx[bool, bool])
  }
  tree
}

#' @name addTip
#' @title Add tip to a tree
#' @description Returns a tree with a new tip ID added
#' @details User must provide new tip ID, the ID of the node
#' which will become the new tip's sister, and new branch lengths.
#' The tip ID must only contain letters numbers and underscores.
#' Optionally, user can specify the IDs for the new parental internal nodes.
#' Ensure that the \code{strt_age} is greater than the \code{end_age}, and that
#' the \code{strt_age} falls within the age span of the sister ID. Otherwise, negative
#' spns may be produced leading to an error.
#' Note, returned tree will not have a node matrix.
#' Note, providing negative end ages will increase the age of the tree.
#' @param tree \code{TreeMan} object
#' @param tid tip ID
#' @param sid ID of node that will become new tip sisters
#' @param strt_age timepoint at which new tips first appear in the tree
#' @param end_age timepoint at which new tips end appear in the tree, default 0.
#' @param tree_age age of tree
#' @param pid parent ID (default is 'p_' + tid)
#' @seealso
#' \code{\link{rmTips}},
#' \url{https://github.com/DomBennett/treeman/wiki/manip-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' tree_age <- getAge(tree)
#' possible_ages <- getSpnAge(tree, "t1", tree_age)
#' start_age <- runif(1, possible_ages[["end"]], possible_ages[["start"]])
#' end_age <- possible_ages[["end"]]
#' tree <- addTip(tree,
#'   tid = "t11", sid = "t1", strt_age = start_age,
#'   end_age = end_age, tree_age = tree_age
#' )
#' summary(tree)
addTip <- function(tree, tid, sid, strt_age = NULL,
                   end_age = 0, tree_age = NULL,
                   pid = paste0("p_", tid)) {
  if (!is.numeric(strt_age) & tree@wspn) {
    stop("Valid strt_age not provided")
  }
  ndlst <- tree@ndlst
  # terminology
  # snd, sid -- old sister node and id
  # tnd, tid -- new tip node and id
  # pnd, pid -- new parent node and id
  # gpnd, gpid -- grand parent (prid of old sister)
  if (grepl("[^a-zA-Z_0-9]", tid)) {
    stop(paste0("Unsuitable characters in tid [", tid, "]"))
  }
  # init new nodes
  tnd <- list("id" = tid, "prid" = pid, "ptid" = character(), "spn" = 0)
  snd <- ndlst[[sid]]
  gpid <- snd[["prid"]][[1]]
  gpnd <- ndlst[[gpid]]
  pnd <- list("id" = pid, "spn" = 0)
  # update ptid
  gpnd[["ptid"]] <- gpnd[["ptid"]][!gpnd[["ptid"]] %in% snd[["id"]]]
  gpnd[["ptid"]] <- c(gpnd[["ptid"]], pnd[["id"]])
  pnd[["ptid"]] <- c(tid, sid)
  # set prid
  pnd[["prid"]] <- snd[["prid"]]
  snd[["prid"]] <- pid
  # add to ndlst
  ndlst[[tid]] <- tnd
  ndlst[[pid]] <- pnd
  ndlst[[sid]] <- snd
  ndlst[[gpid]] <- gpnd
  if (tree@wspn) {
    sspn <- ndlst[[sid]][["spn"]]
    prid <- ndlst[[tid]][["prid"]]
    gprid <- ndlst[[prid]][["prid"]]
    # calc
    gp_age <- getNdAge(tree, gprid, tree_age)
    tspn <- strt_age - end_age
    pspn <- gp_age - strt_age
    new_spsn <- abs(sspn - pspn)
    if (tspn < 0 | pspn < 0 | new_spsn < 0) {
      stop("Invalid ages given: negative spns")
    }
    ndlst[[tid]][["spn"]] <- tspn
    ndlst[[prid]][["spn"]] <- pspn
    ndlst[[sid]][["spn"]] <- new_spsn
  }
  tree@ndlst <- ndlst
  tree <- pstMnp(tree)
  tree <- rmNdmtrx(tree)
  tree <- updateSlts(tree)
  tree
}

#' @name addClade
#' @title Add clade to tree
#' @description Returns a tree with added clade
#' @details Add a \code{TreeMan} object to an existing \code{TreeMan}
#' object by specifying an ID at which to attach. If the id specified
#' is an internal node, then the original clade descending from that
#' node will be replaced. Before running, ensure no IDs are shared
#' between the \code{tree} and the \code{clade}, except for the IDs in the clade
#' of that tree that will be replaced.
#' Note, returned tree will not have a node matrix.
#' @param tree \code{TreeMan} object
#' @param id tip/node ID in tree to which the clade will be added
#' @param clade \code{TreeMan} object
#' @seealso
#' \code{\link{rmClade}}, \code{\link{getSubtree}},
#' \url{https://github.com/DomBennett/treeman/wiki/manip-methods}
#' @export
#' @examples
#'
#' t1 <- randTree(100)
#' # extract a clade
#' cld <- getSubtree(t1, "n2")
#' # remove the same clade
#' t2 <- rmClade(t1, "n2")
#' # add the clade again
#' t3 <- addClade(t2, "n2", cld)
#' # t1 and t3 should be the same
#' # note there is no need to remove a clade before adding
#' t3 <- addClade(t1, "n2", cld) # same tree
addClade <- function(tree, id, clade) {
  if (!id %in% tree@tips) {
    tree <- rmClade(tree, id)
  }
  cld_ids <- names(clade@ndlst)[names(clade@ndlst) != clade@root]
  if (any(names(tree@ndlst) %in% cld_ids) &
    any(cld_ids %in% names(tree@ndlst))) {
    stop(
      "IDs in `clade` exist in parts of `tree` not to be replaced.",
      " Consider checking for duplicates or renaming IDs in either `tree` or `clade`"
    )
  }
  cld_ptids <- clade@ndlst[[clade@root]][["ptid"]]
  for (cld_ptid in cld_ptids) {
    clade@ndlst[[cld_ptid]][["prid"]] <- id
  }
  cld_ndlst <- clade@ndlst[names(clade@ndlst) != clade@root]
  tree@ndlst[[id]][["ptid"]] <- cld_ptids
  tree@ndlst <- c(tree@ndlst, cld_ndlst)
  tree <- pstMnp(tree)
  tree <- rmNdmtrx(tree)
  updateSlts(tree)
}

#' @name rmClade
#' @title Remove a clade from a tree
#' @description Returns a tree with a clade removed
#' @details Inverse function of \code{getSubtree()}. Takes a tree
#' and removes a clade based on an internal node specified. Node
#' is specified with \code{id}, all descending nodes and tips are removed.
#' The resulting tree will replace the missing clade with a tip of \code{id}.
#' @param tree \code{TreeMan} object
#' @param id node ID parent of clade to be removed
#' @seealso
#' \code{\link{addClade}}, \code{\link{getSubtree}}, \code{\link{rmTips}}
#' \url{https://github.com/DomBennett/treeman/wiki/manip-methods}
#' @export
#' @examples
#'
#' t1 <- randTree(100)
#' # remove a clade
#' t2 <- rmClade(t1, "n2")
#' summary(t1)
#' summary(t2)
rmClade <- function(tree, id) {
  ptids <- getNdPtids(tree, id)
  bool <- !tree@all %in% ptids
  tree@ndlst <- tree@ndlst[!names(tree@ndlst) %in% ptids]
  tree@ndlst[[id]][["ptid"]] <- character()
  tree <- pstMnp(tree)
  if (!is.null(tree@ndmtrx)) {
    tree@ndmtrx <- bigmemory::as.big.matrix(tree@ndmtrx[bool, bool])
  }
  updateSlts(tree)
}

#' @name pinTips
#' @title Pin tips to a tree
#' @description Returns a tree with new tips added based on given lineages and time points
#' @details User must provide a vector of new tip IDs, a list of the ranked lineages
#' for these IDs (in ascending order) and a vector of end time points for each new ID
#' (0s for extant tips). The function expects the given tree to be taxonomically informed;
#' the \code{txnym} slot for every node should have a taxonomic label. The function takes
#' the lineage and tries to randomly add the new tip at the lowest point in the taxonomic rank
#' before the end time point. Note, returned tree will not have a node matrix.
#' @param tree \code{TreeMan} object
#' @param tids new tip ids
#' @param lngs list of vectors of the lineages of each tid (ordered high to low rank)
#' @param end_ages end time points for each tid
#' @param tree_age age of tree
#' @seealso
#' \code{\link{addTip}}, \code{\link{rmTips}},
#' \url{https://github.com/DomBennett/treeman/wiki/manip-methods}
#' @export
#' @examples
#' # see https://github.com/DomBennett/treeman/wiki/Pinning-tips for a detailed example
pinTips <- function(tree, tids, lngs, end_ages, tree_age) {
  .getPtntls <- function(lng, end) {
    sccs <- FALSE
    # loop backwards through taxonomy
    # genus --> family --> ....
    for (i in length(lng):1) {
      pull <- txnyms %in% lng[i]
      if (sum(pull) == 0) {
        next
      }
      ptntls <- names(txnyms)[pull]
      if (length(ptntls) == 1) {
        prnt <- ptntls
      } else {
        # assumes monophylly
        prnt <- ptntls[which.max(spn_data[ptntls, "end"])]
      }
      ptntls <- c(prnt, getNdPtids(tree, prnt))
      ptntls <- ptntls[ptntls != rid]
      pull <- spn_data[ptntls, "start"] > end
      if (any(pull)) {
        ptntls <- ptntls[pull]
        sccs <- TRUE
        break
      }
    }
    if (sccs) {
      return(ptntls)
    }
    NULL
  }
  .getPTxnym <- function(tip_txnym, sid) {
    gp_txnym <- txnyms[[getNdSlt(tree, "prid", sid)]]
    s_txnym <- txnyms[[sid]]
    if (s_txnym == tip_txnym) {
      pid_txnym <- tip_txnym
    } else {
      pid_txnym <- gp_txnym
    }
    pid_txnym
  }
  .pin <- function(i) {
    tid <- tids[i]
    end <- end_ages[i]
    lng <- lngs[[i]]
    ptntls <- .getPtntls(lng, end)
    if (is.null(ptntls)) {
      message(paste0("[", tid, "] could not be added"))
      return(NULL)
    }
    rngs <- spn_data[ptntls, , drop = FALSE]
    rngs[rngs[, "end"] <= end, "end"] <- end
    # pinning is based on branch length
    # this is not a model, it just ensures
    # taxonomically matching branch lengths
    # of the tree have equal chance.
    prbs <- rngs[, "start"] - rngs[, "end"]
    if (sum(prbs) < 10 - 8) {
      # cannot add where no branch is available, prbs eqv. to branch length
      message(paste0("[", tid, "] could not be added"))
      return(NULL)
    }
    sid <- as.vector(sample(ptntls, prob = prbs, size = 1))
    start <- runif(min = rngs[sid, "end"], max = rngs[sid, "start"], n = 1)
    # taxnomy of tip and parent tip based grandparent
    tip_txnym <- lng[length(lng)]
    pid_txnym <- .getPTxnym(tip_txnym, sid)
    pid <- paste0("p_", tid, sep = "")
    # add tip
    tree <- addTip(tree,
      tid = tid, sid = sid, strt_age = start,
      end_age = end, pid = pid, tree_age = tree_age
    )
    tree@ndlst[[tid]][["txnym"]] <- tip_txnym
    tree@ndlst[[pid]][["txnym"]] <- pid_txnym
    # add to spn_data
    tid_spn <- getSpnAge(tree, tid, tree_age)
    spn_data[tid, "start"] <<- tid_spn[, "start"]
    spn_data[tid, "end"] <<- tid_spn[, "end"]
    pid_spn <- getSpnAge(tree, pid, tree_age)
    spn_data[pid, "start"] <<- pid_spn[, "start"]
    spn_data[pid, "end"] <<- pid_spn[, "end"]
    sid_spn <- getSpnAge(tree, sid, tree_age)
    spn_data[sid, "start"] <<- sid_spn[, "start"]
    spn_data[sid, "end"] <<- sid_spn[, "end"]
    # add to txnyms list
    txnyms[[tid]] <<- tip_txnym
    txnyms[[pid]] <<- pid_txnym
    # push tree out
    tree <<- tree
  }
  .testLngs <- function(lng) {
    for (l in lng) {
      if (grepl("[^a-zA-Z_0-9]", l)) {
        stop(paste0("Unsuitable characters in [", l, "]"))
      }
    }
    NULL
  }
  if (!tree@wtxnyms) {
    stop("tree has no txnyms")
  }
  if (any(end_ages < 0)) {
    warning("One or more end ages are less than zero, this will change the age of tree.")
  }
  # make sure lineages are right
  mapply(.testLngs, lngs)
  # generate taxonomy and span data
  txnyms <- getNdsSlt(tree, "txnym", tree@all)
  txnyms <- c(txnyms, rep(NA, length(tids) * 2))
  names(txnyms) <- c(names(tree@ndlst), tids, paste0("p_", tids))
  spn_data <- matrix(NA,
    nrow = (length(tids) * 2) + tree@nall,
    ncol = 2
  )
  colnames(spn_data) <- c("start", "end")
  tmp_spn_data <- getSpnsAge(tree, tree@all, tree_age)
  rownames(spn_data) <- c(tree@all, tids, paste0("p_", tids))
  spn_data[tree@all, "start"] <- tmp_spn_data[["start"]]
  spn_data[tree@all, "end"] <- tmp_spn_data[["end"]]
  rm(tmp_spn_data)
  rid <- tree@root
  # add oldest to youngest
  ordrd <- order(end_ages, decreasing = TRUE)
  plyr::m_ply(ordrd, .pin)
  tree
}

# prevent sudden crashes by stopping incorrect arguments going to C code
.preCtest <- function(...) {
  argg <- c(as.list(environment()), list(...))
  bool <- FALSE
  for (arg in argg) {
    if (!is.numeric(arg) | length(arg) < 1) {
      bool <- TRUE
    }
  }
  if (bool) {
    stop("1 or more arguments are of the wrong type")
  }
  NULL
}

# MULTIPLE NDS

.getNdsPtidsFrmLst <- function(ndlst, ids, prinds, parallel, progress) {
  l_data <- data.frame(id = ids, stringsAsFactors = FALSE)
  out <- plyr::mlply(
    .data = l_data, .fun = .getNdPtidsFrmLst, ndlst = ndlst,
    prinds = prinds, .parallel = parallel, .progress = progress
  )
  names(out) <- attr(out, "split_labels")[, 1]
  res <- out[1:length(out)]
  res
}

.getNdsPridsFrmLst <- function(ndlst, ids, prinds,
                               parallel, progress) {
  l_data <- data.frame(id = ids, stringsAsFactors = FALSE)
  out <- plyr::mlply(
    .data = l_data, .fun = .getNdPridsFrmLst, ndlst = ndlst,
    prinds = prinds, .parallel = parallel, .progress = progress
  )
  names(out) <- attr(out, "split_labels")[, 1]
  res <- out[1:length(out)]
  res
}

.getNdsPDFrmLst <- function(ndlst, ids, prinds,
                            parallel, progress) {
  l_data <- data.frame(id = ids, stringsAsFactors = FALSE)
  out <- plyr::mdply(
    .data = l_data, .fun = .getNdPDFrmLst, prinds = prinds,
    ndlst = ndlst, .parallel = parallel, .progress = progress
  )
  res <- out[, 2]
  names(res) <- out[, 1]
  res
}

.getNdsKidsFrmLst <- function(ndlst, ids, prinds, tinds,
                              parallel, progress) {
  l_data <- data.frame(id = ids, stringsAsFactors = FALSE)
  res <- plyr::mlply(
    .data = l_data, .fun = .getNdKidsFrmLst,
    ndlst = ndlst, prinds = prinds,
    tinds = tinds, .parallel = parallel, .progress = progress
  )
  names(res) <- ids
  res[1:length(res)]
}

.getNdsPrdstsFrmLst <- function(ndlst, ids, prinds,
                                parallel, progress) {
  l_data <- data.frame(id = ids, stringsAsFactors = FALSE)
  out <- plyr::mdply(
    .data = l_data, .fun = .getNdPrdstsFrmLst, prinds = prinds,
    ndlst = ndlst, .parallel = parallel, .progress = progress
  )
  res <- out[, 2]
  names(res) <- out[, 1]
  res
}

# SINGLE ND

.getNdSstrFrmLst <- function(ndlst, id) {
  prid <- ndlst[[id]][["prid"]][[1]]
  ptids <- ndlst[[prid]][["ptid"]]
  ptids[ptids != id]
}

#' @useDynLib phylotaR cGetNdPrids
.getNdPridsFrmLst <- function(ndlst, prinds, id) {
  prid <- ndlst[[id]][["prid"]]
  nids <- names(ndlst)
  prind <- which(nids == prid)
  .preCtest(prind, prinds)
  res <- .Call("cGetNdPrids",
    PACKAGE = "phylotaR",
    as.integer(prind),
    as.integer(prinds)
  )
  nids[res]
}

.getNdPrdstsFrmLst <- function(ndlst, prinds, id) {
  prids <- .getNdPridsFrmLst(ndlst, prinds, id)
  sum(vapply(ndlst[prids], function(x) x[["spn"]], numeric(1))) +
    ndlst[[id]][["spn"]]
}

#' @useDynLib phylotaR cGetNdPtids
.getNdPtidsFrmLst <- function(ndlst, prinds, id) {
  nids <- names(ndlst)
  id <- which(nids == id)
  .preCtest(id, prinds)
  res <- .Call("cGetNdPtids",
    PACKAGE = "phylotaR",
    as.integer(id),
    as.integer(prinds)
  )
  nids[which(res > 0)]
}

.getNdKidsFrmLst <- function(ndlst, prinds, tinds, id) {
  ptids <- .getNdPtidsFrmLst(ndlst, prinds, id)
  tids <- names(ndlst)[tinds]
  ptids[ptids %in% tids]
}

.getNdPDFrmLst <- function(ndlst, prinds, id) {
  ptids <- .getNdPtidsFrmLst(ndlst, prinds, id)
  if (length(ptids) > 0) {
    res <- sum(vapply(ndlst[ptids], function(x) x[["spn"]], numeric(1)))
  } else {
    res <- 0
  }
  res
}

# TREE FUNCTIONS

.getTreeAgeFrmLst <- function(ndlst, prinds, tids, parallel) {
  prdsts <- .getNdsPrdstsFrmLst(ndlst,
    ids = tids, prinds = prinds,
    parallel = parallel, progress = "none"
  )
  max(prdsts)
}

# SLOT

.getSltSpns <- function(ndlst) {
  .get <- function(x) {
    x[["spn"]]
  }
  res <- vapply(ndlst, .get, numeric(1))
  names(res) <- NULL
  res
}

# INDS

.getPrinds <- function(ndlst) {
  # pre-node index
  .get <- function(x) {
    x[["prid"]]
  }
  res <- vapply(ndlst, .get, character(1))
  match(res, names(ndlst))
}

.getTinds <- function(ndlst) {
  # tip-node index
  .get <- function(x) {
    length(x[["ptid"]]) == 0
  }
  res <- vapply(ndlst, .get, logical(1))
  names(res) <- NULL
  which(res)
}

# SPECIAL

#' @useDynLib phylotaR cGetNdmtrx
.getNdmtrxFrmLst <- function(ndlst, shared = FALSE, ...) {
  # return matrix of 01s for ids that descend from
  message(
    "Note, trees with `ndmtrx` cannot be saved and loaded using `save()` or `savehistory()`.",
    " Loading from these files may cause unusual behaviour."
  )
  prids <- vapply(ndlst, function(x) x[["prid"]], character(1))
  nids <- names(prids)
  prids <- match(prids, nids)
  qry_ids <- 1:length(nids)
  .preCtest(length(nids), qry_ids, prids)
  res <- .Call("cGetNdmtrx",
    PACKAGE = "phylotaR",
    as.integer(length(nids)),
    as.integer(qry_ids),
    as.integer(prids)
  )
  res <- bigmemory::as.big.matrix(res, shared = shared, ...)
  res
}
# Attemp for making getNdsMat run in parallel
# ... actually made it slower
# ntids <- length(tids)
# n <- foreach::getDoParWorkers()
# nparts <- ntids %/% n
# parts <- c(seq(1, ntids - 1, nparts), ntids + 1)
# res <- foreach (i=2:length(parts), .combine="cbind") %dopar% {
#   tids <- tids[parts[i-1]:(parts[i] - 1)]
#   res <- .Call("cGetNdsMat", PACKAGE="phylotaR",
#                as.integer(length(nids)),
#                as.integer(tids),
#                as.integer(prids))
#   res
# }


# MULTIPLE NDS
.getNdsPrdstsFrmMtrx <- function(ndmtrx, all_ids, ids, spns,
                                 parallel, progress) {
  .get <- function(x) {
    sum(spns[as.logical(x)])
  }
  res <- plyr::adply(ndmtrx[, all_ids %in% ids],
    .margins = 2,
    .fun = .get, .parallel = parallel, .progress = progress
  )[, 2]
  res <- res + spns[all_ids %in% ids]
  names(res) <- all_ids[all_ids %in% ids]
  res
}

.getNdsKidsFrmMtrx <- function(ndmtrx, all_ids, ids, tids,
                               parallel, progress) {
  .get <- function(x) {
    all_ids[as.logical(x) & all_ids %in% tids]
  }
  res <- plyr::alply(ndmtrx[all_ids %in% ids, ],
    .margins = 1,
    .fun = .get, .parallel = parallel, .progress = progress
  )
  names(res) <- all_ids[all_ids %in% ids]
  res <- res[1:length(res)]
  res
}

.getNdsPtidsFrmMtrx <- function(ndmtrx, all_ids, ids,
                                parallel, progress) {
  .get <- function(x) {
    all_ids[as.logical(x)]
  }
  res <- plyr::alply(ndmtrx[all_ids %in% ids, ],
    .margins = 1,
    .fun = .get, .parallel = parallel, .progress = progress
  )
  names(res) <- all_ids[all_ids %in% ids]
  res <- res[1:length(res)]
  res
}

.getNdsPridsFrmMtrx <- function(ndmtrx, all_ids, ids,
                                parallel, progress) {
  .get <- function(x) {
    all_ids[as.logical(x)]
  }
  res <- plyr::alply(ndmtrx[, all_ids %in% ids],
    .margins = 2,
    .fun = .get, .parallel = parallel, .progress = progress
  )
  names(res) <- all_ids[all_ids %in% ids]
  res <- res[1:length(res)]
  res
}

.getNdsPDFrmMtrx <- function(ndmtrx, all_ids, ids, spns,
                             parallel, progress) {
  .get <- function(x) {
    sum(spns[as.logical(x)])
  }
  res <- plyr::adply(ndmtrx[all_ids %in% ids, ],
    .margins = 1,
    .fun = .get, .parallel = parallel, .progress = progress
  )[, 2]
  names(res) <- all_ids[all_ids %in% ids]
  res <- res[1:length(res)]
  res
}

# TREE

.getTreeAgeFrmMtrx <- function(ndmtrx, all_ids, tids, spns, parallel) {
  res <- .getNdsPrdstsFrmMtrx(ndmtrx, all_ids, tids, spns,
    parallel = parallel, progress = "none"
  )
  max(res)
}

.newNd <- function(tree, id) {
  nd <- tree@ndlst[[id]]
  if (!tree@wspn) {
    spn <- pd <- prdst <- numeric()
  } else {
    spn <- nd[["spn"]]
    pd <- .getNdPDFrmLst(tree@ndlst,
      prinds = tree@prinds,
      id = id
    )
    prdst <- .getNdPrdstsFrmLst(tree@ndlst,
      prinds = tree@prinds,
      id = id
    )
  }
  if (is.null(nd[["txnym"]])) {
    txnym <- vector()
  } else {
    txnym <- nd[["txnym"]]
  }
  kids <- .getNdKidsFrmLst(tree@ndlst, prinds = tree@prinds, id = id, tinds = tree@tinds)
  new("Node",
    id = nd[["id"]], spn = spn, prid = as.character(nd[["prid"]][1]),
    ptid = as.character(nd[["ptid"]]), kids = as.character(kids),
    nkids = length(kids), pd = pd, txnym = txnym, prdst = prdst,
    root = tree@root == nd[["id"]], tip = length(nd[["ptid"]]) == 0
  )
}

#' @name Node-class
#' @aliases Node-method
#' @param x \code{Node} object
#' @param object \code{Node} object
#' @param i slot name
#' @param j missing
#' @param ... missing
#' @param drop missing
#' @title Node-class
#' @description The \code{Node} is an S4 class used for displaying node information.
#' It is only generated when a user implements the \code{[[]]} on a tree. Information
#' is only accurate if tree has been updated with \code{updateTree()}.
#' @slot id unique ID for node in tree['ndlst']
#' @slot spn length of preceding branch
#' @slot prid parent node ID
#' @slot ptid child node ID
#' @slot kids descending tip IDs
#' @slot nkids number of descending tip IDs
#' @slot txnym list of associated taxonyms
#' @slot pd total branch length represented by node
#' @slot prdst total branch length of connected prids
#' @slot root T/F root node?
#' @slot tip T/F tip node?
#' @exportClass Node
#' @seealso
#' \code{\link{TreeMan-class}}, \code{\link{TreeMen-class}}
setClass("Node",
  representation = representation(
    id = "character", # unique ID for node in tree@nodelist
    spn = "numeric", # length of preceding branch
    prid = "character", # parent node ID
    ptid = "vector", # child node IDs
    kids = "vector", # descending tip IDs
    nkids = "numeric", # number of descending tips
    txnym = "vector", # list of associated taxonyms
    pd = "numeric", # total branch length represented by node
    prdst = "numeric", # total branch length of connected pres
    root = "logical", # T/F root node?
    tip = "logical"
  ) # T/F tip node?
)
#' @rdname Node-class
#' @exportMethod as.character
setMethod(
  "as.character", c("x" = "Node"),
  function(x) {
    paste0("Node Obj. [ID=", x@id, "]")
  }
)
#' @rdname Node-class
#' @exportMethod show
setMethod(
  "show", "Node",
  function(object) {
    cat(summary(object))
  }
)
#' @rdname Node-class
#' @exportMethod print
setMethod(
  "print", "Node",
  function(x) {
    print(summary(x))
  }
)
#' @rdname Node-class
#' @exportMethod summary
setMethod(
  "summary", c("object" = "Node"),
  function(object) {
    if (object@root) {
      msg <- paste0("Node (root node):\n")
    } else if (object@tip) {
      msg <- paste0("Node (tip node):\n")
    } else {
      msg <- paste0("Node (internal node):\n")
    }
    msg <- paste0(msg, '  + ID: \"', object@id, '\"\n')
    if (length(object@txnym) > 0) {
      msg <- paste0(msg, '  + txnym: \"', paste0(object@txnym, collapse = '\", \"'), '\"\n')
    }
    if (!object@root) {
      msg <- paste0(msg, '  + prid: \"', object@prid, '\"\n')
    }
    if (!object@tip) {
      msg <- paste0(msg, '  + ptid: \"', paste0(object@ptid, collapse = '\", \"'), '\"\n')
      msg <- paste0(msg, "  + nkids: ", length(object@kids), "\n")
    }
    if (length(object@spn) > 0) {
      if (!object@root) {
        msg <- paste0(msg, "  + spn: ", signif(object@spn, 2), "\n")
      }
      msg <- paste0(msg, "  + predist: ", signif(object@prdst, 2), "\n")
      msg <- paste0(msg, "  + pd: ", signif(object@pd, 2), "\n")
    }
    cat(msg)
  }
)

#' @rdname Node-class
#' @exportMethod [
setMethod(
  "[", c("Node", "character", "missing", "missing"),
  function(x, i, j, ..., drop = TRUE) {
    if (!i %in% slotNames(x)) {
      stop(paste0(i, "  not in node"))
    }
    slot(x, i)
  }
)

#' @name writeTree
#' @title Write a Newick tree
#' @description Creates a Newick tree from a \code{TreeMan} object.
#' @details The \code{ndLabels} argument can be used to add a user defined node label in
#' the Newick tree. It should take only 1 argument, \code{nd}, the node represented as a list.
#' It should only return a single character value that can be added to a newick string.
#' @param tree \code{TreeMan} object
#' @param file file path
#' @param append T/F append tree to already existing file
#' @param ndLabels node label function
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \url{https://en.wikipedia.org/wiki/Newick_format},
#' \code{\link{readTree}}, \code{\link{randTree}},
#' \code{\link{readTrmn}}, \code{\link{writeTrmn}},
#' \code{\link{saveTreeMan}}, \code{\link{loadTreeMan}}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' # write out the tree with node labels as IDs
#' ndLabels <- function(n) {
#'   n[["id"]]
#' }
#' writeTree(tree, file = "example.tre", ndLabels = ndLabels)
#' file.remove("example.tre")
writeTree <- function(tree, file, append = FALSE, ndLabels = function(nd) {
                        return(NULL)
                      }, parallel = FALSE, progress = "none") {
  if (is(tree) == "TreeMen") {
    plyr::m_ply(tree@treelst,
      .fun = .writeTree, file = file,
      append = TRUE, ndLabels = ndLabels,
      .progress = progress, .parallel = parallel
    )
  } else if (is(tree) == "TreeMan") {
    .writeTree(tree, file, append, ndLabels)
  } else {
    stop("`tree` must be TreeMan or TreeMen")
  }
  NULL
}

.writeTree <- function(tree, file, append, ndLabels) {
  tipBytip <- function(i) {
    kids <- getNdKids(tree, prid)
    ids <- c(kids, prid, ndlst[[prid]][["prid"]])
    id <<- ids[!ids %in% deja_vues][1]
    deja_vues[i] <<- id
    spn <- ndlst[[id]][["spn"]]
    if (id %in% tids) {
      prids <- getNdPrids(tree, id)
      dpth <- which(prids == prid) - 1
      prid <<- ndlst[[id]][["prid"]]
      tpstr <- paste0(id, ":", spn)
      if (dpth > 0) {
        brckts <- paste0(rep("(", dpth), collapse = "")
        trstr <<- paste0(trstr, ",", brckts, tpstr)
      } else {
        trstr <<- paste0(trstr, ",", tpstr)
      }
    } else {
      prid <<- ndlst[[id]][["prid"]]
      ndlbl <- ndLabels(ndlst[[id]])
      trstr <<- paste0(trstr, ")", ndlbl, ":", spn)
    }
    NULL
  }
  # start with first tip
  # loop through tree structure adding tip by tip to string
  # unpack
  ndlst <- tree@ndlst
  tids <- tree@tips
  nids <- tree@nds
  rid <- tree@root
  # add first tip
  id <- tids[1]
  trstr <- ""
  deja_vues <- rep(NA, length(ndlst))
  deja_vues[1] <- id
  spn <- ndlst[[id]][["spn"]]
  dpth <- length(getNdPrids(tree, id))
  prid <- ndlst[[id]][["prid"]]
  tpstr <- paste0(id, ":", spn)
  trstr <- paste0(rep("(", dpth), collapse = "")
  trstr <- paste0(trstr, tpstr)
  # loop through nodes
  plyr::m_ply(2:(length(ndlst) - 1), .fun = tipBytip)
  ndlbl <- ndLabels(ndlst[[rid]])
  spn <- ndlst[[rid]][["spn"]]
  trstr <- paste0(trstr, ")", ndlbl, ":", spn, ";")
  write.table(
    x = trstr, file = file, quote = FALSE, row.names = FALSE,
    col.names = FALSE, append = append
  )
}

#' @name readTree
#' @title Read a Newick tree
#' @description Return a \code{TreeMan} or \code{TreeMen} object from a Newick treefile
#' @details Read a single or multiple trees from a file, or a text string. Parallelizable
#' when reading multiple trees.
#' The function will add any internal node labels in the Newick tree as a user-defined data slots.
#' The name of this slot is defined with the \code{spcl_slt_nm}.
#' These data can be accessed/manipulated with the \code{`getNdsSlt()`} function.
#' Trees are always read as rooted. (Unrooted trees have polytomous root nodes.)
#' @param file file path
#' @param text Newick character string
#' @param spcl_slt_nm name of special slot for internal node labels, default 'Unknown'.
#' @param wndmtrx T/F add node matrix? Default FALSE.
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \url{https://en.wikipedia.org/wiki/Newick_format},
#' \code{\link{addNdmtrx}}, \code{\link{writeTree}},
#' \code{\link{randTree}}, \code{\link{readTrmn}}, \code{\link{writeTrmn}},
#' \code{\link{saveTreeMan}}, \code{\link{loadTreeMan}}
#' @export
#' @examples
#'
#' # tree string with internal node labels as bootstrap results
#' tree <- readTree(
#'   text = "((A:1.0,B:1.0)0.9:1.0,(C:1.0,D:1.0)0.8:1.0)0.7:1.0;",
#'   spcl_slt_nm = "bootstrap"
#' )
#' # retrieve bootstrap values by node
#' tree["bootstrap"]
readTree <- function(file = NULL, text = NULL, spcl_slt_nm = "Unknown", wndmtrx = FALSE,
                     parallel = FALSE, progress = "none") {
  if (!is.null(file)) {
    trstr <- scan(file, what = "raw", quiet = TRUE)
  } else {
    trstr <- text
  }
  if (length(trstr) > 1) {
    trstr <- as.list(trstr)
    trees <- plyr::mlply(trstr,
      .fun = .readTree, spcl_slt_nm = spcl_slt_nm,
      wndmtrx = wndmtrx, .progress = progress, .parallel = parallel
    )
    names(trees) <- NULL
    trees <- trees[1:length(trees)]
    tree <- as(trees, "TreeMen")
  } else {
    tree <- .readTree(trstr, spcl_slt_nm, wndmtrx)
  }
  tree
}

#' @useDynLib phylotaR
#' @useDynLib phylotaR cFindPrids
.readTree <- function(trstr, spcl_slt_nm, wndmtrx) {
  # Internals
  .idspn <- function(i) {
    mtdt <- substr(trstr, start = nds[i - 1] + 1, stop = nds[i])
    mtdt <- gsub("(\\(|\\)|,|;)", "", mtdt)
    mtdt <- strsplit(mtdt, ":")[[1]]
    id <- NA
    if (length(mtdt) == 0) {
      spn <- NA
    } else if (length(mtdt) == 1) {
      id <- mtdt
      spn <- NA
    } else if (length(mtdt) > 1 && mtdt[1] != "") {
      id <- mtdt[1]
      spn <- as.numeric(mtdt[2])
    } else {
      spn <- as.numeric(mtdt[2])
    }
    c(id, spn)
  }
  .add <- function(i) {
    nd <- vector("list", length = 4)
    names(nd) <- c("id", "ptid", "prid", "spn")
    nd[["id"]] <- ids[i]
    nd[["spn"]] <- spns[i]
    nd[["prid"]] <- ids[prinds[i]]
    nd[["ptid"]] <- ptids[ptnds_pool == i]
    nd
  }
  # get nodes from string
  nds <- c(1, as.integer(gregexpr("(,|\\))", trstr)[[1]]) - 1)
  nds <- c(nds, nchar(trstr))
  # get id and spn
  mtdt <- mapply(2:length(nds), FUN = .idspn)
  ids <- mtdt[1, ]
  spns <- as.numeric(mtdt[2, ])
  rm(mtdt)
  nds <- nds[-1]
  # gen prids
  opns <- gregexpr("\\(", trstr)[[1]]
  clss <- gregexpr("\\)", trstr)[[1]]
  prinds <- .Call("cFindPrids",
    PACKAGE = "phylotaR",
    as.integer(nds),
    as.integer(clss),
    as.integer(opns)
  )
  if (sum(prinds == -1) > 1) {
    stop("Invalid tree string")
  }
  root <- which(prinds == -1)
  prinds <- match(prinds, nds)
  tinds <- which(!1:length(ids) %in% prinds)
  prinds[is.na(prinds)] <- root
  spns[is.na(spns)] <- 0
  # move internal node labels to other
  other <- rep(NA, length(ids))
  intnds <- 1:length(ids) %in% prinds
  other[intnds] <- ids[intnds]
  # overwrite internal node ids only if at least one is malformed
  if (any(grepl("[^a-zA-Z_0-9]", ids[intnds]))) {
    ids[intnds] <- paste0("n", which(intnds))
  } else {
    # if using internal node ids as ids, no need for special slot
    other[intnds] <- NA
  }
  # rm NAs from IDs
  pull <- is.na(ids)
  ids[pull] <- paste0("n", which(pull))
  # ensure no dups in ids
  dups <- duplicated(ids)
  if (any(dups)) {
    dups <- unique(ids[dups])
    for (dup in dups) {
      pull <- ids == dup
      other[pull] <- ids[pull]
      ids[pull] <- paste0("n", which(pull))
    }
  }
  ptids <- ids[-root]
  ptnds_pool <- prinds[-root]
  ndlst <- lapply(1:length(ids), .add)
  names(ndlst) <- ids
  tree <- new("TreeMan",
    ndlst = ndlst, root = ids[root],
    ndmtrx = NULL, wtxnyms = FALSE,
    prinds = prinds, tinds = tinds
  )
  pull <- !is.na(other)
  if (any(pull)) {
    tree <- setNdsOther(tree,
      ids = ids[pull], vals = other[pull],
      slt_nm = spcl_slt_nm
    )
  }
  tree <- updateSlts(tree)
  if (wndmtrx) {
    tree <- addNdmtrx(tree)
  }
  tree
}

#' @name writeTrmn
#' @title Write a .trmn tree
#' @description Write to disk a \code{TreeMan} or \code{TreeMan} object using the .trmn treefile
#' @details Write a tree(s) to file using the .trmn format.
#' It is faster to read and write tree files using treeman with the .trmn file format.
#' In addition it is possible to encode more information than possible with the
#' Newick, e.g. any taxonomic information and additional slot names added to
#' the tree are recorded in the file.
#' @param tree TreeMan object or TreeMen object
#' @param file file path
#' @seealso
#' \code{\link{readTrmn}},
#' \code{\link{readTree}},\code{\link{writeTree}},
#' \code{\link{randTree}}, \code{\link{saveTreeMan}}, \code{\link{loadTreeMan}}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' writeTrmn(tree, file = "test.trmn")
#' tree <- readTrmn("test.trmn")
#' file.remove("test.trmn")
writeTrmn <- function(tree, file) {
  .unpack <- function(ntree) {
    .makeDataFrame(ntree, tree@treelst[[ntree]])
  }
  .makeDataFrame <- function(ntree, tree) {
    res <- data.frame(tree = ntree, prind = tree@prinds)
    res[["id"]] <- names(tree@ndlst)
    if (tree@wspn) {
      res[["spn"]] <- vapply(
        tree@ndlst, function(x) x[["spn"]],
        numeric(1)
      )
    }
    if (tree@wtxnyms) {
      res[["txnym"]] <- vapply(
        tree@ndlst,
        function(x) paste0(x[["txnym"]], collapse = "|"),
        character(1)
      )
    }
    # add any additional slots
    if (length(tree@othr_slt_nms) > 0) {
      for (slt_nm in tree@othr_slt_nms) {
        res[[slt_nm]] <- sapply(
          tree@ndlst,
          function(x) x[[slt_nm]]
        )
      }
    }
    res
  }
  if ("TreeMan" %in% is(tree)) {
    res <- .makeDataFrame(1, tree)
  } else if ("TreeMen" %in% is(tree)) {
    res <- plyr::mdply(
      .data = data.frame(ntree = 1:tree@ntrees),
      .fun = .unpack
    )
    res <- res[, -1]
  } else {
    stop("`tree` must be TreeMan or TreeMen object.")
  }
  write.csv(res, file = file, quote = FALSE, row.names = FALSE)
}

#' @name readTrmn
#' @title Read a .trmn tree
#' @description Return a \code{TreeMan} or \code{TreeMen} object from a .trmn treefile
#' @details Read a tree(s) from a file using the .trmn format.
#' It is faster to read and write tree files using treeman with the .trmn file format.
#' In addition it is possible to encode more information than possible with the
#' Newick, e.g. any taxonomic information and additional slot names added to
#' the tree are recorded in the file.
#' @param file file path
#' @param wndmtrx T/F add node matrix? Default FALSE.
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{writeTrmn}},
#' \code{\link{readTree}},\code{\link{writeTree}},
#' \code{\link{randTree}}, \code{\link{saveTreeMan}}, \code{\link{loadTreeMan}}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' writeTrmn(tree, file = "test.trmn")
#' tree <- readTrmn("test.trmn")
#' file.remove("test.trmn")
readTrmn <- function(file, wndmtrx = FALSE, parallel = FALSE,
                     progress = "none") {
  .pack <- function(i) {
    .readTrmn(
      inpt[inpt[["tree"]] == i, ],
      wndmtrx
    )
  }
  inpt <- read.csv(file, stringsAsFactors = FALSE)
  trids <- unique(inpt[["tree"]])
  trees <- plyr::mlply(
    .data = trids, .fun = .pack,
    .parallel = parallel, .progress = progress
  )
  if (length(trees) == 1) {
    res <- trees[[1]]
  } else {
    trees <- trees[1:length(trees)]
    names(trees) <- NULL
    res <- as(trees, "TreeMen")
  }
  res
}

.readTrmn <- function(inpt, wndmtrx) {
  .add <- function(i) {
    nd <- vector("list", length = 4)
    names(nd) <- c("id", "ptid", "prid", "spn")
    nd[["id"]] <- ids[i]
    nd[["spn"]] <- spns[i]
    nd[["prid"]] <- ids[prinds[i]]
    nd[["ptid"]] <- ptids[ptnds_pool == i]
    nd
  }
  prinds <- inpt[["prind"]]
  # all internal nodes should occur more than once (twice for bifurcating trees)
  prind_test <- sum(prinds == 1:length(prinds)) == 1
  prind_test <- all(table(prinds) > 1) & prind_test
  if (!prind_test) {
    stop("Tree is corrupted, check node structure is hierarchical.")
  }
  ids <- inpt[["id"]]
  if ("spn" %in% names(inpt) && !is.na(inpt[["spn"]][1])) {
    spns <- inpt[["spn"]]
  } else {
    spns <- rep(0, length(ids))
  }
  tinds <- which(!1:length(ids) %in% prinds)
  root <- which(1:length(prinds) == prinds)
  ptids <- ids[-root]
  ptnds_pool <- prinds[-root]
  ndlst <- lapply(1:length(ids), .add)
  names(ndlst) <- ids
  tree <- new("TreeMan",
    ndlst = ndlst, root = ids[root],
    ndmtrx = NULL, wtxnyms = FALSE,
    prinds = prinds, tinds = tinds
  )
  if ("txnym" %in% names(inpt) && !is.na(inpt[["txnym"]][1])) {
    txnyms <- strsplit(inpt[["txnym"]], "\\|")
    names(txnyms) <- ids
    tree <- setTxnyms(tree, txnyms)
  }
  othr_slt_nms <- names(inpt)[!names(inpt) %in%
    c("id", "prind", "spn", "txnym", "tree")]
  if (length(othr_slt_nms) > 0) {
    for (slt_nm in othr_slt_nms) {
      tree <- setNdsOther(tree,
        ids = inpt[["id"]],
        vals = inpt[[slt_nm]], slt_nm = slt_nm
      )
    }
  }
  tree <- updateSlts(tree)
  if (wndmtrx) {
    tree <- addNdmtrx(tree)
  }
  tree
}

#' @name saveTreeMan
#' @title Save a TreeMan object in serialization format
#' @description \code{TreeMan} equivalent to \code{save()} but able to handle
#' node matrices.
#' @details It is not possible to use \code{save()} on \code{TreeMan} objects
#' with node matrices. Node matrices are bigmemory matrices and are therefore outside
#' the R environment, see bigmemory documentation for more information. Saving and loading
#' a bigmemory matrix may cause memory issues in R and cause R to crash.
#'
#' This function can safely store a \code{TreeMan} object with and without
#' a node matrix. This function stores the tree using the serialization format and the node
#' matrix as a hidden .csv. Both parts of the tree can be reloaded to an R environment
#' with \code{loadTreeMan()}. The hidden node matrix filename is based on the file argument:
#' \code{file + _ndmtrx}
#'
#' Reading and writing trees with \code{saveTreeMan()} and
#' \code{loadTreeMan} is faster than any of the other read and write functions.
#' @param tree \code{TreeMan} object
#' @param file file path
#' @seealso
#' \code{\link{loadTreeMan}},
#' \code{\link{readTree}},\code{\link{writeTree}},
#' \code{\link{readTrmn}}, \code{\link{writeTrmn}}
#' @export
#' @examples
#'
#' tree <- randTree(100, wndmtrx = TRUE)
#' saveTreeMan(tree, file = "test.RData")
#' rm(tree)
#' tree <- loadTreeMan(file = "test.RData")
#' file.remove("test.RData", "testRData_ndmtrx")
saveTreeMan <- function(tree, file) {
  ndmtrx_file <- paste0(gsub("\\.", "", file), "_ndmtrx")
  if (!is.null(tree@ndmtrx)) {
    bigmemory::write.big.matrix(x = tree@ndmtrx, filename = ndmtrx_file)
    tree <- rmNdmtrx(tree)
  }
  save(list = c("tree", "ndmtrx_file"), file = file)
}

#' @name loadTreeMan
#' @title Load a TreeMan object in serialization format
#' @description \code{TreeMan} equivalent to \code{load()} but able to handle
#' node matrices.
#' @details It is not possible to use \code{save()} on \code{TreeMan} objects
#' with node matrices. Node matrices are bigmemory matrices and are therefore outside
#' the R environment, see bigmemory documentation for more information. Saving and loading
#' a bigmemory matrix may cause memory issues in R and cause R to crash.
#'
#' This function can safely read a \code{TreeMan} object with and without
#' a node matrix. \code{saveTreeMan()} function stores the tree using the serialization format
#' and the node matrix as a hidden .csv. Both parts of the tree can be reloaded to an R environment
#' with \code{loadTreeMan()}. The hidden node matrix filename is based on the file argument:
#' \code{file + _ndmtrx}
#'
#' Reading and writing trees with \code{saveTreeMan()} and
#' \code{loadTreeMan} is faster than any of the other read and write functions.
#' @param file file path
#' @seealso
#' \code{\link{saveTreeMan}},
#' \code{\link{readTree}},\code{\link{writeTree}},
#' \code{\link{readTrmn}}, \code{\link{writeTrmn}}
#' @export
#' @examples
#'
#' tree <- randTree(100, wndmtrx = TRUE)
#' saveTreeMan(tree, file = "test.RData")
#' rm(tree)
#' tree <- loadTreeMan(file = "test.RData")
#' file.remove("test.RData", "testRData_ndmtrx")
loadTreeMan <- function(file) {
  ndmtrx_file <- NULL
  load(file)
  if (file.exists(ndmtrx_file)) {
    tree@ndmtrx <- bigmemory::read.big.matrix(
      filename = ndmtrx_file,
      type = "integer", shared = FALSE
    )
  }
  tree
}

#' @name searchTxnyms
#' @title Get node labels based on online taxonomic database
#' @description Return names of each node in tree based on searching tip labels
#'  through Global Names Resolver \url{http://resolver.globalnames.org/} in NCBI.
#' @details For each node, all the descendants are searched, the taxonomic lineages returned and
#' then searched to find the lowest shared name.
#' All the tip labels are searched against a specified taxonomic database through the GNR and NCBI.
#' (So far only tested with NCBI database.)
#' Use the infer argument to ensure a taxonym is returned for all nodes. If infer is true,
#' all nodes without an identifed taxonym adopt the taxonym of their parent.
#' Will raise a warning if connection fails and will return NULL.
#' @param tree TreeMan object
#' @param cache T/F, create a local cache of downloaded names?
#' @param parent specify parent of all names to prevent false names
#' @param clean T/F, ensure returned names contain no special characters?
#' @param infer T/F, infer taxonyms for unfound nodes?
#' @seealso
#' \code{\link{taxaResolve}}, \code{\link{setTxnyms}}, \code{\link{getNdsFrmTxnyms}}
#' @export
#' @examples
#' tree <- randTree(8)
#' new_tids <- c(
#'   "Gallus_gallus", "Aileuropoda_melanoleucha", "Ailurus_fulgens",
#'   "Rattus_rattus", "Mus_musculus", "Gorilla_gorilla", "Pan_trogoldytes", "Homo_sapiens"
#' )
#' tree <- setNdsID(tree, tree["tips"], new_tids)
#' nd_labels <- searchTxnyms(tree)
#' print(nd_labels)
# TODO: add compatibility with other GNR datasources
# TODO: catalogue of life, unlike NCBI, does not keep lineages and rank lengths constant between names
searchTxnyms <- function(tree, cache = FALSE, parent = NULL, clean = TRUE,
                         infer = TRUE) {
  # Use GNR to label all nodes in a phylogeny
  # first replace all _ with spaces
  tip_labels <- gsub("_", " ", tree@tips)
  nids <- tree@nds
  nd_labels <- rep(NA, tree@nall)
  names(nd_labels) <- tree@all
  taxa_res <- taxaResolve(tip_labels, datasource = 4, cache = cache, parent = parent)
  if (is.null(taxa_res)) {
    return(NULL)
  }
  # for tips use the first word of the name
  nd_labels[tree@tips] <- vapply(
    strsplit(tip_labels, "\\s+"), function(x) x[1],
    character(1)
  )
  nds_kids <- getNdsKids(tree, ids = nids)
  for (nid in nids) {
    kids <- gsub("_", " ", nds_kids[[nid]])
    genus_names <- vapply(
      strsplit(kids, "\\s+"), function(x) x[1],
      character(1)
    )
    if (all(genus_names == genus_names[1])) {
      nd_labels[nid] <- genus_names[1]
    } else {
      lineages <- as.character(taxa_res[taxa_res$search.name %in% kids, "lineage"])
      lineages <- strsplit(lineages, "\\|")
      lineages <- lineages[!is.na(lineages)]
      if (length(lineages) > 1) {
        nd_labels[nid] <- .findClade(lineages)
      }
    }
  }
  if (clean) {
    nd_labels <- gsub("\\s", "_", nd_labels)
    nd_labels <- gsub("[^a-zA-Z_0-9]", "", nd_labels)
  }
  if (infer & any(is.na(nd_labels))) {
    if (sum(is.na(nd_labels)) / length(nd_labels) > 0.5) {
      message(
        "Fewer than 50% of nodes identified,",
        " not attempting inference of remaning nodes."
      )
    } else {
      for (i in which(is.na(nd_labels))) {
        prids <- getNdPrids(tree, names(nd_labels)[i])
        pssbls <- nd_labels[prids]
        pssbls <- pssbls[!is.na(pssbls)]
        if (length(pssbls) > 0) {
          nd_labels[[i]] <- pssbls[[1]]
        }
      }
    }
  }
  nd_labels
}

#' @name taxaResolve
#' @title Resolve taxonmic names online
#' @description Resolve taxonomic names via the Global Names Resolver.
#' @details Returns dataframe containing GNR metadata for each name wames
#' that cannot be resolved are returned as NA. Various datasources are
#' available, see \url{http://resolver.globalnames.org/data_sources} for a
#' list and IDs. Default is 4 for NCBI.
#' Will raise a warning if connection fails and will return NULL.
#' @param nms vector of names
#' @param batch size of the batches to be queried
#' @param datasource ID number of the datasource
#' @param genus boolean, if true will search against GNR with just the genus
#'  name for names that failed to resolve using the full species name
#' @param cache T/F, create a local cache of downloaded names?
#' @param parent specify parent of all names to prevent false names
#' @seealso
#' \code{\link{searchTxnyms}}, \code{\link{setTxnyms}}, \code{\link{getNdsFrmTxnyms}}
#' @export
#' @examples
#' my_lovely_names <- c(
#'   "Gallus gallus", "Pongo pingu", "Homo sapiens",
#'   "Arabidopsis thaliana", "Macaca thibetana", "Bacillus subtilis"
#' )
#' res <- taxaResolve(nms = my_lovely_names)
#' length(colnames(res)) # 10 different metadata for returned names including original search name
#' # let's look at the lineages
#' lineages <- strsplit(as.vector(res$lineage), "\\|")
#' print(lineages[[6]]) # the bacteria has far fewer taxonomic levels
# NOTE. Originally built for MTT
taxaResolve <- function(nms, batch = 100, datasource = 4, genus = TRUE,
                        cache = FALSE, parent = NULL) {
  .replace <- function(i, slot.name) {
    # controlled extraction
    element <- data[[i]]$result[[1]][[slot.name]]
    if (!is.null(element)) {
      res <- element
    } else {
      res <- NA
    }
    res
  }
  batchResolve <- function(batch.nms) {
    # create query from nms
    url <- "http://resolver.globalnames.org/name_resolvers.json?"
    data_source_ids <- paste0("&data_source_ids=", datasource)
    nms2 <- paste0("names=", paste0(stringr::str_replace_all(
      batch.nms, " ", "+"
    ), collapse = "|"))
    query <- paste(plyr::compact(list(
      url, nms2,
      data_source_ids
    )), collapse = "")
    # search via API
    data <- .safeFromJSON(query)$data
    return(data)
  }
  # avoid names -- names exist on database but mean nothing
  avoid <- c("unidentified")
  # make sure names don't have '_'
  trms <- gsub("_", " ", nms)
  # remove all non alphanumerics
  trms <- gsub("\\W+", " ", trms)
  # remove trailing whitespace
  trms <- gsub("^\\s+|\\s+$", "", trms)
  # any missing trms, replace with stubs
  trms[trms == ""] <- "invalid"
  deja_vues <- rep(FALSE, length(trms))
  data <- vector("list", length = length(trms))
  names(data) <- nms
  if (cache) {
    if (!file.exists("gnr_cache")) {
      dir.create("gnr_cache")
    }
    for (i in 1:length(nms)) {
      fp <- file.path("gnr_cache", paste0(nms[i], ".RData"))
      if (file.exists(fp)) {
        load(fp)
        data[[nms[i]]] <- nd
        deja_vues[i] <- TRUE
      }
    }
  }
  if (sum(!deja_vues) > 0) {
    # Split nms into batch sized chunks
    #  http://stackoverflow.com/questions/3318333/split-a-vector-into-chunks-in-r
    x <- seq_along(trms[!deja_vues])
    btrms <- split(trms[!deja_vues], ceiling(x / batch))
    bnms <- split(nms[!deja_vues], ceiling(x / batch))
    for (i in 1:length(btrms)) {
      temp.data <- batchResolve(btrms[[i]])
      if (is.null(temp.data)) {
        return(NULL)
      }
      data[bnms[[i]]] <- temp.data
    }
  }
  # transform results into output
  search.name <- name.string <- canonical.form <-
    lineage <- lineage.ids <- rank <- taxid <-
    match.type <- prescore <- score <- rep(NA, length(nms))
  for (i in 1:length(data)) {
    parent_test <- TRUE
    nd <- data[[i]]
    if (cache & !deja_vues[i]) {
      fp <- file.path("gnr_cache", paste0(names(data)[i], ".RData"))
      save(nd, file = fp)
    }
    if (!"results" %in% names(nd)) {
      search.name[i] <- nms[i]
    } else if (nd[[1]] %in% avoid) {
      search.name[i] <- nms[i]
    } else {
      search.name[i] <- nms[i]
      lng <- .replace(i, "classification_path")
      if (!is.null(parent)) {
        parent_test <- grepl(parent, lng)
      }
      if (parent_test) {
        name.string[i] <- .replace(i, "name_string")
        canonical.form[i] <- .replace(i, "canonical_form")
        lineage[i] <- lng
        lineage.ids[i] <- .replace(i, "classification_path_ids")
        rank[i] <- .replace(i, "classification_path_ranks")
        taxid[i] <- .replace(i, "taxon_id")
        match.type[i] <- .replace(i, "match_type")
        prescore[i] <- .replace(i, "prescore")
        score[i] <- nd$results[[1]]$score
      }
    }
  }
  res <- data.frame(
    search.name = search.name,
    name.string = name.string,
    canonical.form = canonical.form,
    lineage = lineage, lineage.ids = lineage.ids,
    rank = rank, taxid = taxid,
    match.type = match.type, prescore = prescore,
    score = score, stringsAsFactors = FALSE
  )
  failed <- which(is.na(res$name.string))
  if (genus & length(failed) > 0) {
    # if genus, search just genus names
    genus.nms <- sub("\\s+.*", "", res$search.name[failed])
    genus.res <- taxaResolve(genus.nms, batch, datasource,
      genus = FALSE,
      parent = parent, cache = cache
    )
    # replace in original results, all slots except search.name
    res[failed, -1] <- genus.res[, -1]
  }
  return(res)
}

.safeFromJSON <- function(url, max_trys = 5, power = 2) {
  # Safe wrapper for fromJSON
  trys <- 0
  waittime <- 2
  while (trys < max_trys) {
    json_obj <- try(RJSONIO::fromJSON(url), silent = TRUE)
    if (inherits(json_obj, "try-error")) {
      cat("---- Connection failed: trying again in [", waittime,
        "s]----\n",
        sep = ""
      )
      trys <- trys + 1
      Sys.sleep(waittime)
      waittime <- waittime * power
    } else {
      return(json_obj)
    }
  }
  warning("Failed to connect, server may be down.")
  list("data" = NULL)
}

.findClade <- function(lineages) {
  # for a list of lineages, find the clade shared by all
  subj <- lineages[[1]]
  for (i in 2:length(lineages)) {
    query <- lineages[[i]]
    subj <- subj[subj %in% query]
  }
  subj[length(subj)]
}

#' @name setTxnyms
#' @title Set the txnym slots in a tree
#' @description Return a tree with txnyms added to specified nodes
#' @details Returns a tree. Specify the taxonomic groups for nodes in a tree
#' by providing a vector or list named by node IDs. Takes output from \code{searchTxnyms}.
#' Only letters, numbers and underscores allowed. To remove special characters use regular
#' expressions, e.g. \code{gsub(['a-zA-Z0-9_'], '', txnym)}
#' @param tree \code{TreeMan} object
#' @param txnyms named vector or list
#' @seealso
#' \code{\link{taxaResolve}}, \code{\link{searchTxnyms}},
#' \code{\link{getNdsLng}}, \code{\link{getNdLng}},
#' \url{https://github.com/DomBennett/treeman/wiki/set-methods}
#' @export
#' @examples
#'
#' data(mammals)
#' # let's change the txnym for humans
#' # what's its summary before we change anything?
#' summary(mammals[["Homo_sapiens"]])
#' # now let's add Hominini
#' new_txnym <- list("Homo_sapiens" = c("Hominini", "Homo"))
#' mammals <- setTxnyms(mammals, new_txnym)
#' summary(mammals[["Homo_sapiens"]])
setTxnyms <- function(tree, txnyms) {
  .add <- function(nid) {
    for (txnym in txnyms[[nid]]) {
      if (grepl("[^a-zA-Z_0-9]", txnym)) {
        stop(paste0(
          "Unsuitable characters in [",
          txnym, "]"
        ))
      }
    }
    tree@ndlst[[nid]][["txnym"]] <<- txnyms[[nid]]
  }
  pull <- names(txnyms) %in% names(tree@ndlst)
  txnyms <- txnyms[pull]
  plyr::m_ply(names(txnyms), .fun = .add)
  tree@wtxnyms <- TRUE
  tree
}

#' @name setPD
#' @title Set the phylogenetic diversity
#' @description Return a tree with the phylogenetic diversity altered.
#' @details Use this function to convert the phylogenetic diversity of a tree. For example,
#' you might want to convert the tree so the sum of all branches is 1. This function will achieve
#' that by modiyfing every branch, while maintaining their relative lengths.
#' @param tree \code{TreeMan} object
#' @param val new phylogenetic diversity
#' @seealso
#' \code{\link{setAge}}
#' \url{https://github.com/DomBennett/treeman/wiki/set-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' tree <- setPD(tree, val = 1)
#' summary(tree)
setPD <- function(tree, val) {
  spns <- getNdsSlt(tree, ids = tree@all, slt_nm = "spn")
  spns <- spns / (tree@pd / val)
  tree <- setNdsSpn(tree, ids = tree@all, vals = spns)
  tree@pd <- val
  tree
}

#' @name setAge
#' @title Set the age of a tree
#' @description Return a tree with the age altered.
#' @details Use this function to change the age of a tree. For example,
#' you might want to convert the tree so that its age equals 1. This function will achieve
#' that by modiyfing every branch, while maintaining their relative lengths.
#' @param tree \code{TreeMan} object
#' @param val new age
#' @seealso
#' \code{\link{setPD}}
#' \url{https://github.com/DomBennett/treeman/wiki/set-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' tree <- setAge(tree, val = 1)
#' summary(tree)
setAge <- function(tree, val) {
  # cdmuir's correction
  tree_age <- getAge(tree)
  spns <- getNdsSlt(tree, ids = tree@all, slt_nm = "spn")
  # spns <- spns/(tree_age) # previous, incorrect
  spns <- spns * val / tree_age
  tree <- setNdsSpn(tree, ids = tree@all, vals = spns)
  tree
}

#' @name setNdSpn
#' @title Set the branch length of a specific node
#' @description Return a tree with the span of a node altered.
#' @details Takes a tree, a node ID and a new value for the node's preceding branch length (span).
#' @param tree \code{TreeMan} object
#' @param id id of node whose preceding edge is to be changed
#' @param val new span
#' @seealso
#' \code{\link{setNdsSpn}}
#' \url{https://github.com/DomBennett/treeman/wiki/set-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' tree <- setNdSpn(tree, id = "t1", val = 100)
#' tree <- updateSlts(tree)
#' summary(tree)
setNdSpn <- function(tree, id, val) {
  tree@ndlst[[id]][["spn"]] <- val
  tree@updtd <- FALSE
  tree
}

#' @name setNdsSpn
#' @title Set the branch lengths of specific nodes
#' @description Return a tree with the spans of nodes altered.
#' @details Runs \code{setNdSpn} over multiple nodes. Parallelizable.
#' @param tree \code{TreeMan} object
#' @param ids ids of nodes whose preceding edges are to be changed
#' @param vals new spans
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{setNdSpn}}
#' \url{https://github.com/DomBennett/treeman/wiki/set-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' # make tree taxonomic
#' tree <- setNdsSpn(tree, ids = tree["all"], vals = 1)
#' summary(tree)
#' # remove spns by setting all to 0
#' tree <- setNdsSpn(tree, ids = tree["all"], vals = 0)
#' summary(tree)
setNdsSpn <- function(tree, ids, vals, parallel = FALSE, progress = "none") {
  .reset <- function(id, spn) {
    ndlst[[id]][["spn"]] <- spn
    ndlst[[id]]
  }
  ndlst <- tree@ndlst[ids]
  l_data <- data.frame(id = ids, spn = vals, stringsAsFactors = FALSE)
  ndlst <- plyr::mlply(l_data,
    .fun = .reset, .parallel = parallel,
    .progress = progress
  )
  ndlst <- ndlst[1:length(ndlst)]
  tree@ndlst[ids] <- ndlst
  tree <- updateSlts(tree)
  tree
}

#' @name setNdID
#' @title Set the ID of a node
#' @description Return a tree with the ID of a node altered.
#' @details IDs cannot be changed directly for the \code{TreeMan} class. To change an
#' ID use this function. Warning: all IDs must be unique, avoid spaces in IDs and only
#' use letters, numbers and underscores.
#' Use \code{\link{updateSlts}} after running.
#' @param tree \code{TreeMan} object
#' @param id id to be changed
#' @param val new id
#' @seealso
#' \code{\link{setNdsID}}
#' \url{https://github.com/DomBennett/treeman/wiki/set-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' tree <- setNdID(tree, "t1", "heffalump")
#' tree <- updateSlts(tree)
setNdID <- function(tree, id, val) {
  tree@updtd <- FALSE
  setNdsID(tree, id, val)
}

#' @name setNdsID
#' @title Set the IDs of multiple nodes
#' @description Return a tree with the IDs of nodes altered.
#' @details Runs \code{setNdID()} over multiple nodes. Warning: all IDs must be unique,
#' avoid spaces in IDs, only use numbers, letters and underscores. Parellizable.
#' @param tree \code{TreeMan} object
#' @param ids ids to be changed
#' @param vals new ids
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{setNdID}}
#' \url{https://github.com/DomBennett/treeman/wiki/set-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' new_ids <- paste0("heffalump_", 1:tree["ntips"])
#' tree <- setNdsID(tree, tree["tips"], new_ids)
#' summary(tree)
setNdsID <- function(tree, ids, vals, parallel = FALSE, progress = "none") {
  # internals
  .testSpcls <- function(id) {
    if (grepl("[^a-zA-Z_0-9]", id)) {
      stop(paste0("Unsuitable characters in [", id, "]"))
    }
    NULL
  }
  .rplcS4 <- function(slt) {
    if (any(slot(tree, slt) %in% ids)) {
      mtchs <- match(slot(tree, slt), ids)
      return(vals[mtchs])
    } else {
      return(slot(tree, slt))
    }
  }
  .reset <- function(i) {
    .rplc <- function(slt) {
      res <- nd[[slt]]
      mtchs <- match(res, ids)
      res[which(!is.na(mtchs))] <-
        vals[mtchs[!is.na(mtchs)]]
      res
    }
    nd <- tree@ndlst[[i]]
    nd[["id"]] <- .rplc("id")
    nd[["ptid"]] <- .rplc("ptid")
    nd[["prid"]] <- .rplc("prid")
    nd
  }
  mapply(FUN = .testSpcls, vals)
  l_data <- data.frame(i = 1:length(tree@ndlst), stringsAsFactors = FALSE)
  ndlst <- plyr::mlply(l_data, .fun = .reset, .parallel = parallel, .progress = progress)
  ndlst <- ndlst[1:length(ndlst)]
  all <- names(tree@ndlst)
  all[match(ids, all)] <- vals
  names(ndlst) <- all
  tree@ndlst <- ndlst
  tree@tips <- .rplcS4("tips")
  tree@nds <- .rplcS4("nds")
  tree@root <- .rplcS4("root")
  tree <- updateSlts(tree)
  tree
}

#' @name setNdOther
#' @title Set a user defined slot
#' @description Return a tree with a user defined slot for node ID.
#' @details A user can specify new slots in a tree. Add a new slot with this function
#' by providing a node ID, a value for the new slot and a unique new slot name. Slot names
#' must not be default \code{TreeMan} names. The new value can be any data type.
#' @param tree \code{TreeMan} object
#' @param id id of the node
#' @param val data for slot
#' @param slt_nm slot name
#' @seealso
#' \code{\link{setNdsOther}}
#' \url{https://github.com/DomBennett/treeman/wiki/set-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' tree <- setNdOther(tree, "t1", 1, "binary_val")
#' tree <- updateSlts(tree)
#' (getNdSlt(tree, id = "t1", slt_nm = "binary_val"))
setNdOther <- function(tree, id, val, slt_nm) {
  tree@ndlst[[id]][slt_nm] <- val
  tree@updtd <- FALSE
  if (!slt_nm %in% tree@othr_slt_nms) {
    tree@othr_slt_nms <- c(tree@othr_slt_nms, slt_nm)
  }
  tree
}

#' @name setNdsOther
#' @title Set a user defined slot for multiple nodes
#' @description Return a tree with a user defined slot for node IDs.
#' @details Runs \code{setNdOther()} over multiple nodes. Parellizable.
#' @param tree \code{TreeMan} object
#' @param ids id sof the nodes
#' @param vals data for slot
#' @param slt_nm slot name
#' @param parallel logical, make parallel?
#' @param progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @seealso
#' \code{\link{setNdOther}}
#' \url{https://github.com/DomBennett/treeman/wiki/set-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' # e.g. confidences for nodes
#' vals <- runif(min = 0, max = 1, n = tree["nall"])
#' tree <- setNdsOther(tree, tree["all"], vals, "confidence")
#' tree <- updateSlts(tree)
#' summary(tree)
#' (getNdsSlt(tree, ids = tree["all"], slt_nm = "confidence"))
setNdsOther <- function(tree, ids, vals, slt_nm, parallel = FALSE, progress = "none") {
  .set <- function(id, val) {
    tree@ndlst[[id]][[slt_nm]] <<- val
  }
  l_data <- data.frame(
    id = ids, val = vals,
    stringsAsFactors = FALSE
  )
  plyr::m_ply(.data = l_data, .fun = .set, .parallel = parallel, .progress = progress)
  tree@updtd <- FALSE
  if (!slt_nm %in% tree@othr_slt_nms) {
    tree@othr_slt_nms <- c(tree@othr_slt_nms, slt_nm)
  }
  tree
}

#' @name rmOtherSlt
#' @title Remove a user-defined slot
#' @description Returns a tree with a user-defined tree slot removed.
#' @details A user can specify a new slot using the \code{setNdSlt()} function
#' or upon reading a tree. This can be removed using this function by specifying
#' the name of the slot to be removed.
#' @param tree \code{TreeMan} object
#' @param slt_nm name of slot to be removed
#' @seealso
#' \code{\link{setNdOther}}, \code{\link{setNdsOther}},
#' \url{https://github.com/DomBennett/treeman/wiki/set-methods}
#' @export
#' @examples
#'
#' tree <- randTree(10)
#' vals <- runif(min = 0, max = 1, n = tree["nall"])
#' tree <- setNdsOther(tree, tree["all"], vals, "confidence")
#' tree <- updateSlts(tree)
#' summary(tree)
#' tree <- rmOtherSlt(tree, "confidence")
#' tree <- updateSlts(tree)
#' summary(tree)
rmOtherSlt <- function(tree, slt_nm) {
  .set <- function(id) {
    tree@ndlst[[id]][[slt_nm]] <<- NULL
  }
  l_data <- data.frame(id = tree@all, stringsAsFactors = FALSE)
  plyr::m_ply(.data = l_data, .fun = .set)
  tree@updtd <- FALSE
  tree@othr_slt_nms <- tree@othr_slt_nms[tree@othr_slt_nms != slt_nm]
  tree
}

# roxygen imports
#' @import methods
#' @importFrom graphics lines plot.default text
#' @importFrom utils combn write.table read.csv write.csv
#' @importFrom stats runif

#' @name TreeMan-class
#' @title TreeMan-class
#' @aliases TreeMan-method
#' @description S4 class for representing phylogenetic trees as a list of nodes.
#' @param x \code{TreeMan} object
#' @param i node ID or slot name
#' @param object \code{TreeMan} object
#' @param max.level \code{str()} maximum number of levels to show
#' @param ... additional tree objects
#' @param j missing
#' @param drop missing
#' @slot ndlst list of nodes
#' @slot nds vector of node ids that are internal nodes
#' @slot nnds numeric of number of internal nodes in tree
#' @slot tips vector of node ids that are tips
#' @slot ntips numeric of number of internal nodes in tree
#' @slot all vector of all node ids
#' @slot nall numeric of number of all nodes in tree
#' @slot pd numeric of total branch length of tree
#' @slot tinds indexes of all tip nodes in tree
#' @slot prinds indexes of all pre-nodes in tree
#' @slot wspn logical, do nodes have spans
#' @slot wtxnyms logical, do nodes have txnyms
#' @slot ply logical, is tree bifurcating
#' @slot root character of node id of root, if no root then empty character
#' @slot updtd logical, if tree slots have been updated since initiation or change
#' @slot othr_slt_nms vector, character list of additional data slots added to nodes
#' @slot ndmtrx matrix, T/Fs representing tree structure
#' @details
#' A \code{TreeMan} object holds a list of nodes. The idea of the \code{TreeMan}
#' class is to make adding and removing nodes as similar as possible to adding
#' and removing elements in a list. Note that internal nodes and tips are
#' both considered nodes. Trees can be polytomous but not unrooted.
#'
#'
#' Each node within the \code{TreeMan} \code{ndlst} contains the following data slots:
#' \itemize{
#'    \item \code{id}: character string for the node ID
#'    \item \code{txnym}: name of taxonomic clade (optional)
#'    \item \code{spn}: length of the preceding branch
#'    \item \code{prid}: ID of the immediately preceding node, NULL if root
#'    \item \code{ptid}: IDs of the immediately connecting nodes
#' }
#'
#' See below in 'Examples' for these methods in use.
#' @seealso
#' \code{\link{randTree}}, \code{\link{Node-class}},
#' \code{\link{phylo-to-TreeMan}}, \code{\link{TreeMan-to-phylo}}
#' @examples
#'
#' # Generate random tree
#' tree <- randTree(10)
#' # Print to get basic stats
#' summary(tree)
#' # Slots....
#' tree["tips"] # return all tips IDs
#' tree["nds"] # return all internal node IDs
#' tree["ntips"] # count all tips
#' tree["nnds"] # count all internal nodes
#' tree["root"] # identify root node
#' tree[["t1"]] # return t1 node object
#' tree["pd"] # return phylogenetic diversity
#' tree["ply"] # is polytomous?
#' # Additional special slots (calculated upon call)
#' tree["age"] # get tree's age
#' tree["ultr"] # determine if tree is ultrametric
#' tree["spns"] # get all the spans of the tree IDs
#' tree["prids"] # get all the IDs of preceding nodes
#' tree["ptids"] # get all the IDs of following nodes
#' tree["txnyms"] # get all the taxonyms of all nodes
#' # In addition [] can be used for any user-defined slot
#' # Because all nodes are lists with metadata we can readily
#' #  get specific information on nodes of interest
#' nd <- tree[["n2"]]
#' summary(nd)
#' # And then use the same syntax for the tree
#' nd["nkids"] # .... nkids, pd, etc.
#'
#' # Convert to phylo and plot
#' library(ape)
#' tree <- as(tree, "phylo")
#' plot(tree)
#' @exportClass TreeMan
setClass("TreeMan",
  representation = representation(
    ndlst = "list", # list of node lists
    nds = "vector", # vector of node ids that are internal nodes
    nnds = "numeric", # numeric of number of internal nodes in tree
    tips = "vector", # vector of node ids that are tips
    ntips = "numeric", # numeric of number of internal nodes in tree
    all = "vector", # vector of all Node ids
    nall = "numeric", # numeric of number of all nodes in tree
    pd = "numeric", # numeric of total branch length of tree
    wspn = "logical", # logical, do all nodes have spans
    wtxnyms = "logical", # logical, do nodes txnyms
    ply = "logical", # logical, is tree bifurcating
    updtd = "logical", # logical, if tree slots has been updated since a change
    ndmtrx = "ANY", # bigmemory matrix of logicals
    tinds = "vector", # indexes of tip nodes
    prinds = "vector", # indexes of pre-nodes
    root = "character", # character of node id of root, if no root then empty character
    othr_slt_nms = "vector"
  ), # if new slots added to node, list them here
  validity = fastCheckTreeMan
)

# Accessor methods
#' @rdname TreeMan-class
#' @exportMethod [[
setMethod(
  "[[", c("TreeMan", "character"),
  function(x, i) {
    if (!i %in% names(x@ndlst)) {
      srch_trm <- gsub(" ", "_", i) # usual mistake
      pssbls <- which(agrepl(srch_trm, names(x@ndlst),
        ignore.case = TRUE,
        max.distance = 0.25
      ))
      pssbls <- names(x@ndlst)[pssbls]
      if (length(pssbls) > 0 & length(pssbls) < 50) {
        msg <- paste0("Can't find [", i, "]. Did you mean ....\n")
        for (p in pssbls) {
          msg <- paste0(msg, '"', p, '"\n')
        }
        msg <- paste0(msg, "?\n")
      } else {
        msg <- paste0("Can't find [", i, "] in tree.")
      }
      stop(msg)
    }
    .newNd(x, i)
  }
)
#' @rdname TreeMan-class
setMethod(
  "[", c("TreeMan", "character", "missing", "missing"),
  function(x, i, j, ..., drop = TRUE) {
    slt_nms <- slotNames(x)
    slt_nms <- slt_nms[slt_nms != "ndlst"]
    slt_nms <- slt_nms[slt_nms != "ndmtrx"]
    slt_nms <- slt_nms[slt_nms != "tinds"]
    slt_nms <- slt_nms[slt_nms != "prinds"]
    # ultr is special, shouldn't be updated when updateSlts()
    # too slow to calculate. Instead only calc if called.
    if (i == "ultr") {
      return(isUltrmtrc(x))
    }
    if (i == "age") {
      return(getAge(x))
    }
    # getNdsSlt extractor
    xtr_slts <- c("spns", "prids", "ptids", "txnyms")
    if (i %in% xtr_slts) {
      slt_nm <- sub("s$", "", i) # rm s at end
      res <- getNdsSlt(x, slt_nm, x@all)
      names(res) <- x@all
      return(res)
    }
    if (i %in% x@othr_slt_nms) {
      res <- getNdsSlt(x, i, x@all)
      names(res) <- x@all
      return(res)
    }
    if (!i %in% slt_nms) {
      slt_nms <- paste0(c(
        slt_nms, "ultr", "age", xtr_slts,
        x@othr_slt_nms
      ), collapse = ", ")
      stop(paste0(
        "`", i, "` not a tree slot. Available slots: ",
        slt_nms
      ))
    }
    slot(x, i)
  }
)

# display methods
#' @rdname TreeMan-class
#' @exportMethod as.character
setMethod(
  "as.character", c("x" = "TreeMan"),
  function(x) {
    paste0("TreeMan Object of [", length(x@tips), "] tips")
  }
)
#' @rdname TreeMan-class
#' @exportMethod show
setMethod(
  "show", "TreeMan",
  function(object) {
    msg <- as.character(object)
    cat(msg)
  }
)
#' @rdname TreeMan-class
#' @exportMethod print
setMethod(
  "print", "TreeMan",
  function(x) {
    msg <- as.character(x)
    print(msg)
  }
)
#' @rdname TreeMan-class
#' @exportMethod str
setMethod(
  "str", c("object" = "TreeMan"),
  function(object, max.level = 2L, ...) {
    if (is.na(max.level)) {
      stop("max.level must be numeric")
    }
    str@default(object, max.level = max.level, ...)
  }
)
#' @rdname TreeMan-class
#' @exportMethod summary
setMethod(
  "summary", c("object" = "TreeMan"),
  function(object) {
    if (!fastCheckTreeMan(object)) {
      stop("Tree is corrupted. Run `checkNdlst()` to see how.")
    }
    if (!object@updtd) {
      stop("Tree is not updated since change or initiation. Use `updateSlts()`")
    }
    msg <- "Tree (TreeMan Object):\n"
    msg <- paste0(msg, "  + ", object@ntips, " tips\n")
    msg <- paste0(msg, "  + ", object@nnds, " internal nodes\n")
    if (!is.null(object@ndmtrx)) {
      msg <- paste0(msg, "  + With node matrix\n")
    }
    if (object@wtxnyms) {
      msg <- paste0(msg, "  + With taxonomic names\n")
    }
    if (object@ply) {
      msg <- paste0(msg, "  + Polytomous\n")
    } else {
      msg <- paste0(msg, "  + Binary\n")
    }
    if (length(object@root) == 0) {
      if (!object@wspn) {
        msg <- paste0(msg, "  + Unrooted and without node spans\n")
      } else {
        msg <- paste0(msg, "  + Unrooted, with node spans\n")
        msg <- paste0(msg, "  + PD ", signif(object@pd, 3), "\n")
      }
    } else {
      if (object@wspn) {
        msg <- paste0(msg, "  + PD ", signif(object@pd, 3), "\n")
      } else {
        msg <- paste0(msg, "  + Without node spans\n")
      }
      msg <- paste0(msg, '  + Root node is \"', object@root, '\"\n')
    }
    if (length(object@othr_slt_nms) > 0) {
      msg <- paste0(msg, "  + With additional node slots:\n")
      for (slt_nm in object@othr_slt_nms) {
        msg <- paste0(msg, "    [", slt_nm, "]\n")
      }
    }
    cat(msg)
  }
)

#' @name TreeMen-class
#' @title TreeMen-class
#' @aliases TreeMen-method
#' @param x \code{TreeMen} object
#' @param i tree index (integer or character)
#' @param object \code{TreeMen} object
#' @param max.level \code{str()} maximum level
#' @param ... additional tree objects
#' @param j missing
#' @param drop missing
#' @description S4 class for multiple phylogenetic trees
#' @slot treelst list of \code{TreeMan} objects
#' @slot ntips sum of tips per tree
#' @slot ntrees total number of trees
#' @exportClass TreeMen
#' @seealso
#' \code{\link{cTrees}}
setClass("TreeMen",
  representation = representation(
    treelst = "list", # list of TreeMan objects
    ntips = "numeric", # sum of tips per tree
    ntrees = "numeric"
  ), # number of trees in object
  validity = checkTreeMen
)

# concatenate methods
.cMenToMen <- function(treemen_1, treemen_2) {
  treelst <- c(treemen_1@treelst, treemen_2@treelst)
  treemen_1@treelst <- treelst
  treemen_1@ntips <- treemen_1@ntips + treemen_2@ntips
  treemen_1@ntrees <- treemen_1@ntrees + treemen_2@ntrees
  treemen_1
}

.cMenToMan <- function(treemen, treeman) {
  treelst <- c(treemen@treelst, treeman)
  treemen@treelst <- treelst
  treemen@ntips <- treeman@ntips + treemen@ntips
  treemen@ntrees <- treemen@ntrees + 1
  treemen
}

.cMenToAny <- function(treemen, treeobj) {
  if (class(treeobj)[1] == "TreeMan") {
    treemen <- .cMenToMan(treemen, treeobj)
  } else if (class(treeobj)[1] == "TreeMen") {
    treemen <- .cMenToMen(treemen, treeobj)
  }
  treemen
}

.cTreeObjs <- function(treemen, treeobj, ...) {
  if (nargs() > 2) {
    treemen <- .cMenToAny(treemen, treeobj)
    treemen <- .cTreeObjs(treemen, ...)
  } else {
    treemen <- .cMenToAny(treemen, treeobj)
  }
  treemen
}

#' @title cTrees
#' @description Return \code{TreeMen} of concatenated trees.
#' @details Concatenate trees into single \code{TreeMen} object.
#' @param x \code{TreeMan} or \code{TreeMen} objects
#' @param ... more \code{TreeMan} or \code{TreeMen} objects
#' @seealso
#' \code{\link{TreeMen-class}}, \code{\link{TreeMan-class}}, \code{\link{list-to-TreeMen}}
#' @examples
#'
#' trees <- cTrees(randTree(10), randTree(10))
#' @export
setGeneric("cTrees",
  signature = c("x"),
  function(x, ...) {
    standardGeneric("cTrees")
  }
)
#' @rdname TreeMan-class
#' @exportMethod cTrees
setMethod(
  "cTrees", c("TreeMan"),
  function(x, ...) {
    x <- list(x)
    x <- as(x, "TreeMen")
    x <- .cTreeObjs(x, ...)
    x
  }
)
#' @rdname TreeMen-class
#' @exportMethod cTrees
setMethod(
  "cTrees", c("TreeMen"),
  function(x, ...) {
    x <- .cTreeObjs(x, ...)
    x
  }
)

# Accessor methods
#' @rdname TreeMen-class
#' @exportMethod [[
setMethod(
  "[[", c("TreeMen", "ANY"),
  function(x, i) {
    if (!i %in% 1:length(x@treelst)) {
      stop(paste0(i, " not in tree"))
    }
    x@treelst[[i]]
  }
)
#' @rdname TreeMen-class
#' @aliases TreeMen-method
#' Extract slots from a list of trees
#' @exportMethod [
setMethod(
  "[", c("TreeMen", "character", "missing", "missing"),
  function(x, i, j, ..., drop = TRUE) {
    if (!i %in% slotNames(x)) {
      stop(paste0(i, "  not in tree"))
    }
    slot(x, i)
  }
)


# display methods
#' @rdname TreeMen-class
#' @exportMethod as.character
setMethod(
  "as.character", c("x" = "TreeMen"),
  function(x) {
    paste0("TreeMen Object of [", x@ntrees, "] trees")
  }
)
#' @rdname TreeMen-class
#' @exportMethod show
setMethod(
  "show", "TreeMen",
  function(object) {
    msg <- as.character(object)
    cat(msg)
  }
)
#' @rdname TreeMen-class
#' @exportMethod str
setMethod(
  "str", c("object" = "TreeMen"),
  function(object, max.level = 2L, ...) {
    if (is.na(max.level)) {
      stop("max.level must be numeric")
    }
    str@default(object, max.level = max.level, ...)
  }
)
#' @rdname TreeMen-class
#' @exportMethod print
setMethod(
  "print", c("x" = "TreeMen"),
  function(x) {
    msg <- as.character(x)
    print(msg)
  }
)
#' @rdname TreeMen-class
#' @exportMethod summary
setMethod(
  "summary", c("object" = "TreeMen"),
  function(object) {
    msg <- "Trees (TreeMen Object):\n"
    msg <- paste0(msg, "  + ", object@ntrees, " trees\n")
    msg <- paste0(msg, "  + ", object@ntips, " tips\n")
    cat(msg)
  }
)


#' @name pstMnp
#' @title Update prinds and tinds
#' @description Return tree with updated slots.
#' @details This function is automatically run. Only run, if you
#' are creating yor own functions to add and remove elements of the
#' \code{ndlst}.
#' @param tree \code{TreeMan} object
#' @seealso
#' \code{\link{updateSlts}}, \code{\link{addNdmtrx}},
#' \code{\link{getAge}}
#' @export
pstMnp <- function(tree) {
  # after any adding or removing of tips and nodes,
  # these slots MUST be updated to ensure full functionality
  tree@tinds <- .getTinds(tree@ndlst)
  tree@prinds <- .getPrinds(tree@ndlst)
  tree
}

#' @name updateSlts
#' @title Update tree slots after manipulation
#' @description Return tree with updated slots.
#' @details Tree slots in the \code{TreeMan} object are usually automatically updated.
#' For certain single node manipulations they are not. Run this
#' function to update the slots.
#' @param tree \code{TreeMan} object
#' @seealso
#' \code{\link{addNdmtrx}}, \code{\link{getAge}}
#' @export
updateSlts <- function(tree) {
  # Update the slots for a tree
  wo_pstndes <- vapply(
    tree@ndlst,
    function(n) length(n[["ptid"]]) == 0,
    logical(1)
  )
  tree@tips <- sort(names(wo_pstndes)[wo_pstndes])
  tree@ntips <- length(tree@tips)
  tree@nds <- sort(names(wo_pstndes)[!wo_pstndes])
  tree@nnds <- length(tree@nds)
  tree@all <- names(tree@ndlst)
  tree@nall <- length(tree@all)
  tree@wtxnyms <- any(vapply(
    tree@ndlst, function(n) !is.null(n[["txnym"]]),
    logical(1)
  ))
  spns <- vapply(tree@ndlst, function(n) n[["spn"]], numeric(1))
  tree@wspn <- any(spns > 0)
  if (tree@wspn) {
    tree@pd <- sum(vapply(tree@ndlst, function(n) n[["spn"]], numeric(1)))
  } else {
    tree@pd <- numeric()
  }
  tree@ply <- any(vapply(
    tree@ndlst, function(n) length(n[["ptid"]]) > 2,
    logical(1)
  ))
  tree@updtd <- TRUE
  initialize(tree)
}

#' @name addNdmtrx
#' @title Add node matrix to a tree
#' @description Return tree with node matrix added.
#' @details The node matrix makes 'enquiry'-type computations faster:
#' determining node ages, number of descendants etc. But it takes up
#' large amounts of memory and has no impact on adding or removing tips.
#' Note, trees with the node matrix can not be written to disk using the
#' 'serialization format' i.e. with \code{save} or \code{saveRDS}.
#' The matrix is generated with bigmemory's `as.big.matrix()`.
#' @param tree \code{TreeMan} object
#' @param shared T/F, should the bigmatrix be shared? See bigmemory documentation.
#' @param ... \code{as.big.matrix()} additional arguments
#' @seealso
#' \code{\link{updateSlts}}, \code{\link{rmNdmtrx}},
#' \url{https://cran.r-project.org/package=bigmemory}
#' @export
#' @examples
#' #
#' tree <- randTree(10, wndmtrx = FALSE)
#' summary(tree)
#' tree <- addNdmtrx(tree)
#' summary(tree)
addNdmtrx <- function(tree, shared = FALSE, ...) {
  if (tree@ntips < 3) {
    stop("Too small for node matrix.")
  }
  if (!checkNdlst(tree@ndlst, tree@root)) {
    stop("Invalid tree")
  }
  if (is.null(tree@ndmtrx)) {
    # generate ndmtrx
    tree@ndmtrx <- .getNdmtrxFrmLst(tree@ndlst, shared = shared, ...)
  }
  tree
}

#' @name rmNdmtrx
#' @title Remove node matrix
#' @description Return tree with memory heavy node matrix removed.
#' @details Potential uses: reduce memory load of a tree,
#' save tree using serialization methods.
#' @param tree \code{TreeMan} object
#' @seealso
#' \code{\link{addNdmtrx}}
#' @export
#' @examples
#' #
#' tree <- randTree(10)
#' summary(tree)
#' tree <- rmNdmtrx(tree)
#' summary(tree)
rmNdmtrx <- function(tree) {
  tree@ndmtrx <- NULL
  tree
}

#' @name TreeMan-to-phylo
#' @title Convert TreeMan to phylo
#' @description Return ape's \code{phylo} from a \code{TreeMan}. This will
#' preserve node labels if they are different from the default labels (n#).
#' @seealso
#' \code{\link{phylo-to-TreeMan}},
#' \code{\link{TreeMen-to-multiPhylo}}
#' \code{\link{multiPhylo-to-TreeMen}}
#' \code{\link{TreeMan-class}}
#' @examples
#'
#' library(ape)
#' tree <- randTree(10)
#' tree <- as(tree, "phylo")
setAs(from = "phylo", to = "TreeMan", def = function(from, to) {
  temp_file <- tempfile(pattern = "temp_tree", fileext = ".tre")
  ape::write.tree(from, file = temp_file)
  tree <- readTree(file = temp_file)
  file.remove(temp_file)
  return(tree)
})

#' @name phylo-to-TreeMan
#' @title Convert phylo to TreeMan
#' @description Return a \code{TreeMan} from ape's \code{phylo}. This will
#' preserve node labels, if they are a alphanumeric.
#' @seealso
#' \code{\link{TreeMan-to-phylo}},
#' \code{\link{TreeMen-to-multiPhylo}}
#' \code{\link{multiPhylo-to-TreeMen}}
#' \code{\link{TreeMan-class}}
#' @examples
#'
#' library(ape)
#' tree <- compute.brlen(rtree(10))
#' tree <- as(tree, "TreeMan")
setAs(from = "TreeMan", to = "phylo", def = function(from, to) {
  temp_file <- tempfile(pattern = "temp_tree", fileext = ".tre")

  # use a node label function function that writes the node labels during the
  # conversion, if they are non default.
  if (all(from["nds"] == paste0("n", seq_along(from["nds"])))) {
    nodeLabeller <- function(nd) {
      return(NULL)
    } # no node labels
  } else {
    nodeLabeller <- function(nd) {
      nd[["id"]]
    } # writes node labels
  }

  writeTree(from, file = temp_file, ndLabels = nodeLabeller)
  tree <- ape::read.tree(file = temp_file)
  file.remove(temp_file)
  return(tree)
})

#' @name multiPhylo-to-TreeMen
#' @title Convert multiPhylo to TreeMen
#' @description Return a \code{TreeMen} from ape's \code{mutlPhylo}
#' @seealso
#' \code{\link{TreeMan-to-phylo}},
#' \code{\link{phylo-to-TreeMan}},
#' \code{\link{TreeMen-to-multiPhylo}}
#' \code{\link{TreeMan-class}}
#' @examples
#'
#' library(ape)
#' trees <- c(rtree(10), rtree(10), rtree(10))
#' trees <- as(trees, "TreeMen")
setAs(from = "multiPhylo", to = "TreeMen", def = function(from, to) {
  temp_file <- tempfile(pattern = "temp_tree", fileext = ".tre")
  ape::write.tree(from, file = temp_file)
  tree <- readTree(file = temp_file)
  file.remove(temp_file)
  return(tree)
})

#' @name TreeMen-to-multiPhylo
#' @title Convert TreeMen to multiPhylo
#' @description Return ape's \code{multiPhylo} from a \code{TreeMen}
#' @seealso
#' \code{\link{TreeMan-to-phylo}},
#' \code{\link{phylo-to-TreeMan}},
#' \code{\link{multiPhylo-to-TreeMen}}
#' \code{\link{TreeMan-class}}
#' @examples
#'
#' library(ape)
#' trees <- cTrees(randTree(10), randTree(10), randTree(10))
#' trees <- as(trees, "multiPhylo")
setAs(from = "TreeMen", to = "multiPhylo", def = function(from, to) {
  temp_file <- tempfile(pattern = "temp_tree", fileext = ".tre")
  writeTree(from, file = temp_file)
  tree <- ape::read.tree(file = temp_file)
  file.remove(temp_file)
  return(tree)
})

#' @name list-to-TreeMen
#' @title Convert list to a TreeMen
#' @description Return a \code{TreeMen} object from a list of \code{TreeMans}
#' @seealso
#' \code{\link{TreeMen-class}}
#' @examples
#'
#' trees <- list("tree_1" = randTree(10), "tree_2" = randTree(10))
#' trees <- as(trees, "TreeMen")
# Conversion method
setAs(from = "list", to = "TreeMen", def = function(from, to) {
  ntips <- sum(unlist(lapply(from, function(tree) tree@ntips)))
  ntrees <- length(from)
  new(to, treelst = from, ntips = ntips, ntrees = ntrees)
})
ropensci/phylotaR documentation built on July 9, 2023, 3:17 p.m.