Nothing
#' Parse TNT Tree
#'
#' Read a tree from [TNT](https://www.lillo.org.ar/phylogeny/tnt/)'s
#' parenthetical output.
#'
#' `ReadTntTree()` imports trees generated by the parsimony analysis program
#' [TNT](https://www.lillo.org.ar/phylogeny/tnt/) into R, including node labels
#' written with the `ttags` command.
#' Tree files must have been saved by TNT in parenthetical notation, using the
#' TNT command `tsave *`.
#' Trees are easiest to load into R if taxa have been saved using their names
#' (TNT command `taxname =`). In this case, the TNT `.tre` file
#' contains tip labels and can be parsed directly. The downside is that the
#' uncompressed `.tre` files will have a larger file size.
#'
#' `ReadTntTree()` can also read `.tre` files in which taxa have been saved
#' using their numbers (`taxname -`). Such files contain a hard-coded link to
#' the matrix file that was used to generate the trees, in the first line of the
#' `.tre` file. This poses problems for portability: if the matrix file is
#' moved, or the `.tre` file is accessed on another computer, the taxon names
#' may be lost. As such, it is important to check that the matrix file
#' exists in the expected location -- if it does not,
#' either use the `relativePath` argument to point to its new location, or
#' specify `tipLabels` to manually specify the tip labels.
#'
#' `TntText2Tree()` converts text representation of a tree in TNT to an
#' object of class `phylo`.
#'
#' @param filepath character string specifying path to TNT `.tre` file,
#' relative to the R working directory (visible with `getwd()`).
#' @param relativePath (discouraged) character string specifying location of the
#' matrix file used to generate the TNT results, relative to the current working
#' directory. Taxon names will be read from this file if they are not specified
#' by `tipLabels`.
#' @param keepEnd (optional, default 1) integer specifying how many elements of
#' the file path to conserve when creating relative path (see examples).
#' @param tipLabels (optional) character vector specifying the names of the
#' taxa, in the sequence that they appear in the TNT file. If not specified,
#' taxon names will be loaded from the data file linked in the first line of the
#' `.tre` file specified in `filepath`.
#'
#' @return `ReadTntTree()` returns a tree of class `phylo` in
#' \link[=TNTOrder]{TNT order},
#' corresponding to the tree in `filepath`, or NULL if no trees are found.
#'
#' @examples
#' # In the examples below, TNT has read a matrix from
#' # "c:/TreeTools/input/dataset.nex"
#' # The results of an analysis were written to
#' # "c:/TreeTools/output/results1.tnt"
#' #
#' # results1.tnt will contain a hard-coded reference to
#' # "c:/TreeTools/input/dataset.nex".
#'
#' # On the original machine (but not elsewhere), it would be possible to read
#' # this hard-coded reference from results.tnt:
#' # ReadTntTree("output/results1.tnt")
#'
#' # These datasets are provided with the "TreeTools" package, which will
#' # probably not be located at c:/TreeTools on your machine:
#'
#' oldWD <- getwd() # Remember the current working directory
#' setwd(system.file(package = "TreeTools"))
#'
#' # If taxon names were saved within the file (using `taxname=` in TNT),
#' # then our job is easy:
#' ReadTntTree("extdata/output/named.tre")
#'
#' # But if taxa were compressed to numbers (using `taxname-`), we need to
#' # look up the original matrix in order to dereference the tip names.
#' #
#' # We need to extract the relevant file path from the end of the
#' # hard-coded path in the original file.
#' #
#' # We are interested in the last two elements of
#' # c:/TreeTools/input/dataset.nex
#' # 2 1
#' #
#' # "." means "relative to the current directory"
#' ReadTntTree("extdata/output/numbered.tre", "./extdata", 2)
#'
#' # If working in a lower subdirectory
#' setwd("./extdata/otherfolder")
#'
#' # then it will be necessary to navigate up the directory path with "..":
#' ReadTntTree("../output/numbered.tre", "..", 2)
#'
#'
#' setwd(oldWD) # Restore original working directory
#'
#' TNTText2Tree("(A (B (C (D E ))));")
#'
#' @template MRS
#' @family tree import functions
#' @importFrom ape read.tree
#' @export
ReadTntTree <- function(filepath, relativePath = NULL, keepEnd = 1L,
tipLabels = NULL) {
commands <- .TntCommands(filepath)
tread <- grep("^trea?d?\\b", commands, perl = TRUE)
if (length(tread) < 1) return(NULL)
trees <- c(TntText2Tree(
paste(
gsub("tread\\s+('[^']*')*\\s*", "", commands[tread]),
collapse = "*")))
nRead <- unname(vapply(
commands[tread],
function(cmd) length(
c(TntText2Tree(gsub("tread\\s+('[^']*')*\\s*", "", cmd)))
), double(1)
))
if (!any(grepl("[A-z]", trees[[1]][["tip.label"]]))) {
if (is.null(tipLabels)) {
tipLabels <- rownames(ReadTntCharacters(filepath))
if (is.null(tipLabels)) {
if (length(tread) > 1) {
warning("Multiple tree blocks not fully supported; ",
"please check tip labels and contact \"TreeTools\" ",
"maintainer if necessary")
}
treadComment <- strsplit(commands[tread[1]], "'", fixed = TRUE)[[1]]
filePrefix <- "tree(s) from TNT, for data in "
taxonFile <- grep(filePrefix, treadComment, fixed = TRUE)
taxonFile <- gsub(filePrefix, "",
gsub("\\", "/", treadComment[taxonFile], fixed = TRUE),
fixed = TRUE)
if (!is.null(relativePath)) {
taxonFileParts <- strsplit(taxonFile, "/")[[1]]
nParts <- length(taxonFileParts)
if (nParts < keepEnd) {
stop("Taxon file path (", taxonFile, # nocov
") contains fewer than keepEnd (", keepEnd, ") components.") # nocov
}
taxonFile <- paste0(c(relativePath,
taxonFileParts[(nParts + 1L - keepEnd):nParts]),
collapse = "/")
}
if (length(taxonFile)) {
if (!file.exists(taxonFile)) {
warning("Cannot find linked data file:\n ", taxonFile) # nocov
} else {
tipLabels <- rownames(ReadTntCharacters(taxonFile, character_num = 1L))
if (is.null(tipLabels)) {
# TNT character read failed. Perhaps taxonFile is in NEXUS format?
tipLabels <- rownames(ReadCharacters(taxonFile, character_num = 1L))
}
if (is.null(tipLabels)) {
warning("Could not read taxon names from linked TNT file:\n ", # nocov
taxonFile, "\nIs the file in TNT or Nexus format?", # nocov
" If failing inexplicably, please report:", # nocov
"\n https://github.com/ms609/TreeTools/issues/new") # nocov
}
}
} else {
warning("`filepath` does not link to taxon names; try specifying `tipLabels`")
tipLabels <- as.character(seq_len(trees[[1]][["edge"]][1] - 1) - 1)
}
}
}
trees <- lapply(trees, function(tree) {
tree$tip.label <- tipLabels[as.integer(tree$tip.label) + 1L]
tree
})
}
if (length(tipLabels)) {
trees <- RenumberTips(trees, tipLabels)
}
trees <- TntOrder(trees)
# Assign Tree Tags
ttags <- grep("^tta?g?s?\\b", commands, perl = TRUE)
tagLines <- commands[ttags]
nTags <- length(ttags)
tagTarget <- "^tta?g?s?\\s*\\*\\s*([\\d!]+)$"
targetTags <- grep(tagTarget, tagLines, perl = TRUE)
tagIsTarget <- as.logical(tabulate(targetTags, nTags))
targetTree <- NA
tagWrite <- "^tta?g?s?\\s*\\+\\s*(\\d+)\\s+(.+)$"
writeTags <- grep(tagWrite, tagLines, perl = TRUE)
tagIsWrite <- as.logical(tabulate(writeTags, nTags))
tagClear <- "^tta?g?s?\\s*\\-$"
clearTags <- grep(tagClear, tagLines, perl = TRUE)
tagIsClear <- as.logical(tabulate(clearTags, nTags))
for (i in seq_along(ttags)) {
# A loop is inefficient, but matches TNT's command-driven storage structure
# and thus may be more robust to input format -- and
# will make code maintenance and improvement easier in future.
tagLine <- tagLines[i]
if (tagIsTarget[i]) {
target <- gsub(tagTarget, "\\1", tagLine, perl = TRUE)
targetTree <- if (target == "!") {
cumsum(nRead)[which.max(c(tread, Inf) > ttags[i]) - 1L]
} else {
as.numeric(target)
}
} else if (tagIsWrite[i]) {
if (is.na(targetTree)) {
warning("Expected `ttags *N`; applying tags to first tree");
targetTree <- 1L
}
if (!length(trees[[targetTree]][["node.label"]])) {
trees[[targetTree]][["node.label"]] <-
character(trees[[targetTree]][["Nnode"]])
}
targetTips <- NTip(trees[[targetTree]])
node <- sum(
as.numeric(gsub(tagWrite, "\\1", tagLine, perl = TRUE)),
- targetTips, # Tip labels stored separately
+ 1 # TNT counts from zero
)
tagText <- gsub(tagWrite, "\\2", tagLine, perl = TRUE)
if (node < 1) {
message("Ignoring tag on leaf ", node + targetTips,
": \"", tagText, "\"")
} else {
trees[[targetTree]][["node.label"]][node] <-
if (trees[[targetTree]][["node.label"]][node] == "") {
tagText
} else {
paste(trees[[targetTree]][["node.label"]][node], tagText)
}
}
} else if (tagIsClear[i]) {
trees[[targetTree]][["node.label"]] <- NULL
} else {
warning("ttags command not yet supported: ", tagLine,
". Contact TreeTools maintainer.")
}
}
# Return:
if (length(trees) == 1) {
trees[[1]]
} else if (length(trees) == 0) {
NULL # nocov
} else {
class(trees) <- "multiPhylo"
trees
}
}
#' @rdname ReadTntTree
#' @param treeText Character string describing one or more trees,
#' in the parenthetical format output by TNT.
#' @export
TntText2Tree <- function(treeText) {
treeText <- paste(strsplit(treeText, "\\s*\\*\\s*", perl = TRUE)[[1]], ";")
treeText <- gsub(";(\\s*;)+", ";", treeText)
treeText <- gsub("\\s*([\\w'\\.\\-]+)\\s*", "\\1,", treeText, perl = TRUE)
treeText <- gsub("\\)\\s*\\(", "),(", treeText, perl = TRUE)
treeText <- gsub(",)", ")", treeText, fixed = TRUE)
tr <- read.tree(text = treeText)
if (inherits(tr, "multiPhylo")) {
tr[] <- lapply(tr, function (tree) {
tree$tip.label[] <- Unquote(tree[["tip.label"]])
tree
})
} else if (inherits(tr, "phylo")) {
tr[["tip.label"]][] <- Unquote(tr[["tip.label"]])
}
# Return:
tr
}
#' @rdname ReadTntTree
#' @export
TNTText2Tree <- TntText2Tree
.TntCommands <- function(filepath) {
fileText <- readLines(filepath)
commands <- strsplit(
paste(fileText, collapse = "\UA"), #\UA = \n
"\\s*;\\s*")[[1]]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.