Nothing
##
## @author Marc A. Suchard
##
## A class for reading Newick formatted trees with BEAST-style annotations
##
.strip.annotations <- function(text) {
annotations <- list()
end <- 1
# Merge node and branch annotations
text <- gsub("\\[&(.*?)\\]:\\[&(.*?)\\]", ":\\[&\\1,\\2\\]", text)
text <- gsub("\\[&(.*?)\\]:", ":\\[&\\1]", text)
pattern = "\\[&.*?\\]"
repeat {
match = regexpr(pattern=pattern,text=text)
if (!(match[1] > 0)) {
break
}
annotations[[end]] = regmatches(text, match)
text = sub(pattern,paste("[",end,"]",sep=""), text)
end = end + 1
}
return(list(annotations=annotations,tree=text))
}
.split.tree.names <- function(text) {
text = gsub(pattern="\\[.*?\\]=",x=text,replacement="")
text = gsub(pattern="^tree",x=text,replacement="")
return(text)
}
.split.tree.traits <- function(text) {
## Pull out annotation
text = regmatches(text,regexpr(pattern="\\[.*?\\]",text))
## Remove leading and trailing delimitors
text = substring(text,3,nchar(text)-1)
return(text)
}
.parse.value <- function(text) {
value = text
if (length(grep("^\\{",value))) { ## starts with {
save = value
value = substring(value, 2, nchar(value)-1)
depth = 0
r = regexpr(pattern="\\{+",value,perl=TRUE)
match.length = attr(r, "match.length")
if (match.length > 0) {
depth = match.length
}
if (depth == 0) {
split = ","
} else {
split = paste(
"(?<=",rep("\\}",depth),")",
",",
"(?=" ,rep("\\{",depth),")",
sep="")
}
if (depth >= 1) {
return(save) # TODO Still error in recursion
}
part = strsplit(value, split, perl=TRUE)[[1]]
value = list()
for (i in 1:length(part)) {
value[[i]] = .parse.value(part[i])
}
## TODO Unlist when simple array?
} else {
if (!is.na(suppressWarnings(as.numeric(value)))) { # is a number
value = as.numeric(value)
} else { # is a string
value <- gsub("\\\"","", value)
}
}
return(value)
}
.parse.traits <- function(text, header=FALSE) {
if (header == TRUE) {
text = substring(text,3,nchar(text)-1)
}
pattern <- "(\"[^\"]*\"+|[^,=\\s]+)\\s*(=\\s*(\\{[^=]*\\}|\"[^\"]*\"+|[^,]+))?"
rgx <- gregexpr(pattern,text,perl=TRUE)
n <- length(attr(rgx[[1]],"match.length"))
traits <- list()
start <- attr(rgx[[1]],"capture.start")
names <- attr(rgx[[1]],"capture.names")
length <- attr(rgx[[1]],"capture.length")
names <- attr(rgx[[1]],"capture.names")
for (i in 1:n) {
s <- start[i,3]
e <- s + length[i,3] - 1
value <- substring(text,s,e)
s <- start[i,1]
e <- s + length[i,1] - 1
key <- substring(text,s,e)
traits[[key]] <- .parse.value(value)
}
return(traits)
}
.annotated.clado.build <- function(tp) {
stop(paste("Annotated clado.build is not yet implemented.\n"))
}
## THE CODE BELOW COMES FROM 'ape'. MY GOAL IS TO DERIVE FROM THIS TO READ IN BEAST-STYLE ANNOTATIONS
.annotated.tree.build <- function(tp){
add.internal <- function() {
edge[j, 1] <<- current.node
edge[j, 2] <<- current.node <<- node <<- node + 1L
index[node] <<- j
j <<- j + 1L
}
add.terminal <- function() {
edge[j, 1] <<- current.node
edge[j, 2] <<- tip
index[tip] <<- j
X <- unlist(strsplit(new.tpc[k], ":"))
tip.label[tip] <<- X[1]
index <- length(X)
edge.length[j] <<- as.numeric(X[index])
if (length(annotations) > 0) {
permute[[j]] <<- annotations[[as.numeric(X[2])]] ## permute traits
}
k <<- k + 1L
tip <<- tip + 1L
j <<- j + 1L
}
go.down <- function() {
l <- index[current.node]
X <- unlist(strsplit(new.tpc[k], ":"))
node.label[current.node - nb.tip] <<- X[1]
index <- length(X)
edge.length[l] <<- as.numeric(X[index])
if (length(annotations) > 0) {
permute[[l]] <<- annotations[[as.numeric(X[2])]] ## permute traits
}
k <<- k + 1L
current.node <<- edge[l, 1]
}
if (!length(grep(",", tp))) {
obj <- list(edge = matrix(c(2L, 1L), 1, 2))
tp <- unlist(strsplit(tp, "[\\(\\):;]"))
obj$edge.length <- as.numeric(tp[3])
obj$Nnode <- 1L
obj$tip.label <- tp[2]
if (tp[4] != "")
obj$node.label <- tp[4]
class(obj) <- "phylo"
return(obj)
}
result = .strip.annotations(tp)
annotations = result$annotations
new.tp.stripped = result$tree
# patched for 0.0 root branch length from BEAST2 (not confirmed)
new.tp.stripped <- gsub("\\]0.0;", "\\];", new.tp.stripped)
root.annotation.number <- NULL
m <- regexpr("\\[\\d+\\];", new.tp.stripped)
if (m != -1) {
root.annotation.number <- as.numeric(
gsub("\\[(\\d+)\\];", "\\1", regmatches(new.tp.stripped, m)))
}
annotations = lapply(annotations, .parse.traits, header=TRUE)
tp.stripped = gsub("\\[.*?\\]","",tp)
tpc <- unlist(strsplit(tp.stripped, "[\\(\\),;]"))
tpc <- tpc[nzchar(tpc)]
new.tp.stripped <- gsub("\\[\\d+\\];", ";", new.tp.stripped)
new.tp.stripped <- gsub("\\[(\\d+)\\]","\\1:", new.tp.stripped)
new.tpc <- unlist(strsplit(new.tp.stripped, "[\\(\\),;]"))
new.tpc <- new.tpc[nzchar(new.tpc)]
tsp <- unlist(strsplit(tp.stripped, NULL))
skeleton <- tsp[tsp %in% c("(", ")", ",", ";")]
nsk <- length(skeleton)
nb.node <- sum(skeleton == ")")
nb.tip <- sum(skeleton == ",") + 1
nb.edge <- nb.node + nb.tip
node.label <- character(nb.node)
tip.label <- character(nb.tip)
edge.length <- numeric(nb.edge)
edge <- matrix(0L, nb.edge, 2)
current.node <- node <- as.integer(nb.tip + 1)
edge[nb.edge, 2] <- node
index <- numeric(nb.edge + 1)
index[node] <- nb.edge
j <- k <- tip <- 1L
permute = list()
for (i in 2:nsk) {
if (skeleton[i] == "(") {
add.internal()
}
if (skeleton[i] == ",") {
if (skeleton[i - 1] != ")") {
add.terminal()
}
}
if (skeleton[i] == ")") {
if (skeleton[i - 1] == ",") {
add.terminal()
go.down()
}
if (skeleton[i - 1] == ")") {
go.down()
}
}
}
edge <- edge[-nb.edge, ]
obj <- list(edge = edge, Nnode = nb.node, tip.label = tip.label)
root.edge <- edge.length[nb.edge]
edge.length <- edge.length[-nb.edge]
if (!all(is.na(edge.length)))
obj$edge.length <- edge.length
if (is.na(node.label[1]))
node.label[1] <- ""
if (any(nzchar(node.label)))
obj$node.label <- node.label
if (!is.na(root.edge))
obj$root.edge <- root.edge
class(obj) <- "phylo"
attr(obj, "order") <- "cladewise"
if (!is.null(root.annotation.number)) {
obj$root.annotation <- annotations[[root.annotation.number]]
}
obj$annotations = permute
obj
}
read.annotated.tree <- function (file = "", text = NULL, tree.names = NULL, skip = 0,
comment.char = "#", keep.multi = FALSE, ...)
{
unname <- function(treetext) {
nc <- nchar(treetext)
tstart <- 1
while (substr(treetext, tstart, tstart) != "(" && tstart <=
nc) tstart <- tstart + 1
if (tstart > 1)
return(c(substr(treetext, 1, tstart - 1), substr(treetext,
tstart, nc)))
return(c("", treetext))
}
if (!is.null(text)) {
if (!is.character(text))
stop("argument `text' must be of mode character")
tree <- text
}
else {
tree <- scan(file = file, what = "", sep = "\n", quiet = TRUE,
skip = skip, comment.char = comment.char, ...)
}
if (identical(tree, character(0))) {
warning("empty character string.")
return(NULL)
}
tree <- gsub("[ \n\t]", "", tree)
tree <- gsub("\\[&R\\]", "", tree)
tree <- unlist(strsplit(tree, NULL))
y <- which(tree == ";")
Ntree <- length(y)
x <- c(1, y[-Ntree] + 1)
if (is.na(y[1]))
return(NULL)
STRING <- character(Ntree)
for (i in 1:Ntree) STRING[i] <- paste(tree[x[i]:y[i]], sep = "",
collapse = "")
tmp <- unlist(lapply(STRING, unname))
tmpnames <- tmp[c(TRUE, FALSE)]
STRING <- tmp[c(FALSE, TRUE)]
if (is.null(tree.names) && any(nzchar(tmpnames)))
tree.names <- tmpnames
colon <- grep(":", STRING)
if (!is.null(tree.names)) {
traits.text = lapply(tree.names, .split.tree.traits)
tree.names = lapply(tree.names, .split.tree.names)
tree.traits = lapply(traits.text, .parse.traits)
}
if (!length(colon)) {
stop(paste("Annotated clado.build is not yet implemented.\n"))
obj <- lapply(STRING, .annotated.clado.build)
}
else if (length(colon) == Ntree) {
obj <- lapply(STRING, .annotated.tree.build)
}
else {
obj <- vector("list", Ntree)
obj[colon] <- lapply(STRING[colon], .annotated.tree.build)
nocolon <- (1:Ntree)[!1:Ntree %in% colon]
## obj[nocolon] <- lapply(STRING[nocolon], clado.build)
## change for compatibility with ape 5.0
obj[nocolon] <- lapply(STRING[nocolon],
function(e) ape::read.tree(text = e))
}
for (i in 1:Ntree) {
ROOT <- length(obj[[i]]$tip.label) + 1
if (sum(obj[[i]]$edge[, 1] == ROOT) == 1 && dim(obj[[i]]$edge)[1] >
1)
stop(paste("The tree has apparently singleton node(s): cannot read tree file.\n Reading Newick file aborted at tree no.",
i))
}
if (Ntree == 1 && !keep.multi)
obj <- obj[[1]]
else {
if (!is.null(tree.names)) {
names(obj) <- tree.names
}
class(obj) <- "multiPhylo"
}
obj
}
read.annotated.nexus <- function (file, tree.names = NULL) {
X <- scan(file = file, what = "", sep = "\n", quiet = TRUE)
LEFT <- grep("\\[", X)
RIGHT <- grep("\\]", X)
## browser()
##
## if (length(LEFT)) {
## w <- LEFT == RIGHT
## if (any(w)) {
## s <- LEFT[w]
## X[s] <- gsub("\\[[^]]*\\]", "", X[s])
## }
## w <- !w
## if (any(w)) {
## s <- LEFT[w]
## X[s] <- gsub("\\[.*", "", X[s])
## sb <- RIGHT[w]
## X[sb] <- gsub(".*\\]", "", X[sb])
## if (any(s < sb - 1))
## X <- X[-unlist(mapply(":", (s + 1), (sb - 1)))]
## }
## }
endblock <- grep("END;|ENDBLOCK;", X, ignore.case = TRUE)
semico <- grep(";", X)
i1 <- grep("BEGIN TREES;", X, ignore.case = TRUE)
i2 <- grep("TRANSLATE", X, ignore.case = TRUE)
translation <- if (length(i2) == 1 && i2 > i1)
TRUE
else FALSE
if (translation) {
end <- semico[semico > i2][1]
x <- X[(i2 + 1):end]
x <- unlist(strsplit(x, "[,; \t]"))
x <- x[nzchar(x)]
TRANS <- matrix(x, ncol = 2, byrow = TRUE)
TRANS[, 2] <- gsub("['\"]", "", TRANS[, 2])
n <- dim(TRANS)[1]
}
start <- if (translation)
semico[semico > i2][1] + 1
else semico[semico > i1][1]
end <- endblock[endblock > i1][1] - 1
tree <- X[start:end]
## browser()
rm(X)
tree <- tree[tree != ""]
semico <- grep(";", tree)
Ntree <- length(semico)
if (Ntree == 1 && length(tree) > 1)
STRING <- paste(tree, collapse = "")
else {
if (any(diff(semico) != 1)) {
STRING <- character(Ntree)
s <- c(1, semico[-Ntree] + 1)
j <- mapply(":", s, semico)
if (is.list(j)) {
for (i in 1:Ntree) STRING[i] <- paste(tree[j[[i]]],collapse = "")
}
else {
for (i in 1:Ntree) STRING[i] <- paste(tree[j[,i]], collapse = "")
}
}
else STRING <- tree
}
rm(tree)
## browser()
STRING <- STRING[grep("^[[:blank:]]*tree.*= *", STRING, ignore.case = TRUE)]
Ntree <- length(STRING)
STRING <- gsub("\\[&R\\]", "", STRING)
## TODO Parse out tree-level traits
nms.annontations.trees <- sub(" * = *.*", "", STRING)
nms.annontations.trees <- sub("^ *tree *", "", nms.annontations.trees, ignore.case = TRUE)
nms.trees <- sub("\\s+\\[&.*?\\]", "", nms.annontations.trees)
if (any(nms.trees != nms.annontations.trees)) { # There are tree-level annontations
annotations.trees <- sub(".*\\[&", "\\[&", nms.annontations.trees)
annotations.trees = lapply(annotations.trees, .parse.traits, header=TRUE)
} else {
annotations.trees <- NULL
}
STRING <- sub("^.*? = *", "", STRING)
STRING <- gsub("\\s", "", STRING)
## browser()
colon <- grep(":", STRING)
if (!length(colon)) {
stop(".annotated.clado.build is not yet implemented.\n")
trees <- lapply(STRING, .annotated.clado.build)
} else if (length(colon) == Ntree) {
## trees <- if (translation) {
## browser()
## stop("treeBuildWithTokens is not yet implemented.\n")
## lapply(STRING, .treeBuildWithTokens)
## }
## else lapply(STRING, .annotated.tree.build)
trees <- lapply(STRING, .annotated.tree.build)
## browser()
} else {
## trees <- vector("list", Ntree)
## trees[colon] <- lapply(STRING[colon], .annotated.tree.build)
## nocolon <- (1:Ntree)[!1:Ntree %in% colon]
## trees[nocolon] <- lapply(STRING[nocolon], .annotated.clado.build)
## if (translation) {
## for (i in 1:Ntree) {
## tr <- trees[[i]]
## for (j in 1:n) {
## ind <- which(tr$tip.label[j] == TRANS[, 1])
## tr$tip.label[j] <- TRANS[ind, 2]
## }
## if (!is.null(tr$node.label)) {
## for (j in 1:length(tr$node.label)) {
## ind <- which(tr$node.label[j] == TRANS[,
## 1])
## tr$node.label[j] <- TRANS[ind, 2]
## }
## }
## trees[[i]] <- tr
## }
## translation <- FALSE
## }
stop("Unknown error in read.annotated.nexus.\n")
}
for (i in 1:Ntree) {
tr <- trees[[i]]
if (!translation)
n <- length(tr$tip.label)
ROOT <- n + 1
if (sum(tr$edge[, 1] == ROOT) == 1 && dim(tr$edge)[1] >
1) {
stop(paste("The tree has apparently singleton node(s): cannot read tree file.\n Reading NEXUS file aborted at tree no.",
i, sep = ""))
}
}
if (Ntree == 1) {
trees <- trees[[1]]
if (translation) {
trees$tip.label <- # if (length(colon)) {
# TRANS[, 2]
#} else {
TRANS[, 2][as.numeric(trees$tip.label)]
#}
}
}
else {
if (!is.null(tree.names))
names(trees) <- tree.names
if (translation) {
# if (length(colon) == Ntree)
# attr(trees, "TipLabel") <- TRANS[, 2]
#else {
for (i in 1:Ntree) trees[[i]]$tip.label <- TRANS[, 2][as.numeric(trees[[i]]$tip.label)]
trees <- .compressTipLabel(trees)
#}
}
class(trees) <- "multiPhylo"
if (!all(nms.trees == ""))
names(trees) <- nms.trees
}
# Add tree-level annotations back on
if (!is.null(annotations.trees)) {
if (Ntree == 1) {
trees$tree.annotations <- annotations.trees[[1]]
} else {
for (i in 1:Ntree) {
trees[[i]]$tree.annotations <- annotations.trees[[i]]
}
}
}
trees
} # end read.annotated.nexus
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.