Nothing
#-------------------------------------------------------
# Methods
# -Are concerned with subnode stuff only.
# Higher-level tree modifications should be done
# with phylobase functions.
#-------------------------------------------------------
# Table of Contents:
## Generic Methods: (and overloads)
#
## SIMMAP Methods:
# get.nodenames -- internal; get names of nodes
# is.simmap -- check whether file or text string contains simmap formatted trees
# read.simmap -- read simmap text (v1.0-1.1) to phylo4d object (with singletons)
# read.simmap.new -- read simmap text (v1.5) to phylo4d_ext
# read.nexus.simmap -- reads nexus file which contains simmap (all versions) trees
# expand.singles -- experimental; expand singleton nodes into a bifurcation with one 0-length branch terminating at a dummy node
# collapse.to.singles -- experimental; collapse bifurcations which contain a 0-length branch connected to a leaf node into singletons
# collapse.singletons -- remove singleton nodes from a tree
# collapse.subnodes -- experimental; collapse subnodes to singletons
# write.simmap (w/ two subroutines) -- write a tree (and parts of it's data) to simmap format (vers=c(1.0,1.1,1.5))
# write.simmap.old -- wrapper; calls write.simmap with v1.0 or v1.1 parameters
# write.simmap.new -- wrapper; calls write.simmap with v1.5 parameters
# write.nexus.simmap -- uses write.simmap and other internal functions to write a nexus file using write.simmap to output each tree.
## Subnode Methods:
# .edge.index -- internal;
# nSubNodes -- get number of subnodes
# hasSubNodes -- does the tree have subnodes?
# hasData -- does the tree contain any data? (is the data slot empty?)
# getSubNodeData -- returns subnode data.frame
# getSubNodePosish -- returns subnode positions
# getSubNodeEdgeInds -- returns subnode edge indices
# getEmptyDataFrame -- returns empty data frame in the same format as the data frame from the data slot
# addSubNode -- add a subnode to a tree
# showSubNodes -- print out a representation of the subnodes to stdout
## Generics
setGeneric("sndata", function(x, ...) { standardGeneric("sndata") })
setGeneric("sndata<-", function(x,datnames=NULL, value) { standardGeneric("sndata<-") })
setGeneric("snid", function(x, ...) { standardGeneric("snid") })
setGeneric("snposition", function(x, ...) { standardGeneric("snposition") })
setGeneric("snbranch", function(x, ...) { standardGeneric("snbranch") })
setGeneric("rmdata", function(x,index,subindex) { standardGeneric("rmdata")} )
setGeneric("weight", function(x) { standardGeneric("weight")} )
setGeneric("weight<-", function(x,value) { standardGeneric("weight<-")} )
setGeneric("hasWeight",function(x,strict=TRUE) { standardGeneric("hasWeight")} )
# subnode
setGeneric("hasSubNodes", function(x) { standardGeneric("hasSubNodes") })
setGeneric("hasData", function(x) { standardGeneric("hasData") })
setGeneric("hasDataColumn",function(x,index) { standardGeneric("hasDataColumn") })
# overloading phylobase methods:
#--------------------------
# SIMMAP Processing Methods
#--------------------------
# internal:
# get node names from newick string
get.nodenames<-function(newick.txt)
{
nnames = character(0)
ttmp = newick.txt
if(grepl("^.*;$",ttmp)) {
# remove trailing semi-colon, otherwise it will mess up the regular expression matching
ttmp = substring(ttmp,first=1,last=(nchar(ttmp)-1))
}
nname.pat="(\\(|,|\\))([a-zA-Z0-9'_\\.]{1,})(:|;)"
junk = regexpr(nname.pat,ttmp)
count = 0
while(junk[1] != -1)
{
tmpname = substr(ttmp,(junk[1]+1),(junk[1]+attr(junk,"match.length")-2))
nnames = append(nnames,tmpname)
ttmp = sub(nname.pat,"",ttmp)
junk = regexpr(nname.pat,ttmp)
count = count + 1
stopifnot(count < 100000)
}
return(nnames)
}
# Check for evidence that this file contains simmap-formatted trees
# @param 'vers' is the simmap version that should be checked for
# @param 'quick' if TRUE, then the text string containing a tree is
# only checked to see if the formatting looks right. If FALSE,
# the method attempts to create a tree from the string.
# @param 'vers' can take the values (1.0,1.1,1.5); a meaningless result
# will be returned otherewise
#
is.simmap <- function(finput="",text=NULL,vers=c(1.1),quick=TRUE)
{
if(!is.null(text)){
# TODO: check text for newlines and split on them if they exist.
rawtext=text
} else {
if(!file.exists(finput))
stop("Assumed 'finput' was a filename and could not be fond")
rawtext = scan(finput,what=character(0),strip.white=T,sep="\n")
}
## TODO: split individual strings on ';' character
# if there is only one string, then just check that,
# otherwise assume the file is text from a nexus file
if(length(rawtext)==1)
{
treesblock = rawtext
treelines = 1
} else {
treesblock = read.nexus.block(txt=rawtext,block="trees",rm.comments=T)
treelines = which(tolower(substr(treesblock,1,4))=="tree")
}
if(length(treesblock)==0)
{
warning("No trees in this file...")
return(FALSE)
}
potentialsimmap = logical(length(treelines))
count = 1
for(linenumb in treelines)
{
# check for simmap styles
if(vers == 1.1 || vers == 1.0)
{
# remove all comments from the string
junk = gsub("\\[(.*?)\\]","",treesblock[linenumb])
potentialsimmap[count] = grepl(":\\{.*?\\}",junk)
if(potentialsimmap[count] && !quick)
{
tr = try(read.simmap(text=junk,vers=vers),silent=T)
if(is(tr,"try-error"))
potentialsimmap[count] = FALSE
}
} else {
potentialsimmap[count] = grepl("\\[(.*?)\\]",treesblock[linenumb])
if(potentialsimmap[count] && !quick)
{
junk = gsub("\\[(.*?)\\]","",treesblock[linenumb])
# TODO: change this to read.simmap.new when that function is finished:
tr = try(read.tree(text=junk),silent=T)
if(is(tr,"try-error"))
potentialsimmap[count] = FALSE
}
}
count = count + 1
}
return(any(potentialsimmap))
}
# read modified newick file
# citation:
# Bollback J. P. (2006) SIMMAP: Stochastic character mapping of
# discrete traits on phylogenies. BMC Bioinformatics. 7:88
#
# Assume that branch lengths are present (otherwise, why use SIMMAP?)
# Assume that the root node can only have one simmap state (or, only use the first)
#
# @param as.num - convert the data to numeric (by default, a factor data type is used)
# @param text - use text string instead of a file name
# @param vers - which version of simmap to use
# @param as.num - no idea
# @param add.root - should the tree be treated as unrooted?
#
read.simmap <- function(file="",text=NULL, vers=1.1, as.num=FALSE, add.root=TRUE, ...)
{
if(is.null(text))
{
#stop("Need to have text for now")
if(!file.exists(file))
stop("Assumed 'file' was a filename and could not be fond.")
text = scan(file,what=character(0),strip.white=T,sep="\n")
}
# clear whitespace
text = gsub("\\s","",text)
# add semicolon to end
if(substring(text,nchar(text))!=";")
text = paste(text,";",sep="")
if(TRUE)
{
# add root node and internal node names
count = 1
while(regexpr("):",text)[1] != -1)
{
text = sub( "):", paste(")Internal",sprintf("%0.7d",count),":",sep=""), text)
count = count + 1
}
# Root
if(add.root && regexpr(");",text)[1] != -1)
text = sub( ");", ")Root:0;" , text)
}
## Poor replacement for regular expressions
#
edge.ind = integer(0)
edge.state = character(0)
edge.len = numeric(0)
junk = strsplit(text,"")[[1]]
sub.branches = cbind( which(junk=="{"), which(junk=="}") )
br.lens = numeric(nrow(sub.branches))
for(ii in seq(nrow(sub.branches)))
{
br.total = 0
# get the internal part
within = paste(junk[seq( sub.branches[ii,1]+1 , sub.branches[ii,2]-1 )],collapse="")
splitchar = ifelse(vers==1.0,";",":")
within.sub = strsplit(within,splitchar)[[1]]
for(jj in within.sub)
{
slptmp = strsplit(jj,",")[[1]]
edge.ind = append(edge.ind,ii)
edge.state = append(edge.state,slptmp[1])
edge.len = append(edge.len,slptmp[2])
br.total = br.total + as.numeric(slptmp[2])
}
br.lens[ii] = br.total
}
# horrible way to put it back together:
# switch to regular expressions
#
newick.str = paste(junk,collapse="")
for(ii in seq(nrow(sub.branches)))
{
replen = diff(sub.branches[ii,])+1
substr(newick.str, sub.branches[ii,1], sub.branches[ii,2]) <- paste(rep(" ",replen),collapse="")
substr(newick.str, sub.branches[ii,1], sub.branches[ii,2]) <- as.character(br.lens[ii])
}
newick.str = gsub("\\s","",newick.str) # strip whitespace
# convert to ape format:
tr = read.tree(text=newick.str)
# make singleton nodes
#
edge.len = as.numeric(edge.len)
all.labels = c(tr$tip.label,tr$node.label)
# all names except the root node (which ought to be named, otherwise this will break)
#
#nnames = head(get.nodenames(newick.str),-1) # This should match up with edge.ind
nnames = get.nodenames(newick.str)
scount = 1
nTips = length(tr$tip.label)
nInts = length(tr$node.label)
# node information
dataVal = character(0)
dataNode = integer(0)
# loop through split branches
for(kk in unique(edge.ind))
{
# this information shouldn't change:
is.root = F
edgeind = which(edge.ind == kk)
trind = which(nnames[kk] == all.labels)
esplice = which(tr$edge[,2] == trind) # which edge to chop
anc = tr$edge[esplice,1]
dec = tr$edge[esplice,2]
# assume that this is the root (no way to check for it otherwise..)
if( length(esplice) == 0 )
{
dec = trind
is.root = T
}
# store data information:
dataNode = append(dataNode,dec)
dataVal = append(dataVal, edge.state[edgeind[1]])
newsingles = length(edgeind)-1
if(newsingles==0 || is.root)
next
# add new singleton nodes
for(mm in seq(newsingles))
{
#cat("- adding a new single\n")
### add new internal nodes to 'phylo' object:
#
tr$Nnode = tr$Nnode + 1; nInts = nInts + 1
tr$node.label = append(tr$node.label,sprintf("Singleton%0.7d",scount))
nodeid = nTips + nInts # should be the last one available
tr$edge[esplice,1] = nodeid
tr$edge = rbind(tr$edge,c(anc,nodeid))
tr$edge.length[esplice] = edge.len[edgeind[mm]]
tr$edge.length = append(tr$edge.length,edge.len[edgeind[(mm+1)]])
# store data information:
dataNode = append(dataNode,nodeid)
dataVal = append(dataVal, edge.state[edgeind[(mm+1)]])
# update info:
esplice = length(tr$edge.length) # should be the last one
dec = nodeid
scount = scount + 1
}
}
# create phylo4d object
new.labels = c(tr$tip.label,tr$node.label)
if(as.num)
dataVal = as.numeric(dataVal)
tmpdf = data.frame("simmap_state"=dataVal, row.names=new.labels[dataNode])
rettree = phylo4d(tr,all.data=tmpdf,missing.data="OK",rownamesAsLabels=TRUE)
#write.nexus(tr,file="written.tree")
return(rettree)
}
# helper functions for read.simmap.new (below)
#
match.order <- function(basevec,cmpvec)
{
neworder = order(basevec)
neworder[order(basevec)] <- order(cmpvec)
stopifnot(all(cmpvec[neworder] == basevec))
return(neworder)
}
strip<-function(str,left=TRUE,right=TRUE)
{
if(left)
str = sub("^[ \t\n]+","",str)
if(right)
str = sub("[ \t\n]+$","",str)
return(str)
}
# Read simmap v1.5 files
# ----------------------
# Notes:
# 1. all mutational (mapping) information is contained with a comment block (e.g., [&map={0,0.144863,1,0.453803,0}] ),
# 2. within this block the mutational information is contained within curly braces, { }, preceded by a map command, &map= { },
# 3. the mutational information is from the descendant node to the ancestor (i.e., tips to root assignment),
# 4. the information is STATE, LENGTH, STATE,... excluding the last length, e.g., two changes (0=>1=>0) might look like this, [&map={0,0.144863,1,0.453803,0}], with state 0 having a length of 0.144863, followed by state 1 with a length of 0.453803, with the final state 0 having a length of BRLEN - (0.144863 + 0.453803); this approach helps reduce the size of the file by not including redundant information,
# 5. if a branch has no changes then it will look like this, [&map={STATE}]LENGTH
#
# @param specialpatt indicates which variables should be treated in the SIMMAP way
# any variables found which are not in specialpatt are treated as containing
# vector data. By default, all variable are considered to be in the special
# format.
#
read.simmap.new <- function(file="",text=NULL, specialpatt=character(0), add.root=TRUE)
{
str.has <- function(patt,token,lowercase=T){
len = length(grep(sprintf("%s",patt),tolower(token)))
if(len == 1)
return(TRUE)
return(FALSE)
}
if(is.null(text))
{
if(file.exists(file)){
text = paste(readLines(file),collapse="")
}else{
stop("Could not find file ",file)
}
}
# clear whitespace
txt = gsub("\\s","",text)
# add semicolon to end
if(substring(txt,nchar(txt))!=";")
txt = paste(txt,";",sep="")
# check to see if it is actually a SIMMAP v1.5 formatted
if(!is.simmap(text=txt,vers=1.5))
stop("'text' does not seem to contain any simmap-formatted trees")
# Extract comments from the tree string (which should contain the mapped data)
comments.pos = gregexpr("\\[&.*?\\]",txt)[[1]]
comments.len = attr(comments.pos,"match.length")
comments = character(length(comments.pos))
for(ii in seq(length(comments)))
{
comments[ii] = substr(txt,(comments.pos[ii]+2), ((comments.pos[ii] + comments.len[ii] - 2)))
}
length(comments)
####
# Process Trees
# 1.) strip SIMMAP comments from the tree string
#
cleaned = gsub("\\[&.*?\\]","",txt)
textstr = cleaned
# 2.) add root node and internal node names
count = 1
while(regexpr("):",textstr)[1] != -1)
{
textstr = sub( "):", paste(")Internal",sprintf("%0.7d",count),":",sep=""), textstr)
count = count + 1
}
# check if root has a value, if it does not then
# the program needs to know so that it can be set
# when processing the comments (check if anc == root).
#
norootval=FALSE
if(add.root && regexpr(");",textstr)[1] != -1){
textstr = sub( ");", ")Root:0;" , textstr)
norootval=TRUE
}
# 3.) create phylo4d object from tree string where comments have been removed (data is added next)
nodenames = get.nodenames(textstr)
phy = as(read.tree(text=textstr),'phylo4d')
#
# End Process Trees
####
# Sanity check: (leave out root, if it did not come with an explicit state)
#if(length(comments)!= (length(nodenames) - as.integer(norootval)) )
# stop("Node names does not match up with comments (and they should).")
####
# Process Comments:
#
# NOTE: Only one branch mapping is allowed at this point (TODO: fix this)
#
subnode.names = c("map","simmap_state",specialpatt)
subnode.patt = sprintf("^(%s)$",paste(subnode.names,collapse="|"))
subnode.pos = matrix(NA,nrow=0,ncol=2)
subnode.branch = matrix(NA,nrow=0,ncol=2)
subnode.data = character(0)
subnode.commentid = integer(0)
subnode.subind = integer(0)
phy.data = NULL
for (kk in seq(length(comments)))
{
## Tokenize command comment:
comment.tokens = strsplit(comments[kk],",")[[1]]
# check if there is an assignment (shouldn't there always be?)
ex.inds = (grepl("=",comment.tokens))
# reconstitute tokens if they were broken up in comment.tokens
curr = 1
using = rep(TRUE,length(ex.inds))
count = 1
while(count <= length(ex.inds)){
if(ex.inds[count]){
curr=count
} else {
using[count] = FALSE
comment.tokens[curr] = paste(comment.tokens[curr],comment.tokens[count],sep=",")
}
count = count + 1
}
comment.tokens = comment.tokens[using]
## end Tokenize command comment
# strip beginning / ending whitespace from each token
# comment.tokens = strip(comment.tokens)
## Process name/value pairs from comment tokens
## NOTE: branch mapping is treated differently that other attributes
nnames = character(0)
vals = character(0)
for(attr.str in comment.tokens)
{
# strip whitespace from tokens
#
tmp=strsplit(attr.str,"=")[[1]]
metric.basename = strip(tmp[1])
metric.value = strip(tmp[2])
if(str.has("\\{",metric.value) && str.has("\\}",metric.value))
{
metric.value = sub("^\\{(.*?)\\}$","\\1",metric.value)
metric.value = strip(strsplit(metric.value,",")[[1]])
}
if(!grepl(subnode.patt,metric.basename))
{
# process regular values
# Expand basenames:
if(length(metric.value) != 1)
metric.basename = sprintf("%s_%04d",metric.basename,seq(length(metric.value)))
nnames = append(nnames,metric.basename)
vals = append(vals,metric.value)
} else {
# process map nodes:
# values format: STATE,LENGTH,STATE,....
# FROM desc to ancs (so, 1st STATE and LENGTH pair are the values for the current node)
#
if(length(metric.value) == 1){
nnames = append(nnames,metric.basename)
vals = append(vals,metric.value[1])
} else {
desc = unname(which(labels(phy)==nodenames[kk]))
# check if desc is the root node (if it is then the ancestor is node 0 per phylobase's standard, I think).
if(isRooted(phy)){
anc = ifelse(rootNode(phy) == desc ,0 ,unname(ancestor(phy,desc)))
} else {
anc = unname(ancestor(phy,desc))
}
eind = .edge.index(phy,anc,desc)
elen = unname(edgeLength(phy)[eind])
# if one more length is needed:
if( length(metric.value) %% 2 != 0 )
{
tonow = sum(as.numeric(metric.value[seq(2,length(metric.value),by=2)]))
metric.value = c(metric.value,(elen-tonow))
}
# Separate states from lengths and make lengths fractions of BRLEN
# NOTE: fractions start at ANC(!) not DESC in the phyext format. This may cause some confusion
#
states = rev(metric.value[seq(1,length(metric.value),by=2)])
lens = rev(as.numeric(metric.value[seq(2,length(metric.value),by=2)]))
lens = cumsum(lens) / elen
# Assign data from DESC node
nnames = append(nnames,metric.basename)
vals = append(vals,tail(states,1)) # last one the DESC node state
# Cache data from subnodes on this branch:
for (subind in seq(length(states)-1))
{
subnode.branch = rbind(subnode.branch,c(anc,desc)) # which branch
subnode.pos = rbind(subnode.pos,c(lens[subind],lens[subind])) # which position on the branch
tmpstate=states[subind]
names(tmpstate) <- metric.basename
subnode.data = append(subnode.data,tmpstate) # TODO: make sure this works well
subnode.commentid = append(subnode.commentid,kk)
subnode.subind = append(subnode.subind, subind)
}
}
}
}
stopifnot(length(nnames) == length(vals))
if(is.null(phy.data))
{
phy.data = matrix(NA,nrow=length(nodenames),ncol=length(nnames))
rownames(phy.data) <- nodenames
colnames(phy.data) <- nnames
} else {
newcols = setdiff(nnames,colnames(phy.data))
if(length(newcols) != 0){
for(cname in newcols)
phy.data = cbind(phy.data,NA)
colnames(phy.data) <- c(head(colnames(phy.data),-length(newcols)),newcols)
}
unusedcols = setdiff(colnames(phy.data),nnames)
if(length(unusedcols) != 0)
{
nnames <- append(nnames,unusedcols)
vals <- append(vals,rep(NA,length(unusedcols)))
}
}
phy.data[kk,] = vals[match.order(colnames(phy.data),nnames)]
} # end kk comments loop
# Guess as phy.data types:
options(warn=-1) # suppress warnings for this sections
phy.data = data.frame(apply(phy.data,2,I),stringsAsFactors=FALSE)
for(jj in seq(ncol(phy.data)))
{
exclude = is.na(phy.data[,jj])
if( all(!is.na(as.numeric(phy.data[(!exclude),jj]))) )
phy.data[,jj] = as.numeric(phy.data[,jj])
}
options(warn=0)
#
# End Process Comments
#####
###
# Combine processed tree and processed comments:
if(isRooted(phy)){
phyd = addData(phy,all.data=phy.data,match.data=TRUE,rownamesAsLabels=TRUE)
} else {
phyd = addData(phy,all.data=phy.data,match.data=TRUE,rownamesAsLabels=TRUE,missing.data="OK")
}
if(length(subnode.data)!=0){
# add subnode stuff:
phyd = phyext(phyd)
usubbranches = unique(subnode.commentid)
# for(ii in seq(length(subnode.data)))
# {
for(ii in seq(length(usubbranches)))
{
cominds = which(subnode.commentid == usubbranches[ii])
usubnodes = unique(subnode.subind[cominds])
for(jj in seq(length(usubnodes)))
{
thisinds = cominds[which(subnode.subind[cominds] == usubnodes[jj])]
tmpdat = subnode.data[thisinds]
# sanity check:
check = apply(subnode.pos[thisinds,,drop=FALSE],1,mean)
stopifnot(all(check == check[1]))
phyd = addSubNode(phyd,subnode.branch[thisinds[1],1],subnode.branch[thisinds[1],2],subnode.pos[thisinds[1]],tmpdat,pos.is.fraction=TRUE)
}
}
}
return(phyd)
}
# Read trees from a nexus file. This function is only really necessary for
# nexus files where trees have simmap formatting. If they don't, then
# readNexus should be used instead.
#
# note: if type=="all", data is read in from the "characters" block
# and it's rownames are assumed to match the taxa names.
#
read.nexus.simmap <- function(finput="", text=NULL, vers=NULL, type=c("all", "tree"),...)
{
type <- match.arg(type)
if (type == "all") {
returnData <- TRUE
} else {
returnData <- FALSE
}
outtrees = NULL
if(!is.null(text)){
# TODO: check text for newlines and split on them if they exist.
rawtext=text
} else {
if(!file.exists(finput))
stop("Assuming finput is a file and could not find it")
rawtext = scan(finput,what=character(0),strip.white=T,sep="\n")
}
treesblock = read.nexus.block(txt=rawtext,block="trees",rm.comments=T)
if(length(treesblock)==0)
{
warning("No trees in this file...")
return(FALSE)
}
## TODO: split individual strings on ';' character into newlines
treelines = which(tolower(substr(treesblock,1,4))=="tree")
if(length(treelines)==0)
return(NULL)
# make sure the the translated table is used if found
transstart = which(sapply(treesblock,function(i) tolower(i)=="translate",USE.NAMES=F))
translated = length(transstart)!=0
transend=integer(0)
taxatrans=""
index=integer(0)
taxname=""
# assume that trees are not translated otherwise:
# these should be in order numerically.
if(translated)
{
transend = seq(transstart,min(treelines)-1)[min(grep(";",treesblock[seq(transstart,min(treelines)-1)]))]
taxatrans = treesblock[(transstart+1):transend]
# should ALWAYS be the last token:
if(any(taxatrans==";"))
taxatrans = taxatrans[-which(taxatrans==";")]
index = as.integer(sapply(strsplit(taxatrans,"\\s"),function(i) i[1])) # I'm assuming that they are integers
taxname = sapply(strsplit(taxatrans,"\\s"),function(i) sub("(,|;)","",i[2]))
}
count = 0
for(linenumb in treelines)
{
cat("tree number",linenumb,"\n")
# clearly, this will not work for non-simmap, non-newick trees (or simmap v1.5 strings)
#tmpstr = tail(strsplit(treesblock[linenumb],"=")[[1]],1)
# so, trying this instead:
#tmpstr = sub("^tree.+?=(.+)$","\\1",treesblock[linenumb])
# This might not be necessary:
if(has.weights(text=treesblock[linenumb])){
tmpstr = sub("\\[(.*?)\\]","",treesblock[linenumb])
} else {
tmpstr = treesblock[linenumb]
}
#tmpstr = sub("^tree.+?=(.+)$","\\1",tmpstr)
#tmpstr = sub("^tree(.+?)=\\s{0,}\\[(.*?)\\](.+)$","\\3",tmpstr)
#tmpstr = sub("^tree(.+?)=(.*?)(\\(.+)$","\\3",tmpstr)
roottype = sub("^(TREE|tree)(.+?)=(.*?)(\\(.+)$","\\3",tmpstr)
roottype = gsub("\\s","",roottype)
tmpstr = sub("^(TREE|tree)(.+?)=(.*?)(\\(.+)$","\\4",tmpstr)
addroot = !grepl("&U",roottype)
# check for simmap style
# remove first comment (should others be removed?
if(is.simmap(text=tmpstr)){
trtmp = phyext(unname(read.simmap(text=tmpstr,add.root=addroot)))
} else if (is.simmap(text=tmpstr,vers=1.5)) {
trtmp = phyext(unname(read.simmap.new(text=tmpstr,add.root=addroot,...)))
} else if (is.simmap(text=tmpstr,vers=1.0)){
trtmp = phyext(unname(read.simmap(text=tmpstr,vers=1.0,add.root=addroot)))
} else {
trtmp = phyext(read.tree(text=tmpstr))
}
if(!is(trtmp,'phylo4'))
trtmp = as(trtmp,'phylo4')
if(!inherits(trtmp,"phylo4")){
cat("Trouble parsing line for trees:\n",treesblock[linenumb],"\n")
stop("Wha?")
}
outtrees = append(outtrees,trtmp)
count = count + 1
}
# if the trees have been translated, then
# add the names back in.
#
if(translated)
{
for(treeind in seq(length(outtrees)))
{
tnames = as.integer(tipLabels(outtrees[[treeind]]))
if(!any(is.na(tnames)))
{
tipLabels(outtrees[[treeind]]) <- taxname[tnames]
} else {
warning("Tree translations found, but could not be mapped")
}
}
}
# Find and add data:
if(returnData && has.block(txt=rawtext,block="characters"))
{
data.part = read.characters2(txt=rawtext,blockname="characters")
if(length(data.part) > 0)
{
for(treeind in seq(length(outtrees)))
{
outtrees[[treeind]] <- addData(outtrees[[treeind]],tip.data=data.part,rownamesAsLabels=TRUE)
}
}
}
return(outtrees)
}
# expand singleton nodes into bifurcating nodes with one junk node
# this function is mainly for plotting and saving files
# TODO: convert all explicit calls to @'slot' to their accessor counterpart (e.g. tree@edge.length goes to edgeLength(tree)
#
expand.singles <- function(tree,keep.data=FALSE)
{
# note: tips should be indexed 1...N, where N is the number of tips
if(!is(tree,"phylo4"))
stop("tree needs to be of class phylo4")
if(hasSingle(tree))
{
has.data = hasData(tree) && keep.data
# Need to add labels to trees if any are NA:
# TODO: make sure labels are unique
if(any( is.na(labels(tree)) ))
{
count = round(runif(1) * 1e6)
for(ii in which(is.na(labels(tree))))
{
labels(tree)[ii] <- sprintf("Internal%07d",count) # warning, these labels might not be unique
count = round(runif(1) * 1e6)
}
}
# rename 'Singleton' labels
#labels(tree) = sub("^Singleton(.*)$","Internal\\1",labels(tree))
tmptable=table(tree@edge[,1])
snodes = as.integer(names(tmptable)[which(tmptable==1)])
snodes = snodes[snodes!=0] # 0 is the root node (check on this...)
nold = nrow(tree@edge)
nnew = length(snodes)
count = 1
for(ii in snodes)
{
tree@label = append(sprintf("JUNK%0.7d",count),tree@label)
tree@edge = rbind(tree@edge, c(ii, count))
tree@edge.length = append(tree@edge.length,0)
count = count + 1
}
# rearrange
#tree@order = "unknown"
rootind = which(tree@edge[,1] == 0)
tree@edge[seq(nold),] = tree@edge[seq(nold),] + nnew
tree@edge[seq(from=nold+1,to=nold+nnew),1] = tree@edge[seq(from=nold+1,to=nold+nnew),1] + nnew
tree@edge[rootind,1] = 0
# rename
nnodes = nTips(tree) + nNodes(tree)
names(tree@label) <- as.character(seq(1,nnodes))
names(tree@edge.length) <- apply(tree@edge,1,paste,collapse="-")
if(has.data)
{
rownames(tree@data) <- as.integer(rownames(tree@data)) + nnew
}
}
if(!keep.data)
tree = as(tree,"phylo4")
return(tree)
}
#
# sister function of expand.singles -> converts internal nodes with
# zero-length branches into singletons (in phylo4 format). Will only work if
# the zero-len branch is connected to a tip
#
collapse.to.singles <- function(tree,by.name=NULL)
{
if(is(tree,"phylo"))
tree = as(tree,"phylo4")
if(!is(tree,"phylo4"))
stop("tree argument needs to be of class phylo4")
# don't use the root node
if(any(edgeLength(tree, seq(1,(nNodes(tree)+nTips(tree)))[nodeType(tree)!='root']) == 0))
{
# Remove zero-length branches and their tips
zinds = which(edgeLength(tree)==0)
torem = edges(tree)[zinds,]
torem = torem[edges(tree)[zinds,1]!=0,][,2] # don't include root
zinds = zinds[edges(tree)[zinds,1]!=0] # don't include root
torem = torem[nodeType(tree)[torem]=="tip"]
if(length(torem)!=0)
{
# remove excess tips
rm.count = length(torem)
tip.count = length(tipLabels(tree))
total.count = length(labels(tree))
tree@edge <- edges(tree)[-zinds,] # TODO: add 'edges<-' to phylobase
tree@edge.length = tree@edge.length[-zinds]
tree@label = tree@label[-torem]
## begin reindexing nodes
# tips
t.oldseq = seq(1, (tip.count))[-torem]
t.newseq = seq(1,(tip.count-rm.count))
t.replace.inds = which(tree@edge[,2] %in% t.oldseq)
stopifnot(length(t.replace.inds) == length(t.oldseq)) # it should be a 1-1 relationship for tips
tree@edge[,2] = replace(tree@edge[,2],t.replace.inds ,t.newseq)
# internal
n.oldseq = seq(tip.count+1,total.count)
n.newseq = seq(tip.count+1-rm.count, total.count-rm.count)
for(kk in seq(length(n.oldseq)))
{
replaceinds = which(tree@edge[,1] == n.oldseq[kk])
if(length(replaceinds)!=0)
tree@edge[replaceinds,1] = rep(n.newseq[kk],length(replaceinds))
replaceinds = which(tree@edge[,2] == n.oldseq[kk])
if(length(replaceinds)!=0)
tree@edge[replaceinds,2] = rep(n.newseq[kk],length(replaceinds))
}
## rename
names(tree@label) <- as.character(seq(1,length(labels(tree))))
names(tree@edge.length) <- apply(tree@edge,1,paste,collapse="-")
}
} else {
warning("No zero-length branches found to be removed")
}
return(tree)
}
# collapse singleton nodes using ape<->phylo4 conversion functions:
# TODO: make this less reliant on ape functions
#
collapse.singletons <- function(phy)
{
rettree = phy
# if singletons exist
if(hasSingle(rettree))
{
if(is(rettree,"phylo4d"))
{
tab=table(edges(rettree)[,1])
snodeid = as.integer(names(tab)[which(tab==1)])
snodeid = snodeid[snodeid!=0] # 0 is a dummy node
# not sure why this is used:
#ancs = sapply(snodeid,function(i) which(edges(rettree)[,1] == i)) # where the subnode is the ancestor
#decs = sapply(snodeid,function(i) which(edges(rettree)[,2] == i)) # where the subnode is the descendant
#newdata = data.frame(tdata(rettree,"all")[-decs,],row.names=labels(rettree)[-decs])
newdata = tdata(rettree)[-snodeid,,drop=F]
#colnames(newdata) <- colnames(tdata(rettree))
# hack it for now:
rettree = as(rettree,"phylo") # convert to ape
rettree <- collapse.singles(rettree) # collapse singles
if(is.rooted(rettree)) {
rettree = phylo4d(rettree,all.data=newdata,rownamesAsLabels=TRUE) # create new phylo4d object
} else {
rettree = phylo4d(rettree,all.data=newdata,rownamesAsLabels=TRUE,missing.data="OK") #
}
} else {
rettree = as(rettree,"phylo") # convert to ape
rettree <- collapse.singles(rettree) # collapse singles
rettree = phylo4(rettree)
}
}
return(rettree)
}
# turn subnodes into singletons
# x - phylobase tree
# rm.ex.data - "remove 'extra' data":
# should data frames unique to tdat be removed (TRUE)
# or should extra columns be added to sdat (FALSE)
# WARNING: rm.ex.data = TRUE could lead to a loss of data (obviously)
#
collapse.subnodes <- function(x,rm.ex.data = TRUE)
{
if(!inherits(x,"phylo4d_ext") || !hasSubNodes(x))
{
warning("No subnodes to collapse")
return(x)
}
# Step 1: cache data from x and setup variables to hold info for the new tree
## a.) Handle data:
tdat = tdata(x)
sdat = sndata(x)
col.diffs = setdiff(colnames(tdat),colnames(sdat))
if(length(col.diffs) > 0)
{
if(rm.ex.data){
tdat = tdat[,-which(colnames(tdat) %in% col.diffs)]
} else {
tmpnames = colnames(sdat)
sdat = cbind(sdat,matrix(NA,nrow=nrow(sdat),ncol=length(col.diffs)))
colnames(sdat) <- c(tmpnames,col.diffs)
}
}
newdat = tdat
# TODO: might need to reorder columns of tdat and sdat to match up....
## b.) Subnode positions
posmeans = snposition(x)
if(!is.matrix(posmeans))
posmeans = matrix(posmeans,nrow=1,ncol=2)
posmeans = apply(posmeans,1,mean)
## c.) Handle edges
el = edges(x)
newel = el
elens = edgeLength(x)
newelens = elens
snedges.inds = getSubNodeEdgeInds(x)
u.sninds = unique(snedges.inds)
## d.) Handle node ids and labels
nids = nodeId(x) # node ids
lastnodeid = max(nids) # base from which to create new node ids
nodelabs = nodeLabels(x)
# Step 2.) Systematically convert subnodes into new sets of edges (with singletons)
for(ii in seq(length(u.sninds)))
{
# get edge and subnode indices:
eid = u.sninds[ii]
sninds = which(snedges.inds == eid)
curr.anc = el[eid,1]
curr.desc = el[eid,2]
elen = elens[eid]
newnodeids = lastnodeid + seq(length(sninds))
lastnodeid = max(newnodeids)
tmp = sprintf("Singleton%07d",newnodeids)
names(tmp) <- newnodeids
# Create new nodes and edges
# positions FROM anc TO desc:
newlens = sort(posmeans[sninds] * elen) # absolute positions along the branch
newlens = unname(c(newlens[1],diff(newlens),elen - max(newlens))) # convert to spacing btw subnodes
newedges = matrix(NA,ncol=2,nrow=length(newlens))
newedges[,1] <- unname(c(curr.anc,newnodeids))
newedges[,2] <- unname(c(newnodeids,curr.desc))
tmpdat = sdat[sninds,,drop=F]
rownames(tmpdat) <- c(tmp)
newdat = rbind(newdat,tmpdat)
# Append data:
newel = rbind(newel,newedges)
newelens = c(newelens,newlens)
nodelabs = c(nodelabs,tmp)
}
# Step 3.) Create new tree with new edgelist,labels, and data
# remove old indices:
newel = newel[-u.sninds,]
newelens = newelens[-u.sninds]
# create new tree:
newphy = phylo4(newel,newelens,tipLabels(x),nodelabs)
# add data:
newphy = addData(newphy,all.data=newdat)
return(newphy)
}
# subroutine of write.simmap:
newlabels.v1x <- function(x,usestate,splitchar,write.nas=TRUE)
{
# Note: this version of simmap only handle one state
if(is.null(usestate))
usestate = colnames(tdata(x))
usestate = usestate[1]
tdat = tdata(x)[,usestate,drop=F]
snodes.present = hasSubNodes(x)
sdat = NULL; snedges.inds = NULL
if(snodes.present){
sdat = sndata(x)[,usestate,drop=F]
snedges.inds = apply(snbranch(x),1,function(i) .edge.index(x,i[1],i[2]))
}
es = edges(x)[,2]
if(!isRooted(x))
es = union(es,edges(x)[,1]) # added 3/8 to accomodate unrooted trees:
elens = edgeLength(x); elens[is.na(elens)] <- 0.0
newlenlab=character(length(es))
for(ii in seq(length(es)))
{
nodeid=es[ii]
if(!(ii %in% snedges.inds))
{
# added 3/8 to accomodate unrooted trees:
if(is.na(elens[ii])){
newlenlab[ii] = ""
} else if(!is.na(tdat[nodeid,1]) || write.nas){
newlenlab[ii] = paste("{",tdat[nodeid,1] ,",", elens[ii],"}",sep="")
} else {
newlenlab[ii] = as.character(elens[ii])
}
} else {
snind = which(snedges.inds == ii)
snlens = snposition(x)[snind,] * elens[ii]
snstates = sdat[snind,1]
if(!is.matrix(snlens))
snlens = matrix(snlens,nrow=1)
# reorder
snpos = apply(snlens,1,mean)
neword = order(snpos,decreasing=T)
snpos = snpos[neword]
snstates = snstates[neword]
newlenlab[ii] = paste("{",tdat[nodeid,1],",",(elens[ii]-max(snpos)),sep="")
if(length(snpos)>1)
for(jj in seq(length(snpos)-1))
newlenlab[ii] = paste(newlenlab[ii],splitchar,snstates[jj],",",(snpos[jj]-snpos[(jj+1)]),sep="")
newlenlab[ii] = paste(newlenlab[ii],splitchar,tail(snstates,1),",",tail(snpos,1),"}",sep="")
}
}
oldlabs = labels(x)[es]
names(newlenlab) <- oldlabs
oldlabs[which(is.na(oldlabs))] <- ""
newlab = paste(oldlabs,":", newlenlab ,sep="")
newlab[which(newlenlab=="")] <- "" # remove any <NA> data
return(newlab)
}
# subroutine of write.simmap:
newlabels.v15 <- function(x,usestate,splitchar)
{
if(is.null(usestate))
usestate = colnames(tdata(x))
else {
if(is.numeric(usestate))
{
usestate = colnames(tdata(x))[usestate]
}
}
# get tip and subnode data:
tdat = tdata(x)[,usestate,drop=F]
snodes.present = hasSubNodes(x)
sdat = NULL; snedges.inds = NULL
if(snodes.present){
sdat = sndata(x)[,usestate,drop=F]
snedges.inds = apply(snbranch(x),1,function(i) .edge.index(x,i[1],i[2]))
}
es = edges(x)[,2]
if(!isRooted(x))
es = union(es,edges(x)[,1]) # added 3/8 to accomodate unrooted trees:
elens = edgeLength(x); elens[is.na(elens)] <- 0.0
newlenlab=character(length(es)) # new "length" labels for each node
# Setup SIMMAP comments to hold data values
for(ii in seq(length(es)))
{
nodeid=es[ii]
if(!(ii %in% snedges.inds))
{ # if there is no subnode on this branch:
if(is.na(elens[ii])) {
newlenlab[ii] = "" # added to deal with unrooted trees
} else {
topper = "[&"
backer = paste("]",elens[ii],sep="")
middle = ""
for(colind in seq(length(usestate)))
if(!is.na(tdat[nodeid,usestate[colind]]))
middle = paste(middle,sprintf("%s={%s}%s",usestate[colind],tdat[nodeid,usestate[colind]],ifelse(colind==length(usestate),"",",")),sep="")
if(middle==""){
newlenlab[ii] = as.character(elens[ii])
} else {
newlenlab[ii] = paste(topper,middle,backer,sep="")
}
}
} else {
topper = "[&"
backer = paste("]",elens[ii],sep="")
middle = ""
# retrieve subnode data:
snind = which(snedges.inds == ii)
snlens = snposition(x)[snind,] * elens[ii]
for(colind in seq(length(usestate)))
{
tmp = ""
snstates = sdat[snind,usestate[colind]]
if(!is.matrix(snlens))
snlens = matrix(snlens,nrow=1)
snpos = apply(snlens,1,mean)
neword = order(snpos,decreasing=T)
snpos = snpos[neword]
snstates = snstates[neword]
# first, write the DESC state:
tmp = paste(sprintf("%s={%s", usestate[colind],tdat[nodeid,usestate[colind]]),(elens[ii]-max(snpos)),sep=",")
#newlenlab[ii] = paste(sprintf("[&%s={%s",usestate[colind],tdat[nodeid,usestate[colind]]),(elens[ii]-max(snpos)),sep=",")
# intermediates, from DESC -> ANC
if(length(snpos)>1)
for(jj in seq(length(snpos)-1))
tmp = paste(tmp,splitchar,snstates[jj],",",(snpos[jj]-snpos[(jj+1)]),sep="")
#newlenlab[ii] = paste(newlenlab[ii],splitchar,snstates[jj],",",(snpos[jj]-snpos[(jj+1)]),sep="")
# last subnode
#newlenlab[ii] = paste(newlenlab[ii],splitchar,tail(snstates,1),ifelse(colind==length(usestate),"",","),sep="")
tmp = paste(tmp,splitchar,tail(snstates,1),"}",sep="")
middle = paste(middle,tmp,ifelse(colind==length(usestate),"",","),sep="")
}
if(middle==""){
newlenlab[ii] = as.character(elens[ii])
} else {
newlenlab[ii] = paste(topper,middle,backer,sep="")
}
}
}
# End setup SIMMAP comments
# Substitute SIMMAP comments into tree
oldlabs = labels(x)[es]
names(newlenlab) <- oldlabs
oldlabs[which(is.na(oldlabs))] <- ""
#newlab = paste(oldlabs,":{", newlenlab ,"}",sep="")
newlab = paste(oldlabs,newlenlab,sep=":")
newlab[which(newlenlab=="")] <- "" # remove any <NA> data
# DEBUG: at this point, oldlabs and newlabs look like they have been lined up correctly
return(newlab)
}
# write modified newick file:
# -This is kind of a hack: it basically writes subnodes and lengths to a new character label and then
# creates a new 'phylo' class using only those labels (without edge lengths). Then it uses
# APEs algorithm to write those labels to a newick string.
#
# -If any of the SIMMAP datatypes are is.na, then they are left out! TODO: Look into changing this in the future
#
write.simmap <- function(x,usestate=NULL,file="",vers=1.1,...)
{
splitchar = ifelse(vers==1.0,";",":")
if(is.null(usestate))
usestate = colnames(tdata(x))
#if(hasSubNodes(x))
if(hasData(x) && hasDataColumn(x,usestate))
{
newlab=NULL
if(vers == 1.5){
newlab <- newlabels.v15(x,usestate,",")
} else {
newlab <- newlabels.v1x(x,usestate,splitchar)
}
# reorder:
es = edges(x)[,2]
if(!isRooted(x))
es = union(es,edges(x)[,1]) # added 3/8 to accomodate unrooted trees:
altorder = order(es)
newlab = newlab[altorder]
ntype = nodeType(x)[es[altorder]]
phy = as(x,'phylo')
newedges = edges(x)
newphy = list(edge=newedges,
tip.label=newlab[which(ntype=="tip")],
node.label=newlab[which(ntype!="tip")],
Nnode=length(which(ntype!="tip"))
)
class(newphy) <- "phylo"
################################################################
# borrowed code from APE (write.tree.R):
#
output.tree.names=FALSE
append = FALSE
digits = 10
brl <- !is.null(newphy$edge.length)
nodelab <- !is.null(newphy$node.label)
f.d <- paste("%.", digits, "g", sep = "")
## Helper functions
cp <- function(s) STRING <<- paste(STRING, s, sep = "")
add.internal <- function(i) {
cp("(")
br <- which(newphy$edge[, 1] == i)
for (j in br) {
desc <- newphy$edge[j, 2]
if (desc > n) add.internal(desc)
else add.terminal(j)
if (j != br[length(br)]) cp(",")
}
cp(")")
if (nodelab) cp(newphy$node.label[i - n])
if (brl) {
cp(":")
cp(sprintf(f.d, newphy$edge.length[which(newphy$edge[, 2] == i)]))
}
}
add.terminal <- function(i) {
cp(newphy$tip.label[newphy$edge[i, 2]])
if (brl) {
cp(":")
cp(sprintf(f.d, newphy$edge.length[i]))
}
}
## End helper functions
n <- length(newphy$tip.label)
STRING <- if (output.tree.names) paste("ERROR!", "(", sep = "") else "("
br <- which(newphy$edge[, 1] == n + 1)
for (j in br) {
desc <- newphy$edge[j, 2]
if (desc > n) add.internal(desc)
else add.terminal(j)
if (j != br[length(br)]) cp(",")
}
if (is.null(newphy$root.edge)) {
cp(")")
if (nodelab) cp(newphy$node.label[1])
cp(";")
} else {
cp(")")
if (nodelab) cp(newphy$node.label[1])
cp(":")
cp(sprintf(f.d, newphy$root.edge))
cp(";")
}
if (file == "") return(STRING)
cat(STRING, file = file, append = append, sep = "\n")
################################################################
} else {
phy = as(x,'phylo')
write.tree(phy,file,...)
}
}
# write simmap version 1.1 strings
#
write.simmap.old <- function(x,usestate=NULL,file="",...)
{
write.simmap(x,usestate,file=file,vers=1.1,...)
}
# write simmap version 1.5 strings
#
write.simmap.new <- function(x,usestate=NULL,file="",...)
{
write.simmap(x,usestate,file=file,vers=1.5,...)
}
# This is the main write function for phylo4d_ext
# Mainly ripped from APE write.nexus function
#
# @param obj a list or single phylo4d_ext objects
# @param file the file to write to
# @param translate should the tree names be moved to a special section
# @param vers which SIMMAP version should be used (can be vers=c(1.1, 1.0, 1.5))
# @param usestates if NULL, then all data columns are used
# @param dtype for compatibility with older simmap versions
# anything in 'usestate' will be written into simmap comments
# anything left over that is compatible with dtype will be written as
# a character.
#
#
write.nexus.simmap <- function(obj, file = "", translate = TRUE, dtype=c(noneData(),contData(),discData()), ...)
{
if(!is.list(obj))
{
if(!is(obj,"phylo4d_ext"))
stop("This function is only made to work with phylo4d_ext objects. Use write.nexus() instead.")
obj <- list(obj)
} else {
if(!all(sapply(obj,is,'phylo4d_ext')))
stop("This function is only made to work with phylo4d_ext objects or lists of such. Use write.nexus() instead.")
}
ntree <- length(obj)
dtype = match.arg(dtype)
dat = NULL
if(hasData(obj[[1]]))
dat = tdata(obj[[1]],"tip")
cat("#NEXUS\n", file = file)
cat(paste("[R-package APE, ", date(), "]\n\n", sep = ""),file = file, append = TRUE)
#N <- length(obj[[1]]$tip.label)
N <- length(tipLabels(obj[[1]]))
cat("BEGIN TAXA;\n", file = file, append = TRUE)
cat(paste("\tDIMENSIONS NTAX = ", N, ";\n", sep = ""),file = file, append = TRUE)
cat("\tTAXLABELS\n", file = file, append = TRUE)
cat(paste("\t\t", tipLabels(obj[[1]]), sep = ""),sep = "\n", file = file, append = TRUE)
cat("\t;\n", file = file, append = TRUE)
cat("END;\n", file = file, append = TRUE)
cat("BEGIN TREES;\n", file = file, append = TRUE)
if (translate) {
## We take arbitrarily the labels of the first tree, and
## translate them as "1", "2", "3", ...
cat("\tTRANSLATE\n", file = file, append = TRUE)
tmp <- checkLabel(tipLabels(obj[[1]]))
X <- paste("\t\t", 1:N, "\t", tmp, ",", sep = "")
## We remove the last comma:
X[length(X)] <- gsub(",", "", X[length(X)])
cat(X, file = file, append = TRUE, sep = "\n")
cat("\t;\n", file = file, append = TRUE)
token <- as.character(1:N)
names(token) <- tipLabels(obj[[1]])
tipLabels(obj[[1]]) <- token
if (ntree > 1) {
for (i in 2:ntree)
tipLabels(obj[[i]]) <- token[tipLabels(obj[[i]])]
class(obj) <- NULL
}
} else {
for (i in 1:ntree)
tipLabels(obj[[i]]) <- checkLabel(tipLabels(obj[[i]]))
}
for (i in 1:ntree) {
tprefix = "\tTREE * UNTITLED"
#weights
if(hasWeight(obj[[i]])){
tprefix = sprintf("%s [&W %f]",tprefix,weight(obj[[i]]))
}
#is rooted
if (isRooted(obj[[i]])){
#cat("\tTREE * UNTITLED = [&R] ", file = file, append = TRUE)
tprefix = paste(tprefix," = [&R] ",sep="")
}else{
#cat("\tTREE * UNTITLED = [&U] ", file = file, append = TRUE)
tprefix = paste(tprefix," = [&U] ",sep="")
}
cat(tprefix, file = file, append = TRUE)
# NOTE (2/13) - write.simmap does this step now, and better:
#if(is.null(usestates))
# usestates = colnames(tdata(obj[[i]]))
cat(write.simmap(obj[[i]], file="",...),"\n", sep = "", file = file, append = TRUE)
}
cat("END;\n", file = file, append = TRUE)
# begin characters block write:
if( dtype != noneData() && !is.null(dat) )
{
datnames = colnames(dat)
treeargs <- list(...)
if( "usestate" %in% names(treeargs) ){
exclude = integer(0)
if(!is.character(treeargs$usestate)) {
exclude = as.integer(treeargs$usestate)
} else {
exclude = which(datnames == treeargs$usestate)
}
if(length(exclude) > 0)
dat = dat[,-exclude,drop=FALSE]
}
dat = dat[,which(guess.datatype(dat) == dtype),drop=FALSE]
if(ncol(dat) != 0)
{
cat( write.characters2(dat,dtype=dtype), sep = "\n", file= file, append=TRUE)
} else {
warning("Could not find any suitable data to write.")
}
}
# end characters block write
}
#------------------------------------------
# Modifying phylo4d-extension objects
#
#------------------------------------------
# Internal:
# NOTE: labels are case-sensitive
# NOTE: this function needs to deal with NAs
.edge.index <- function(tree,anc,dec)
{
if(!is(tree,'phylo4'))
stop("Tree needs to inherit from class phylo4")
# convert from characters to indices
if(is.character(c(anc,dec))){
if(is.na(anc)){
anc = which(is.na(labels(tree)))
} else {
anc = which(labels(tree) == anc)
}
if(is.na(dec)){
dec = which(is.na(labels(tree)))
} else {
dec = which(labels(tree) == dec)
}
}
if(is.na(anc) || is.na(dec) || is.null(anc) || is.null(dec))
stop("Must specify valid ancestor and decendent: ",anc,"-",dec)
#eind = which(edges(tree)[,1] == anc & edges(tree)[,2] == dec)
eind = which(edges(tree)[,1] %in% anc & edges(tree)[,2] %in% dec)
if(length(eind) != 1){
warning("No connection between ",anc," and ",dec,"\n")
return(NA)
}
return(eind)
}
nSubNodes <- function(x)
{
return (length(x@subnode.id))
}
setMethod("hasSubNodes", signature(x="list"),
function(x) {
return(sapply(x,hasSubNodes))
})
setMethod("hasSubNodes", signature(x="phylo4d_ext"),
function(x) {
return(nSubNodes(x)!=0)
})
setMethod("hasSubNodes", signature(x="phylo4"),
function(x) {
return(FALSE)
})
setMethod("hasSubNodes", signature(x="phylo"),
function(x) {
return(FALSE)
})
setMethod("hasData", signature(x="phylo"),
function(x) {
return(FALSE)
})
setMethod("hasData", signature(x="phylo4"),
function(x) {
return(FALSE)
})
setMethod("hasData", signature(x="phylo4d"),
function(x) {
return(ncol(tdata(x))!=0)
})
setMethod("hasData", signature(x="list"),
function(x) {
return(sapply(x,hasData))
})
setMethod("hasDataColumn", signature("phylo",index="ANY"),
function(x,index){
return(FALSE)
})
setMethod("hasDataColumn", signature("phylo4",index="ANY"),
function(x,index){
return(FALSE)
})
setMethod("hasDataColumn", signature("phylo4d",index="numeric"),
function(x,index){
retval = FALSE
if( hasData(x) && length(index) != 0 && all(index <= ncol(tdata(x))) )
{
retval=TRUE
}
return(retval)
})
setMethod("hasDataColumn", signature("phylo4d",index="character"),
function(x,index){
uinds = unique(index)
ind = which(colnames(tdata(x)) %in% uinds)
if(length(ind) != length(uinds))
{
return(FALSE)
} else {
return(hasDataColumn(x,ind))
}
})
getSubNodeData <- function(x,colname)
{
if(missing(colname))
return(x@subnode.data)
return(x@subnode.data[colname])
}
getSubNodePosish <- function(x)
{
return(x@subnode.pos)
}
getSubNodeEdgeInds <- function(x)
{
edgeinds = integer(nSubNodes(x))
if(length(edgeinds)!=0)
{
for(ii in seq(nrow(x@subnode.branch)))
edgeinds[ii] = .edge.index(x,x@subnode.branch[ii,1],x@subnode.branch[ii,2])
}
return (edgeinds)
}
# return empty data.frame styled like
# @data slot
getEmptyDataFrame <- function(x,...)
{
tmpdf = data.frame(x@data[0,],...)
colnames(tmpdf) <- colnames(tdata(x))
return(tmpdf)
}
# TODO: overload tdata here
#
addSubNode <- function(x,anc,dec,position,dataf,pos.is.fraction=FALSE)
{
if(!is(x,'phylo4d_ext')){
warning("Converting x to an extended phylo4d object")
x = phyext(x)
}
if(missing(dataf))
stop("dataf needs to contain something")
eind = .edge.index(x,anc,dec)
if(is.na(eind))
stop("Failure to find edge from ",anc, " to ",dec,"\n")
elen = edgeLength(x)[eind]
if(elen == 0)
stop("Cannot place subnode on a zero length branch")
if(position > ifelse(pos.is.fraction,1,elen))
stop("Position: ",position,", is greater than allowed value of ",ifelse(pos.is.fraction,1,elen),"\n")
# TODO: check for overlapping subnodes
# TODO: check to see if every col in data.frame has
# an associated col in tdata() AND that col is
# defined for internal and tip nodes
# construct data frame:
newdf = getEmptyDataFrame(x)
if(is.data.frame(dataf))
{
if( all(names(dataf) == names(newdf)) ){
newdf = rbind(newdf, dataf)
} else {
# only use cols which exist in tdata(x)
colinds = which(names(dataf) %in% names(newdf))
if( length(colinds) == 0 )
stop("Could not find any common columns between dataf and tdata(x)")
newdf = merge(newdf,dataf[,colinds,drop=F],all=T)
}
} else {
newdf = getEmptyDataFrame(x,stringsAsFactors=FALSE) # ??
# special case (if lengths are the same, then assume that they are in order):
if( length(dataf) == ncol(newdf) && is.null(names(dataf)) )
names(dataf) <- names(newdf)
newdf[1,] <- rep(NA,ncol(newdf))
# try to match up names
ndf = names(dataf)
count = 1
for(nm in ndf){
nmind = which(names(newdf)==nm)
if(!is.null(nmind) && length(nmind) != 0)
{
if(is.factor(newdf[,nmind]) && !(dataf[count] %in% levels(newdf[,nmind])) )
{
# need to add more levels if this column is a factor
newlevs = c(levels(newdf[,nmind]),unname(dataf[count]))
levels(newdf[,nmind]) <- newlevs
levels(tdata(x)[,nmind]) <- newlevs # TODO: TEST THIS!
}
newdf[1,nmind] = dataf[count]
}
count = count + 1
}
}
if(!pos.is.fraction)
position = position / elen
x@subnode.id = append(x@subnode.id, as.integer(nTips(x) + nNodes(x) + nSubNodes(x) + 1))
x@subnode.data = rbind(x@subnode.data, newdf)
x@subnode.branch = rbind(x@subnode.branch, edges(x)[eind,])
x@subnode.pos = rbind(x@subnode.pos, rep(position,2))
return (x)
}
#
showSubNodes <- function(x)
{
if(hasSubNodes(x))
{
charlen = 80
brchar = "-"
terminalchar = "-"
regchar="0"
overlapchar = "*"
# use them all by default:
einds = apply(x@subnode.branch,1,function(i) .edge.index(x,i[1],i[2]))
for(eind in unique(einds))
{
snid = which(einds == eind)
anc = x@subnode.branch[snid,1]
dec = x@subnode.branch[snid,2]
# setup output string
tmpstr = rep(brchar,charlen)
tmpstr[1] = terminalchar
tmpstr[charlen] = terminalchar
# get relative positions of nodes:
elen = edgeLength(x)[eind]
breaksize = elen / charlen
snpos = x@subnode.pos[snid,] * elen
if(!is.matrix(snpos))
snpos = matrix(snpos,nrow=1)
snmeans = apply(snpos,1,mean)
for(xx in seq(length(snid)))
{
nbreaks = max(1, floor(diff(snpos[xx,]) / breaksize) )
from = floor(snpos[xx,] / breaksize)
tmpstr[seq(from,length.out=nbreaks)][tmpstr[seq(from,length.out=nbreaks)]==regchar] = overlapchar
tmpstr[seq(from,length.out=nbreaks)][tmpstr[seq(from,length.out=nbreaks)]==brchar] = regchar
#tmpstr[seq(from,length.out=nbreaks)] = rep(regchar,nbreaks)
}
# collapse and print:
tmpstr = paste(tmpstr,collapse="")
if(length(snid)==1){
names(tmpstr) <- sprintf("Subnode at ~%0.2f on branch: '%s' to '%s' (brlen=%0.2f)",snpos[1]+diff(snpos[1,])/2,labels(x)[anc],labels(x)[dec],elen)
} else {
names(tmpstr) <- sprintf("%d Subnodes on branch: %d to %d (bren=%0.2f)",length(snid),anc[1],dec[1],elen)
}
print(tmpstr)
cat("\n")
}
print("0 indicates a subnode; * indicates 2+ subnodes overlapping. Positions are relative.")
}
}
#-----------------------------
# Set Generics
#-----------------------------
# show
setMethod("show","phylo4d_ext", function(object){ printphylo4(object); showSubNodes(object)})
setMethod("snid", signature(x="phylo4d_ext"),
function(x) {
return(x@subnode.id)
})
setMethod("snid", signature(x="list"),
function(x) {
return(x[[1]]@subnode.id)
})
setMethod("sndata", signature(x="phylo4d_ext"),
function(x) {
return(x@subnode.data)
})
setMethod("sndata", signature(x="list"),
function(x) {
return(x[[1]]@subnode.data)
})
# this adds data
# TODO: Change 'adds' behavior to 'replaces'
setReplaceMethod("sndata", signature(x="phylo4d_ext"),
function(x,datnames=NULL,value) {
if(!is.data.frame(value) && (length(value) != nSubNodes(x)))
{
warning("Can only add vectors of data if they are the same length as the number of subnodes")
return(x)
}
if(!is.data.frame(value)){
value = data.frame(value)
}
if(!is.null(datnames))
{
if(is.character(datnames) && length(datnames)==ncol(value))
{
names(value) <- datnames
} else {
warning("Not using specified names")
}
}
x@subnode.data = cbind(x@subnode.data,value)
return(x)
})
setReplaceMethod("sndata",signature(x="list"),
function(x,datnames=NULL,value) {
for(ii in seq(length(x)))
sndata(x[[ii]],datnames=datnames) <- value
return(x)
})
# begin rmdata:
# 'rmdata' method removes data columns from the 'data' and 'subnode.data' slots
#
setMethod("rmdata", signature(x="phylo4d_ext",index="numeric",subindex="numeric"),
function(x,index,subindex) {
if(length(index)>0 && abs(index) <= ncol(tdata(x)) && index != 0)
{
x@data = x@data[,-index,drop=F]
} else {
warning("The data column to be removed could not be found in tdata(x)")
}
if( hasSubNodes(x) && length(subindex) >0 && abs(subindex) <= ncol(sndata(x)) && subindex != 0)
{
x@subnode.data = x@subnode.data[,-index,drop=F]
}
return(x)
})
setMethod("rmdata", signature(x="phylo4d_ext",index="numeric",subindex="missing"),
function(x,index) {
return( rmdata(x,index=index,subindex=index) )
})
setMethod("rmdata", signature(x="phylo4d_ext",index="character",subindex="missing"),
function(x,index) {
ind = which(colnames(tdata(x)) %in% index)
if(hasSubNodes(x)){
subind = which(colnames(sndata(x)) %in% index)
} else {
subind = numeric(0)
}
return( rmdata(x,index=ind,subindex=subind) )
})
setMethod("rmdata", signature(x="phylo4d",index="numeric",subindex="missing"),
function(x,index) {
if(length(index)>0 && abs(index) <= ncol(tdata(x)) && index != 0)
x@data = x@data[,-index,drop=F]
return(x)
})
setMethod("rmdata", signature(x="phylo4d",index="character",subindex="missing"),
function(x,index) {
inds = which(names(tdata(x)) %in% index)
return(rmdata(x,inds))
})
setMethod("rmdata", signature(x="list",index="ANY",subindex="missing"),
function(x,index) {
x = sapply(x,rmdata,index)
return(x)
})
# end rmdata
setMethod("snposition", signature(x="phylo4d_ext"),
function(x) {
return(x@subnode.pos)
})
setMethod("snposition", signature(x="list"),
function(x) {
return(x[[1]]@subnode.pos)
})
setMethod("snbranch", signature(x="phylo4d_ext"),
function(x) {
return(x@subnode.branch)
})
setMethod("snbranch", signature(x="list"),
function(x) {
return(x[[1]]@subnode.branch)
})
#---------------
## WEIGHT:
#
#---------------
setMethod("weight", signature(x="phylo4d_ext"),
function(x) {
return(x@weight)
})
setMethod("weight", signature(x="list"),
function(x) {
if(hasWeight(x))
return( sapply(x,weight) )
return(numeric(0))
})
setReplaceMethod("weight", signature(x="phylo4d_ext"),
function(x,value) {
x@weight = value
return(x)
})
setReplaceMethod("weight",signature(x="list"),
function(x,value) {
if(length(x) != length(value))
stop("Replacement values need to be same length as the list")
for(ii in seq(length(x)))
weight(x[[ii]]) <- value[ii]
return(x)
})
setMethod("hasWeight",signature(x="phylo4d_ext"),
function(x,strict=TRUE){
return( length(x@weight)!=0 )
})
setMethod("hasWeight",signature(x="list"),
function(x,strict=TRUE){
retbool=ifelse(strict,TRUE,FALSE)
for(ii in seq(length(x))){
tmpbool = hasWeight(x[[ii]])
if(strict && !tmpbool)
return(FALSE)
if(!strict && tmpbool)
return(TRUE)
}
return(retbool)
})
#
# setMethod("addData", signature(x="phylo4d_ext"),
# function(x,...,snode.data=NULL) {
#
# oldcols = names(tdata(x))
# # Add data the normal way:
# x = getMethod("addData","phylo4d")(x,...)
#
# newcols = setdiff(names(tdata(x)),names(sndata(x)))
# supercols = setdiff(oldcols,names(tdata(x)))
#
# cat("newcols: ", newcols,"\n")
#
# # Add data to subedges:
# if(is.null(snode.data))
# {
# if(length(newcols)!=0)
# {
# newdat = data.frame(matrix(NA,nrow=nSubNodes(x),ncol=length(newcols)))
# names(newdat) <- newcols
# x@subnode.data = cbind(x@subnode.data,newdat)
# }
#
# # Remove superfluous columns
# if(length(supercols)!=0)
# {
# for(scol in supercols)
# x = rmdata(x,scol)
# }
# }else{
# # add this data:
# if(length(newcols)!=0)
# {
# print("adding this way")
# sndata(x,datnames=newcols) <- snode.data
# }
# }
#
# return(x)
# })
#
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.