R/phyext2.R
In phyext2: An Extension (for Package 'SigTree') of Some of the Classes in Package 'phylobase'

############
############  All these functions are from the no-longer-maintainted phyext package,
############  and reproduced here to support the SigTree package,
############  with permission of phyext package author Conrad Stack.
############

# data information:
noneData <- function() { return("none") }
discData <- function() { return("discrete") }
contData <- function() { return("cont") }


guess.datatype <- function(datvals)
{
	if(is.null(dim(datvals)))
		return(character(0))
	
	ndatcols = ncol(datvals)
	datatypes = rep("",ndatcols)
	
	if(ndatcols > 0)
	{
		datatypes[sapply(seq(ndatcols),function(i) is.factor(datvals[,i]))] = discData()
		datatypes[sapply(seq(ndatcols),function(i) is.numeric(datvals[,i]))] = contData()
	}
	
	return(datatypes)
}


write.characters2 <- function(xdf,blockname="CHARACTERS",dtype=c(contData(),discData()),missing.char="?")
{	
	
	dtype = match.arg(dtype)
	# set up state labels:
	use.state.labels=F
	state.labels=character(0)
	
	# For disc data, figure out what the symbols should be 
	discover.symbols<-function(dat,zero.based=T)
	{			
		syms = integer(0)
		if(ncol(dat)!=0){
			for(ii in seq(ncol(dat)))
			{
				syms = c(syms,unique((as.integer(dat[,ii]))))
			}
		}
		syms = sort(unique(syms))
		
		# assume that syms are in order
		if(zero.based)
		{
			if(syms[1] != 0){
				offs = -syms[1]
				syms = syms + offs
			}
		}
		
		return(syms)
	}
	
	# convert factors to zero-based ints
	# CAUTION: this function might not work in next NCL version
	convert.to.int <- function(dat,zero.based=T)
	{
		offset = 0
		if(zero.based)
			offset = -1
		
		for(ii in seq(ncol(dat)))
		{
			dat[,ii] = as.integer(dat[,ii])+offset
		}
		return(dat)
	}
	
	# generate state labels:
	make.state.labels <- function(dat)
	{
		cnames = colnames(dat)
		outlabs = character(ncol(dat))
		for(ii in seq(ncol(dat)))
		{
			# order by
			snames = unique(as.character(dat[,ii])[order(as.integer(dat[,ii]))])
			outlabs[ii] = sprintf("%d %s / %s",ii,cnames[ii],paste(snames,collapse=" "))
		}
		
		return(outlabs)
	}
	
	
	if(!is.data.frame(xdf))
		stop("Internal function .write.characters.block needs a data.frame as the first argument")
	
	header = paste("BEGIN ",blockname,";",sep="")
	header.title = paste("TITLE ",blockname,"_matrix;",sep="")
	header.dims = sprintf("DIMENSIONS NTAX=%d NCHAR=%d;",nrow(xdf),ncol(xdf))
	header.format = sprintf("FORMAT DATATYPE=%s MISSING=%s",ifelse(dtype==contData(),"CONTINUOUS","STANDARD"),missing.char)   # TODO: add GAP, SYMBOLS 
	if(dtype == discData()){
		# This check is any of the levels are NOT integers
		# if they are integers, then it is assumed that they do not 
		# need to be writen as state labels
		#
		use.state.labels = any(is.na(as.integer(levels(xdf[,1]))))
		if(use.state.labels){
			state.labels = make.state.labels(xdf)
			xdf = convert.to.int(xdf) # this does the zero-based conversion
		}
		header.format = sprintf("%s SYMBOLS=\"%s\";",header.format,paste(discover.symbols(xdf),collapse=" "))
	} else {
		header.format = paste(header.format,";",sep="")
	}
	
	if(use.state.labels){
		header.labels = sprintf("CHARSTATELABELS\n\t%s;", paste(state.labels,collapse=","))
	}else{
		header.labels = sprintf("CHARSTATELABELS\n\t%s;", paste(paste(seq(ncol(xdf)),colnames(xdf)),collapse=","))
	}

	mmatrix = "MATRIX"
	#mmatrix.data = unname(cbind(rownames(xdf),apply(xdf,2,as.character)))
	if(dtype==contData()){
		mmatrix.data = apply(xdf,1:2,function(i) sprintf("%0.15f",i))
	} else {
		mmatrix.data = apply(xdf,2,as.character)
	}
	if(any(is.na(mmatrix.data)))
		mmatrix.data[which(is.na(mmatrix.data),T)] <- missing.char
	
	mmatrix.data = apply(mmatrix.data,1,paste,collapse=" ")
	mmatrix.data = unname(cbind(rownames(xdf),mmatrix.data))
	mmatrix.data = unname(apply(mmatrix.data,1,paste,collapse="\t"))
	mmatrix.end = ";\n\nEND;"
	

	return(c(header,
			header.title,
			header.dims,
			header.format,
			header.labels,
			mmatrix,
			mmatrix.data,
			mmatrix.end))

}




# Check for evidence that this file contains simmap-formatted trees
# @param 'vers' is the simmap version that should be checked for
# @param 'quick' if TRUE, then the text string containing a tree is 
#		 only checked to see if the formatting looks right.  If FALSE,
#		 the method attempts to create a tree from the string.
# @param 'vers' can take the values (1.0,1.1,1.5); a meaningless result
#		 will be returned otherewise
#
is.simmap <- function(finput="",text=NULL,vers=c(1.1),quick=TRUE)
{
	if(!is.null(text)){
		# TODO: check text for newlines and split on them if they exist.
		rawtext=text
	} else {
		if(!file.exists(finput))
			stop("Assumed 'finput' was a filename and could not be fond")
		
		rawtext = scan(finput,what=character(0),strip.white=T,sep="\n")		
	}

	## TODO: split individual strings on ';' character
	
	# if there is only one string, then just check that,
	# otherwise assume the file is text from a nexus file
	if(length(rawtext)==1)
	{
		treesblock = rawtext
		treelines = 1
	} else {
		treesblock = read.nexus.block(txt=rawtext,block="trees",rm.comments=T)
		treelines = which(tolower(substr(treesblock,1,4))=="tree")
	}
	
	if(length(treesblock)==0)
	{
		warning("No trees in this file...")
		return(FALSE)
	}
	
	potentialsimmap = logical(length(treelines))
	count = 1
	for(linenumb in treelines)
	{
		# check for simmap styles
		if(vers == 1.1 || vers == 1.0)
		{
			# remove all comments from the string
			junk = gsub("\\[(.*?)\\]","",treesblock[linenumb])
			potentialsimmap[count] = grepl(":\\{.*?\\}",junk)
			if(potentialsimmap[count] && !quick)
			{
				tr = try(read.simmap(text=junk,vers=vers),silent=T)
				if(is(tr,"try-error"))
					potentialsimmap[count] = FALSE
			}
			
		} else {
			
			potentialsimmap[count] = grepl("\\[(.*?)\\]",treesblock[linenumb])
			if(potentialsimmap[count] && !quick)
			{
				junk = gsub("\\[(.*?)\\]","",treesblock[linenumb])
				
				# TODO: change this to read.simmap.new when that function is finished:
				tr = try(read.tree(text=junk),silent=T)
				if(is(tr,"try-error"))
					potentialsimmap[count] = FALSE
			}
		}
		
		count = count + 1
	}
	
	return(any(potentialsimmap))
}






#-----------------------
# Phylo4d class extended
# @ author Conrad Stack
#
# This extension will support "sub-nodes" so that branches can be fragmented to 
# accomadate the results of ancestral state reconstructions (state changes along 
# branches).
#-----------------------

# @param subnode.id  	ID for this node 
# @param subnode.data  	data.frame the should look like superclass data.frame
# @param subnode.branch The branch that this subnode is located on. (Duplicate information - the reason this uses node id's instead of edge indices is due to the fact that edge indices seem unstable)
# @param subnode.pos  	The distance to the subnode from the ancestor (root) of the branch. Can be a non-zero range
setClass("phylo4d_ext", 
			representation(	subnode.id="integer",
							subnode.data="data.frame",
							subnode.branch="matrix",  
							subnode.pos="matrix",
							weight="numeric" ),
			,contains="phylo4d",
			prototype=prototype(
				subnode.id=integer(0),
				subnode.data=data.frame(NULL),
				subnode.branch=matrix(0,nrow=0,ncol=2),
				subnode.pos=matrix(0,nrow=0,ncol=2),
				weight=numeric(0)
				)
			)


#------------------
# check validity
#------------------
validPhylo4d_ext <-function(object)
{
	
	retval = TRUE

	# number of ids and branches are the same
	retval = retval && (length(object@subnode.id) == nrow(object@subnode.branch))

	# number of branches and number of entries in the data matrix are the same
	if(length(object@subnode.data)!=0)
		retval = retval && (nrow(object@subnode.branch) == nrow(object@subnode.data))
	
	# number of ids and number of rows in position matrix are similar
	retval = retval && (length(object@subnode.id) == nrow(object@subnode.pos))
	
	# Check that branch ids exist in superclass 
	if(nrow(object@subnode.branch)!=0)
		for(br.ind in seq(nrow(object@subnode.branch)))
			retval = retval && any(apply(edges(object),1,function(i) all(object@subnode.branch[br.ind,] == i)))
		
	# check that data.frames in 'data' slot and 'subnode data' slot are structurally similar
	# retval = retval && all( names(object@data) == names(object@subnode.data) )
	
	return (retval)
}
setValidity("phylo4d_ext",validPhylo4d_ext)


#----------------------------------------------
#  Constructors: extended phylo4d  
#  (overloaded)
#----------------------------------------------

# make it a generic function with return class of phylo4d_ext (or list of these)
setGeneric("phyext", function(x, ...) { standardGeneric("phyext")}, valueClass=c("phylo4d_ext","list") )

# return itself (otherwise it will use the function below):
setMethod("phyext", "phylo4d_ext",
    function(x, ...) {
	    return(x)
})
	
# if first arg is a phylo4d object:
setMethod("phyext", "phylo4d",
    function(x, snode.data=NULL, snode.branch=NULL, snode.pos=NULL, ...) {
	
	# default values:
	if(missing(snode.data) || is.null(snode.data))
		snode.data = data.frame(x@data[0,])
	
	if(missing(snode.branch) || is.null(snode.branch))
		snode.branch = matrix(0,nrow=0,ncol=2)
	
	if(missing(snode.pos) || is.null(snode.pos))
		snode.pos = matrix(0,nrow=0,ncol=2)
	
	# convert to matrix is it isn't already
	if(is.numeric(snode.pos))
		snode.pos = matrix(rep(snode.pos,2),ncol=2)
	
	if(is.numeric(snode.branch))
		snode.branch = matrix(snode.branch,ncol=2)
		
	# Convert any singletons into subnodes:
	#
	if(hasSingle(x))
	{
		
		###########
		# Changed (7/11) by conrad to process more than one subnode:
		#
		# process subnodes:		
		
		
		# Get singleton node ids:
		tab=table(edges(x)[,1])
		snodeid = as.integer(names(tab)[which(tab==1)])
		snodeid = snodeid[snodeid!=0]  # 0 is a dummy node
		
		
		# Get singleton indices: (edges where singletons are the descendant)
		#sings = sapply(snodeid,function(i) which(edges(x)[,2] == i))
		sings = snodeid
		
		# Get singleton data: (why are edge indices used here(sings) instead of node ids?)
		#decs.data = data.frame(tdata(x,"all")[sings,])
		#dnames <- colnames(tdata(x))
		#names(decs.data) <- dnames
		decs.data = tdata(x,"all")[sings,,drop=F]
		
		
		########################################################################################
		# Setup other temporary data containers for transferring singleton data to subnodes
		# 
		# Idea:
		# 1.)	einds gets a first singletons:
		# 		new.ancestor <<----------->> first.singleton
		# 2.)	For loop the processes all subsequent singletons, looping until non-singleton is found
		# 3.) 	Put data back together
		#
		
		# 1.)
		einds = which(apply(edges(x),1,function(i) !(i[1]%in%snodeid) && i[2]%in%snodeid  )) # get all first singletons-on-a-branch
		alle = edges(x)[einds,]
		if(!is.matrix(alle))  alle = matrix(alle,nrow=1)
		elens = edgeLength(x)[einds]
		#edata = data.frame()
		#colnames(edata) <- dnames
		edata = tdata(x,"all")[alle[,2],,drop=F]
		ee = cbind(alle,elens)
		newnodes = matrix(0,ncol=3,nrow=0)
		
		# 2.)
		for(ii in seq(nrow(ee)))
		{
			anc = ee[ii,1]
			dec = ee[ii,2]
			
			# if decendent is still a singleton:
			if(dec %in% snodeid)
			{
				newlens = ee[ii,3]
				newdata = data.frame(NULL)
	
				# travel back until a non-singleton is found:
				while(dec %in% snodeid)
				{
					#cat("dec=",dec," - ")
					
					# length to new subnode:
					tmplen = unname(edgeLength(x)[which(edges(x)[,1]==dec)])
					newlens = append(newlens, (tail(newlens,1)+tmplen) )
					#cat(levels(tmpdata[1,])[as.integer(tmpdata[1,])],"\n")
					
					# new subnode data:
					#tmpdata = data.frame(decs.data[which(sings==dec),])
					#colnames(tmpdata) <- dnames
					tmpdata = decs.data[which(sings==dec),,drop=FALSE]
					newdata = rbind( newdata, tmpdata )						
					
					dec = edges(x)[which(edges(x)[,1]==dec),2]
				}
				
				# use any
				newlens = head(tail(newlens,-1),-1)
				newdata = tail(newdata,-1)
				if(length(newlens)!=0)
				{
					for(kk in seq(length(newlens)))
					{
						newnodes = rbind(newnodes,c(anc,dec,newlens[kk]))
					}
					edata = rbind(edata,newdata)
				}
				ee[ii,2] <- dec
			}
		}
		
		# 3.)
		eetmp = rbind(ee,newnodes)
		ancs = unname(eetmp[,1])
		decs = unname(eetmp[,2])
		snode.length = unname(eetmp[,3])
		snode.data = edata
		#names(snode.data) <- dnames
		
		
		###############################################################################################################
		
		# new labels:
		ancs.label = labels(x)[ancs]
		decs.label = labels(x)[decs]
		
		# Remove singletons from original tree
		x = collapse.singletons(x)
		
		# condition the branches:
		for(ii in seq(length(ancs)))
		{
			# NOTE: This function assumes that labels are unique (might not always be true...)
			snode.branch = rbind(snode.branch, edges(x)[.edge.index(x, ancs.label[ii], decs.label[ii]),])
			# Conrad: make subnode position a fraction of the parent branch length:
			#snode.pos = rbind(snode.pos, rep(snode.length[ii],2))
			snode.pos = rbind(snode.pos, rep(snode.length[ii],2)/edgeLength(x)[.edge.index(x,snode.branch[ii,1],snode.branch[ii,2])]) 
		}
	}
	
	# create dummy ids for subnodes 
	# NOTE: subnode.ids not currently being used
	#
	idvals = integer(0)
	if(nrow(snode.branch)!=0)
	{
		idstart = (nTips(x) + nNodes(x) + 1)
		idvals = as.integer(seq(from = idstart, length.out=nrow(snode.branch)))
	} 
	
	# Annotate collapsed tree with subnode data:
	retobj = new("phylo4d_ext",
		x,
		subnode.id=idvals, 
		subnode.data = snode.data,
		subnode.branch = snode.branch, 
		subnode.pos = snode.pos)
	
	return(retobj)
})



setMethod("phyext", "phylo4",
    function(x, snode.data=NULL, snode.branch=NULL, snode.pos=NULL, ...) {

	# create new phylo4 object (allow arguments to be passed via ellipsis)
	phyd = phylo4d(x,...)
	
	phyext(phyd,snode.data,snode.branch,snode.pos)
})



setMethod("phyext", "phylo",
    function(x, snode.data=NULL, snode.branch=NULL, snode.pos=NULL, ...) {
	phyd = phylo4d(x,...)
	phyext(phyd,snode.data,snode.branch,snode.pos)
	
})


setMethod("phyext","list",
	function(x,...){
	
	x = sapply(x,phyext,...)
	return(x)
})


# assume character points to a file or is a piece of text
# TODO: check if trees are simmap formatted!
setMethod("phyext", "character",
    function(x, snode.data=NULL, snode.branch=NULL, snode.pos=NULL, ...) {
	 
	# Figure out what x's format is:
	tr = NULL
	if(file.exists(x))
	{
		# x points to a file  
		if(is.simmap(finput=x)){
			tr = try(read.nexus.simmap(x)[[1]],silent=T)
		} else {
			tr = try(read.tree(x),silent=T)
		}
		
		if(is(tr,"try-error"))
		{
			tr = try(read.nexus(x),silent=T)
			if(is(tr,"try-error"))
				stop("If first argument is a file then it must contains trees in newick or nexus format")
		}
	} else {
		# x is a character string:
		if(is.simmap(text=x)){
			tr = try(read.simmap(text=x),silent=T)
		} else {
			tr = try(read.tree(text=x),silent=T)
		}
		if(is(tr,"try-error"))
			stop("If first argument is a string, then it must be newick-formated.")
	}
	
	phyd = phylo4d(tr,...)
	phyext(phyd,snode.data,snode.branch,snode.pos)
})








# get coalescent intervals from tip dated trees


tipdate.ci<-function(tr,show.plot=F)
{
	
	if ( !(class(tr) %in% c("multiPhylo","phylo")) )
	  	stop("object \"tr\" is not of class \"phylo\"")
	if(!is.binary.tree(tr))
		stop("Need a binary tree")
	if(!is.rooted(tr))
		stop("Need a rooted tree")

	nleaves = length(tr$tip.label)
	ileaves = seq(1,nleaves)
	#rnode = nleaves + 1 # root node
	rnode =  which(tabulate(tr$edge[,2])==0)[1]
	cipos <- numeric(0)  #!# cipos<<-numeric(0)
	citype <- numeric(0) #!# citype<<-numeric(0)
	cinode <- numeric(0) #!# cinode<<-numeric(0)
	
	isleaf<-function(n)
	{
		return( (length(which(tr$edge[,1]==n)) == 0) )
	}
	
#!#	addlen<-function(len)
#!#	{
#!#		cipos<<-c(cipos,len)
#!#	}
	
	
#!#	addtype<-function(type)
#!#	{
#!#		citype<<-c(citype,type)
#!#	}	
	
#!#	addnode<-function(nnode)
#!#	{
#!#		cinode<<-c(cinode,nnode)
#!#	}
	
	# USE SAPPLY for recursion from now on!
	# Start at root node and trace back from there....
	# node = node number,
	# tr = tree (should stay constant)
	# len = interval lengths
	#
	chknode<-function(node,len)
	{
		#Sys.sleep(0.25)
		#cat("node: ",node," length: ",len,"\n")
		# check for leaf node
		if(isleaf(node))
		{
			cipos <- c(cipos,len) #!# addlen(len)
			citype <- c(citype,0) #!# addtype(0)		
			cinode <- c(cinode,node) #!# addnode(node)
		} else {
			cipos <- c(cipos,len) #!# addlen(len)
			citype <- c(citype,1) #!# addtype(1)	
			cinode <- c(cinode, node) #!# addnode(node)
			# tr$edge 	 col1 = FROM,
			#		 col2 = TO
			inds = which(tr$edge[,1]==node)
			if(show.plot) edgelabels(as.character(tr$edge.length[inds]),inds,adj=-0.5)			
			Recall(tr$edge[inds[1],2],(len + tr$edge.length[inds[1]]))
			Recall(tr$edge[inds[2],2],(len + tr$edge.length[inds[2]]))
		}
	}
	
	if(show.plot) plot.phylo(tr,show.tip.label=F)

	chknode(rnode,0)
	sorder = order(cipos, decreasing=TRUE)
	tmppos = cipos[sorder]
	tmptype = citype[sorder]
	#rm(list=c("cipos","citype","cinode"))
	
	lin = numeric(0) #lineages
	il = abs(diff(tmppos)) #interval.length
	ic = length(cipos)-1 # interval.count
	td = diff(range(cipos)) #total.depth
	
	count=0;
	for (i in tmptype)
	{
		if(i==0) count = count + 1 	# sampling event
		else count= count - 1		# coalescent event
		lin = append(lin,count)
	}
	
	return(list(lineages=lin[1:(length(lin)-1)],interval.length=il,interval.count=ic,total.depth=td,I=tmptype[-1]))
}


setGeneric("treeHeight", function(x) { standardGeneric("treeHeight") })
setMethod("treeHeight", signature(x="phylo4d_ext"),
  function(x) {	
	options(warn=-1)  # suppress warnings for this sections
	retval = tipdate.ci(as(x,"phylo"))$total.depth
	options(warn=0)  
	return(retval)
})

setMethod("treeHeight", signature(x="phylo4d"),
  function(x) {
	options(warn=-1)  # suppress warnings for this sections
	retval = tipdate.ci(as(x,"phylo"))$total.depth
	options(warn=0)  
	return(retval)
})

setMethod("treeHeight", signature(x="phylo4"),
  function(x) {
	options(warn=-1)  # suppress warnings for this sections
	retval = tipdate.ci(as(x,"phylo"))$total.depth
	options(warn=0)  
	return(retval)
})










#-------------------------------------------------------
#  Methods 
#  -Are concerned with subnode stuff only.  
#   Higher-level tree modifications should be done
#  	with phylobase functions.
#-------------------------------------------------------

# Table of Contents:

## Generic Methods: (and overloads)
#

## SIMMAP Methods:
# get.nodenames -- internal; get names of nodes
# is.simmap	 --  check whether file or text string contains simmap formatted trees
# read.simmap  --  read simmap text (v1.0-1.1) to phylo4d object (with singletons)
# read.simmap.new  --  read simmap text (v1.5) to phylo4d_ext
# read.nexus.simmap  -- reads nexus file which contains simmap (all versions) trees
# expand.singles  --  experimental; expand singleton nodes into a bifurcation with one 0-length branch terminating at a dummy node
# collapse.to.singles  --  experimental;  collapse bifurcations which contain a 0-length branch connected to a leaf node into singletons
# collapse.singletons  --  remove singleton nodes from a tree
# collapse.subnodes  --  experimental;  collapse subnodes to singletons
# write.simmap (w/ two subroutines)  -- write a tree (and parts of it's data) to simmap format (vers=c(1.0,1.1,1.5))
# write.simmap.old  --  wrapper; calls write.simmap with v1.0 or v1.1 parameters
# write.simmap.new  --  wrapper; calls write.simmap with v1.5 parameters
# write.nexus.simmap  --  uses write.simmap and other internal functions to write a nexus file using write.simmap to output each tree.

## Subnode Methods:
# .edge.index  --  internal;
# nSubNodes  --  get number of subnodes
# hasSubNodes  --  does the tree have subnodes?
# hasData  --  does the tree contain any data? (is the data slot empty?)
# getSubNodeData  --  returns subnode data.frame
# getSubNodePosish  -- returns subnode positions
# getSubNodeEdgeInds  --  returns subnode edge indices
# getEmptyDataFrame  --  returns empty data frame in the same format as the data frame from the data slot
# addSubNode  --  add a subnode to a tree
# showSubNodes  --  print out a representation of the subnodes to stdout


## Generics
setGeneric("sndata", function(x, ...) { standardGeneric("sndata") })
setGeneric("sndata<-", function(x,datnames=NULL, value) { standardGeneric("sndata<-") })
setGeneric("snid", function(x, ...) { standardGeneric("snid") })
setGeneric("snposition", function(x, ...) { standardGeneric("snposition") })
setGeneric("snbranch", function(x, ...) { standardGeneric("snbranch") })
setGeneric("rmdata", function(x,index,subindex) { standardGeneric("rmdata")} )
setGeneric("weight", function(x) { standardGeneric("weight")} )
setGeneric("weight<-", function(x,value) { standardGeneric("weight<-")} )
setGeneric("hasWeight",function(x,strict=TRUE) { standardGeneric("hasWeight")} )

# subnode
setGeneric("hasSubNodes", function(x) { standardGeneric("hasSubNodes") })
setGeneric("hasData", function(x) { standardGeneric("hasData") })
setGeneric("hasDataColumn",function(x,index) { standardGeneric("hasDataColumn") })

# overloading phylobase methods:

#--------------------------
# SIMMAP Processing Methods		
#--------------------------

# internal:
# get node names from newick string
get.nodenames<-function(newick.txt)
{
	nnames = character(0)
	ttmp = newick.txt
	
	if(grepl("^.*;$",ttmp)) {
		# remove trailing semi-colon, otherwise it will mess up the regular expression matching
		ttmp = substring(ttmp,first=1,last=(nchar(ttmp)-1))
	}
	nname.pat="(\\(|,|\\))([a-zA-Z0-9'_\\.]{1,})(:|;)"
	junk = regexpr(nname.pat,ttmp)
	count = 0
	while(junk[1] != -1)
	{
		tmpname = substr(ttmp,(junk[1]+1),(junk[1]+attr(junk,"match.length")-2))
		nnames = append(nnames,tmpname)
		ttmp = sub(nname.pat,"",ttmp)	
		junk = regexpr(nname.pat,ttmp)
		count = count + 1
		stopifnot(count < 100000)
	}
	return(nnames)
}




# read modified newick file
# citation: 
# Bollback J. P. (2006) SIMMAP: Stochastic character mapping of 
# discrete traits on phylogenies. BMC Bioinformatics. 7:88
# 
# Assume that branch lengths are present (otherwise, why use SIMMAP?)
# Assume that the root node can only have one simmap state (or, only use the first)
#
# @param as.num - convert the data to numeric (by default, a factor data type is used)
# @param text - use text string instead of a file name
# @param vers - which version of simmap to use
# @param as.num - no idea
# @param add.root - should the tree be treated as unrooted?
#
read.simmap <- function(file="",text=NULL, vers=1.1, as.num=FALSE, add.root=TRUE, ...)
{
	
	if(is.null(text))
	{
		#stop("Need to have text for now")
		if(!file.exists(file))
			stop("Assumed 'file' was a filename and could not be fond.")
		
		text = scan(file,what=character(0),strip.white=T,sep="\n")
	}

	# clear whitespace	
	text = gsub("\\s","",text)
	
	# add semicolon to end
	if(substring(text,nchar(text))!=";")
		text = paste(text,";",sep="")
	
	if(TRUE)
	{
		# add root node and internal node names
		count = 1
		while(regexpr("):",text)[1] != -1)
		{
			text = sub( "):", paste(")Internal",sprintf("%0.7d",count),":",sep=""), text)
			count = count + 1
		}
			
		# Root
		if(add.root && regexpr(");",text)[1] != -1)
			text = sub( ");", ")Root:0;" , text)
		
	}
	
	## Poor replacement for regular expressions
	#
	edge.ind = integer(0)
	edge.state = character(0)
	edge.len = numeric(0)
	junk = strsplit(text,"")[[1]]
	sub.branches = cbind( which(junk=="{"), which(junk=="}") )
	br.lens = numeric(nrow(sub.branches))
	
	for(ii in seq(nrow(sub.branches)))
	{
		br.total = 0
		# get the internal part
		within = paste(junk[seq( sub.branches[ii,1]+1 , sub.branches[ii,2]-1 )],collapse="")
		splitchar = ifelse(vers==1.0,";",":")
		within.sub = strsplit(within,splitchar)[[1]]
		for(jj in within.sub)
		{
			slptmp = strsplit(jj,",")[[1]]
			edge.ind = append(edge.ind,ii)
			edge.state = append(edge.state,slptmp[1])
			edge.len = append(edge.len,slptmp[2])
			br.total = br.total + as.numeric(slptmp[2])
		}
		br.lens[ii] = br.total
	}

	
	# horrible way to put it back together:
	# switch to regular expressions
	#
	newick.str = paste(junk,collapse="")
	for(ii in seq(nrow(sub.branches)))
	{
		replen = diff(sub.branches[ii,])+1
		substr(newick.str, sub.branches[ii,1], sub.branches[ii,2]) <- paste(rep(" ",replen),collapse="")
		substr(newick.str, sub.branches[ii,1], sub.branches[ii,2]) <- as.character(br.lens[ii])
	}
	newick.str = gsub("\\s","",newick.str) # strip whitespace
	
	# convert to ape format:
	tr = read.tree(text=newick.str)
	
	# make singleton nodes
	#
	edge.len = as.numeric(edge.len)
	all.labels = c(tr$tip.label,tr$node.label)
	
	# all names except the root node (which ought to be named, otherwise this will break)
	#
	#nnames = head(get.nodenames(newick.str),-1)  # This should match up with edge.ind
	nnames = get.nodenames(newick.str)
	scount = 1
	nTips = length(tr$tip.label)
	nInts = length(tr$node.label)
	
	# node information
	dataVal = character(0)
	dataNode = integer(0)
		
	# loop through split branches
	for(kk in unique(edge.ind))
	{
		# this information shouldn't change:
		is.root = F
		edgeind = which(edge.ind == kk)
		trind = which(nnames[kk] == all.labels)
		esplice = which(tr$edge[,2] == trind) # which edge to chop
		anc = tr$edge[esplice,1]
		dec = tr$edge[esplice,2]
	
		# assume that this is the root (no way to check for it otherwise..)
		if( length(esplice) == 0 )
		{
			dec = trind
			is.root = T
		}
		
		# store data information:
		dataNode = append(dataNode,dec)
		dataVal = append(dataVal, edge.state[edgeind[1]])

		newsingles = length(edgeind)-1
		
		if(newsingles==0 || is.root)
			next
		
		# add new singleton nodes
		for(mm in seq(newsingles))
		{
			#cat("- adding a new single\n")
			### add new internal nodes to 'phylo' object:
			#
			tr$Nnode = tr$Nnode + 1; nInts = nInts + 1
			tr$node.label = append(tr$node.label,sprintf("Singleton%0.7d",scount))
			nodeid = nTips + nInts # should be the last one available
			tr$edge[esplice,1] = nodeid
			tr$edge = rbind(tr$edge,c(anc,nodeid))
			tr$edge.length[esplice] = edge.len[edgeind[mm]]
			tr$edge.length = append(tr$edge.length,edge.len[edgeind[(mm+1)]])
			
			# store data information:
			dataNode = append(dataNode,nodeid)
			dataVal = append(dataVal, edge.state[edgeind[(mm+1)]])
			
			
			# update info:
			esplice = length(tr$edge.length) # should be the last one
			dec = nodeid
			scount = scount + 1
		}		
	}
	

	# create phylo4d object
	new.labels = c(tr$tip.label,tr$node.label)
	if(as.num)
		dataVal = as.numeric(dataVal)
	tmpdf = data.frame("simmap_state"=dataVal, row.names=new.labels[dataNode])
	rettree = phylo4d(tr,all.data=tmpdf,missing.data="OK",rownamesAsLabels=TRUE)
	
	#write.nexus(tr,file="written.tree")
	return(rettree)
}


# helper functions for read.simmap.new (below)
#
match.order <- function(basevec,cmpvec)
{
	neworder = order(basevec)
	neworder[order(basevec)] <- order(cmpvec)
	stopifnot(all(cmpvec[neworder] == basevec))
	return(neworder)		
}


strip<-function(str,left=TRUE,right=TRUE)
{
	if(left)
		str = sub("^[ \t\n]+","",str)
	if(right)
		str = sub("[ \t\n]+$","",str)
	return(str)
}


# Read simmap v1.5 files
# ----------------------
# Notes:
# 1. all mutational (mapping) information is contained with a comment block (e.g., [&map={0,0.144863,1,0.453803,0}] ),
# 2. within this block the mutational information is contained within curly braces, { }, preceded by a map command, &map= { },
# 3. the mutational information is from the descendant node to the ancestor (i.e., tips to root assignment),
# 4. the information is STATE, LENGTH, STATE,... excluding the last length, e.g., two changes (0=>1=>0) might look like this, [&map={0,0.144863,1,0.453803,0}], with state 0 having a length of 0.144863, followed by state 1 with a length of 0.453803, with the final state 0 having a length of BRLEN - (0.144863 + 0.453803); this approach helps reduce the size of the file by not including redundant information,
# 5. if a branch has no changes then it will look like this, [&map={STATE}]LENGTH
#
# @param specialpatt indicates which variables should be treated in the SIMMAP way
#		 any variables found which are not in specialpatt are treated as containing 
#		 vector data.  By default, all variable are considered to be in the special
#		 format.
#
read.simmap.new <- function(file="",text=NULL, specialpatt=character(0), add.root=TRUE)
{
	
	str.has <- function(patt,token,lowercase=T){
		len = length(grep(sprintf("%s",patt),tolower(token)))
		if(len == 1)
			return(TRUE)
		return(FALSE)
	}
	
	if(is.null(text))
	{
		if(file.exists(file)){
			text = paste(readLines(file),collapse="")
		}else{
			stop("Could not find file ",file)
		}
	}

	# clear whitespace
	txt = gsub("\\s","",text)
	
	# add semicolon to end
	if(substring(txt,nchar(txt))!=";")
		txt = paste(txt,";",sep="")
	
	# check to see if it is actually a SIMMAP v1.5 formatted
	if(!is.simmap(text=txt,vers=1.5))
		stop("'text' does not seem to contain any simmap-formatted trees")
	
	
	# Extract comments from the tree string (which should contain the mapped data)
	comments.pos = gregexpr("\\[&.*?\\]",txt)[[1]]
	comments.len = attr(comments.pos,"match.length")
	comments = character(length(comments.pos))
	for(ii in seq(length(comments)))
	{
		comments[ii] = substr(txt,(comments.pos[ii]+2), ((comments.pos[ii] + comments.len[ii] - 2)))
	}
	
	length(comments)
	
	
	####
	# Process Trees
	# 1.) strip SIMMAP comments from the tree string
	#
	cleaned = gsub("\\[&.*?\\]","",txt)
	textstr = cleaned
	
	# 2.) add root node and internal node names
	count = 1
	while(regexpr("):",textstr)[1] != -1)
	{
		textstr = sub( "):", paste(")Internal",sprintf("%0.7d",count),":",sep=""), textstr)
		count = count + 1
	}
	
	# check if root has a value, if it does not then 
	# the program needs to know so that it can be set
	# when processing the comments (check if anc == root).
	#
	norootval=FALSE
	if(add.root && regexpr(");",textstr)[1] != -1){
		textstr = sub( ");", ")Root:0;" , textstr)
		norootval=TRUE
	}
	
	# 3.) create phylo4d object from tree string where comments have been removed (data is added next)		
	nodenames = get.nodenames(textstr)
	phy = as(read.tree(text=textstr),'phylo4d')
	#
	# End Process Trees
	####
	
	# Sanity check: (leave out root, if it did not come with an explicit state)
	#if(length(comments)!= (length(nodenames) - as.integer(norootval)) )
	#	stop("Node names does not match up with comments (and they should).")
	
	
	####
	# Process Comments:
	#
	# NOTE: Only one branch mapping is allowed at this point (TODO: fix this)
	#
	subnode.names = c("map","simmap_state",specialpatt)
	subnode.patt = sprintf("^(%s)$",paste(subnode.names,collapse="|"))
	subnode.pos = matrix(NA,nrow=0,ncol=2)
	subnode.branch = matrix(NA,nrow=0,ncol=2)
	subnode.data = character(0)
	subnode.commentid = integer(0)
	subnode.subind = integer(0)	

	phy.data = NULL
	for (kk in seq(length(comments)))
	{
		## Tokenize command comment:
		comment.tokens = strsplit(comments[kk],",")[[1]]
		
		# check if there is an assignment (shouldn't there always be?)
		ex.inds = (grepl("=",comment.tokens))
	
		# reconstitute tokens if they were broken up in comment.tokens
		curr = 1
		using = rep(TRUE,length(ex.inds))
		count = 1
		while(count <= length(ex.inds)){
			if(ex.inds[count]){
				curr=count
			} else {
				using[count] = FALSE
				comment.tokens[curr] = paste(comment.tokens[curr],comment.tokens[count],sep=",")
			}
			count = count + 1
		}
		comment.tokens = comment.tokens[using]
		## end Tokenize command comment
		
		# strip beginning / ending whitespace from each token
		# comment.tokens = strip(comment.tokens)
	
		## Process name/value pairs from comment tokens
		## NOTE: branch mapping is treated differently that other attributes
		nnames = character(0)
		vals = character(0)
		
		for(attr.str in comment.tokens)
		{
			# strip whitespace from tokens
			#
			tmp=strsplit(attr.str,"=")[[1]]
			metric.basename = strip(tmp[1])
			metric.value = strip(tmp[2])
			if(str.has("\\{",metric.value) && str.has("\\}",metric.value))
			{
				metric.value = sub("^\\{(.*?)\\}$","\\1",metric.value)
				metric.value = strip(strsplit(metric.value,",")[[1]])
			}
			
			if(!grepl(subnode.patt,metric.basename))
			{
				# process regular values
				# Expand basenames:
				if(length(metric.value) != 1)
					metric.basename = sprintf("%s_%04d",metric.basename,seq(length(metric.value)))
				
				nnames = append(nnames,metric.basename)
				vals = append(vals,metric.value)
				
			} else {
				
				# process map nodes:
				# values format: STATE,LENGTH,STATE,....
				# FROM desc to ancs (so, 1st STATE and LENGTH pair are the values for the current node)
				#
				if(length(metric.value) == 1){
					nnames = append(nnames,metric.basename)
					vals = append(vals,metric.value[1])
				} else {
					
					desc = unname(which(labels(phy)==nodenames[kk]))
					# check if desc is the root node (if it is then the ancestor is node 0 per phylobase's standard, I think).
					if(isRooted(phy)){
						anc = ifelse(rootNode(phy) == desc ,0 ,unname(ancestor(phy,desc)))
					} else {
						anc = unname(ancestor(phy,desc))
					}
					eind = .edge.index(phy,anc,desc)
					elen = unname(edgeLength(phy)[eind])
	
					# if one more length is needed:
					if( length(metric.value) %% 2 != 0 )
					{
						tonow = sum(as.numeric(metric.value[seq(2,length(metric.value),by=2)]))
						metric.value = c(metric.value,(elen-tonow))
					}
	
					
					# Separate states from lengths and make lengths fractions of BRLEN 
					# NOTE: fractions start at ANC(!) not DESC in the phyext format.  This may cause some confusion
					#
					states = rev(metric.value[seq(1,length(metric.value),by=2)])
					lens = rev(as.numeric(metric.value[seq(2,length(metric.value),by=2)]))
					lens = cumsum(lens) / elen
					
					# Assign data from DESC node
					nnames = append(nnames,metric.basename)
					vals = append(vals,tail(states,1)) # last one the DESC node state
					
					# Cache data from subnodes on this branch:
					for (subind in seq(length(states)-1))
					{
						subnode.branch = rbind(subnode.branch,c(anc,desc)) # which branch
						subnode.pos = rbind(subnode.pos,c(lens[subind],lens[subind]))  # which position on the branch
						tmpstate=states[subind]
						names(tmpstate) <- metric.basename
						subnode.data = append(subnode.data,tmpstate)  # TODO: make sure this works well
						subnode.commentid = append(subnode.commentid,kk)
						subnode.subind = append(subnode.subind, subind)
					}
				}
			}
		}
		
		stopifnot(length(nnames) == length(vals))
			
		if(is.null(phy.data))
		{
			phy.data = matrix(NA,nrow=length(nodenames),ncol=length(nnames))
			rownames(phy.data) <- nodenames
			colnames(phy.data) <- nnames
		} else {
	
			newcols = setdiff(nnames,colnames(phy.data))
			if(length(newcols) != 0){
				
				for(cname in newcols)
					phy.data = cbind(phy.data,NA)
				
				colnames(phy.data) <- c(head(colnames(phy.data),-length(newcols)),newcols)
			}
			
			unusedcols = setdiff(colnames(phy.data),nnames)
			if(length(unusedcols) != 0)
			{
				nnames <- append(nnames,unusedcols)
				vals <- append(vals,rep(NA,length(unusedcols)))
			}
		}
		
		phy.data[kk,] = vals[match.order(colnames(phy.data),nnames)]
		
	} # end kk comments loop
	
	# Guess as phy.data types:
	options(warn=-1)  # suppress warnings for this sections
	phy.data = data.frame(apply(phy.data,2,I),stringsAsFactors=FALSE)
	for(jj in seq(ncol(phy.data)))
	{
		exclude = is.na(phy.data[,jj])
		if( all(!is.na(as.numeric(phy.data[(!exclude),jj]))) )
			phy.data[,jj] = as.numeric(phy.data[,jj])
	}
	options(warn=0)
	#
	# End Process Comments
	#####
	

	###
	# Combine processed tree and processed comments:
	if(isRooted(phy)){
		phyd = addData(phy,all.data=phy.data,match.data=TRUE,rownamesAsLabels=TRUE)
	} else {
		phyd = addData(phy,all.data=phy.data,match.data=TRUE,rownamesAsLabels=TRUE,missing.data="OK")
	}
	
	if(length(subnode.data)!=0){
		# add subnode stuff:
		phyd = phyext(phyd)
		usubbranches = unique(subnode.commentid)
		
#		for(ii in seq(length(subnode.data)))
#		{
		for(ii in seq(length(usubbranches)))
		{
			cominds = which(subnode.commentid == usubbranches[ii])
			usubnodes = unique(subnode.subind[cominds])
			for(jj in seq(length(usubnodes)))
			{
				thisinds = cominds[which(subnode.subind[cominds] == usubnodes[jj])]
				tmpdat = subnode.data[thisinds]
				# sanity check:
				check = apply(subnode.pos[thisinds,,drop=FALSE],1,mean)
				stopifnot(all(check == check[1]))
				phyd = addSubNode(phyd,subnode.branch[thisinds[1],1],subnode.branch[thisinds[1],2],subnode.pos[thisinds[1]],tmpdat,pos.is.fraction=TRUE)
			}
		}
	}
	
	return(phyd)
}




# Read trees from a nexus file.  This function is only really necessary for 
# nexus files where trees have simmap formatting.  If they don't, then
# readNexus should be used instead.
#
# note: if type=="all", data is read in from the "characters" block
#		and it's rownames are assumed to match the taxa names.
#
read.nexus.simmap <- function(finput="", text=NULL, vers=NULL, type=c("all", "tree"),...)
{
	
	type <- match.arg(type)

	if (type == "all") {
		returnData <- TRUE
	} else {
		returnData <- FALSE
	}


	outtrees = NULL
	if(!is.null(text)){
		# TODO: check text for newlines and split on them if they exist.
		rawtext=text
	} else {
		if(!file.exists(finput))
			stop("Assuming finput is a file and could not find it")
		
		rawtext = scan(finput,what=character(0),strip.white=T,sep="\n")		
	}

	treesblock = read.nexus.block(txt=rawtext,block="trees",rm.comments=T)
	
	if(length(treesblock)==0)
	{
		warning("No trees in this file...")
		return(FALSE)
	}

	## TODO: split individual strings on ';' character into newlines
	
	treelines = which(tolower(substr(treesblock,1,4))=="tree")
	if(length(treelines)==0)
		return(NULL)

	
	# make sure the the translated table is used if found
	transstart = which(sapply(treesblock,function(i) tolower(i)=="translate",USE.NAMES=F))
	translated = length(transstart)!=0
	transend=integer(0)
	taxatrans=""
	index=integer(0)
	taxname=""
	
	# assume that trees are not translated otherwise:
	# these should be in order numerically.
	if(translated)
	{
		transend = seq(transstart,min(treelines)-1)[min(grep(";",treesblock[seq(transstart,min(treelines)-1)]))]
		taxatrans = treesblock[(transstart+1):transend]
		
		# should ALWAYS be the last token:
		if(any(taxatrans==";"))
			taxatrans = taxatrans[-which(taxatrans==";")]
		
		index = as.integer(sapply(strsplit(taxatrans,"\\s"),function(i) i[1]))  # I'm assuming that they are integers
		taxname = sapply(strsplit(taxatrans,"\\s"),function(i) sub("(,|;)","",i[2]))
	}

	count = 0
	for(linenumb in treelines)
	{
		
		cat("tree number",linenumb,"\n")
		# clearly, this will not work for non-simmap, non-newick trees (or simmap v1.5 strings)
		#tmpstr = tail(strsplit(treesblock[linenumb],"=")[[1]],1)
		# so, trying this instead:
		#tmpstr = sub("^tree.+?=(.+)$","\\1",treesblock[linenumb])
		
		# This might not be necessary:
		if(has.weights(text=treesblock[linenumb])){
			tmpstr = sub("\\[(.*?)\\]","",treesblock[linenumb])
		} else {
			tmpstr = treesblock[linenumb]
		}
		#tmpstr = sub("^tree.+?=(.+)$","\\1",tmpstr)
		#tmpstr = sub("^tree(.+?)=\\s{0,}\\[(.*?)\\](.+)$","\\3",tmpstr)
		#tmpstr = sub("^tree(.+?)=(.*?)(\\(.+)$","\\3",tmpstr)
		roottype = sub("^(TREE|tree)(.+?)=(.*?)(\\(.+)$","\\3",tmpstr)
		roottype = gsub("\\s","",roottype)
		tmpstr = sub("^(TREE|tree)(.+?)=(.*?)(\\(.+)$","\\4",tmpstr)
		
		addroot = !grepl("&U",roottype)
		# check for simmap style
		# remove first comment (should others be removed?
		if(is.simmap(text=tmpstr)){
			trtmp = phyext(unname(read.simmap(text=tmpstr,add.root=addroot)))
		} else if (is.simmap(text=tmpstr,vers=1.5)) {
			trtmp = phyext(unname(read.simmap.new(text=tmpstr,add.root=addroot,...)))
		} else if (is.simmap(text=tmpstr,vers=1.0)){
			trtmp = phyext(unname(read.simmap(text=tmpstr,vers=1.0,add.root=addroot)))
		} else {
			trtmp = phyext(read.tree(text=tmpstr))
		}
		
		
		if(!is(trtmp,'phylo4'))
			trtmp = as(trtmp,'phylo4')
		
		if(!inherits(trtmp,"phylo4")){
			cat("Trouble parsing line for trees:\n",treesblock[linenumb],"\n")
			stop("Wha?")
		}
		
		outtrees = append(outtrees,trtmp)
		count = count + 1
	}
	
	# if the trees have been translated, then
	# add the names back in.
	#
	if(translated)
	{
		for(treeind in seq(length(outtrees)))
		{
			tnames = as.integer(tipLabels(outtrees[[treeind]]))
			if(!any(is.na(tnames)))
			{
				tipLabels(outtrees[[treeind]]) <- taxname[tnames]
			} else {
				warning("Tree translations found, but could not be mapped")
			}
		}
	}
	
	# Find and add data:
	if(returnData && has.block(txt=rawtext,blockname="characters"))  #!# block changed to blockname 7/1/15
	{
		data.part = read.characters2(txt=rawtext,blockname="characters")
		if(length(data.part) > 0)
		{
			for(treeind in seq(length(outtrees)))
			{
				outtrees[[treeind]] <- addData(outtrees[[treeind]],tip.data=data.part,rownamesAsLabels=TRUE)
			}
		}
	}
	
	
	return(outtrees)
}




# expand singleton nodes into bifurcating nodes with one junk node
# this function is mainly for plotting and saving files
# TODO: convert all explicit calls to @'slot' to their accessor counterpart (e.g. tree@edge.length goes to edgeLength(tree)
#
expand.singles <- function(tree,keep.data=FALSE)
{
	# note: tips should be indexed 1...N, where N is the number of tips
	if(!is(tree,"phylo4"))
		stop("tree needs to be of class phylo4")
	
	if(hasSingle(tree))
	{
		has.data = hasData(tree) && keep.data		

		# Need to add labels to trees if any are NA:
		# TODO: make sure labels are unique
		if(any( is.na(labels(tree)) ))
		{
			count = round(runif(1) * 1e6)
			for(ii in which(is.na(labels(tree))))
			{
				labels(tree)[ii] <- sprintf("Internal%07d",count)  # warning, these labels might not be unique
				count = round(runif(1) * 1e6)
			}
		}
		
		# rename 'Singleton' labels
		#labels(tree) = sub("^Singleton(.*)$","Internal\\1",labels(tree))

		tmptable=table(tree@edge[,1])
		snodes = as.integer(names(tmptable)[which(tmptable==1)])
		snodes = snodes[snodes!=0] # 0 is the root node (check on this...)
		
		nold = nrow(tree@edge)
		nnew = length(snodes)
		count = 1
		for(ii in snodes)
		{
			tree@label = append(sprintf("JUNK%0.7d",count),tree@label)
			tree@edge = rbind(tree@edge, c(ii, count))
			tree@edge.length = append(tree@edge.length,0)
			count = count + 1
		}
		
		# rearrange
		#tree@order = "unknown"
		rootind = which(tree@edge[,1] == 0)
		tree@edge[seq(nold),] = tree@edge[seq(nold),] + nnew
		tree@edge[seq(from=nold+1,to=nold+nnew),1] = tree@edge[seq(from=nold+1,to=nold+nnew),1] + nnew
		tree@edge[rootind,1] = 0
		
		# rename
		nnodes = nTips(tree) + nNodes(tree)
		names(tree@label) <- as.character(seq(1,nnodes))
		names(tree@edge.length) <- apply(tree@edge,1,paste,collapse="-")
		if(has.data)
		{
			rownames(tree@data) <- as.integer(rownames(tree@data)) + nnew
		}
	}
	
	if(!keep.data)
		tree = as(tree,"phylo4")
	
	return(tree)
}


# 
# sister function of expand.singles -> converts internal nodes with
# zero-length branches into singletons (in phylo4 format).  Will only work if
# the zero-len branch is connected to a tip
#
collapse.to.singles <- function(tree,by.name=NULL)
{
	if(is(tree,"phylo"))
		tree = as(tree,"phylo4")
	
	if(!is(tree,"phylo4"))
		stop("tree argument needs to be of class phylo4")

	# don't use the root node
	if(any(edgeLength(tree, seq(1,(nNodes(tree)+nTips(tree)))[nodeType(tree)!='root']) == 0))
	{
		# Remove zero-length branches and their tips
		zinds = which(edgeLength(tree)==0)
		torem = edges(tree)[zinds,]
		torem = torem[edges(tree)[zinds,1]!=0,][,2] # don't include root
		zinds = zinds[edges(tree)[zinds,1]!=0] 		# don't include root
		torem = torem[nodeType(tree)[torem]=="tip"]
		
		if(length(torem)!=0)
		{
			# remove excess tips
			rm.count = length(torem)
			tip.count = length(tipLabels(tree))
			total.count = length(labels(tree))
			
			tree@edge <- edges(tree)[-zinds,]  # TODO: add 'edges<-' to phylobase
			tree@edge.length = tree@edge.length[-zinds]
			tree@label = tree@label[-torem]
			
			## begin reindexing nodes
			# tips
			t.oldseq = seq(1, (tip.count))[-torem]
			t.newseq = seq(1,(tip.count-rm.count))
			t.replace.inds = which(tree@edge[,2] %in% t.oldseq)
			stopifnot(length(t.replace.inds) == length(t.oldseq))  # it should be a 1-1 relationship for tips
			tree@edge[,2] = replace(tree@edge[,2],t.replace.inds ,t.newseq)
			
			# internal
			n.oldseq = seq(tip.count+1,total.count)
			n.newseq = seq(tip.count+1-rm.count, total.count-rm.count)
			for(kk in seq(length(n.oldseq)))
			{
				replaceinds = which(tree@edge[,1] == n.oldseq[kk])
				if(length(replaceinds)!=0)
					tree@edge[replaceinds,1] = rep(n.newseq[kk],length(replaceinds))
					
				replaceinds = which(tree@edge[,2] == n.oldseq[kk])
				if(length(replaceinds)!=0)
					tree@edge[replaceinds,2] = rep(n.newseq[kk],length(replaceinds))
			}
			
			
			## rename
			names(tree@label) <- as.character(seq(1,length(labels(tree))))
			names(tree@edge.length) <- apply(tree@edge,1,paste,collapse="-")
		}
	} else {
		warning("No zero-length branches found to be removed")
	}
	return(tree)
}


# collapse singleton nodes using ape<->phylo4 conversion functions:
# TODO: make this less reliant on ape functions
#
collapse.singletons <- function(phy)
{
	rettree = phy
	
	# if singletons exist
	if(hasSingle(rettree))
	{
		if(is(rettree,"phylo4d"))
		{
			tab=table(edges(rettree)[,1])
			snodeid = as.integer(names(tab)[which(tab==1)])
			snodeid = snodeid[snodeid!=0]  # 0 is a dummy node
			
			# not sure why this is used:
			#ancs =  sapply(snodeid,function(i) which(edges(rettree)[,1] == i)) # where the subnode is the ancestor
			#decs =  sapply(snodeid,function(i) which(edges(rettree)[,2] == i)) # where the subnode is the descendant
			#newdata = data.frame(tdata(rettree,"all")[-decs,],row.names=labels(rettree)[-decs])
			newdata = tdata(rettree)[-snodeid,,drop=F]
			#colnames(newdata) <- colnames(tdata(rettree))
			
			# hack it for now:
			rettree = as(rettree,"phylo") # convert to ape
			rettree <- collapse.singles(rettree) # collapse singles
			if(is.rooted(rettree)) {
				rettree = phylo4d(rettree,all.data=newdata,rownamesAsLabels=TRUE) # create new phylo4d object
			} else {
				rettree = phylo4d(rettree,all.data=newdata,rownamesAsLabels=TRUE,missing.data="OK") # 
			}
		} else {
			rettree = as(rettree,"phylo") # convert to ape
			rettree <- collapse.singles(rettree) # collapse singles
			rettree = phylo4(rettree)
		}
	}
	
	return(rettree)
}


# turn subnodes into singletons
# x - phylobase tree
# rm.ex.data - "remove 'extra' data":
#  should data frames unique to tdat be removed (TRUE)
#  or should extra columns be added to sdat (FALSE)
#  WARNING: rm.ex.data = TRUE could lead to a loss of data (obviously)
#
collapse.subnodes <- function(x,rm.ex.data = TRUE)
{
	
	if(!inherits(x,"phylo4d_ext") || !hasSubNodes(x))
	{
		warning("No subnodes to collapse")
		return(x)
	}
	
	# Step 1: cache data from x and setup variables to hold info for the new tree
	
	## a.) Handle data:
	tdat = tdata(x)
	sdat = sndata(x)
	col.diffs = setdiff(colnames(tdat),colnames(sdat))
	if(length(col.diffs) > 0)
	{
		if(rm.ex.data){
			tdat = tdat[,-which(colnames(tdat) %in% col.diffs)]
		} else {
			tmpnames = colnames(sdat)
			sdat = cbind(sdat,matrix(NA,nrow=nrow(sdat),ncol=length(col.diffs)))
			colnames(sdat) <- c(tmpnames,col.diffs)
		}
	}
	newdat = tdat
	
	# TODO: might need to reorder columns of tdat and sdat to match up....
	
	## b.) Subnode positions
	posmeans = snposition(x)
	if(!is.matrix(posmeans))
		posmeans = matrix(posmeans,nrow=1,ncol=2)
	posmeans = apply(posmeans,1,mean)
	
	## c.) Handle edges
	el = edges(x)
	newel = el
	elens = edgeLength(x)
	newelens = elens
	snedges.inds = getSubNodeEdgeInds(x)
	u.sninds = unique(snedges.inds)
	
	## d.) Handle node ids and labels
	nids = nodeId(x) # node ids
	lastnodeid = max(nids) # base from which to create new node ids
	nodelabs = nodeLabels(x)
	
	# Step 2.) Systematically convert subnodes into new sets of edges (with singletons)
	for(ii in seq(length(u.sninds)))
	{
		# get edge and subnode indices:
		eid = u.sninds[ii]
		sninds = which(snedges.inds == eid)
		curr.anc = el[eid,1]
		curr.desc = el[eid,2]	
		elen = elens[eid]
		newnodeids = lastnodeid + seq(length(sninds))
		lastnodeid = max(newnodeids)
		tmp = sprintf("Singleton%07d",newnodeids)
		names(tmp) <- newnodeids
	
		# Create new nodes and edges	
		# positions FROM anc TO desc:
		newlens = sort(posmeans[sninds] * elen) # absolute positions along the branch
		newlens = unname(c(newlens[1],diff(newlens),elen - max(newlens))) # convert to spacing btw subnodes
		newedges = matrix(NA,ncol=2,nrow=length(newlens))
		newedges[,1] <- unname(c(curr.anc,newnodeids))
		newedges[,2] <- unname(c(newnodeids,curr.desc))
	
		tmpdat = sdat[sninds,,drop=F]
		rownames(tmpdat) <- c(tmp)
		newdat = rbind(newdat,tmpdat)
		
		# Append data:	
		newel = rbind(newel,newedges)
		newelens = c(newelens,newlens)
	
		nodelabs = c(nodelabs,tmp)
	}
	
	# Step 3.) Create new tree with new edgelist,labels, and data
	
	# remove old indices:
	newel = newel[-u.sninds,]
	newelens = newelens[-u.sninds]
	
	# create new tree:
	newphy = phylo4(newel,newelens,tipLabels(x),nodelabs)
	# add data:
	newphy = addData(newphy,all.data=newdat)
	
	return(newphy)
}



# subroutine of write.simmap:
newlabels.v1x <- function(x,usestate,splitchar,write.nas=TRUE) 
{
	
	# Note: this version of simmap only handle one state
	if(is.null(usestate))
		usestate = colnames(tdata(x))
			
	usestate = usestate[1]
	tdat = tdata(x)[,usestate,drop=F]
	snodes.present = hasSubNodes(x)
	sdat = NULL; snedges.inds = NULL
	
	if(snodes.present){
		sdat = sndata(x)[,usestate,drop=F]
		snedges.inds = apply(snbranch(x),1,function(i) .edge.index(x,i[1],i[2]))
	}
		
	es = edges(x)[,2]
	if(!isRooted(x))
		es = union(es,edges(x)[,1])  # added 3/8 to accomodate unrooted trees:
	
	elens = edgeLength(x); elens[is.na(elens)] <- 0.0
	newlenlab=character(length(es))
	
	for(ii in seq(length(es)))
	{
		nodeid=es[ii]
		if(!(ii %in% snedges.inds))
		{
			# added 3/8 to accomodate unrooted trees:
			if(is.na(elens[ii])){
				newlenlab[ii] = ""
			} else if(!is.na(tdat[nodeid,1]) || write.nas){
				newlenlab[ii] = paste("{",tdat[nodeid,1] ,",", elens[ii],"}",sep="")
			} else {
				newlenlab[ii] = as.character(elens[ii])
			}
		} else {
			
			snind = which(snedges.inds == ii)
			snlens = snposition(x)[snind,] * elens[ii]
			snstates = sdat[snind,1]
			if(!is.matrix(snlens))
				snlens = matrix(snlens,nrow=1)
			
			# reorder
			snpos = apply(snlens,1,mean)
			neword = order(snpos,decreasing=T)
			snpos = snpos[neword]
			snstates = snstates[neword]
			
			newlenlab[ii] = paste("{",tdat[nodeid,1],",",(elens[ii]-max(snpos)),sep="")
			
			if(length(snpos)>1)
				for(jj in seq(length(snpos)-1))
					newlenlab[ii] = paste(newlenlab[ii],splitchar,snstates[jj],",",(snpos[jj]-snpos[(jj+1)]),sep="")
			
			newlenlab[ii] = paste(newlenlab[ii],splitchar,tail(snstates,1),",",tail(snpos,1),"}",sep="")
			
		}
	}
	
	oldlabs = labels(x)[es]
	names(newlenlab) <- oldlabs
	oldlabs[which(is.na(oldlabs))] <- ""
	newlab = paste(oldlabs,":", newlenlab ,sep="")
	newlab[which(newlenlab=="")] <- ""  # remove any <NA> data

	return(newlab)
}


# subroutine of write.simmap:
newlabels.v15 <- function(x,usestate,splitchar)
{
	
	if(is.null(usestate))
		usestate = colnames(tdata(x))
	else {
		if(is.numeric(usestate))
		{
			usestate = colnames(tdata(x))[usestate]
		}
	}
	
	# get tip and subnode data:
	tdat = tdata(x)[,usestate,drop=F]
	snodes.present = hasSubNodes(x)
	sdat = NULL; snedges.inds = NULL
	
	if(snodes.present){
		sdat = sndata(x)[,usestate,drop=F]
		snedges.inds = apply(snbranch(x),1,function(i) .edge.index(x,i[1],i[2]))
	}
	
	es = edges(x)[,2]
	if(!isRooted(x))
		es = union(es,edges(x)[,1])  # added 3/8 to accomodate unrooted trees:

	elens = edgeLength(x); elens[is.na(elens)] <- 0.0
	newlenlab=character(length(es)) # new "length" labels for each node
	
	# Setup SIMMAP comments to hold data values
	for(ii in seq(length(es)))
	{
		nodeid=es[ii]
		
		if(!(ii %in% snedges.inds))
		{ # if there is no subnode on this branch:
			if(is.na(elens[ii])) {
				newlenlab[ii] = ""  # added to deal with unrooted trees
			} else {		
				topper = "[&"
				backer = paste("]",elens[ii],sep="")
				middle = ""
				for(colind in seq(length(usestate)))
					if(!is.na(tdat[nodeid,usestate[colind]]))
						middle = paste(middle,sprintf("%s={%s}%s",usestate[colind],tdat[nodeid,usestate[colind]],ifelse(colind==length(usestate),"",",")),sep="")
			
				if(middle==""){
					newlenlab[ii] = as.character(elens[ii])
				} else {
					newlenlab[ii] = paste(topper,middle,backer,sep="")
				}
			}
		} else {
			
			topper = "[&"
			backer = paste("]",elens[ii],sep="")
			middle = ""

			# retrieve subnode data:
			snind = which(snedges.inds == ii)
			snlens = snposition(x)[snind,] * elens[ii]
			
			for(colind in seq(length(usestate)))
			{
				tmp = ""
				snstates = sdat[snind,usestate[colind]]
				if(!is.matrix(snlens))
					snlens = matrix(snlens,nrow=1)
				
				snpos = apply(snlens,1,mean)
				neword = order(snpos,decreasing=T)
				
				snpos = snpos[neword]
				snstates = snstates[neword]
				
				# first, write the DESC state:
				tmp = paste(sprintf("%s={%s", usestate[colind],tdat[nodeid,usestate[colind]]),(elens[ii]-max(snpos)),sep=",")
				#newlenlab[ii] = paste(sprintf("[&%s={%s",usestate[colind],tdat[nodeid,usestate[colind]]),(elens[ii]-max(snpos)),sep=",")
				
				# intermediates, from DESC -> ANC
				if(length(snpos)>1)
					for(jj in seq(length(snpos)-1))
						tmp = paste(tmp,splitchar,snstates[jj],",",(snpos[jj]-snpos[(jj+1)]),sep="")
						#newlenlab[ii] = paste(newlenlab[ii],splitchar,snstates[jj],",",(snpos[jj]-snpos[(jj+1)]),sep="")
				
				# last subnode
				#newlenlab[ii] = paste(newlenlab[ii],splitchar,tail(snstates,1),ifelse(colind==length(usestate),"",","),sep="")
				tmp = paste(tmp,splitchar,tail(snstates,1),"}",sep="")
				middle = paste(middle,tmp,ifelse(colind==length(usestate),"",","),sep="")
			}
			
			if(middle==""){
				newlenlab[ii] = as.character(elens[ii])
			} else {
				newlenlab[ii] = paste(topper,middle,backer,sep="")
			}
			
		}
	}
	# End setup SIMMAP comments
	
	
	# Substitute SIMMAP comments into tree
	oldlabs = labels(x)[es]
	names(newlenlab) <- oldlabs
	oldlabs[which(is.na(oldlabs))] <- ""
	#newlab = paste(oldlabs,":{", newlenlab ,"}",sep="")
	newlab = paste(oldlabs,newlenlab,sep=":")
	newlab[which(newlenlab=="")] <- ""  # remove any <NA> data
	
	# DEBUG: at this point, oldlabs and newlabs look like they have been lined up correctly
	
	return(newlab)		
}


# write modified newick file:
# -This is kind of a hack: it basically writes subnodes and lengths to a new character label and then
#  creates a new 'phylo' class using only those labels (without edge lengths).  Then it uses
#  APEs algorithm to write those labels to a newick string.  
#
# -If any of the SIMMAP datatypes are is.na, then they are left out!  TODO:  Look into changing this in the future
#
write.simmap <- function(x,usestate=NULL,file="",vers=1.1,...)
{
	splitchar = ifelse(vers==1.0,";",":")
	
	if(is.null(usestate))
		usestate = colnames(tdata(x))
	
	#if(hasSubNodes(x))
	if(hasData(x) && hasDataColumn(x,usestate))
	{
		newlab=NULL
		if(vers == 1.5){
			newlab <- newlabels.v15(x,usestate,",")
		} else {
			newlab <- newlabels.v1x(x,usestate,splitchar)
		}
		
		# reorder:
		es = edges(x)[,2]
		if(!isRooted(x))
			es = union(es,edges(x)[,1])  # added 3/8 to accomodate unrooted trees:
	
		altorder = order(es)
		newlab = newlab[altorder]
		ntype = nodeType(x)[es[altorder]]
		phy = as(x,'phylo')
		newedges = edges(x)
		newphy = list(edge=newedges,
						tip.label=newlab[which(ntype=="tip")],
						node.label=newlab[which(ntype!="tip")],
						Nnode=length(which(ntype!="tip"))
					)
		
		class(newphy) <- "phylo"
		
		################################################################
		# borrowed code from APE (write.tree.R):
		#
		output.tree.names=FALSE
		append = FALSE
		digits = 10
		brl <- !is.null(newphy$edge.length)
		nodelab <- !is.null(newphy$node.label)
		f.d <- paste("%.", digits, "g", sep = "")
		
		## Helper functions
		cp <- function(s) STRING <<- paste(STRING, s, sep = "")

		add.internal <- function(i) {
		    cp("(")
		    br <- which(newphy$edge[, 1] == i)
		    for (j in br) {
		        desc <- newphy$edge[j, 2]
		        if (desc > n) add.internal(desc)
		        else add.terminal(j)
		        if (j != br[length(br)])  cp(",")
		    }
		    cp(")")
		    if (nodelab) cp(newphy$node.label[i - n])
		    if (brl) {
		        cp(":")
		        cp(sprintf(f.d, newphy$edge.length[which(newphy$edge[, 2] == i)]))
		    }
		}
		
		add.terminal <- function(i) {
		    cp(newphy$tip.label[newphy$edge[i, 2]])
		    if (brl) {
		        cp(":")
		        cp(sprintf(f.d, newphy$edge.length[i]))
		    }
		}
		## End helper functions

		n <- length(newphy$tip.label)
		STRING <- if (output.tree.names) paste("ERROR!", "(", sep = "") else "("
		br <- which(newphy$edge[, 1] == n + 1)
		for (j in br) {
		    desc <- newphy$edge[j, 2]
		    if (desc > n) add.internal(desc)
		    else add.terminal(j)
		    if (j != br[length(br)]) cp(",")
		}
		if (is.null(newphy$root.edge)) {
		    cp(")")
		    if (nodelab) cp(newphy$node.label[1])
		    cp(";")
		} else {
		    cp(")")
		    if (nodelab) cp(newphy$node.label[1])
		    cp(":")
		    cp(sprintf(f.d, newphy$root.edge))
		    cp(";")
		}
		if (file == "") return(STRING)
	    cat(STRING, file = file, append = append, sep = "\n")
	    
		################################################################		
	} else {
		phy = as(x,'phylo')
		write.tree(phy,file,...)
	}
}

	
# write simmap version 1.1 strings
#
write.simmap.old <- function(x,usestate=NULL,file="",...)
{
	write.simmap(x,usestate,file=file,vers=1.1,...)
}

# write simmap version 1.5 strings
#
write.simmap.new <- function(x,usestate=NULL,file="",...)
{
	write.simmap(x,usestate,file=file,vers=1.5,...)
}



# This is the main write function for phylo4d_ext
# Mainly ripped from APE write.nexus function
#
# @param obj a list or single phylo4d_ext objects
# @param file the file to write to
# @param translate should the tree names be moved to a special section
# @param vers which SIMMAP version should be used (can be vers=c(1.1, 1.0, 1.5))
# @param usestates if NULL, then all data columns are used
# @param dtype for compatibility with older simmap versions
#  	   anything in 'usestate' will be written into simmap comments
#	   anything left over that is compatible with dtype will be written as 
#	   a character.
#				
#
write.nexus.simmap <- function(obj, file = "", translate = TRUE, dtype=c(noneData(),contData(),discData()), ...)
{
	
	if(!is.list(obj))
	{
		if(!is(obj,"phylo4d_ext"))
			stop("This function is only made to work with phylo4d_ext objects. Use write.nexus() instead.")
		
		obj <- list(obj)
	} else {
		if(!all(sapply(obj,is,'phylo4d_ext')))
			stop("This function is only made to work with phylo4d_ext objects or lists of such. Use write.nexus() instead.")
	}
	ntree <- length(obj)

	dtype = match.arg(dtype)
	dat = NULL
	if(hasData(obj[[1]]))
		dat = tdata(obj[[1]],"tip")
	
    cat("#NEXUS\n", file = file)
    cat(paste("[R-package APE, ", date(), "]\n\n", sep = ""),file = file, append = TRUE)
	
    #N <- length(obj[[1]]$tip.label)
	N <- length(tipLabels(obj[[1]]))

        cat("BEGIN TAXA;\n", file = file, append = TRUE)
        cat(paste("\tDIMENSIONS NTAX = ", N, ";\n", sep = ""),file = file, append = TRUE)
        cat("\tTAXLABELS\n", file = file, append = TRUE)
        cat(paste("\t\t", tipLabels(obj[[1]]), sep = ""),sep = "\n", file = file, append = TRUE)
        cat("\t;\n", file = file, append = TRUE)
        cat("END;\n", file = file, append = TRUE)
   	
    cat("BEGIN TREES;\n", file = file, append = TRUE)
    if (translate) {
        ## We take arbitrarily the labels of the first tree, and
        ## translate them as "1", "2", "3", ...
        cat("\tTRANSLATE\n", file = file, append = TRUE)
        tmp <- checkLabel(tipLabels(obj[[1]]))
        X <- paste("\t\t", 1:N, "\t", tmp, ",", sep = "")
        ## We remove the last comma:
        X[length(X)] <- gsub(",", "", X[length(X)])
        cat(X, file = file, append = TRUE, sep = "\n")
        cat("\t;\n", file = file, append = TRUE)
        token <- as.character(1:N)
        names(token) <- tipLabels(obj[[1]])
        tipLabels(obj[[1]]) <- token
        if (ntree > 1) {
            for (i in 2:ntree)
                tipLabels(obj[[i]]) <- token[tipLabels(obj[[i]])]
            class(obj) <- NULL
        }
    } else {
        for (i in 1:ntree)
          tipLabels(obj[[i]]) <- checkLabel(tipLabels(obj[[i]]))
    }
    for (i in 1:ntree) {
	    tprefix = "\tTREE * UNTITLED"
	    #weights
	    if(hasWeight(obj[[i]])){
		    tprefix = sprintf("%s [&W %f]",tprefix,weight(obj[[i]]))
	    }
	    #is rooted
        if (isRooted(obj[[i]])){
        	#cat("\tTREE * UNTITLED = [&R] ", file = file, append = TRUE)
        	tprefix = paste(tprefix," = [&R] ",sep="")
    	}else{
	    	#cat("\tTREE * UNTITLED = [&U] ", file = file, append = TRUE)
	    	tprefix = paste(tprefix," = [&U] ",sep="")
		}
		cat(tprefix, file = file, append = TRUE)
		
		# NOTE (2/13) - write.simmap does this step now, and better:
		#if(is.null(usestates))
		#	usestates = colnames(tdata(obj[[i]]))
		
        cat(write.simmap(obj[[i]], file="",...),"\n", sep = "", file = file, append = TRUE)
    }
    cat("END;\n", file = file, append = TRUE)

	
	# begin characters block write:
	if( dtype != noneData() && !is.null(dat) )
	{
		datnames = colnames(dat)

		treeargs <- list(...)
		if( "usestate" %in% names(treeargs) ){
			
			exclude = integer(0)
			if(!is.character(treeargs$usestate)) {
				exclude = as.integer(treeargs$usestate)
			} else {
				exclude = which(datnames == treeargs$usestate)
			}
			
			if(length(exclude) > 0)
				dat = dat[,-exclude,drop=FALSE]
		}
		
		dat = dat[,which(guess.datatype(dat) == dtype),drop=FALSE]
		
		if(ncol(dat) != 0)
		{
			cat( write.characters2(dat,dtype=dtype), sep = "\n", file= file, append=TRUE)
		} else {
			warning("Could not find any suitable data to write.")
		}
	}
 	# end characters block write

	
}








#------------------------------------------
# Modifying phylo4d-extension objects
# 
#------------------------------------------

# Internal:
# NOTE: labels are case-sensitive
# NOTE: this function needs to deal with NAs
.edge.index <- function(tree,anc,dec)
{
	if(!is(tree,'phylo4'))
		stop("Tree needs to inherit from class phylo4")
	
	# convert from characters to indices
	if(is.character(c(anc,dec))){
		
		if(is.na(anc)){
			anc = which(is.na(labels(tree)))
		} else {
			anc = which(labels(tree) == anc)
		}
		
		if(is.na(dec)){
			dec = which(is.na(labels(tree)))
		} else {
			dec = which(labels(tree) == dec)
		}
		
	}
			
	if(is.na(anc) || is.na(dec) || is.null(anc) || is.null(dec))
		stop("Must specify valid ancestor and decendent: ",anc,"-",dec)
	
	#eind = which(edges(tree)[,1] == anc & edges(tree)[,2] == dec)
	eind = which(edges(tree)[,1] %in% anc & edges(tree)[,2] %in% dec)

	if(length(eind) != 1){
		warning("No connection between ",anc," and ",dec,"\n")
		return(NA)
	}
	
	return(eind)
}


nSubNodes <- function(x)
{
	return (length(x@subnode.id))
}

setMethod("hasSubNodes", signature(x="list"),
  function(x) {
	return(sapply(x,hasSubNodes))
})


setMethod("hasSubNodes", signature(x="phylo4d_ext"),
  function(x) {
	return(nSubNodes(x)!=0)
})


setMethod("hasSubNodes", signature(x="phylo4"),
  function(x) {
	return(FALSE)
})

setMethod("hasSubNodes", signature(x="phylo"),
  function(x) {
	return(FALSE)
})

setMethod("hasData", signature(x="phylo"),
  function(x) {
	return(FALSE)
})

setMethod("hasData", signature(x="phylo4"),
  function(x) {
	return(FALSE)
})

setMethod("hasData", signature(x="phylo4d"),
  function(x) {
	return(ncol(tdata(x))!=0)
})

setMethod("hasData", signature(x="list"),
	function(x) {
		return(sapply(x,hasData))
})

setMethod("hasDataColumn", signature("phylo",index="ANY"),
	function(x,index){
	return(FALSE)
})

setMethod("hasDataColumn", signature("phylo4",index="ANY"),
	function(x,index){
	return(FALSE)
})

setMethod("hasDataColumn", signature("phylo4d",index="numeric"),
	function(x,index){
	retval = FALSE
	if( hasData(x) && length(index) != 0 && all(index <= ncol(tdata(x))) )
	{
		retval=TRUE
	}
	return(retval)
})

setMethod("hasDataColumn", signature("phylo4d",index="character"),
	function(x,index){

	uinds = unique(index)
	ind = which(colnames(tdata(x)) %in% uinds)
	
	if(length(ind) != length(uinds))
	{
		return(FALSE)
	} else {
		return(hasDataColumn(x,ind))
	}
})



getSubNodeData <- function(x,colname)
{
	if(missing(colname))
		return(x@subnode.data)
	
	return(x@subnode.data[colname])
}

getSubNodePosish <- function(x)
{
	return(x@subnode.pos)
}


getSubNodeEdgeInds <- function(x)
{
	edgeinds = integer(nSubNodes(x))
	if(length(edgeinds)!=0)
	{
		for(ii in seq(nrow(x@subnode.branch)))
			edgeinds[ii] = .edge.index(x,x@subnode.branch[ii,1],x@subnode.branch[ii,2])
	}
	return (edgeinds)
}

# return empty data.frame styled like
# @data slot
getEmptyDataFrame <- function(x,...)
{
	tmpdf = data.frame(x@data[0,],...)
	colnames(tmpdf) <- colnames(tdata(x))
	return(tmpdf)
}


# TODO: overload tdata here
# 
addSubNode <- function(x,anc,dec,position,dataf,pos.is.fraction=FALSE)
{
	
	if(!is(x,'phylo4d_ext')){
		warning("Converting x to an extended phylo4d object")
		x = phyext(x)
	}
	
	if(missing(dataf))
		stop("dataf needs to contain something")

	eind = .edge.index(x,anc,dec)
	if(is.na(eind))
		stop("Failure to find edge from ",anc, " to ",dec,"\n")
	
	elen = edgeLength(x)[eind]
	if(elen == 0)
		stop("Cannot place subnode on a zero length branch")
	
	if(position > ifelse(pos.is.fraction,1,elen))
		stop("Position: ",position,", is greater than allowed value of ",ifelse(pos.is.fraction,1,elen),"\n")
	
	# TODO: check for overlapping subnodes
	# TODO: check to see if every col in data.frame has 
	#		an associated col in tdata() AND that col is  
	#		defined for internal and tip nodes

	# construct data frame:
	newdf = getEmptyDataFrame(x)
	if(is.data.frame(dataf))
	{
		if( all(names(dataf) == names(newdf)) ){
			newdf = rbind(newdf, dataf)
		} else {
			# only use cols which exist in tdata(x)
			colinds = which(names(dataf) %in% names(newdf)) 
			if( length(colinds) == 0 )
				stop("Could not find any common columns between dataf and tdata(x)")
			
			newdf = merge(newdf,dataf[,colinds,drop=F],all=T)
		}
		
	} else {
		


		newdf = getEmptyDataFrame(x,stringsAsFactors=FALSE)  # ??

		# special case (if lengths are the same, then assume that they are in order):
		if( length(dataf) == ncol(newdf) && is.null(names(dataf)) )
			names(dataf) <- names(newdf)

		newdf[1,] <- rep(NA,ncol(newdf))
		# try to match up names
		ndf = names(dataf)
		count = 1
		
		for(nm in ndf){
			nmind = which(names(newdf)==nm)
			if(!is.null(nmind) && length(nmind) != 0)
			{
				if(is.factor(newdf[,nmind])  && !(dataf[count] %in% levels(newdf[,nmind])) )
				{
					# need to add more levels if this column is a factor
					newlevs = c(levels(newdf[,nmind]),unname(dataf[count]))
					levels(newdf[,nmind]) <- newlevs
					levels(tdata(x)[,nmind]) <- newlevs	# TODO: TEST THIS!
				}
				newdf[1,nmind] = dataf[count]
			}
			count = count + 1
		}

		
	}
	
	if(!pos.is.fraction) 
		position = position / elen
	
	x@subnode.id = append(x@subnode.id, as.integer(nTips(x) + nNodes(x) + nSubNodes(x) + 1))
	x@subnode.data = rbind(x@subnode.data, newdf)
	x@subnode.branch = rbind(x@subnode.branch, edges(x)[eind,])
	x@subnode.pos = rbind(x@subnode.pos, rep(position,2))
	
	return (x)
}


#
showSubNodes <- function(x)
{
	if(hasSubNodes(x))
	{
		charlen = 80
		brchar = "-"
		terminalchar = "-"
		regchar="0"
		overlapchar = "*"
		# use them all by default:
		
		einds = apply(x@subnode.branch,1,function(i) .edge.index(x,i[1],i[2]))
		
		for(eind in unique(einds))
		{
			snid = which(einds == eind)
			anc = x@subnode.branch[snid,1]
			dec = x@subnode.branch[snid,2]
			
			# setup output string
			tmpstr = rep(brchar,charlen)
			tmpstr[1] = terminalchar
			tmpstr[charlen] = terminalchar
			
			# get relative positions of nodes:
			elen = edgeLength(x)[eind]
			breaksize = elen / charlen
			snpos = x@subnode.pos[snid,] * elen 
			if(!is.matrix(snpos)) 
				snpos = matrix(snpos,nrow=1)
			
			snmeans = apply(snpos,1,mean)
			
			for(xx in seq(length(snid)))
			{
				nbreaks = max(1, floor(diff(snpos[xx,]) / breaksize) )
				from = floor(snpos[xx,] / breaksize)
				tmpstr[seq(from,length.out=nbreaks)][tmpstr[seq(from,length.out=nbreaks)]==regchar] = overlapchar
				tmpstr[seq(from,length.out=nbreaks)][tmpstr[seq(from,length.out=nbreaks)]==brchar] = regchar
				#tmpstr[seq(from,length.out=nbreaks)] = rep(regchar,nbreaks)
			}
			
			# collapse and print:
			tmpstr = paste(tmpstr,collapse="")
			if(length(snid)==1){
				names(tmpstr) <- sprintf("Subnode at ~%0.2f on branch: '%s' to '%s' (brlen=%0.2f)",snpos[1]+diff(snpos[1,])/2,labels(x)[anc],labels(x)[dec],elen)
			} else {
				names(tmpstr) <- sprintf("%d Subnodes on branch: %d to %d (bren=%0.2f)",length(snid),anc[1],dec[1],elen)
			}
			print(tmpstr)
			cat("\n")
		}
		print("0 indicates a subnode; * indicates 2+ subnodes overlapping. Positions are relative.")
	}
}



#-----------------------------
# Set Generics
#-----------------------------

# show
#!#setMethod("show","phylo4d_ext", function(object){ printphylo4(object); showSubNodes(object)}) # printphylo4 changed to print 07.27.15 to adapt to new phylobase package
setMethod("show","phylo4d_ext", function(object){ print(object); showSubNodes(object)})

setMethod("snid", signature(x="phylo4d_ext"),
  function(x) {
	return(x@subnode.id)
})

setMethod("snid", signature(x="list"),
	function(x) {
		return(x[[1]]@subnode.id)
})

setMethod("sndata", signature(x="phylo4d_ext"),
  function(x) {
	return(x@subnode.data)
})

setMethod("sndata", signature(x="list"),
  function(x) {
	return(x[[1]]@subnode.data)
})


# this adds data
# TODO: Change 'adds' behavior to 'replaces'
setReplaceMethod("sndata", signature(x="phylo4d_ext"),
  function(x,datnames=NULL,value) {
	
	if(!is.data.frame(value) && (length(value) != nSubNodes(x)))
	{
	  warning("Can only add vectors of data if they are the same length as the number of subnodes")
	  return(x)
	}
	if(!is.data.frame(value)){
		value = data.frame(value)
	}
	if(!is.null(datnames))
	{
		if(is.character(datnames) && length(datnames)==ncol(value))
		{
			names(value) <- datnames
		} else {
			warning("Not using specified names")
		}
	}
	x@subnode.data = cbind(x@subnode.data,value)
	return(x)
})


setReplaceMethod("sndata",signature(x="list"),
	function(x,datnames=NULL,value) {
		for(ii in seq(length(x)))
			sndata(x[[ii]],datnames=datnames) <- value
			
	return(x)
})


# begin rmdata:
# 'rmdata' method removes data columns from the 'data' and 'subnode.data' slots
#

setMethod("rmdata", signature(x="phylo4d_ext",index="numeric",subindex="numeric"),
  function(x,index,subindex) {
	
	if(length(index)>0 && abs(index) <= ncol(tdata(x)) && index != 0)
	{
		x@data = x@data[,-index,drop=F]
	} else {
		warning("The data column to be removed could not be found in tdata(x)")
	}

	if( hasSubNodes(x) && length(subindex) >0 && abs(subindex) <= ncol(sndata(x)) && subindex != 0)
	{
		x@subnode.data = x@subnode.data[,-index,drop=F]
	}
	
	return(x)
})
	

setMethod("rmdata", signature(x="phylo4d_ext",index="numeric",subindex="missing"),
  function(x,index) {
	return( rmdata(x,index=index,subindex=index) )
})

setMethod("rmdata", signature(x="phylo4d_ext",index="character",subindex="missing"),
  function(x,index) {
	ind = which(colnames(tdata(x)) %in% index)
	if(hasSubNodes(x)){
		subind = which(colnames(sndata(x)) %in% index)
	} else {
		subind = numeric(0)
	}
	return( rmdata(x,index=ind,subindex=subind) )
})


setMethod("rmdata", signature(x="phylo4d",index="numeric",subindex="missing"),
  function(x,index) {
	  if(length(index)>0 && abs(index) <= ncol(tdata(x)) && index != 0)
		x@data = x@data[,-index,drop=F]
	
	return(x)
})

setMethod("rmdata", signature(x="phylo4d",index="character",subindex="missing"),
  function(x,index) {
	inds = which(names(tdata(x)) %in% index)
	return(rmdata(x,inds))
})


setMethod("rmdata", signature(x="list",index="ANY",subindex="missing"),
	function(x,index) {
		x = sapply(x,rmdata,index)
	return(x)
})
# end rmdata


setMethod("snposition", signature(x="phylo4d_ext"),
  function(x) {
	return(x@subnode.pos)
})

setMethod("snposition", signature(x="list"),
  function(x) {
	return(x[[1]]@subnode.pos)
})

setMethod("snbranch", signature(x="phylo4d_ext"),
  function(x) {
	return(x@subnode.branch)
})

setMethod("snbranch", signature(x="list"),
  function(x) {
	return(x[[1]]@subnode.branch)
})


#---------------
## WEIGHT:
#
#---------------
setMethod("weight", signature(x="phylo4d_ext"),
  function(x) {
	return(x@weight)
})

setMethod("weight", signature(x="list"),
	function(x) {
		if(hasWeight(x))
			return( sapply(x,weight) )
		
		return(numeric(0))
})

setReplaceMethod("weight", signature(x="phylo4d_ext"),
  function(x,value) {
	x@weight = value
	return(x)
})

setReplaceMethod("weight",signature(x="list"),
	function(x,value) {
	
	if(length(x) != length(value))
		stop("Replacement values need to be same length as the list")
	
	for(ii in seq(length(x)))
		weight(x[[ii]]) <- value[ii]
	
	return(x)
})

setMethod("hasWeight",signature(x="phylo4d_ext"),
	function(x,strict=TRUE){
		return( length(x@weight)!=0 )
})


setMethod("hasWeight",signature(x="list"),
	function(x,strict=TRUE){
		retbool=ifelse(strict,TRUE,FALSE)
		for(ii in seq(length(x))){
			tmpbool = hasWeight(x[[ii]])
			if(strict && !tmpbool)
				return(FALSE)
			
			if(!strict && tmpbool)
				return(TRUE)
			
		}
		return(retbool)
})

# 
# setMethod("addData", signature(x="phylo4d_ext"),
# 	function(x,...,snode.data=NULL) {
# 		
# 		oldcols = names(tdata(x))
# 		# Add data the normal way:
# 		x = getMethod("addData","phylo4d")(x,...)
# 		
# 		newcols = setdiff(names(tdata(x)),names(sndata(x)))
# 		supercols = setdiff(oldcols,names(tdata(x)))
# 		
# 		cat("newcols: ", newcols,"\n")
# 		
# 		# Add data to subedges:
# 		if(is.null(snode.data))
# 		{
# 			if(length(newcols)!=0)
# 			{
# 				newdat = data.frame(matrix(NA,nrow=nSubNodes(x),ncol=length(newcols)))
# 				names(newdat) <- newcols
# 				x@subnode.data = cbind(x@subnode.data,newdat)
# 			}
# 			
# 			# Remove superfluous columns
# 			if(length(supercols)!=0)
# 			{
# 				for(scol in supercols)
# 					x = rmdata(x,scol)
# 			}
# 		}else{
# 			# add this data:
# 			if(length(newcols)!=0)
# 			{
# 				print("adding this way")
# 				sndata(x,datnames=newcols) <- snode.data
# 			}
# 		}
# 		
# 		return(x)
# })
# 
# 








#----------------------------------------
#  Phylo4d extension plots
#----------------------------------------

# 
# @param usestate specifies which column index for the @data slot should be accessed and plotted.  
#		 It can be either an integer (between 1...ncol) or a character string (which is a column 
#		 name.)
#
phyextPlot <- function(x,states,states.col,
						states.na="none", 
						usestate=1,
						plot.subnodes=T,
						plot.points=T,
						line.widths,line.types, ... )
{

	# TODO: sanity check to make sure usestate is a proper data column index
	
	# plot base phylogeny using phylobase functions:
	junk <- x
		
	gtree <- extractTree(junk)  # TODO: overload this so that it works on phyext class (keeps subnode info)
	posi = phyloXXYY(gtree)
	grid.newpage()
	pushViewport(viewport(layout=grid.layout(nrow=1, ncol=1), name="base"))
	pushViewport(viewport(layout.pos.col=1, name="plot1"))
		
	treePlot(gtree, newpage=FALSE,...)

	if(hasTipData(junk) || hasNodeData(junk))
	{
		if(missing(states))
		{
			tmp = as.character(unique(tdata(junk,'all')[,usestate,drop=F])[,1])
			tmp = tmp[!is.na(tmp)]
			if(hasSubNodes(junk) && plot.subnodes)
			{
				tmp = c(tmp,as.character(unique(sndata(junk)[,usestate,drop=F])[,1]))
				tmp = tmp[!is.na(tmp)]
			}
			tmp = unique(tmp)
			states = c(states.na,tmp)
			states.col = c(1,seq(from=2,length.out=length(states)-1))
		}
		
		if(missing(line.widths)){
			line.widths = rep(1,length(states))
		} else {
			if(length(states) != length(line.widths))
				stop("line.widths need to be the same length as states")
		}
		
		if(missing(line.types)){
			line.types = rep(1,length(states))
		} else {
			if(length(states) != length(line.types))
				stop("line.types need to be the same length as states")
		}
				
		seekViewport("tree")
		
		eord = edges(gtree)[posi$eorder,] # this is the order used
		treedata = tdata(junk,"all")[eord[,2],usestate,drop=T]  
		datamap = sapply(treedata,function(i) which(states == i),simplify=T)
		if(is.list(datamap))
			datamap = unlist(lapply(datamap, function(i) ifelse(length(i)==0,1,i[1])))
		
		# replot edges:
		grid.segments(posi$segs$h0x,posi$segs$h0y, posi$segs$h1x,posi$segs$h1y,gp=gpar(col=states.col[datamap],lwd=line.widths[datamap],lty=line.types[datamap]))
		grid.segments(posi$segs$v0x,posi$segs$v0y, posi$segs$v1x,posi$segs$v1y,gp=gpar(col=states.col[datamap],lwd=line.widths[datamap],lty=line.types[datamap]))
		if(plot.points) grid.points(posi$xx,posi$yy,pch=20,gp=gpar(col=states.col[datamap],cex=0.5))
		
		
		# plot sub nodes:
		if(hasSubNodes(junk) && plot.subnodes)
		{
			esub = matrix(edges(junk)[getSubNodeEdgeInds(junk),],ncol=2)
			posi.inds = apply(esub,1,function(i) which(i[1] == eord[,1] & i[2] == eord[,2]))
			subdata = getSubNodeData(junk,usestate)
			submapping = sapply(subdata[,1],function(i) which(states == i))
			subposi = getSubNodePosish(junk)
			
			get.x.offset <- function(xxyy,inds)
			{
				apply(cbind(xxyy$segs$h0x[inds],xxyy$segs$h1x[inds]),1,diff)
			}
			
			# reorder (so that lines don't completely cover each other):
			neword = order(rowMeans(subposi),decreasing=T)
			subposi = matrix(subposi[neword,],ncol=2)
			submapping = submapping[neword]
			posi.inds = posi.inds[neword]
			
			# subbranch positions:
			subposi.x0 = posi$segs$h0x[posi.inds]
			subposi.y0 = posi$segs$h0y[posi.inds]
			subposi.y1 = posi$segs$h1y[posi.inds]
			subposi.x1 = posi$segs$h0x[posi.inds] + (get.x.offset(posi,posi.inds) * rowMeans(subposi))
			subposi.vx0 = posi$segs$v0x[posi.inds]
			subposi.vy0 = posi$segs$v0y[posi.inds]
			subposi.vy1 = posi$segs$v1y[posi.inds]
			subposi.vx1 = posi$segs$v1x[posi.inds]
			
			# plot subnodes:
			grid.segments(subposi.x0,subposi.y0,subposi.x1,subposi.y1,gp=gpar(col=states.col[submapping],lwd=line.widths[submapping],lty=line.types[submapping]))
			grid.segments(subposi.vx0,subposi.vy0,subposi.vx1,subposi.vy1,gp=gpar(col=states.col[submapping],lwd=line.widths[submapping],lty=line.types[submapping]))
			if(plot.points) grid.points(subposi.x1,subposi.y1,pch=20,gp=gpar(col=states.col[submapping],cex=0.5))
			
		}
		
		upViewport(2)
	}
}

# The line below is unnecessary since phylobase sets 'plot' as a generic function
# (through graphics I think)
#setGeneric('plot')
setMethod('plot', signature(x='phylo4d_ext', y='missing'), function(x, y, ...) {
    phyextPlot(x, ...)
})







#---------------------------------------------
# Process Nexus files  
# -	Extra methods for extracting different information
# 	from nexus-formatted files.
#
#---------------------------------------------


# Strip comments from a tree string
#
.strip.tree.comments <- function(text=NULL)
{
	
}


# Method to read the first comment in a line in the format '[&...]'
# This is for reading tree weights chiefly
#
# Example:
# get.nexus.comments("example.txt")->lala
#
get.nexus.comments<-function(finput,text=NULL)
{
	
	if(!is.null(text)){
		# TODO: check text for newlines and split on them if they exist.
		rawtext=text
	} else {
		if(!file.exists(finput))
			stop("Assuming finput is a file and could not find it")
		
		rawtext = scan(finput,what=character(0),strip.white=T,sep="\n")		
	}
	
	# TODO: return named pair {treename, comment}
	comments = character(0)
	for(ii in seq(length(rawtext)))
	{
		junk =  gsub("^.*?\\[(.*?)\\].*$","\\1",rawtext[ii])
		if(length( grep("^&(.*)$",junk) ) != 0)
			comments = append(comments,junk)
	}
	
	return(comments)	
}


has.weights <- function(finput,text=NULL)
{
	if(!is.null(text)){
		# TODO: check text for newlines and split on them if they exist.
		rawtext=text
	} else {
		if(!file.exists(finput))
			stop("Assuming finput is a file and could not find it")
		
		rawtext = scan(finput,what=character(0),strip.white=T,sep="\n")		
		rawtext = read.nexus.block(txt=rawtext,block="trees",silent=T)
	}
	
	comments = grep("(&lnP|&W)( |=)",rawtext)
	return( length(comments)>0 )
}


# get tree weights from file or tree string
# Assuming this format: 
# [.... &W -122235 ........]
#
get.tree.weights <- function(finput,text=NULL,starters=c("&W","&lnP"),splitchar="( |=)")
{
	if(!is.null(text)){
		# TODO: check text for newlines and split on them if they exist.
		rawtext=text
	} else {
		if(!file.exists(finput))
			stop("Assuming finput is a file and could not find it")
		
		rawtext = scan(finput,what=character(0),strip.white=T,sep="\n")		
		rawtext = read.nexus.block(txt=rawtext,block="trees",silent=T)
	}
	
	comments = get.nexus.comments(text=rawtext)
	tokens = strsplit(comments,splitchar)
	find.weight <- function(ii,ww)
	{
		return(ii[(which(toupper(ii) %in% toupper(ww)))+1])
	}
	weighs = unlist(lapply(tokens,find.weight,starters))
	
	return(as.numeric(weighs))
}



# Internal function - split tokens by a certain character
.split.tokens <- function(txt,char)
{
	nlines = length(txt)
	newtokens = character(nlines)
	charlines = unname(sapply(txt,function(i) length(grep(char,i))))
	curline=1
	for(ll in seq(nlines))
	{
		if(charlines[ll] == 0){
			newtokens[curline] = txt[ll]
			curline = curline + 1
		}else{
			tmp = strsplit(txt[ll],char)[[1]]
			for(tmpline in tmp){
				#if(tmpline!=""){
					#newtokens[curline] = paste(tmpline,char,sep="")
					newtokens[curline] = tmpline
					curline = curline + 1
				#}
			}
		}
	}
	return (newtokens)
}


# Internal function - get all content within a nexus block
.get.nexus.block.inds <- function(filename,blockname,text=NULL)
{
	# choose character vector
	if(!is.null(text))
	{
		filetext = text
	} else {
		filetext = scan(filename,what="character",sep="\n",strip.white=T)
	}
	filetext = tolower(filetext)
	
	search.for=paste(paste("begin",tolower(blockname),sep="\\s"),"[\\s]{0,};",sep="")
	start.ind = grep( search.for ,filetext,ignore.case=T)
	if(length(start.ind) == 0)
		return (integer(0))
	
	end.ind = grep("end;",filetext,ignore.case=T)
	end.ind = end.ind[head(which(end.ind > start.ind),1)]

	return( c((start.ind),(end.ind)) )
}



# Internal:
# Get text of a read nexus block
read.nexus.block <- function(finput,txt=NULL,block,rm.comments=F,silent=F)
{
	if(!is.null(txt)){
		# Using the text argument is not recommended
		rawtext=txt
	} else {
		
		if(!file.exists(finput))
			stop("Assuming finput is a file and could not find it")
		
		rawtext = scan(finput,what=character(0),strip.white=T,sep="\n",quiet=silent)		
	}
	
	inds = .get.nexus.block.inds(blockname=block,text=rawtext)
	if(length(inds)==0)
	{
		if(!silent) warning(paste("This file has",block, "no block"))
		return (character(0))
	}
	
	rawtext = rawtext[(inds[1]+1):(inds[2]-1)]
	# TODO: split up newlines if they exist
	
	if(rm.comments)
	{
		# assume comments are start a line with [ 
		# and end a line with ]
		#
		comment.starts = grep("^\\[",rawtext)
		comment.ending = grep("\\]$",rawtext)
		if(length(comment.starts) == length(comment.ending)){
			comment.pairs = cbind(comment.starts,comment.ending)
		} else {
			comment.pairs = cbind(character(0),character(0))
		}
		
		if(nrow(comment.pairs)>0)
		{
			for(pair in seq(nrow(comment.pairs)))
			{
				print(pair)
				rawtext = rawtext[-(comment.pairs[pair,1]:comment.pairs[pair,2])]
			}
		}
	}
	
	return (rawtext)
}


has.block <- function(finput,txt=NULL,blockname="characters2")
{
	retbool = (length(read.nexus.block(finput,txt,block=blockname,silent=T))!=0)
	return(retbool)	
}

has.characters2 <- function(finput,txt=NULL)
{
	return(has.block(finput,txt,"characters2"))
}



# Alternative way to read in characters
# Also works as a work around to readNexus' annoying habit
# or crashing when using type="data" but where trees block
# contains simmap formatted trees
read.characters2 <- function(finput,txt=NULL,blockname="characters2")
{
	rawtext = NULL
	if(!is.null(txt))
	{
		rawtext = txt
	} else {
			if(!file.exists(finput)){
				stop("Assuming finput is a file and could not find it")
			} else {
				rawtext = scan(finput,what=character(0),strip.white=T,sep="\n")						
			}
	} 
	
	tmpfile = tempfile()
	tmphead = "#NEXUS\n"
	tmptaxa = c("BEGIN TAXA;",read.nexus.block(txt=rawtext,block="taxa"),"END;")
	tmptext = read.nexus.block(txt=rawtext,block=blockname)
	tmptext = c("BEGIN CHARACTERS;",tmptext,"END;")
	writeLines(c(tmphead,tmptaxa,tmptext),con=tmpfile)
	data2.part = readNexus(tmpfile,type="data",levels.uniform=F)
	rownames(data2.part) <- checkLabel(rownames(data2.part))
	return(data2.part)
}










#------------------------
# Coersing functions	|
#------------------------

# pulled from implicit(?) 'coerce<-' function
deep.phy.copy <- function (from, to, value,usedata=TRUE) 
{
	useslots = c("data", "metadata", "edge", "edge.length","label", "edge.label", "annote")	 # leaving 'order' out...
	if(!usedata) 
		useslots = useslots[-c(1,2)]
    for (what in useslots) 
    	slot(from, what) <- slot(value, what)
    from
}

# up convert:
# NOTE: not sure if it is necessary to define all of these...
# 		there might be a more 'S4' way to do this.
#
setAs('phylo','phylo4d_ext', function(from, to) {
	return(phyext(from))
})

setAs('phylo4','phylo4d_ext', function(from, to) {
	return(phyext(from))
})

setAs('phylo4d','phylo4d_ext', function(from, to) {
	return(phyext(from))
})

# 
# # down convert:
# # TODO: figure out if these functions are necessary:
# #
# setAs('phylo4d_ext','phylo4d',function(from,to) {
# 
# 	newphy = deep.phy.copy(new(to,order="unknown"),to,from,TRUE)
# 	newphy
# })
# 
# 
# setAs('phylo4d_ext','phylo4',function(from,to) {
# 	
# 	newphy = deep.phy.copy(new(to,order="unknown"),to,from,FALSE)
# 	newphy
# })
# 
# 
# 
# setAs('phylo4d_ext','phylo',function(from,to) {
# 	
# 	# TODO: Need to convert subnodes back into singlestons, 
# 	#		and then collapse the singletons.
# 	return(as(as(from,"phylo4d"),"phylo"))
# })
# 
# 






#!# This section commented out on inclusion in SigTree package, 7/1/15
#!## use this with a namespace
#!#.onLoad <- function(lib, pkg) {
#!#    require(phylobase)
#!#    #!#require(methods) # not sure if this is needed
#!#    phylobase.options(singleton="ok")
#!#}
Any scripts or data that you put into this service are public.
phyext2 documentation built on May 2, 2019, 2:09 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
phyext2
An Extension (for Package 'SigTree') of Some of the Classes in Package 'phylobase'

R/phyext2.R
In phyext2: An Extension (for Package 'SigTree') of Some of the Classes in Package 'phylobase'

Try the phyext2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

phyext2 An Extension (for Package 'SigTree') of Some of the Classes in Package 'phylobase'

R/phyext2.R In phyext2: An Extension (for Package 'SigTree') of Some of the Classes in Package 'phylobase'

Try the phyext2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

phyext2
An Extension (for Package 'SigTree') of Some of the Classes in Package 'phylobase'

R/phyext2.R
In phyext2: An Extension (for Package 'SigTree') of Some of the Classes in Package 'phylobase'