R/utilities-phylo.R

Defines functions uniqueMultiPhylo white.mkn .depth.phylo .kappa.phylo .kappa.cache .eb.phylo .eb.cache .ou.phylo .ou.cache .trend.phylo .trend.cache .delta.phylo .delta.cache .lambda.phylo .lambda.cache .lrate.phylo .nrate.phylo .null.phylo .null.cache .white.phylo .bm.phylo .white.cache rescale.phylo .paths.phylo .paths.cache .bm.smartstart .ou.smartstart .constrain.m .constrain.k .repars .meristic.matrix .ard.matrix .sym.matrix .er.matrix .bnd.hessian argn.mkn argn.default treedata span.phylo tips cherries name.check phylo.lookup .fill.taxonomy lookup.phylo phylo.clades is.phylo .mrca .get.descendants.of.node .get.desc.of.node .get.ancestors.of.node .get.ancestor.of.node nodelabel.phylo .polytomy.phylo .cache.descendants .nodefind.phylo unique.phylo .unique.phylo as.phylo.hphylo heights.multiPhylo heights.phylo heights

Documented in argn.default argn.mkn as.phylo.hphylo cherries heights heights.multiPhylo heights.phylo is.phylo lookup.phylo name.check nodelabel.phylo phylo.clades phylo.lookup rescale.phylo span.phylo tips treedata uniqueMultiPhylo unique.phylo white.mkn

## GENERIC FUNCTION
heights <- function(x)
UseMethod("heights")


heights.phylo=function(x){
    phy=x
	phy <- reorder(phy, "postorder")
	n <- length(phy$tip.label)
	n.node <- phy$Nnode
	xx <- numeric(n + n.node) # ending times
	for (i in nrow(phy$edge):1) xx[phy$edge[i, 2]] <- xx[phy$edge[i, 1]] + phy$edge.length[i]
	root = ifelse(is.null(phy$root.edge), 0, phy$root.edge)
	labs = c(phy$tip.label, phy$node.label)
	depth = max(xx)
	tt = depth - xx # time to 'present day' of branch starts
	idx = 1:length(tt)
	#dd = phy$edge.length[idx]
	mm = match(1:length(tt), c(phy$edge[, 2], Ntip(phy) + 1))
	dd = c(phy$edge.length, root)[mm] # reordered bls
	ss = tt + dd
	res = cbind(ss, tt)
	rownames(res) = idx
	colnames(res) = c("start", "end")
	res = data.frame(res)
	res
}


heights.multiPhylo=function(x){
    phy=x
	phy=hashes.phylo(phy)
	hh=unique(unlist(lapply(phy, function(x) x$hash)))
	times=lapply(phy, function(x) {tmp=heights.phylo(x); tmp$hash=x$hash; tmp})
	if(is.null(names(times))) names(times)=1:length(times)
	out=lapply(hh, function(hash){
        tmp=sapply(times, function(dat){
            if(hash%in%dat$hash) dat[which(dat$hash==hash),c("start","end")]  else return(c(0,0))
        })
        res=data.frame(t(tmp))
        rownames(res)=names(times)
        names(res)=c("start","end")
        gg=apply(res, 1, function(x) all(x==0))
        if(any(gg)) res=res[-which(gg),]
        return(res)
    })
	attr(out,"hash")=hh
	out
}

## as.phylo.hyplo
as.phylo.hphylo=function(x, ...){
     x$desc=NULL
     x$hash=NULL
     cl=class(x)
     class(x)=cl[!cl%in%"hphylo"]
     x
}

.unique.phylo=function(phy){
    # phy is assumed to have tips that are redundant (and whose exemplars are monophyletic)
    # prunes tree to leave one member of each unique label

	mon=table(phy$tip.label)
	if(all(mon==1)) return(phy)
	mon=mon[mon>1]
	for(i in 1:length(mon)){
		m=names(mon[i])
		drop=which(phy$tip.label==m)
		drop=drop[2:length(drop)]
		phy$tip.label[drop]="null"
	}
	phy=.drop.tip(phy, phy$tip.label[phy$tip.label=="null"])
	return(phy)
}


unique.phylo=function(x, incomparables=FALSE, ...){
    # phy: has tip.labels that are not unique
    # returns phylogeny with unique tip labels
    # if a taxon (indicated by multiple tip labels of same string) is non-monophyletic, all tips of that taxon are removed with warning
    # likely used where an exemplar phylogeny is needed and phy$tip.label is 'tricked' into a non-unique vector of names

	phy=x
	if(incomparables) warning("'incomparables' exerts no effect in this setting")
	.exemplar.monophyly=function(tip, phy) {
        # tip: occurs multiply in phy$tip.label
		nn=which(phy$tip.label==tip)
		if(length(nn)==1) return(TRUE)
		anc=getMRCA(phy, nn)
		dd=unique(phy$tip.label[.get.descendants.of.node(anc, phy, tips=TRUE)])
		if(length(dd)==1) return(TRUE) else return(FALSE)
	}


	tt=table(phy$tip.label)
	if(!any(tt>1)) {
		return(phy)
	} else {
		todo=names(tt[tt>1])
        #		cat("checking monophyly of groups...\n")
		mon=sapply(todo, function(t) {
            #				   cat(paste("\n\t",t, sep=""))
            .exemplar.monophyly(t, phy)
        })
        #		cat("\n")

		if(any(!mon)) {
			warning(paste("non-monophyletic lineages encountered:\n\t", paste(todo[!mon], collapse="\n\t"), "\n", sep=""))
			phy=.drop.tip(phy, names(!mon))
		}
		return(.unique.phylo(phy))
	}
}


# wrapper for plot.phylo: plot tree with internal nodes labelled with ape numbering
plotNN <- function (phy, time = TRUE, margin = TRUE, ...) {
    phy$node.label <- (length(phy$tip.label) + 1):max(phy$edge);
    plot.phylo(phy, show.node.label = TRUE, no.margin = !margin, cex = 0.5, ...);
    if (time && margin) {
        axisPhylo(cex.axis = 0.75);
    }
}

# Get b and d values from r (b-d) and epsilson (d/b)
# Used in previous version of program; now in terms of r and epsilon
# Possibly of use to users wishing to translate results
get.bd <- function (r, epsilon) {
      b <- r/(1 - epsilon);
      d <- b - r;   # Alternatively: d <- epsilon * r/(1 - epsilon)
      return(list(b = b, d = d));
}


#general phylogenetic utility for determining whether a node is the root of the phylogeny
#author: JM EASTMAN 2011

is.root <- function (node,phy) {
	if (node == (Ntip(phy) + 1)) return(TRUE) else return(FALSE);
}


.nodefind.phylo=function(phy, label){
	N=Ntip(phy)
	ww=match(label, phy$node.label)
	if(!all(is.na(ww))) {
		return(N+ww)
	} else {
		warning(paste("Encountered no node.label for ",label,sep=""))
		return(NULL)

	}
}

.cache.descendants=function(phy){
    # fetches all tips subtended by each internal node

	N=as.integer(Ntip(phy))
	n=as.integer(Nnode(phy))

	phy=reorder(phy, "postorder")

	zz=list( N=N,
    MAXNODE=N+n,
    ANC=as.integer(phy$edge[,1]),
    DES=as.integer(phy$edge[,2])
    )

	res=.Call("cache_descendants", phy=zz, PACKAGE="geiger")
	return(res)
}


.polytomy.phylo=function(tips, age=1){
	N=length(tips)+1
	edge=cbind(N,1:(N-1))
	length=rep(age, N-1)
	phy=list(edge=edge, edge.length=length, tip.label=tips, Nnode=1)
	class(phy)="phylo"
	return(phy)
}


nodelabel.phylo=function(phy, taxonomy, strict=TRUE, ncores=NULL){
    # all phy$tip.label must be in taxonomy
    # taxonomy: exclusivity highest on left, lowest on right (species, genus, family, etc., as columns)
    # columns in 'taxonomy' should ONLY be taxonomic ranks


    #	tt=as.matrix(taxonomy)
    #	tt[!is.na(tt)]=""
    #	drp=apply(tt, 1, function(x) all(x==""))
    #	if(any(drp)) taxonomy=taxonomy[-which(drp),]


	taxonomy=cbind(rownames=rownames(taxonomy),taxonomy)
	rank="rownames"

	taxonomy=as.data.frame(as.matrix(taxonomy),stringsAsFactors=FALSE)

	if(!all(xx<-phy$tip.label%in%taxonomy[,rank])) {
		warning(paste("taxa not found in 'taxonomy':\n\t", paste(phy$tip.label[!xx], collapse="\n\t"), sep=""))
	}
	taxonomy[taxonomy==""]=NA

    op<-orig<-options()
    op$expressions=max(op$expressions, 500000)
	options(op)

	unmatched=phy$tip.label[!xx]
	idx=match(phy$tip.label, taxonomy[,rank])
	tax=taxonomy[idx,]

	labels=unique(unlist(tax[,-which(names(tax)==rank)]))
	labels=labels[!is.na(labels)]

	dat=tax[,-which(names(tax)==rank)]
	hashes_labels=character(length(labels))
	zz=tax[,rank]
	tips=phy$tip.label[xx]
	for(i in 1:ncol(dat)){
		uu=unique(dat[,i])
		uu=uu[!is.na(uu)]
		for(j in uu){
			cur=zz[which(dat[,i]==j)]
			if(length(cur)>1){
				hashes_labels[which(labels==j)]=.hash.tip(cur, tips)
			} else {
				hashes_labels[which(labels==j)]=NA
			}

		}
	}
	names(hashes_labels)=labels
	hashes_labels=hashes_labels[!is.na(hashes_labels)]

	# redundancies for labels
	tmp=table(hashes_labels)
	if(any(tmp[tmp>1]->redund)){
		root=.hash.tip(tips, tips)
		for(r in names(redund)){
			if(r==root){
				rdx=which(hashes_labels==r)
				warning(paste("redundant labels encountered at root:\n\t", paste(names(hashes_labels[rdx]), collapse="\n\t"), sep=""))
				hashes_labels=hashes_labels[-rdx[!rdx==min(rdx)]]
			} else {
				rdx=which(hashes_labels==r)
				warning(paste("redundant labels encountered:\n\t", paste(names(hashes_labels[rdx]), collapse="\n\t"), sep=""))
				hashes_labels=hashes_labels[-rdx[!rdx==max(rdx)]]
			}
		}
	}

    #	cat("resolving descendants for splits in tree...\n")
	tmp=hashes.phylo(phy, tips, ncores=ncores)
	hashes_tree=tmp$hash
	phy$node.label=rep("",max(phy$edge))
	mm=match(hashes_tree, hashes_labels)
	nodelabels=ifelse(is.na(mm), "", names(hashes_labels[mm]))
	nodelabels[is.na(nodelabels)]=""
	nodelabels=nodelabels[(Ntip(phy)+1):max(phy$edge)]
	tmp=table(nodelabels[nodelabels!=""])
	if(any(tmp[tmp>1]->redund)){
		for(r in names(redund)){
			rdx=which(nodelabels==r)
			nodelabels[rdx[rdx!=min(rdx)]]=""
		}
	}

	phy$node.label=nodelabels

	edges=NULL

	desc=.cache.descendants(phy)$tips[-c(1:Ntip(phy))]
    tidx=match(tips, phy$tip.label)

	FUN=function(taxon){
		nm=rownames(dat)
		dat=as.matrix(dat, ncol=ncol(dat))
		rownames(dat)=nm
		if(!taxon%in%dat) {
			try=agrep(taxon, unique(c(dat)), value=TRUE)
			if(length(try)){
				warning(paste(sQuote(taxon), " not encountered in 'taxonomy'\n\nIntended search may have been:\n\t", paste(try, collapse="\n\t", sep=""), sep=""))
			} else {
				warning(paste(sQuote(taxon), " not encountered in 'taxonomy'", sep=""))

			}

			return(NULL)
		}
		expected=rownames(which(dat==taxon, arr.ind=TRUE))
		if(length(expected)==1){ # single tip
			x=which(phy$tip.label==expected)
			hs=.hash.tip(expected, tips)
			res=list(unexpected=c(), missing=c())
			attr(res, "node")=x
			attr(res, "hash")=hs
			attr(res, "expected")=sort(expected)
			return(res)

		}

		bin=as.integer(tips%in%expected)

        #		rownames(edges)=1:nrow(edges)
		N=Ntip(phy)

        ff=.get.parallel(ncores)

		dist=unlist(ff(desc, function(x) {
            y=tidx%in%x
            sum(abs(y-bin))
        }))
		nearest=N+which(dist==min(dist))
		hs=character(length(nearest))
		for(i in 1:length(nearest))hs[i]=.hash.tip(edges[nearest[i],], tips)
		res=lapply(nearest, function(x) {
            tt=tips[which(tidx%in%desc[[x-N]])]
            unexpected=sort(setdiff(tt, expected)) ## in tree but unexpected
            missing=sort(setdiff(expected, tt)) ## expected but not in tree
            tmp=list(unexpected=unexpected, missing=missing)
            hs=.hash.tip(edges[x,], tips)
            attr(tmp, "node")=x
            attr(tmp, "hash")=hs
            attr(tmp, "expected")=sort(expected)
            return(tmp)
        })
		res
	}

	# missed labels
	mm=match(hashes_labels, hashes_tree)
	if(any(is.na(mm))){
		mss=hashes_labels[is.na(mm)]
		if(!strict){
			N=Ntip(phy)

			ll=list()
			nm=rev(names(mss))
			for(x in 1:length(nm)){
				ll[[x]]=FUN(nm[x])
			}
			near_tmp=ll
			names(near_tmp)=nm
			near=lapply(1:length(near_tmp), function(idx){
                tmp=near_tmp[[idx]]
                x=nm[idx]
                if(is.null(tmp)) return(c())
                if(length(tmp)==1) {
                    nd=attributes(tmp[[1]])$node
                    names(nd)=paste("\"", x, "\"", sep="")
                    if(phy$node.label[nd-N]=="") return(nd) else return(c())
                } else {
                    return(c())
                }
			})
			near=unlist(near)
			dd=duplicated(near)
			true_missed=nm[!nm%in%gsub("\"","",names(near))]
			if(any(dd)) near=near[-which(dd)]
			phy$node.label[near-N]=names(near)
		}
	}

	nn=gsub("\"", "", unique(phy$node.label[phy$node.label!=""]))
	hl=names(hashes_labels)
	ms=sort(hl[!hl%in%nn])
	phy$missed=ms
	if(length(ms)){
		warning(paste("labels missing from 'phy':\n\t", paste(ms, collapse="\n\t"), sep=""))
	}

	phy$FUN=FUN

	phy
}




#general phylogenetic utility for returning first ancestor (as a numeric referent) of the supplied node
#author: JM EASTMAN 2010

.get.ancestor.of.node <-
function(node, phy) {
	return(phy$edge[which(phy$edge[,2]==node),1])
}




#general phylogenetic utility for returning all ancestors (listed as given in phy$edge[,2]) of a node
#author: JM EASTMAN 2010

.get.ancestors.of.node <-
function(node, phy) {
	a=c()
	if(node==(Ntip(phy)+1->root)) return(NULL)
	f=.get.ancestor.of.node(node, phy)
	a=c(a,f)
	if(f>root) a=c(a, .get.ancestors.of.node(f, phy))
	return(a)
}


#general phylogenetic utility for returning the first (usually, unless a polytomy exists) two descendants of the supplied node
#author: JM EASTMAN 2010

.get.desc.of.node <-
function(node, phy) {
	return(phy$edge[which(phy$edge[,1]==node),2])
}



#general phylogenetic utility for returning all descendants (listed as given in phy$edge[,2]) of a node (excluding the supplied node)
#author: JM EASTMAN 2011

.get.descendants.of.node <-
function(node, phy, tips=FALSE){
	n=Ntip(phy)
	all=ifelse(tips, FALSE, TRUE)
	out <- .Call("get_descendants", tree=list(
    NODE = as.integer(node),
    ROOT = as.integer(n+1),
    ALL = as.integer(all),
    ENDOFCLADE = as.integer(dim(phy$edge)[1]),
    ANC = as.integer(phy$edge[,1]),
    DES = as.integer(phy$edge[,2])),
    PACKAGE = "geiger")
	res=out$TIPS
	if(!length(res)) res=NULL
	return(res)
}

.mrca=function(labels, phy){
	mm=labels

	if(all(is.character(labels))){

		ll=c(phy$tip.label, phy$node.label)
		mm=match(labels, ll)
		if(any(is.na(mm))) stop("Some 'labels' not encountered in 'phy'")


	}

	if(!all(is.numeric(mm))) stop("Supply 'labels' as a character or integer vector")

    if(length(u<-unique(mm))==1) return(u)

	aa=unlist(lapply(mm, function(x) .get.ancestors.of.node(x, phy)))
	tt=table(aa)
	max(as.integer(names(tt[tt==length(labels)])))
}

is.phylo=function(x) "phylo"%in%class(x)

## FUNCTIONS
## grabs most exclusive tips from clade definitions (and whose tips can then be reconciled with a taxonomic database -- a lookup table)
# allows for recursion in clade definitions (clade defined in part using another clade definition)
# returns trees representing each clade definition
# 'nested': an important variable -- this is the subset of clades that are defined (recursively) within 'clades'
phylo.clades=function(clades, phy=NULL, unplaced=TRUE, ncores=NULL){

    ## give 'phy' as a multiPhylo, named list of trees (whose labels appear in the clade defs)
    ## clades:
    #	clades=list(
    #			 Sirenoidea=c("Siren", "Pseudobranchus"),
    #			 Ambystomatidae=c("Ambystomatidae", "Plethodontidae"),
    #			 Cryptobranchoidea=c("Hynobiidae", "Cryptobranchus_alleganiensis"),
    #			 CAUDATA=c("Sirenoidea","Salamandroidea","Cryptobranchoidea")
    #	)

    if (!is.null(phy)) {
        if (!"multiPhylo" %in% class(phy) | is.null(phynm <- names(phy)))
        stop("Supply 'phy' as a 'multiPhylo' object with names")
        tmp = character(length(phy))
        for (i in 1:length(phy)) {
            cur = phy[[i]]
            cur$edge.length = NULL
            tre = write.tree(cur)
            tmp[i] = gsub(";", "", tre)
        }
        phy = tmp
        names(phy) = phynm
        for (i in 1:length(clades)) {
            cur = clades[[i]]
            if (any(cur == names(clades)[i]))
            stop("Encountered self-referential clade")
            mm = match(cur, phynm)
            if (any(!is.na(mm))) {
                ww = which(!is.na(mm))
                for (j in 1:length(ww)) {
                    clades[[i]][ww[j]] = phy[[mm[ww[j]]]]
                }
            }
        }
    }

    ll = sapply(clades, length)
    if (any(ll == 1)) {
    	unis=clades[ll==1]
    	clades=clades[ll!=1]
        warning("Non-splitting lineages found in 'clades'")
    } else {
        unis=NULL
    }
    cc = unique(c(unlist(clades)))
    tt = cc %in% names(clades)
    nested = cc[tt]
    is.nestedclade = function(clade, nested) {
        if (clade %in% nested)
        return(TRUE)
        else return(FALSE)
    }

    fetch_nestedclades = function(clade, clades, nested) {
        if (is.nestedclade(clade, nested)) {
            desc = clades[[clade]]
            new = as.list(desc)
            names(new) = desc
            tt = sapply(new, is.nestedclade, nested)
            for (i in 1:length(new)) {
                if (tt[i])
                new[[i]] = fetch_nestedclades(new[[i]], clades, nested)
                else new[[i]] = NA
            }
        }
        else {
            new = NA
        }
        return(new)
    }

    paths_through_clades = function(clades, nested) {
        nn = names(clades)
        res = lapply(nn, function(x) {
            dd = clades[[x]]
            y = lapply(dd, fetch_nestedclades, clades, nested)
            names(y)[1:length(dd)] = dd
            y
        })
        names(res) = nn
        res
    }

    unplaced_phy = function(phy, cladepath) {
        if (any(names(cladepath) == "unplaced")) {
            tips = names(cladepath$unplaced)
            y = .polytomy.phylo(tips)
            y$edge.length = NULL
            new = bind.tree(phy, y)
            new = .drop.tip(new, "unplaced")
            return(new)
        }
        else {
            return(phy)
        }
    }

    tree_cladepath = function(cladepath, nested) {
        print_group = function(cladepath) {
            xx = sapply(names(cladepath), is.nestedclade, nested)
            middle = character(length(xx))
            for (i in 1:length(xx)) {
                if (xx[i]) {
                    new = cladepath[[names(xx)[i]]]
                    middle[i] = print_group(new)
                }
                else {
                    middle[i] = names(cladepath)[i]
                }
            }
            paste("(", paste(middle, collapse = ", "), ")", sep = "")
        }
        tmp = paste(print_group(cladepath), ";", sep = "")
        phy = read.tree(text = tmp)
        return(unplaced_phy(phy, cladepath))
    }

    cladepaths = paths_through_clades(clades, nested)

    if (unplaced) {
        for (i in 1:length(cladepaths)) {
            cur = cladepaths[[i]]
            if (!is.null(unplc <- attributes(clades[[i]])$unplaced)) {
                unplaced_taxa = lapply(unplc, function(x) return(NA))
                names(unplaced_taxa) = unplc
                cladepaths[[i]]$unplaced = unplaced_taxa
            }
        }
    }

    alt_tip_label=function(phy, unis){
    	phy$alt.tip.label=character(Ntip(phy))
    	pt=phy$tip.label
    	for(i in 1:length(unis)){
    		if(length(cur<-unis[[i]])!=1) stop("'unis' should have a single member in each element")
    		if(!is.na(mm<-match(cur, pt))) phy$alt.tip.label[mm]=names(unis)[i]
    	}
    	phy
    }

    phy = lapply(cladepaths, tree_cladepath, nested)
    if(length(unis)) phy = lapply(phy, alt_tip_label, unis)
    nn=sapply(phy, Ntip)
    midx<-which(nn==max(nn))
   	master=phy[midx]

    ## RESOLVE NODE LABELS for 'master' tree (most comprehensive)
   	if(length(master)==1){
   		master=master[[1]]
   		tt=master$tip.label
   		null=.hash.tip(c(), tt)

   		mm=hashes.phylo(master, tt, ncores=ncores)
   		ss=sapply(phy, function(x) .hash.tip(x$tip.label, tt))
   		if(any(ss==null)){
			warning(paste("The following not encountered:\n\t", paste(names(ss)[which(ss==null)], collapse="\n\t")))
		}
   		ts=table(ss)
   		if(any(ts>1)){
   			drp=numeric()
   			ts=ts[ts>1]
   			for(i in 1:length(ts)){
   				ww=which(ss==names(ts[i]))
   				drp=c(drp, ww[which(ww!=min(ww))])
   			}
   			warning("The following node labels appear redundant:\n\t", paste(names(ss)[drp], collapse="\n\t"))
   			ss=ss[-drp]
   		}
   		xx=match(ss, mm$hash)
   		if(any(is.na(xx))){
   			warning(paste("The following not encountered:\n\t", paste(names(ss)[is.na(xx)], collapse="\n\t")))
   			ss=ss[-which(is.na(xx))]
   		}
   		N=Ntip(master)
   		nd=character(Nnode(master))
   		nd[xx-N]=names(ss)
   		master$node.label=nd
   		phy[[midx]]=master
   	}
    return(phy)
}


lookup.phylo=function(phy, taxonomy=NULL, clades=NULL, ncores=NULL){
    ## taxonomy expected to have first column at same level as tip labels in phy
    ## first row in taxonomy is most exclusive
    ## clade_defs are phylogenetic trees of a clade representation

	if(!is.null(taxonomy)){
		if(!any(taxonomy[,1]%in%phy$tip.label) & !is.null(rownames(taxonomy))) {
			taxonomy=as.data.frame(as.matrix(cbind(species=rownames(taxonomy), taxonomy)),stringsAsFactors=FALSE)
		}
	}

	related_tips=function(tips, phy){
		tips=tips[tips%in%phy$tip.label]
		if(length(tips)<2) stop("related_tips(): 'tips' to be found in 'phy' are too few")
		nd=unique(sapply(tips, function(x) match(x, phy$tip.label)))
		if(length(nd)==1) return(nd)
		anc=getMRCA(phy, nd)
		dd=.get.descendants.of.node(anc, phy, tips=TRUE)
		return(phy$tip.label[dd])
	}

	tips_in_group=function(phy, taxonomy=NULL, clade_def){

		lengths=sapply(clade_def$tip.label, function(grp){
            if(!is.null(taxonomy)){
                ww=which(taxonomy==grp, arr.ind=TRUE)[,"row"]
                if(length(ww)>0){
                    return(TRUE)
                } else {
                    return(FALSE)
                }
            } else {
                return(length(phy$tip.label[which(phy$tip.label%in%grp)])>0)
            }
		})

		if(sum(lengths)<2) return(NA)

		## FIXME: use 'lengths' to determine if 'clade_def' is satisfied (at least two members needed)


		tmp=sapply(clade_def$tip.label, function(grp){
            if(!is.null(taxonomy)){
                ww=which(taxonomy==grp, arr.ind=TRUE)[,"row"]
                if(length(ww)>0){
                    return(unique(taxonomy[ww,1]))
                } else {
                    return(c())
                }
            } else {
                return(phy$tip.label[which(phy$tip.label%in%grp)])
            }
		})

		## most exclusive 'spp' (recognized from tree)
		spanning_taxa=unique(unlist(tmp))
		recovered_taxa=unique(related_tips(spanning_taxa, phy))

		return(recovered_taxa)
	}

	orig_phy=phy

	if(!is.null(taxonomy)){
		## check ordering of taxonomy
		oo=order(apply(taxonomy, 2, function(x) length(unique(x))),decreasing=TRUE)
		if(!all(oo==c(1:ncol(taxonomy)))){
			warning("Assuming 'taxonomy' is not from most to least exclusive")
			taxonomy=taxonomy[,ncol(taxonomy):1] #reverse ordering of columns
		}
		original_taxonomy=taxonomy

		## check rank of tree
		phy$node.label=NULL
		gtips=sapply(phy$tip.label, function(x) unlist(strsplit(gsub(" ", "_", x), "_"))[1])
		check=sapply(list(phy$tip.label, gtips), function(x) sum(x%in%taxonomy[,1]))
		if(max(check)==check[2]) {
			genuslevel_tree=TRUE
			phy$tip.label=unname(gtips)
		} else {
			genuslevel_tree=FALSE
		}
		tips=phy$tip.label

		## prune taxonomy down to members in the phylogeny
		taxonomy=taxonomy[taxonomy[,1]%in%tips,]
		matching_tips=taxonomy[,1]

		## BUILD taxonomic mapping to each row in 'phy'
		mm=match(tips, matching_tips)
		tt=taxonomy[mm,]
		colnames(tt)=names(taxonomy)
	} else {
		tt=c()
	}

	if(!is.null(clades)){
		tips=phy$tip.label
		clade_defs=phylo.clades(clades, ncores=ncores)
        #		cat("resolving clades...\n\t")
		res=lapply(1:length(clade_defs), function(idx) {
            def=clade_defs[[idx]]
            #				   cat(paste(names(clade_defs)[idx], "\n\t", sep=""))
            tt=try(tips_in_group(phy, taxonomy, def),silent=TRUE)
            if(inherits(tt, "try-error")) return(NA) else return(tt)
        })
        #		cat("\n")
		names(res)=names(clade_defs)

		zz=sapply(res, function(x) all(is.na(x)))
		if(any(zz)){
			warning(paste("taxa not represented in 'tips':\n\t", paste(names(res)[zz], collapse="\n\t"), sep=""))
		}
		res=res[!zz]
		clade_defs=clade_defs[!zz]

		## BUILD clade-level mapping to each row in 'phy'
		gg=matrix("", nrow=Ntip(phy), ncol=length(res))
		for(i in 1:length(res)){
			nm=names(clade_defs)[i]
			cur_tips=res[[i]]
			zz=tips%in%cur_tips
			gg[zz,i]=nm
		}
		colnames(gg)=names(clade_defs)
		tt=as.matrix(cbind(tt, gg))
	}

	if(is.null(tt)){
		if(!is.null(nn<-phy$node.label)){
			names(nn)=Ntip(phy)+1:length(nn)
			nn=nn[nn!=""]
			if(length(nn)){
				tt=matrix("", nrow=Ntip(phy), ncol=length(nn))
				dd=.cache.descendants(phy)$tips
				for(i in 1:length(nn)){
					tt[phy$tip.label%in%phy$tip.label[dd[[as.integer(names(nn[i]))]]],i]=nn[i]
				}
				colnames(tt)=nn

			} else {
				return(NULL)
			}
		} else {
			return(NULL)
		}
	}
	phy_mapping=tt
	rownames(phy_mapping)=orig_phy$tip.label
	return(as.data.frame(as.matrix(phy_mapping), stringsAsFactors=FALSE))
}



## FUNCTIONS ##

.fill.taxonomy=function(taxonomy){
	push.taxon=function(taxonomy, indices, column){
		for(n in which(indices)){
			taxonomy[n,column]=paste(taxonomy[n,min((column:ncol(taxonomy))[!is.na(taxonomy[n,column:ncol(taxonomy)])])],paste("R",column,sep=""),sep=".")
		}
		return(taxonomy)
	}

	for(c in 1:(ncol(taxonomy)-1)){
		ii=is.na(taxonomy[,c])
		if(any(ii)) taxonomy=push.taxon(taxonomy, ii, c)
	}
	return(taxonomy)
}

phylo.lookup=function(taxonomy, ncores=NULL) {
    # GENERAL FUNCTION: convert taxonomic 'lookup' table to phylogeny
    # lookup is data.frame with ranks as columns
    # rowlabels of 'taxonomy' are assumed to be tips of the phylogeny
    # rank corresponds to (numeric) position in rank names (R to L) to use in building taxonomic tree; if null, all ranks are used
    # NOTE: taxonomic groups in 'lookup' MUST be ordered by exclusivity from R to L -- e.g., genera must precede families must precede orders
	labels=rownames(taxonomy)
    mm=nrow(taxonomy)
    tax=as.matrix(taxonomy, ncol=ncol(taxonomy))
	rownames(tax)=labels
	occurrences=table(tax)
	occurrences=occurrences[names(occurrences)!="" & occurrences>1]
    if(any(labels%in%names(occurrences))) stop(paste("The following appear to be both ranks and row.names of 'taxonomy':\n\t", paste(labels[labels%in%names(occurrences)], collapse="\n\t"), sep=""))

    if(!any(occurrences==mm)){
        tax=cbind(as.matrix(taxonomy, ncol=ncol(taxonomy)),root="root")
        rownames(tax)=labels
        occurrences=c(occurrences, root=mm)
    }

    f=.get.parallel(ncores)

    #clds=f(names(occurrences), function(x) {
    #		tmp=which(tax==x, arr.ind=TRUE)
	#	xcol=max(tmp[,"col"])
	#	dat=as.matrix(tax[xrow<-tmp[,"row"],1:xcol], ncol=xcol)
	#	tab=occurrences[names(occurrences)!=x]
	#	unique(sapply(1:nrow(dat), function(idx){
	#		cur=dat[idx,]
	#		   if(any(cur%in%names(tab)->xx)){
	#				ht=tab[names(tab)%in%cur]
	#				return(names(ht)[max(which(ht==max(ht)))])
	#		   } else {
	#				return(rownames(dat)[idx])
	#		   }
	#	}))
	#})


    clds=f(names(occurrences), function(x) {
		tmp=which(tax==x, arr.ind=TRUE)[,"row"]
        taxinx=rownames(tax)[tmp]
        return(taxinx)
	})
    names(clds)=names(occurrences)

    nn=sapply(clds, length)
    clds=clds[order(nn)]

    ## exclude DUPLICATES
    eq=f(1:length(clds), function(idx){
        others=clds[-which(names(clds)==names(clds)[idx])]
        vv=sapply(others, function(x) setequal(clds[[idx]], x))
        if(any(vv)) return(names(which(vv))) else return()
    })
    drop=c()
    tmp=unique(c(unlist(eq)))
    if(length(tmp)){
        mm=match(tmp, names(clds))
        tmp=tmp[order(mm)]
        mm=mm[order(mm)]
        names(mm)=tmp

        for(i in 1:length(mm)){
            ww=which(sapply(eq, function(x) names(mm[i])%in%x))
            tt=names(clds)[ww]
            a=which(names(clds)==names(mm[i]))
            b=match(tt, names(clds))
            if(any(b<a)) drop=c(drop, names(mm[i]))
        }

        clds=clds[-which(names(clds)%in%drop)]
    }

    resolve_definition=function(taxon, clades){
        cldother=clades[-which(names(clades)==taxon)]
        tips=clades[[taxon]]

        resolver=function(tips, cld){

            members=c()
            if(!length(cld)) {
                members=c(members, tips)
            } else {
                tt=sapply(cld, function(y){
                    all(y%in%tips)
                })

                if(any(tt)){
                    nm=names(cld)[max(which(tt))]
                    members=c(members, nm)
                    ftips=tips[-which(tips%in%cld[[nm]])]
                    fcld=cld[which(tt)]
                    fcld=fcld[!names(fcld)%in%nm]
                    members=c(members, resolver(ftips, fcld))
                } else {
                    members=c(members, tips)
                }

            }
            return(members)
        }

        def=resolver(tips, cldother)
        def
    }

    defs=lapply(1:length(clds), function(idx){
        taxon=names(clds)[idx]
        resolve_definition(taxon, clds)
    })
    names(defs)=names(clds)

    op=options()
    op$expressions=max(op$expressions, 500000)
    options(op)

	tmp=phylo.clades(defs, ncores=ncores)
	phy=tmp[[length(defs)]]
	tt=table(phy$tip.label)
	if(any(tt>1)){
		warning(paste("The following tips occur multiply, suggesting non-monophyly of subtending groups:\n\t", paste(names(tt[tt>1]), collapse="\n\t"), sep=""))
		warning("Offending tips removed from labeled phylogeny")
		phy=.drop.tip(phy, names(tt[tt>1]))
	}
	phy$root.edge=0
	phy
}


name.check <- function(phy, data, data.names = NULL) {

	if (is.null(data.names)) {
		if (is.vector(data)) {
			data.names <- names(data);
		} else {
			data.names <- rownames(data);
		}
	}
	t <- phy$tip.label;
	r1 <- t[is.na(match(t, data.names))];
	r2 <- data.names[is.na(match(data.names, t))];

	r <- list(sort(r1), sort(r2));

	names(r) <- cbind("tree_not_data", "data_not_tree")

	class(r) <- "name.check"

	if (length(r1) == 0 && length(r2) == 0) {
		return("OK");
	} else {
		return(r);
	}
}

cherries <- function(phy){
    cache=.cache.descendants(phy)
	N=Ntip(phy)
	nds=which(tabulate(phy$edge[phy$edge[,2]<=N,1])==2)
    if(length(nds)){
        mat=matrix(NA, nrow=length(nds), ncol=2)
        for(i in 1:length(nds)) mat[i, ]=phy$tip.label[cache$tips[[nds[i]]]]
        rownames(mat)=nds
    } else {
        mat=NULL
    }
    return(mat)
}


tips <- function(phy, node)
{
	if(node<=Ntip(phy)) return(phy$tip.label[node])
	dd=.get.descendants.of.node(node, phy, tips=TRUE)
	phy$tip.label[dd]
}

span.phylo=function(phy){
    desc=.cache.descendants(phy)
    N=Ntip(phy)
    labs=c(phy$tip.label, phy$node.label)

    ff=function(node){
        if(length(node)>1) stop("Supply 'node' as a single value")
        if(!is.numeric(node)) node=which(labs==node)

        if(node<=N) return(NULL)
        dd=desc$fdesc[[node]]
        labs[sapply(dd, function(x) desc$tips[[x]][1])]
    }
    ff

}

# data.names is optional, and will replace the names or rownames
# of data when matching data to the tree
# ?!? data.names is not involved here - JWB

# if sort is T, data will have rows in the same order
# as the taxon names in phy$tip.label

treedata <- function(phy, data, sort=FALSE, warnings=TRUE) {

	dm=length(dim(data))

	if (is.vector(data)) {
		data<-as.matrix(data)
	}
	if (is.factor(data)) {
		data<-as.matrix(data)
	}
	if (is.array(data) & length(dim(data))==1) {
		data<-as.matrix(data)
	}

#	if (is.null(data.names)) {
	if (is.null(rownames(data))) {
		stop("names for 'data' must be supplied")
#JME				data.names<-phy$tip.label
#JME				if(warnings)
#JME					cat("Warning: no tip labels, order assumed to be the same as in the tree\n")
	} else {
		data.names<-rownames(data)
	}
#	}
	nc<-name.check(phy, data)
	if (is.na(nc[[1]][1]) | nc[[1]][1]!="OK") {
		if (length(nc[[1]]!=0)) {
			phy=.drop.tip(phy, as.character(nc[[1]]))
			if (warnings) {
				warning(paste("The following tips were not found in 'data' and were dropped from 'phy':\n\t",
							  paste(nc[[1]], collapse="\n\t"), sep=""))
#JME			print(nc[[1]])
#JME			cat("\n")
			}
		}

		if(length(nc[[2]]!=0)) {
			m<-match(data.names, nc[[2]])
			data=as.matrix(data[is.na(m),])
			data.names<-data.names[is.na(m)]
			if(warnings) {
				warning(paste("The following tips were not found in 'phy' and were dropped from 'data':\n\t",
							  paste(nc[[2]], collapse="\n\t"), sep=""))
#JME			print(nc[[2]])
#JME			cat("\n")
			}
		}
 	}
	order<-match(data.names, phy$tip.label)

	rownames(data)<-phy$tip.label[order]

	if (sort) {
    	index <- match(phy$tip.label, rownames(data))
   		data <- as.matrix(data[index,])
	}
	if (dm==2){
		data <- as.matrix(data)
	}

	phy$node.label=NULL

	return(list(phy=phy, data=data))
}

## GENERIC
## rescale=function(x, ...) UseMethod("rescale")

## GENERIC

## GENERIC
argn.default=function(x, ...){
	attr(x, "argn")
}

## GENERIC
argn.mkn=function(x, ...){
	attr(x, "argn")
}

## HESSIAN COMPUTATION of CONFIDENCE INTERVALS
.bnd.hessian=function(m, p, s, prob=0.05){
	prob=min(c(1-prob, prob))
	dm=unique(dim(m))
	if(length(dm)!=1) {warning("FAILURE in Hessian CI computation: 'm' must be a square matrix"); return(NA)}
	if(dm!=length(p) | dm!=length(s)) {warning("FAILURE in Hessian CI computation: 'p' and 's' must be of equal length to the dimensions of 'm'"); return(NA)}
	if(!all(s%in%c("exp","nat"))) {warning("FAILURE in Hessian CI computation: 's' must indicate the space of 'p': expecting either 'exp' or 'nat' as elements"); return(NA)}

	qq=qnorm(1-(prob/2))
	dd=try(sqrt(diag(solve(m))),silent=TRUE)
	if(inherits(dd, "try-error")){
		warning(paste("ERROR in inversion of Hessian matrix:", gsub(">", "", gsub("<", "", gsub("\n", "", toString(attributes(dd)$condition))))))
		return(NA)
	}
	bb=qq*dd
	res=sapply(1:length(s), function(idx) {
			   if(s[idx]=="exp"){

			   return(c(lb=exp(log(p[idx])-bb[idx]), ub=exp(log(p[idx])+bb[idx])))
			   } else {
			   return(c(lb=p[idx]-bb[idx], ub=p[idx]+bb[idx]))
			   }
			   })
	rownames(res)=c("lb", "ub")
	colnames(res)=names(p)
	res

}


## MKN CONSTRAINT FUNCTIONS
.er.matrix=function(m){
	m[]=1
	diag(m)=0
	m
}

.sym.matrix=function(m){
	m[]=1:length(m)
	ww=which(upper.tri(m),arr.ind=TRUE)
	m[ww]=m[ww[,2:1]]
	diag(m)=0
	m
}

.ard.matrix=function(m){
	m[]=1:length(m)
	diag(m)=0
	m
}

.meristic.matrix=function(m, symmetric=TRUE){
	m[]=0
	k=nrow(m)
	idx=rbind(cbind(1:(k-1), 2:k), cbind(2:k, 1:(k-1)))
	if(symmetric) base=.sym.matrix(m) else base=.ard.matrix(m)
	m[idx]=base[idx]
	m
}

.repars=function(pars, expected){
    if(!length(pars)==length(expected)) stop(paste("The following 'pars' are expected:\n\t", paste(expected, collapse="\n\t", sep=""), sep=""))
	if(all(!is.null(nm<-names(pars)))){
		if(!all(nm%in%expected)) stop(paste("The following 'pars' are unexpected:\n\t", paste(nm[!nm%in%expected], collapse="\n\t", sep=""), sep=""))
		if(length(unique(nm))!=length(expected)) stop(paste("The following 'pars' are expected:\n\t", paste(expected, collapse="\n\t", sep=""), sep=""))
		mm=match(expected, nm)
		return(pars[mm])
	} else {
		return(pars)
	}
}

## EXTENSION of diversitree:::constrain()
## create 'standard' constraint likelihood function given a patterned matrix
.constrain.k=function(f, model=c("ER", "SYM", "ARD", "meristic"), ...){
	e=environment(f)
	k=e$k

	m=matrix(1:k^2,nrow=k,byrow=TRUE)
	model=match.arg(model,c("ER", "SYM", "ARD", "meristic"))
	mm=switch(model,
			  ER=.er.matrix(m),
			  SYM=.sym.matrix(m),
			  ARD=.ard.matrix(m),
			  meristic=.meristic.matrix(m, ...)
			  )
	return(.constrain.m(f, mm))
}

## EXTENSION of diversitree:::constrain()
## create constrained likelihood function by supplying matrix
.constrain.m=function(f, m){
# if entry is 0, assumed to constrain rate to 0
	k=ncol(m)
	if(nrow(m)!=k) stop("supply 'm' as a square matrix")
	idx <- cbind(rep(1:k, each = k - 1), unlist(lapply(1:k, function(i) (1:k)[-i])))
	npar <- k * (k - 1)
	par=cbind(idx, m[idx], duplicated(m[idx]))
	colnames(par)=c("row","col","parm","dup")
	upar=unique(par[,"parm"])
	sdig=nchar(k)
	sfor=paste("%0",sdig,"d",sep="")
	spar=apply(par[,c("row","col")], 1, function(x) paste("q", paste(sapply(x, function(y) sprintf(sfor, y)), collapse=""),sep=""))
	res=unlist(sapply(1:nrow(par), function(idx){
					  curspar=spar[idx]
					  if(par[idx,"parm"]==0){
					  return(paste(curspar, 0, sep="~") )
					  } else if(par[idx,"dup"]==1){
					  mm=min(which(par[,"parm"]==par[idx,"parm"]))
					  return(paste(curspar, spar[mm], sep="~") )
					  } else {
					  return(NULL)
					  }
					  }))
	return(.constrain(f, formulae=res))
}

# (smart) starting pt for optimization
.ou.smartstart=function(dat, bounds){
	vv=var(dat)
    xb=max(bounds)
    nb=min(bounds)
    atry=seq(-8,4,by=2)
    s=sample(1:length(atry),1)
    if(s==1) {
        aa=nb
    } else if(s==length(atry)) {
        aa=xb
    } else {
        aa=vv*2*exp(atry[s])
        if(aa>xb) aa=xb
        if(aa<nb) aa=nb
    }
    if(is.na(aa)) aa=0.1
    aa
}

# (smart) starting pt for optimization
.bm.smartstart=function(phy, dat){
	ss=mean(pic(dat, phy)^2)
	ss
}

# compute path length from root to tip
.paths.cache=function(cache){

    if(is.null(cache$ordering) || cache$ordering!="postorder"){
        stop("'cache' should be postordered")
    }

	n <- cache$n.tip
	pp <- cache$adesc[-c(1:n)]
	e1 <- cache$edge[, 1]
	e2 <- cache$edge[, 2]
	EL <- cache$edge.length
	xx <- numeric(n + cache$n.node)
	for (i in length(e1):1) {
		var.cur.node <- xx[e1[i]]
		xx[e2[i]] <- var.cur.node + EL[i]
		j <- i - 1L
		while (e1[j] == e1[i] && j > 0) {
			left <- if (e2[j] > n)
			pp[[e2[j] - n]]
			else e2[j]
			right <- if (e2[i] > n)
			pp[[e2[i] - n]]
			else e2[i]
			j <- j - 1L
		}
	}
	xx[1:n]
}

# compute path length from root to tip
.paths.phylo=function(phy, ...){

    ## much from ape:::vcv.phylo()
	phy <- reorder(phy, "postorder")

    FUN=function(vcv=FALSE){
        n <- length(phy$tip.label)
        pp <- .cache.descendants(phy)$tips
        e1 <- phy$edge[, 1]
        e2 <- phy$edge[, 2]
        EL <- phy$edge.length
        xx <- numeric(n + phy$Nnode)
        if(vcv) vmat=matrix(0, n, n)
        for (i in length(e1):1) {
            var.cur.node <- xx[e1[i]]
            xx[e2[i]] <- var.cur.node + EL[i]
            if(vcv){
                j <- i - 1L
                while (e1[j] == e1[i] && j > 0) {
                    left=pp[[e2[j]]]
                    right=pp[[e2[i]]]
                    vmat[left, right] <- vmat[right, left] <- var.cur.node
                    j <- j - 1L
                }
            }
        }
        if(vcv) {
            diags <- 1 + 0:(n - 1) * (n + 1)
            vmat[diags] <- xx[1:n]
            colnames(vmat)<-rownames(vmat)<-phy$tip.label
            return(vmat)
        } else {
            return(xx[1:n])
        }
    }

    FUN(...)
}


.heights.cache=function (cache)
{
    if(is.null(cache$ordering) || cache$ordering!="postorder"){
        stop("'cache' should be postordered")
    }

	n <- cache$n.tip
	n.node <- cache$n.node
	xx <- numeric(n + n.node)
	for (i in nrow(cache$edge):1) xx[cache$edge[i, 2]] <- xx[cache$edge[i, 1]] + cache$edge.length[i]
	root = ifelse(is.null(cache$root.edge), 0, cache$root.edge)
	depth = max(xx)
	tt = depth - xx
	idx = 1:length(tt)
	dd = cache$edge.length[idx]
	mm = match(1:length(tt), c(cache$edge[, 2], n + 1))
	dd = c(cache$edge.length, root)[mm]
	ss = tt + dd
	res = cbind(ss, tt)
	rownames(res) = idx
	colnames(res) = c("start", "end")
	res = data.frame(res)
	res
}

## tree transformation
rescale.phylo=function(x, model=c("BM", "OU", "EB", "nrate", "lrate", "trend",
	"lambda", "kappa", "delta", "white", "depth"), ...){

	phy=x

	model=match.arg(model, c("BM", "OU", "EB", "nrate", "lrate", "trend", "lambda",
		"kappa", "delta", "white", "depth"))

	if(!"phylo"%in%class(phy)) stop("supply 'phy' as a 'phylo' object")

	FUN=switch(model,
               BM=.bm.phylo(phy),
			   OU=.ou.phylo(phy),
			   EB=.eb.phylo(phy),
               nrate=.nrate.phylo(phy),
               lrate=.lrate.phylo(phy),
			   trend=.trend.phylo(phy),
			   lambda=.lambda.phylo(phy),
			   kappa=.kappa.phylo(phy),
			   delta=.delta.phylo(phy),
			   white=.white.phylo(phy),
			   depth=.depth.phylo(phy)
			   )
	class(FUN)=c("transformer", "function")
	dots=list(...)
	if(!missing(...)) {
        if(!all(names(dots)%in%argn(FUN)))
			stop(paste("The following parameters are expected:\n\t", paste(argn(FUN),
				collapse="\n\t", sep=""), sep=""))
        return(FUN(...))
    } else {
    	return(FUN)
    }
}


# tree transformation
.white.cache=function(cache){
	N=cache$n.tip
	cache$len[]=0
	cache$len[1:N]=1

	z=function(){
		cache
	}
	return(z)
}

# tree transformation
.bm.phylo=function(phy){
    el=phy$edge.length
    z=function(sigsq){
        phy$edge.length=el*sigsq
        phy
    }
    attr(z,"argn")="sigsq"
    return(z)
}

# tree transformation
.white.phylo=function(phy){
	N=Ntip(phy)
	phy$edge.length[]=0
	phy$edge.length[phy$edge[,2]<=N]=1
    el=phy$edge.length
	z=function(sigsq=1){
		phy$edge.length=el*sigsq
        phy
	}
    attr(z,"argn")="sigsq"
	return(z)
}



# tree transformation
.null.cache=function(cache) {
	z=function(){
		cache
	}
	return(z)
}

# tree transformation
.null.phylo=function(phy) {
	z=function(){
		phy
	}
	return(z)
}

# tree transformation -- rates by time
.nrate.phylo=function(phy){
	ht=heights.phylo(phy)
	N=Ntip(phy)
	Tmax=ht$start[N+1]
	mm=match(1:nrow(ht), phy$edge[,2])
	ht$t=Tmax-ht$end
	ht$e=ht$start-ht$end
	ht$a=ht$t-ht$e
	ht$rS=ht$a/Tmax
	ht$rE=ht$t/Tmax

	dd=phy$edge[,2]

	relscale.brlen=function(start, end, len, dat){
		ss=start<=dat[,"time"]
		strt=min(which(ss))

		ee=dat[,"time"]<end
		etrt=max(which(ee))+1

		bl=numeric()
        fragment=numeric()
		marker=start
		for(i in strt:etrt){
			fragment=c(fragment, (nm<-(min(c(end, dat[i, "time"]))))-marker)
			bl=c(bl,dat[i, "rate"])
			marker=nm
		}
        fragment=fragment/(sum(fragment))
        sclbrlen=numeric()
        for(i in 1:length(bl)) sclbrlen=c(sclbrlen, len*fragment[i]*bl[i])
		sc=structure(as.numeric(sclbrlen), names=strt:etrt)
		return(sc)
	}


	z=function(time, rate, sigsq=1){
		if(any(time>1) | any(time<0)) stop("supply 'time' as a vector of relative time:\n\tvalues should be in the range 0 (root) to 1 (present)")
		if(any(rate<0)) stop("'rate' must consist of positive values")
		if(length(time)!=length(rate)) stop("'time' and 'rate' must be of equal length")
        phy$edge.length=phy$edge.length*sigsq
		ordx=order(time)
		time=time[ordx]
		rate=rate[ordx]
		dat=cbind(time=c(0,time, 1), rate=(c(1, 1, rate)))
		rs=sapply(dd, function(x) as.numeric(sum(relscale.brlen(ht$rS[x], ht$rE[x], ht$e[x], dat))))
		phy$edge.length=rs
		phy
	}
	attr(z, "argn")=c("time", "rate", "sigsq")
	return(z)
}

# tree transformation -- rates by clade

.lrate.phylo=function(phy){
    N=Ntip(phy)
    n=Nnode(phy)
    cache=.cache.tree(phy)
    cache$phy=phy
    vv=rep(1, N+n-1)

	z=function(node, rate, sigsq=1){


        shifts=c(sort(node[node>N]), node[node<=N])
        mm=match(shifts, node)
        rates=rate[mm]

        phy$edge.length=phy$edge.length*sigsq
        for(i in 1:length(shifts)) vv=.assigndescendants(vv, shifts[i], rates[i], exclude=shifts, cache=cache)
        phy$edge.length=vv*phy$edge.length
        phy
    }

	attr(z, "argn")=c("node", "rate", "sigsq")
	return(z)
}


# tree transformation
.lambda.cache=function(cache){
	N=cache$n.tip
	paths=.paths.cache(cache)

	z=function(lambda){
		if(lambda<0) stop("'lambda' must be positive valued")

		bl=cache$len*lambda
		bl[1:N]=bl[1:N]+(paths-(paths*lambda))
		cache$len=bl
		if(any(cache$len<0,na.rm=TRUE)){
			warning("negative branch lengths encountered:\n\tlambda may be too large")
		}
		cache
	}
	attr(z,"argn")="lambda"
	return(z)
}

# tree transformation
.lambda.phylo=function(phy){
	ht=heights.phylo(phy)
	N=Ntip(phy)
	Tmax=ht$start[N+1]
	mm=match(1:N, phy$edge[,2])
	ht$e=ht$start-ht$end
	paths=.paths.phylo(phy)

	z=function(lambda, sigsq=1){
		if(lambda<0) stop("'lambda' must be positive valued")

		bl=phy$edge.length*lambda
		bl[mm]=bl[mm]+(paths-(paths*lambda))
		phy$edge.length=bl
		if(any(phy$edge.length<0)){
			warning("negative branch lengths encountered:\n\tlambda may be too large")
		}
        phy$edge.length=phy$edge.length*sigsq
		phy
	}
	attr(z,"argn")=c("lambda", "sigsq")
	return(z)
}


# tree transformation
.delta.cache=function(cache){
	ht=.heights.cache(cache)
	N=cache$n.tip
	Tmax=ht$start[N+1]
	mm=match(1:nrow(ht), cache$edge[,2])
	ht$t=Tmax-ht$end
	ht$e=ht$start-ht$end
	ht$a=ht$t-ht$e

	z=function(delta, rescale=TRUE){
		if(delta<0) stop("'delta' must be positive valued")
		bl=(ht$a+ht$e)^delta-ht$a^delta
		cache$len=bl

		if(rescale){
			scl=Tmax^delta
			cache$len=(cache$len/scl)*Tmax
		}
		cache
	}
	attr(z,"argn")="delta"
	return(z)
}

# tree transformation
.delta.phylo=function(phy){
	ht=heights.phylo(phy)
	N=Ntip(phy)
	Tmax=ht$start[N+1]
	mm=match(1:nrow(ht), phy$edge[,2])
	ht$t=Tmax-ht$end
	ht$e=ht$start-ht$end
	ht$a=ht$t-ht$e
	if(sum(ht$a < -0.1)>0) stop("Calculation error; contact developers.")
	ht$a[ht$a<0] <- 0

	z=function(delta, sigsq=1, rescale=TRUE){
		if(delta<0) stop("'delta' must be positive valued")
		bl=(ht$a+ht$e)^delta-ht$a^delta
		phy$edge.length=bl[phy$edge[,2]]

		if(rescale){
			scl=Tmax^delta
			phy$edge.length=(phy$edge.length/scl)*Tmax
		}
        phy$edge.length=phy$edge.length*sigsq
		phy
	}
	attr(z,"argn")=c("delta", "sigsq")
	return(z)
}



# tree transformation
.trend.cache=function(cache){

	ht=.heights.cache(cache)
	N=cache$n.tip
	Tmax=ht$start[N+1]
	mm=match(1:nrow(ht), cache$edge[,2])
	ht$head=Tmax-ht$end[cache$edge[mm,1]] # age
	ht$tail=ht$head+(ht$start-ht$end)


	z=function(slope){
		ht$br=1+ht$head*slope
		ht$er=1+ht$tail*slope
        scl=sapply(1:nrow(ht), function(idx){
            if(idx==N+1) return(NA)
            if(ht$br[idx]>0 & ht$er[idx]>0) {
                return((ht$br[idx]+ht$er[idx])/2)
            } else if(ht$br[idx]<0 & ht$er[idx]<0) {
                return(0)
            } else {
                si=-1/slope
                return(ht$br[idx]*(si-ht$head[idx])/(2*(ht$tail[idx]-ht$head[idx])))
            }
        })
		cache$len=cache$len*scl
		cache
	}
	attr(z,"argn")="slope"
	return(z)
}

# tree transformation
.trend.phylo=function(phy){

	ht=heights.phylo(phy)
	N=Ntip(phy)
	Tmax=ht$start[N+1]
	mm=match(1:nrow(ht), phy$edge[,2])
	ht$head=Tmax-ht$end[phy$edge[mm,1]] # age
	ht$tail=ht$head+(ht$start-ht$end)


	z=function(slope, sigsq=1){
        # begin (age): head
        # end: tail
		ht$br=1+ht$head*slope
		ht$er=1+ht$tail*slope
        scl=sapply(1:nrow(ht), function(idx){
            if(idx==N+1) return(NA)
            if(ht$br[idx]>0 & ht$er[idx]>0) {
                return((ht$br[idx]+ht$er[idx])/2)
            } else if(ht$br[idx]<0 & ht$er[idx]<0) {
                return(0)
            } else {
                si=-1/slope
                return(ht$br[idx]*(si-ht$head[idx])/(2*(ht$tail[idx]-ht$head[idx])))
            }
        })

		phy$edge.length=phy$edge.length*scl[phy$edge[,2]]
        phy$edge.length=phy$edge.length*sigsq
		phy
	}
	attr(z,"argn")=c("slope", "sigsq")
	return(z)
}



# tree transformation
.ou.cache=function(cache){
	ht=.heights.cache(cache)
	N=cache$n.tip
	Tmax=ht$start[N+1]
	mm=match(1:nrow(ht), cache$edge[,2])
	ht$t1=Tmax-ht$end[cache$edge[mm,1]]
	ht$t2=ht$start-ht$end+ht$t1
	z=function(alpha){
		if(alpha<0) stop("'alpha' must be positive valued")
		if (alpha == 0){
      			bl = ht$t2-ht$t1
    		} else {
      			bl = (1/(2 * alpha)) * exp(-2 * alpha * (Tmax - ht$t2)) *
        		-(expm1(-2 * alpha * ht$t2)) - (1/(2 * alpha)) *
        		exp(-2 * alpha * (Tmax - ht$t1)) * -(expm1(-2 *
                                                     		alpha * ht$t1))
    			}
		cache$len=bl
		cache
	}
	attr(z,"argn")="alpha"
	return(z)
}

# tree transformation
.ou.phylo=function(phy){
	ht=heights.phylo(phy)
	N=Ntip(phy)
	Tmax=ht$start[N+1]
	mm=match(1:nrow(ht), phy$edge[,2])
	ht$t1=Tmax-ht$end[phy$edge[mm,1]]
	ht$t2=ht$start-ht$end+ht$t1
	z=function(alpha, sigsq=1){
		if(alpha<0) stop("'alpha' must be positive valued")
		bl=(1/(2*alpha))*exp(-2*alpha * (Tmax-ht$t2)) * (1 - exp(-2 * alpha * ht$t2)) - (1/(2*alpha))*exp(-2*alpha * (Tmax-ht$t1)) * (1 - exp(-2 * alpha * ht$t1))
		phy$edge.length=bl[phy$edge[,2]]
        phy$edge.length=phy$edge.length*sigsq
		phy
	}
	attr(z,"argn")=c("alpha", "sigsq")
	return(z)
}



# tree transformation
.eb.cache=function(cache){

	ht=.heights.cache(cache)
	N=cache$n.tip
	Tmax=ht$start[N+1]
	mm=match(1:nrow(ht), cache$edge[,2])
	ht$t1=Tmax-ht$end[cache$edge[mm,1]]
	ht$t2=ht$start-ht$end+ht$t1

	z=function(a){
		if(a==0) return(cache)
		bl = (exp(a*ht$t2)-exp(a*ht$t1))/(a)
		cache$len=bl
		cache
	}
	attr(z,"argn")="a"
	return(z)
}

# tree transformation
.eb.phylo=function(phy){


	ht=heights.phylo(phy)
	N=Ntip(phy)
	Tmax=ht$start[N+1]
	mm=match(1:nrow(ht), phy$edge[,2])
	ht$t1=Tmax-ht$end[phy$edge[mm,1]]
	ht$t2=ht$start-ht$end+ht$t1

	z=function(a, sigsq=1){
		if(a==0) return(phy)
		bl = (exp(a*ht$t2)-exp(a*ht$t1))/(a)
		phy$edge.length=bl[phy$edge[,2]]
        phy$edge.length=phy$edge.length*sigsq
		phy
	}
	attr(z,"argn")=c("a", "sigsq")
	return(z)
}



# tree transformation
.kappa.cache=function(cache){

	z=function(kappa){
		if(kappa<0) stop("'kappa' must be positive valued")

		cache$len=cache$len^kappa
		cache
	}
	attr(z,"argn")="kappa"
	return(z)
}

# tree transformation
.kappa.phylo=function(phy){

	z=function(kappa, sigsq=1){
		if(kappa<0) stop("'kappa' must be positive valued")

		phy$edge.length=phy$edge.length^kappa
        phy$edge.length=phy$edge.length*sigsq
		phy
	}
	attr(z,"argn")=c("kappa", "sigsq")
	return(z)
}

# tree transformation
.depth.phylo=function(phy){
	orig=max(heights.phylo(phy))
	z=function(depth){
		phy$edge.length <- (phy$edge.length/orig) * depth
		if(!is.null(phy$root.edge)) phy$root.edge=(phy$root.edge/orig) * depth
		phy
	}
	attr(z,"argn")="depth"
	z
}

white.mkn <- function(dat) {
	tt <- table(dat);
	n <- sum(tt);
	p <- tt/n;
	ll <- sum(log(p^tt));

	k <- length(tt) - 1;
	opt <- list(lnL = ll, method = "MLE", k = k);
	opt <- .aic(opt, n);
	#opt <- .aic(lnL = opt$lnL, n = n, k = k); # changed .aic args
	return(opt);
}

drop.extinct <- function (phy, tol=NULL) {
    if (!"phylo" %in% class(phy)) {
        stop("\"phy\" is not of class \"phylo\".");
    }
    if (is.null(phy$edge.length)) {
        stop("\"phy\" does not have branch lengths.");
    }
    if (is.null(tol)) {
        tol <- min(phy$edge.length)/100;
    }
    aa <- is.extinct(phy=phy, tol=tol);
    if (length(aa) > 0) {
        #cat("Dropping", length(aa), "taxa:", aa, "\n", sep=" ");
        phy <- .drop.tip(phy, aa);
    }
    return(phy);
}

# return tip.labels, so that tree ordering is not an issue
is.extinct <- function (phy, tol=NULL) {
    if (!"phylo" %in% class(phy)) {
        stop("\"phy\" is not of class \"phylo\".");
    }
    if (is.null(phy$edge.length)) {
        stop("\"phy\" does not have branch lengths.");
    }
    if (is.null(tol)) {
        tol <- min(phy$edge.length)/100;
    }
    phy <- reorder(phy);
    xx <- numeric(Ntip(phy) + phy$Nnode);
    for (i in 1:length(phy$edge[,1])) {
        xx[phy$edge[i,2]] <- xx[phy$edge[i,1]] + phy$edge.length[i];
    }
    aa <- max(xx[1:Ntip(phy)]) - xx[1:Ntip(phy)] > tol;
    if (any(aa)) {
        return(phy$tip.label[which(aa)]);
    } else {
        return(NULL);
    }
}


drop.random<-function (phy, n)
{
    if (!inherits(phy, "phylo"))
	stop("object \"phy\" is not of class \"phylo\"")
    nb.tip <- length(phy$tip.label)
    if (n > nb.tip)
	return(NULL)
    cut <- sample(1:nb.tip, n)
    r <- .drop.tip(phy, cut)
    return(r)
}

uniqueMultiPhylo=function(x, incomparables=FALSE, ...){
	phy=x
	if(incomparables) warning("'incomparables' exerts no effect in this setting")
	ss=sapply(phy, digest)
	if(any(dd<-duplicated(ss))){
		sub=phy[-which(dd)]
		class(sub)="multiPhylo"
		return(sub)
	} else {
		return(phy)
	}
}
mwpennell/geiger-v2 documentation built on Feb. 26, 2023, 1:19 a.m.