# R/likelihood.R In geiger: Analysis of Evolutionary Diversification

#### Documented in make.bm.relaxed

```## replacing prepare.data.bm
make.bm.relaxed <- function(phy, dat, SE=NA, method=c("direct","vcv","reml")){
#   method=match.arg(method, c("direct","vcv","reml"))
method=match.arg(method, c("direct","vcv"))

lik=switch(method,
direct=.make.bm.relaxed.direct(phy, dat, SE),
vcv=.make.bm.relaxed.vcv(phy, dat, SE),
reml=.make.bm.relaxed.pic(phy, dat, SE))
lik
}

#primary function for computing the likelihood of data, given a root state, VCV matrix, and Brownian motion model
#author: LJ HARMON 2009 and JM EASTMAN 2010
.bm.lik.fn.vcv <-
function(root, dat, vcv, SE) {
# mod 12.02.2010 JM Eastman: using determinant()\$modulus rather than det() to stay in log-space
y=dat
b <- vcv
if(any(SE!=0)) diag(b)=diag(b)+SE^2
w <- rep(root, nrow(b))
num <- -t(y-w)%*%solve(b)%*%(y-w)/2
den <- 0.5*(length(y)*log(2*pi) + as.numeric(determinant(b)\$modulus))
return((num-den)[1,1])
}

#primary function for computing the likelihood of data, using REML, under Brownian motion model
#author: LJ HARMON, LJ REVELL, and JM EASTMAN 2011
#'rphy' is rate-scaled tree in pruning-wise order (as 'ic')
.bm.lik.fn.reml <-
function(rphy, ic) {
new.var=.pic_variance.phylo(rphy)
reml=dnorm(ic, mean=0, sd=sqrt(new.var), log=TRUE)
return(sum(reml))
}

#compute expected PIC variance given tree: used for .bm.lik.fn.reml()
#author: JM EASTMAN 2011
.pic_variance.phylo <- function(phy)
{
phy\$node.label=NULL
nb.tip <- Ntip(phy)
nb.node <- phy\$Nnode
if(!is.binary.phylo(phy)) stop("'phy' is not fully dichotomous.")

phy <- reorder(phy, "postorder")

ans <- .C("pic_variance", as.integer(nb.tip), as.integer(nb.node),
as.integer(phy\$edge[, 1]), as.integer(phy\$edge[, 2]),
as.double(phy\$edge.length), double(nb.node),
PACKAGE = "geiger")

var <- ans[[6]]
var
}

.make.bm.relaxed.vcv <- function(phy, dat, SE=NULL){
## NOT currently handling given node states

cache=.prepare.bm.univariate(phy, dat, nodes=NULL, SE=SE, control=list(binary=FALSE))

check.argn=function(rates, root){
if(length(rates)!=(cache\$n.node+cache\$n.tip-1) && length(root)==1) stop("Supply 'rates' as a vector of rate scalars for each branch, and supply 'root' as a single value.")
}

likSE <- function(rates, root, SE) {
check.argn(rates, root)
tt=.scale.brlen(cache\$phy, rates)
vcv=.paths.phylo(tt, vcv=TRUE)
xSE=cache\$SE[mm<-match(colnames(vcv), names(cache\$SE))]
dat=cache\$dat[match(colnames(vcv), names(cache\$dat))]
.bm.lik.fn.vcv(root, dat, vcv, xSE)
}
attr(likSE, "cache") <- cache
attr(likSE,"argn")=list(rates=cache\$phy\$edge[,2], root="root", SE="SE")
class(likSE)=c("rbm","bm","function")
return(likSE)
} else { # unadjustable SE
lik <- function(rates, root){
check.argn(rates, root)
tt=.scale.brlen(cache\$phy, rates)
vcv=.paths.phylo(tt, vcv=TRUE)
SE=cache\$SE[match(colnames(vcv),names(cache\$SE))]
dat=cache\$dat[match(colnames(vcv),names(cache\$dat))]
.bm.lik.fn.vcv(root, dat, vcv, SE)
}
attr(lik, "cache") <- cache
attr(lik,"argn")=list(rates=cache\$phy\$edge[,2], root="root")
class(lik)=c("rbm","bm","function")
return(lik)
}
}

.make.bm.relaxed.pic <- function(phy, dat, SE=NULL){

## NOT currently handling given node states
cache=.prepare.bm.univariate(phy, dat, nodes=NULL, SE=SE)

ic=pic(cache\$dat, cache\$phy, scaled=FALSE)
cache\$ic=ic

check.argn=function(rates){
if(length(rates)!=(cache\$n.node+cache\$n.tip-1)) stop("Supply 'rates' as a vector of rate scalars for each branch.")
}

xSE=cache\$SE
N=Ntip(cache\$phy)
subSE=match(1:N, cache\$phy\$edge[,2])

likSE <- function(rates, SE) {
check.argn(rates)
rphy=.scale.brlen(cache\$phy, rates)
rphy\$edge.length[subSE]=rphy\$edge.length[subSE]+xSE^2
.bm.lik.fn.reml(rphy, cache\$ic)
}
attr(likSE, "cache") <- cache
attr(likSE,"argn")=list(rates=cache\$phy\$edge[,2], SE="SE")
class(likSE)=c("rbm","bm","function")
return(likSE)
} else {
lik <- function(rates){
check.argn(rates)
rphy=.scale.brlen(cache\$phy, rates)
rphy\$edge.length[subSE]=rphy\$edge.length[subSE]+xSE^2
.bm.lik.fn.reml(rphy, cache\$ic)
}
attr(lik,"cache")=cache
attr(lik,"argn")=list(rates=cache\$phy\$edge[,2])
class(lik)=c("rbm","bm","function")
}

lik
}

.make.bm.relaxed.direct <- function (phy, dat, SE=NULL)
{

## NOT currently handling given node states

cache=.prepare.bm.univariate(phy, dat, nodes=NULL, SE=SE)
N = cache\$n.tip
n = cache\$n.node
z = length(cache\$len)
rootidx = as.integer(cache\$root)
given = as.integer(attributes(cache\$y)\$given)
given[rootidx]=1

datc_init = list(
len = as.numeric(cache\$len),
intorder = as.integer(cache\$order[-length(cache\$order)]),
tiporder = as.integer(1:N),
root = rootidx,
y = as.numeric(cache\$y[1, ]),
n = as.integer(z),
given = as.integer(given),
descRight = as.integer(cache\$children[, 1]),
descLeft = as.integer(cache\$children[, 2]),
drift=0
)

ll.bm.direct <- function(pars, datc) {
out = .Call("bm_direct", dat = datc, pars = pars, PACKAGE = "geiger")
#       vals = c(out\$initM[rootidx], out\$initV[rootidx], out\$lq[rootidx])
loglik <- sum(out\$lq)
#       intermediates=FALSE
#       if (intermediates) {
#          attr(loglik, "intermediates") <- intermediates
#          attr(loglik, "vals") <- vals
#       }
return(loglik)
}
class(ll.bm.direct) <- c("bm.direct", "bm", "function")

vv = numeric(N+n)
mm = match(cache\$phy\$edge[, 2], 1:(N+n))
check.argn=function(rates, root){
if(length(rates)!=(N+n-1) && length(root)==1) stop("Supply 'rates' as a vector of rate scalars for each branch, and supply 'root' as a single value.")
}

var = as.numeric(cache\$y[2, ]^2)

## LIKELIHOOD FUNCTION
likSE <- function(rates, root, SE) {
check.argn(rates, root)
vv[mm] = rates
datc_se=datc_init
datc_se\$var=var

datc_se\$y[rootidx]=root

ll = ll.bm.direct(pars = vv,  datc_se)
return(ll)
}
attr(likSE, "cache") <- cache
attr(likSE,"argn")=list(rates=cache\$phy\$edge[,2], root="root", SE="SE")
class(likSE)=c("rbm","bm","function")
return(likSE)
} else { # unadjustable SE
lik <- function(rates, root) {
check.argn(rates, root)
vv[mm] = rates
datc_init\$var=var
datc_init\$y[rootidx]=root

ll = ll.bm.direct(pars = vv,  datc_init)
return(ll)
}
attr(lik, "cache") <- cache
attr(lik,"argn")=list(rates=cache\$phy\$edge[,2], root="root")
class(lik)=c("rbm","bm","function")
return(lik)
}
}

.cache.tree <- function (phy)
{
ordxx=function (children, is.tip, root)
# from diversitree:::get.ordering
{
todo <- list(root)
i <- root
repeat {
kids <- children[i, ]
i <- kids[!is.tip[kids]]
if (length(i) > 0)
todo <- c(todo, list(i))
else break
}
as.vector(unlist(rev(todo)))
}

edge <- phy\$edge
edge.length <- phy\$edge.length
idx <- seq_len(max(edge))
n.tip <- Ntip(phy)
tips <- seq_len(n.tip)
root <- n.tip + 1
is.tip <- idx <= n.tip

desc=.cache.descendants(phy)
children <- desc\$fdesc
if(!max(sapply(children, length) == 2)){
children=NULL
order=NULL
binary=FALSE
} else {
children <- rbind(matrix(NA, n.tip, 2), t(matrix(unlist(children), nrow=2)))
order <- ordxx(children, is.tip, root)
binary=TRUE
}

len <- edge.length[mm<-match(idx, edge[, 2])]

ans <- list(tip.label = phy\$tip.label, node.label = phy\$node.label,
len = len, children = children, order = order,
root = root, n.tip = n.tip, n.node = phy\$Nnode, tips = tips,
edge = edge, edge.length = edge.length, nodes = phy\$edge[,2], binary = binary, desc = desc)
ans
}

.prepare.bm.univariate=function(phy, dat, nodes=NULL, SE=NA, control=list(binary=TRUE, ultrametric=FALSE)){

## CONTROL OBJECT
ct=list(binary=TRUE, ultrametric=FALSE)
ct[names(control)]=control

## MATCHING and major problems
td=treedata(phy, dat, sort=TRUE, warnings=FALSE)
phy=reorder(td\$phy, "postorder")
if(ct\$binary) if(!is.binary.phylo(phy)) stop("'phy' should be a binary tree")
if(ct\$ultrametric) if(!is.ultrametric(phy)) stop("'phy' should be an ultrametric tree")
if(is.null(phy\$edge.length)) stop("'phy' must include branch lengths in units of time")

if(ncol(td\$data)>1) stop("'dat' should be univariate")
dat=td\$data[,1]

## RESOLVE SE
seTMP=structure(rep(NA, length(dat)), names=names(dat))

if(is.null(SE)) SE=NA

if(length(SE)>1){
if(is.null(names(SE))) stop("'SE' should be a named vector")
if(!all(names(dat)%in%names(SE))) stop("names in 'SE' must all occur in names of 'dat'")
seTMP[names(SE[names(dat)])]=SE[names(dat)]
SE=seTMP
} else {
if(is.numeric(SE)){
seTMP[]=SE
SE=seTMP
} else {
SE=seTMP
}
}

if(!all(is.na(SE) | SE >= 0)) stop("'SE' values should be positive (including 0) or NA")

## CACHE tree
cache=.cache.tree(phy)
N=cache\$n.tip
n=cache\$n.node
m<-s<-g<-numeric(N+n)

## RESOLVE data: given trait values (m and g) and SE (s) for every node (tips and internals)
g[1:N]=1
m[]=NA; m[1:N]=dat
s[1:N]=SE

## RESOLVE nodes
if(!is.null(nodes)){
nn=(N+1):(N+n)
vec=.cache.y.nodes(m, s, g, nn, phy, nodes=nodes)
} else {
vec=rbind(m=m, s=s)
attr(vec, "given")=g
}

cache\$SE=SE
cache\$dat=dat[match(phy\$tip.label, names(dat))]
cache\$phy=phy

cache\$y=vec

return(cache)
}

.cache.y.nodes=function(m, s, g, nn, phy, nodes){

if(is.numeric(nodes) & is.vector(nodes)){
if(!all(names(nodes)%in%nn)) stop("'nodes' must have (integer) names corresponding to the internal nodes of 'phy'")
nodes=data.frame(cbind(node=as.integer(names(nodes)), mean=nodes, SE=0), stringsAsFactors=FALSE)
} else {
if(!all(c("taxon1", "taxon2", "mean", "SE")%in%colnames(nodes))){
flag=FALSE
if(!all(c("mean", "SE")%in%colnames(nodes)) | is.null(rownames(nodes))){
flag=TRUE
} else if(!all(rr<-as.integer(rownames(nodes))%in%nn)){
flag=TRUE
}
if(flag) stop("'nodes' must minimally have column names: 'taxon1', 'taxon2', 'mean', and 'SE'")
nodes=as.data.frame(nodes)
nodes\$node=as.integer(rownames(nodes))
} else {
nodes=as.data.frame(nodes)
if(!is.numeric(nodes\$mean) | !is.numeric(nodes\$SE)){
stop("'nodes' must have numeric vectors for 'mean' and 'SE'")
}

if(!all(zz<-unique(c(as.character(nodes\$taxon1), as.character(nodes\$taxon2)))%in%phy\$tip.label)){
stop(paste("Some taxa appear missing from 'phy':\n\t", paste(zz[!zz%in%phy\$tip.label], collapse="\n\t", sep=""), sep=""))
}

nodes\$node=apply(nodes[,c("taxon1", "taxon2")], 1, .mrca, phy=phy)
}

if(!length(unique(nodes\$node))==nrow(nodes)) {
stop("Some nodes multiply constrained:\n\t", paste(nodes\$node[duplicated(nodes\$node)], collapse="\n\t", sep=""), sep="")
}
}

nidx=nodes\$node
if(any(g[nidx]==1->zz)) stop("Some nodes already constrained:\n\t", paste(nidx[which(zz)], collapse="\n\t", sep=""), sep="")

m[nidx]=as.numeric(nodes\$mean)
s[nidx]=as.numeric(nodes\$SE)
g[nidx]=1

vec=rbind(m=m, s=s)
attr(vec, "given")=g

vec
}

.proc.lnR=function(gen, subprop, cur.lnL, new.lnL, lnp, lnh, heat=1, control){
if(is.infinite(cur.lnL)) stop("Starting point has exceptionally poor likelihood.")
if(is.finite(new.lnL)) {
lnLikelihoodRatio = new.lnL - cur.lnL
} else {
new.lnL=-Inf
lnLikelihoodRatio = -Inf
}

if(control\$sample.priors) lnLikelihoodRatio=0

lnR=(heat * lnLikelihoodRatio) + (heat * lnp) + lnh

r=.assess.lnR(lnR)

if(r\$error) .error.rjmcmc(gen, subprop, cur.lnL, new.lnL, lnLikelihoodRatio, lnp, lnh, control\$errorlog)
return(r)
}

#general phylogenetic utility for determining whether to accept ('r'=1) or reject ('r'=0) a proposed model in Markov chain Monte Carlo sampling
#author: JM EASTMAN 2010
.assess.lnR <- function(lnR) {
if(is.na(lnR) | is.infinite(abs(lnR))) {
error=TRUE
r=0
} else {
if(lnR < -20) {
r=0
} else if(lnR >= 0) {
r=1
} else {
r=exp(lnR)
}
error=FALSE
}
return(list(r=r, error=error))
}
```

## Try the geiger package in your browser

Any scripts or data that you put into this service are public.

geiger documentation built on July 8, 2020, 7:12 p.m.