Nothing
#DISCRETE TRAIT TREE PAINTING METHOD##
#written by Jeremy M. Beaulieu
corPAINT<-function(phy, data, ntraits=2, rate.mat=NULL, model=c("ER","SYM","ARD"), node.states=c("joint", "marginal", "scaled"), p=NULL, root.p=NULL, ip=NULL, lb=0, ub=100, diagn=FALSE){
if(is.null(phy$node.label)==TRUE){
stop("There are no node label designations")
}
#Creates the data structure and orders the rows to match the tree
phy$edge.length[phy$edge.length==0]=1e-5
if(ntraits==1){
data.sort<-data.frame(data[,2], data[,3], row.names=data[,1])
}
if(ntraits==2){
data.sort<-data.frame(data[,2], data[,3], data[,4], row.names=data[,1])
}
if(ntraits==3){
data.sort<-data.frame(data[,2], data[,3], data[,4], data[,5], row.names=data[,1])
}
data.sort<-data.sort[phy$tip.label,]
#Some initial values for use later - will clean up
k=ntraits
nl=2
# Check to make sure values are reasonable (i.e. non-negative)
if(ub < 0){
ub <- 100
}
if(lb < 0){
lb <- 0
}
if(ub < lb){ # This user really needs help
ub <- 100
lb <- 0
}
obj <- NULL
nb.tip<-length(phy$tip.label)
nb.node <- phy$Nnode
ntraits=ntraits
model=model
root.p=root.p
ip=ip
if(ntraits==1){
tot.states <- factor(c(phy$node.label,as.character(data[,3])))
nregimes <- length(levels(tot.states))
regimes <- numeric(nb.tip+nb.node)
TIPS <- 1:nb.tip
regimes[TIPS]<-data[,3]
regimes[-TIPS]<-as.numeric(phy$node.label)
}
if(ntraits==2){
tot.states <- factor(c(phy$node.label,as.character(data[,4])))
nregimes <- length(levels(tot.states))
regimes <- numeric(nb.tip+nb.node)
TIPS <- 1:nb.tip
regimes[TIPS]<-data[,4]
regimes[-TIPS]<-as.numeric(phy$node.label)
}
if(ntraits==3){
tot.states <- factor(c(phy$node.label,as.character(data[,5])))
nregimes <- length(levels(tot.states))
regimes <- numeric(nb.tip+nb.node)
TIPS <- 1:nb.tip
regimes[TIPS] <- data[,5]
regimes[-TIPS] <- as.numeric(phy$node.label)
}
model.set.final<-rate.mat.set.paint(phy,data.sort,nregimes,ntraits,model)
if(!is.null(rate.mat)){
rate <- rate.mat
rate[is.na(rate)]=max(rate, na.rm=TRUE)+1
model.set.final$rate <- rate
model.set.final$index.matrix <- rate.mat
}
lower = rep(lb, model.set.final$np)
upper = rep(ub, model.set.final$np)
opts <- list("algorithm"="NLOPT_LN_SBPLX", "maxeval"="1000000", "ftol_rel"=.Machine$double.eps^0.5)
if(!is.null(p)){
cat("Calculating likelihood from a set of fixed parameters", "\n")
out<-NULL
out$solution<-p
out$objective<-dev.corpaint(out$solution,phy=phy,liks=model.set.final$liks,Q=model.set.final$Q,rate=model.set.final$rate,root.p=root.p)
loglik <- -out$objective
est.pars<-out$solution
}
else{
if(is.null(ip)){
cat("Initializing...", "\n")
#If the analysis is to be run a single processor:
#Sets parameter settings for random restarts by taking the parsimony score and dividing
#by the total length of the tree
model.set.init<-rate.mat.set.paint(phy,data.sort,nregimes,ntraits,model="ER")
opts <- list("algorithm"="NLOPT_LN_SBPLX", "maxeval"="1000000", "ftol_rel"=.Machine$double.eps^0.5)
dat<-as.matrix(data.sort)
dat<-phyDat(dat,type="USER", levels=c("0","1"))
par.score<-parsimony(phy, dat, method="fitch")
tl <- sum(phy$edge.length)
mean = par.score/tl
ip<-rexp(1, 1/mean)
lower.init = rep(lb, model.set.init$np)
upper.init = rep(ub, model.set.init$np)
init = nloptr(x0=rep(ip, length.out = model.set.init$np), eval_f=dev.corpaint, lb=lower.init, ub=upper.init, opts=opts, phy=phy,liks=model.set.init$liks,Q=model.set.init$Q,rate=model.set.init$rate,regimes=regimes,root.p=root.p)
cat("Finished. Beginning thorough search...", "\n")
lower = rep(lb, model.set.final$np)
upper = rep(ub, model.set.final$np)
out = nloptr(x0=rep(init$solution, length.out = model.set.final$np), eval_f=dev.corpaint, lb=lower, ub=upper, opts=opts, phy=phy,liks=model.set.final$liks,Q=model.set.final$Q,rate=model.set.final$rate,regimes=regimes,root.p=root.p)
loglik <- -out$objective
est.pars<-out$solution
}
#If a user-specified starting value(s) is supplied:
else{
cat("Beginning subplex optimization routine -- Starting value(s):", ip, "\n")
opts <- list("algorithm"="NLOPT_LN_SBPLX", "maxeval"="1000000", "ftol_rel"=.Machine$double.eps^0.5)
out = nloptr(x0=rep(ip, length.out = model.set.final$np), eval_f=dev.corpaint, lb=lower, ub=upper, opts=opts, phy=phy,liks=model.set.final$liks,Q=model.set.final$Q,rate=model.set.final$rate,regimes=regimes,root.p=root.p)
loglik <- -out$objective
est.pars<-out$solution
}
}
#Starts the summarization process:
cat("Finished. Inferring ancestral states using", node.states, "reconstruction.","\n")
lik.anc <- ancRECON.paint(phy, data, est.pars, hrm=FALSE, rate.cat=NULL, ntraits=ntraits, method=node.states, model=model, root.p=root.p)
if(node.states == "marginal" || node.states == "scaled"){
pr<-apply(lik.anc$lik.anc.states,1,which.max)
phy$node.label <- pr
tip.states <- NULL
}
if(ntraits==1){
if (is.character(node.states)) {
if (node.states == "marginal"){
colnames(lik.anc$lik.anc.states) <- c("P(0)","P(1)")
}
}
}
if(ntraits==2){
if (is.character(node.states)) {
if (node.states == "marginal"){
colnames(lik.anc$lik.anc.states) <- c("P(0,0)","P(0,1)","P(1,0)","P(1,1)")
}
}
}
if(ntraits==3){
if (is.character(node.states)) {
if (node.states == "marginal"){
colnames(lik.anc$lik.anc.states) <- c("P(0,0,0)","P(1,0,0)","P(0,1,0)","P(0,0,1)","P(1,1,0)","P(1,0,1)","P(0,1,1)","P(1,1,1)")
}
}
}
if(node.states == "joint"){
phy$node.label <- lik.anc$lik.anc.states
tip.states <- lik.anc$lik.tip.states
}
cat("Finished. Performing diagnostic tests.", "\n")
#Approximates the Hessian using the numDeriv function
if(diagn==TRUE){
h <- hessian(func=dev.corpaint, x=est.pars, phy=phy,liks=model.set.final$liks,Q=model.set.final$Q,rate=model.set.final$rate,regimes=regimes,root.p=root.p)
hess.eig <- eigen(h,symmetric=TRUE)
eigval<-signif(hess.eig$values,2)
eigvect<-round(hess.eig$vectors, 2)
}
else{
h=matrix(0,dim(model.set.final$index.matrix[[1]])[1],dim(model.set.final$index.matrix[[1]])[1])
eigval=NULL
eigvect=NULL
}
solution<-solution.se<-model.set.final$index.matrix
for(i in 1:nregimes){
solution[[i]] <- matrix(est.pars[model.set.final$index.matrix[[i]]], dim(model.set.final$index.matrix[[i]]))
solution.se[[i]] <- matrix(sqrt(diag(pseudoinverse(h)))[model.set.final$index.matrix[[i]]], dim(model.set.final$index.matrix[[i]]))
if(ntraits==1){
rownames(solution[[i]]) <- rownames(solution.se[[i]]) <- c("(0)","(1)")
colnames(solution[[i]]) <- colnames(solution.se[[i]]) <- c("(0)","(1)")
}
if(ntraits==2){
rownames(solution[[i]]) <- rownames(solution.se[[i]]) <- c("(0,0)","(0,1)","(1,0)","(1,1)")
colnames(solution[[i]]) <- colnames(solution.se[[i]]) <- c("(0,0)","(0,1)","(1,0)","(1,1)")
}
if(ntraits==3){
rownames(solution[[i]]) <- rownames(solution.se[[i]]) <- c("(0,0,0)","(1,0,0)","(0,1,0)","(0,0,1)","(1,1,0)","(1,0,1)","(0,1,1)","(1,1,1)")
colnames(solution[[i]]) <- colnames(solution.se[[i]]) <- c("(0,0,0)","(1,0,0)","(0,1,0)","(0,0,1)","(1,1,0)","(1,0,1)","(0,1,1)","(1,1,1)")
}
}
obj = list(loglik = loglik, AIC = -2*loglik+2*model.set.final$np,AICc = -2*loglik+(2*model.set.final$np*(nb.tip/(nb.tip-model.set.final$np-1))),ntraits=ntraits, solution=solution, solution.se=solution.se, index.mat=model.set.final$index.matrix, opts=opts, data=data.sort, phy=phy, states=lik.anc$lik.anc.states, tip.states=tip.states, iterations=out$iterations, eigval=eigval, eigvect=eigvect)
class(obj)<-"corpaint"
return(obj)
}
#Print function
print.corpaint<-function(x,...){
ntips=Ntip(x$phy)
output<-data.frame(x$loglik,x$AIC,x$AICc,x$ntraits,ntips, row.names="")
names(output)<-c("-lnL","AIC","AICc","N.traits","ntax")
cat("\nFit\n")
print(output)
cat("\n")
param.est<- x$solution
cat("Rates\n")
print(param.est)
cat("\n")
if(any(x$eigval<0)){
index.matrix <- x$index.mat
#If any eigenvalue is less than 0 then the solution is not the maximum likelihood solution
if (any(x$eigval<0)) {
cat("The objective function may be at a saddle point", "\n")
}
}
else{
cat("Arrived at a reliable solution","\n")
}
}
dev.corpaint<-function(p,phy,liks,Q,rate,regimes,root.p){
nregimes<-length(levels(factor(regimes)))
root.reg<-as.numeric(phy$node.label[1])
nb.tip <- length(phy$tip.label)
nb.node <- phy$Nnode
TIPS <- 1:nb.tip
comp <- numeric(nb.tip + nb.node)
phy <- reorder(phy, "pruningwise")
#Obtain an object of all the unique ancestors
anc <- unique(phy$edge[,1])
if (any(is.nan(p)) || any(is.infinite(p))) return(1000000)
for(i in 1:nregimes){
Q[[i]]<- matrix(c(p, 0)[rate[[i]]],dim(rate[[1]])[1],dim(rate[[1]])[2])
diag(Q[[i]]) <- -rowSums(Q[[i]])
}
for (i in seq(from = 1, length.out = nb.node)){
#the ancestral node at row i is called focal
focal <- anc[i]
#Get descendant information of focal
desRows<-which(phy$edge[,1]==focal)
desNodes<-phy$edge[desRows,2]
v <- 1
for (desIndex in sequence(length(desRows))){
v<-v*expm(Q[[regimes[focal]]] * phy$edge.length[desRows[desIndex]], method=c("Ward77")) %*% liks[desNodes[desIndex],]
}
comp[focal] <- sum(v)
liks[focal, ] <- v/comp[focal]
}
#Specifies the root:
root <- nb.tip + 1L
#If any of the logs have NAs restart search:
if (is.na(sum(log(comp[-TIPS])))){return(1000000)}
else{
equil.root <- NULL
for(i in 1:ncol(Q[[root.reg]])){
posrows <- which(Q[[root.reg]][,i] >= 0)
rowsum <- sum(Q[[root.reg]][posrows,i])
poscols <- which(Q[[root.reg]][i,] >= 0)
colsum <- sum(Q[[root.reg]][i,poscols])
equil.root <- c(equil.root,rowsum/(rowsum+colsum))
}
if (is.null(root.p)){
flat.root = equil.root
k.rates <- 1/length(which(!is.na(equil.root)))
flat.root[!is.na(flat.root)] = k.rates
flat.root[is.na(flat.root)] = 0
loglik<- -(sum(log(comp[-TIPS])) + log(sum(flat.root * liks[root,])))
}
else{
#root.p==maddfitz will fix root probabilities according to FitzJohn et al 2009 Eq. 10:
if(is.character(root.p)){
equil.root[is.na(equil.root)] = 0
loglik <- -(sum(log(comp[-TIPS])) + log(sum(equil.root * liks[root,])))
if(is.infinite(loglik)){return(1000000)}
}
#root.p!==NULL will fix root probabilities based on user supplied vector:
else{
loglik<- -(sum(log(comp[-TIPS])) + log(sum(root.p * liks[root,])))
if(is.infinite(loglik)){return(1000000)}
}
}
}
loglik
}
rate.mat.set.paint<-function(phy,data.sort,nregimes,ntraits,model){
k=ntraits
nl=2
obj <- NULL
nb.tip<-length(phy$tip.label)
nb.node <- phy$Nnode
if(ntraits==1){
factored <- factorData(data.sort) # was acting on data, not data.sort
nl <- ncol(factored)
obj <- NULL
nb.tip<-length(phy$tip.label)
nb.node <- phy$Nnode
if (is.character(model)) {
if(model=="ER"){
rate <- lapply(1:nregimes,rate.mat.maker, hrm=FALSE, ntraits=1, nstates=2, model="ER")
np <- nregimes
for(i in 2:nregimes){
rate[[i]]<-rate[[i]]+i-1
}
index.matrix<-rate
for(i in 1:nregimes){
rate[[i]][is.na(rate[[i]])]<-max(rate[[nregimes]],na.rm=TRUE)+1
}
}
if(model=="ARD"){
rate <- lapply(1:nregimes,rate.mat.maker, hrm=FALSE, ntraits=1, model="ARD")
np <- length(rate[[1]][is.na(rate[[1]])==TRUE])*nregimes
for(i in 2:nregimes){
rate[[i]]<-rate[[i]]+(length(rate[[1]][is.na(rate[[1]])==TRUE])*(i-1))
}
index.matrix<-rate
for(i in 1:nregimes){
rate[[i]][is.na(rate[[i]])]<-max(rate[[nregimes]],na.rm=TRUE)+1
}
}
}
stateTable <- NULL # will hold 0s and 1s for likelihoods of each state at tip
for(column in 1:nl){
stateTable <- cbind(stateTable,factored[,column])
}
colnames(stateTable) <- colnames(factored)
ancestral <- matrix(0,nb.node,nl) # all likelihoods at ancestral nodes will be 0
liks <- rbind(stateTable,ancestral) # combine tip likelihoods & ancestral likelihoods
rownames(liks) <- NULL
}
if(ntraits==2){
if (is.character(model)) {
if(model=="ER"){
rate <- lapply(1:nregimes,rate.mat.maker, hrm=FALSE, ntraits=2, model="ER")
np <- nregimes
for(i in 2:nregimes){
rate[[i]]<-rate[[i]]+i-1
}
index.matrix<-rate
for(i in 1:nregimes){
rate[[i]][is.na(rate[[i]])]<-max(rate[[nregimes]],na.rm=TRUE)+1
}
}
if(model=="ARD"){
rate <- lapply(1:nregimes,rate.mat.maker, hrm=FALSE, ntraits=2, model="ARD")
np <- length(rate[[1]][is.na(rate[[1]])==TRUE])*nregimes
for(i in 2:nregimes){
rate[[i]]<-rate[[i]]+(length(rate[[1]][is.na(rate[[1]])==TRUE])*(i-1))
}
index.matrix<-rate
for(i in 1:nregimes){
rate[[i]][is.na(rate[[i]])]<-max(rate[[nregimes]],na.rm=TRUE)+1
}
}
}
x<-data.sort[,1]
y<-data.sort[,2]
liks <- matrix(0, nb.tip + nb.node, nl^k)
TIPS <- 1:nb.tip
for(i in 1:nb.tip){
if(is.na(x[i])){
x[i]=2
y[i]=2
}
}
for(i in 1:nb.tip){
if(x[i]==0 & y[i]==0){liks[i,1]=1}
if(x[i]==0 & y[i]==1){liks[i,2]=1}
if(x[i]==1 & y[i]==0){liks[i,3]=1}
if(x[i]==1 & y[i]==1){liks[i,4]=1}
if(x[i]==2 & y[i]==2){liks[i,1:4]=1}
}
}
if(ntraits==3){
rate<-rate.mat.maker(hrm=FALSE,ntraits=ntraits,model=model)
index.matrix<-rate
rate[is.na(rate)]<-max(rate,na.rm=TRUE)+1
x<-data.sort[,1]
y<-data.sort[,2]
z<-data.sort[,3]
liks <- matrix(0, nb.tip + nb.node, nl^k)
TIPS <- 1:nb.tip
for(i in 1:nb.tip){
if(is.na(x[i])){
x[i]=2
y[i]=2
z[i]=2
}
}
for(i in 1:nb.tip){
if(x[i]==0 & y[i]==0 & z[i]==0){liks[i,1]=1}
if(x[i]==1 & y[i]==0 & z[i]==0){liks[i,2]=1}
if(x[i]==0 & y[i]==1 & z[i]==0){liks[i,3]=1}
if(x[i]==0 & y[i]==0 & z[i]==1){liks[i,4]=1}
if(x[i]==1 & y[i]==1 & z[i]==0){liks[i,5]=1}
if(x[i]==1 & y[i]==0 & z[i]==1){liks[i,6]=1}
if(x[i]==0 & y[i]==1 & z[i]==1){liks[i,7]=1}
if(x[i]==1 & y[i]==1 & z[i]==1){liks[i,8]=1}
if(x[i]==2 & y[i]==2 & z[i]==2){liks[i,1:8]=1}
}
}
Q <- rate
for(i in 1:nregimes){
Q[[i]]<-Q[[i]]*0
}
obj$np<-np
obj$rate<-rate
obj$index.matrix<-index.matrix
obj$liks<-liks
obj$Q<-Q
obj
}
ancRECON.paint <- function(phy, data, p, method=c("joint", "marginal", "scaled"), hrm=TRUE, rate.cat, ntraits=NULL, charnum=NULL, rate.mat=NULL, model=c("ER", "SYM", "ARD"), root.p=NULL){
#Note: Does not like zero branches at the tips. Here I extend these branches by just a bit:
phy$edge.length[phy$edge.length<=1e-5]=1e-5
if(hrm==FALSE){
if(ntraits==1){
data.sort<-data.frame(data[,2],data[,3],row.names=data[,1])
}
if(ntraits==2){
data.sort<-data.frame(data[,2], data[,3], data[,4], row.names=data[,1])
}
if(ntraits==3){
data.sort<-data.frame(data[,2], data[,3], data[,4], data[,5], row.names=data[,1])
}
}
else{
data.sort <- data.frame(data[,2], data[,2],row.names=data[,1])
}
data.sort<-data.sort[phy$tip.label,]
root.reg<-as.numeric(phy$node.label[1])
#Some initial values for use later
k=2
obj <- NULL
nb.tip <- length(phy$tip.label)
nb.node <- phy$Nnode
root.p=root.p
if(ntraits==1){
tot.states <- factor(c(phy$node.label,as.character(data[,3])))
nregimes <- length(levels(tot.states))
regimes <- numeric(nb.tip+nb.node)
TIPS <- 1:nb.tip
regimes[TIPS]<-data[,3]
regimes[-TIPS]<-as.numeric(phy$node.label)
}
if(ntraits==2){
tot.states <- factor(c(phy$node.label,as.character(data[,4])))
nregimes <- length(levels(tot.states))
regimes <- numeric(nb.tip+nb.node)
TIPS <- 1:nb.tip
regimes[TIPS]<-data[,4]
regimes[-TIPS]<-as.numeric(phy$node.label)
}
if(ntraits==3){
tot.states <- factor(c(phy$node.label,as.character(data[,5])))
nregimes <- length(levels(tot.states))
regimes <- numeric(nb.tip+nb.node)
TIPS <- 1:nb.tip
regimes[TIPS] <- data[,5]
regimes[-TIPS] <- as.numeric(phy$node.label)
}
if(hrm==FALSE){
#Imported from Jeffs rayDISC -- will clean up later, but for now, it works fine:
if(ntraits==1){
k <- 1
factored <- factorData(data.sort) # was acting on data, not data.sort
nl <- ncol(factored)
nb.tip<-length(phy$tip.label)
nb.node <- phy$Nnode
if (is.character(model)) {
if(model=="ER"){
rate <- lapply(1:nregimes,rate.mat.maker, hrm=FALSE, ntraits=1, nstates=nl, model="ER")
np <- nregimes
for(i in 2:nregimes){
rate[[i]]<-rate[[i]]+i-1
}
index.matrix<-rate
for(i in 1:nregimes){
rate[[i]][is.na(rate[[i]])]<-max(rate[[nregimes]],na.rm=TRUE)+1
}
}
if(model=="ARD"){
rate <- lapply(1:nregimes,rate.mat.maker, hrm=FALSE, ntraits=1, nstates=nl, model="ARD")
np <- length(rate[[1]][is.na(rate[[1]])==TRUE])*nregimes
for(i in 2:nregimes){
rate[[i]]<-rate[[i]]+(length(rate[[1]][is.na(rate[[1]])==TRUE])*(i-1))
}
index.matrix<-rate
for(i in 1:nregimes){
rate[[i]][is.na(rate[[i]])]<-max(rate[[nregimes]],na.rm=TRUE)+1
}
}
}
stateTable <- NULL # will hold 0s and 1s for likelihoods of each state at tip
for(column in 1:nl){
stateTable <- cbind(stateTable,factored[,column])
}
colnames(stateTable) <- colnames(factored)
ancestral <- matrix(0,nb.node,nl) # all likelihoods at ancestral nodes will be 0
liks <- rbind(stateTable,ancestral) # combine tip likelihoods & ancestral likelihoods
rownames(liks) <- NULL
}
if(ntraits==2){
k=2
nl=2
if(is.null(rate.mat)){
if(model=="ER"){
rate <- lapply(1:nregimes,rate.mat.maker, hrm=FALSE, ntraits=2, model="ER")
np <- nregimes
for(i in 2:nregimes){
rate[[i]]<-rate[[i]]+i-1
}
index.matrix<-rate
for(i in 1:nregimes){
rate[[i]][is.na(rate[[i]])]<-max(rate[[nregimes]],na.rm=TRUE)+1
}
}
if(model=="ARD"){
rate <- lapply(1:nregimes,rate.mat.maker, hrm=FALSE, ntraits=2, model="ARD")
np <- length(rate[[1]][is.na(rate[[1]])==TRUE])*nregimes
for(i in 2:nregimes){
rate[[i]]<-rate[[i]]+(length(rate[[1]][is.na(rate[[1]])==TRUE])*(i-1))
}
index.matrix<-rate
for(i in 1:nregimes){
rate[[i]][is.na(rate[[i]])]<-max(rate[[nregimes]],na.rm=TRUE)+1
}
}
}
else{
rate<-rate.mat
rate[is.na(rate)]<-max(rate,na.rm=TRUE)+1
}
x<-data.sort[,1]
y<-data.sort[,2]
liks <- matrix(0, nb.tip + nb.node, nl^k)
TIPS <- 1:nb.tip
for(i in 1:nb.tip){
if(is.na(x[i])){
x[i]=2
}
if(is.na(y[i])){
y[i]=2
}
}
for(i in 1:nb.tip){
if(x[i]==0 & y[i]==0){liks[i,1]=1}
if(x[i]==0 & y[i]==1){liks[i,2]=1}
if(x[i]==1 & y[i]==0){liks[i,3]=1}
if(x[i]==1 & y[i]==1){liks[i,4]=1}
if(x[i]==2 & y[i]==0){liks[i,c(1,3)]=1}
if(x[i]==2 & y[i]==1){liks[i,c(2,4)]=1}
if(x[i]==0 & y[i]==2){liks[i,c(1,2)]=1}
if(x[i]==1 & y[i]==2){liks[i,c(3,4)]=1}
if(x[i]==2 & y[i]==2){liks[i,1:4]=1}
}
}
if(ntraits==3){
k=3
nl=2
if(is.null(rate.mat)){
rate<-rate.mat.maker(hrm=FALSE,ntraits=ntraits,model=model)
rate[is.na(rate)]<-max(rate,na.rm=TRUE)+1
}
else{
rate<-rate.mat
rate[is.na(rate)]<-max(rate,na.rm=TRUE)+1
}
x<-data.sort[,1]
y<-data.sort[,2]
z<-data.sort[,3]
liks <- matrix(0, nb.tip + nb.node, nl^k)
TIPS <- 1:nb.tip
for(i in 1:nb.tip){
if(is.na(x[i])){
x[i]=2
}
if(is.na(y[i])){
y[i]=2
}
if(is.na(z[i])){
z[i]=2
}
}
for(i in 1:nb.tip){
if(x[i]==0 & y[i]==0 & z[i]==0){liks[i,1]=1}
if(x[i]==1 & y[i]==0 & z[i]==0){liks[i,2]=1}
if(x[i]==0 & y[i]==1 & z[i]==0){liks[i,3]=1}
if(x[i]==0 & y[i]==0 & z[i]==1){liks[i,4]=1}
if(x[i]==1 & y[i]==1 & z[i]==0){liks[i,5]=1}
if(x[i]==1 & y[i]==0 & z[i]==1){liks[i,6]=1}
if(x[i]==0 & y[i]==1 & z[i]==1){liks[i,7]=1}
if(x[i]==1 & y[i]==1 & z[i]==1){liks[i,8]=1}
#If x is ambiguous but the rest are not:
if(x[i]==2 & y[i]==0 & z[i]==0){liks[i,c(1,2)]=1}
if(x[i]==2 & y[i]==1 & z[i]==0){liks[i,c(3,5)]=1}
if(x[i]==2 & y[i]==0 & z[i]==1){liks[i,c(4,6)]=1}
if(x[i]==2 & y[i]==1 & z[i]==1){liks[i,c(7,8)]=1}
#If y is ambiguous but the rest are not:
if(x[i]==0 & y[i]==2 & z[i]==0){liks[i,c(1,3)]=1}
if(x[i]==1 & y[i]==2 & z[i]==0){liks[i,c(2,5)]=1}
if(x[i]==0 & y[i]==2 & z[i]==1){liks[i,c(4,7)]=1}
if(x[i]==1 & y[i]==2 & z[i]==1){liks[i,c(6,8)]=1}
#If z is ambiguous but the rest are not:
if(x[i]==0 & y[i]==0 & z[i]==2){liks[i,c(1,4)]=1}
if(x[i]==0 & y[i]==1 & z[i]==2){liks[i,c(3,7)]=1}
if(x[i]==1 & y[i]==0 & z[i]==2){liks[i,c(2,6)]=1}
if(x[i]==1 & y[i]==1 & z[i]==2){liks[i,c(5,8)]=1}
#If x and y is ambiguous but z is not:
if(x[i]==2 & y[i]==2 & z[i]==0){liks[i,c(1,2,3,5)]=1}
if(x[i]==2 & y[i]==2 & z[i]==1){liks[i,c(4,6,7,8)]=1}
#If x and z is ambiguous but y is not:
if(x[i]==2 & y[i]==0 & z[i]==2){liks[i,c(1,2,4,6)]=1}
if(x[i]==2 & y[i]==1 & z[i]==2){liks[i,c(3,5,7,8)]=1}
#If y and z is ambiguous but x is not:
if(x[i]==0 & y[i]==2 & z[i]==2){liks[i,c(1,3,4,7)]=1}
if(x[i]==1 & y[i]==2 & z[i]==2){liks[i,c(2,5,6,8)]=1}
#All states are ambiguous:
if(x[i]==2 & y[i]==2 & z[i]==2){liks[i,1:8]=1}
}
}
Q <- rate
for(i in 1:nregimes){
Q[[i]]<-Q[[i]]*0
}
}
for(i in 1:nregimes){
Q[[i]]<- matrix(c(p, 0)[rate[[i]]],dim(rate[[1]])[1],dim(rate[[1]])[2])
diag(Q[[i]]) <- -rowSums(Q[[i]])
}
phy <- reorder(phy, "pruningwise")
TIPS <- 1:nb.tip
anc <- unique(phy$edge[,1])
if(method=="joint"){
lik.states<-numeric(nb.tip + nb.node)
comp<-matrix(0,nb.tip + nb.node,ncol(liks))
for (i in seq(from = 1, length.out = nb.node)) {
#The ancestral node at row i is called focal:
focal <- anc[i]
#Get descendant information of focal:
desRows<-which(phy$edge[,1]==focal)
#Get node information for each descendant:
desNodes<-phy$edge[desRows,2]
#Initiates a loop to check if any nodes are tips:
for (desIndex in sequence(length(desRows))){
#If a tip calculate C_y(i) for the tips and stores in liks matrix:
if(any(desNodes[desIndex]==phy$edge[,1])==FALSE){
liks[desNodes[desIndex],] <- expm(Q[[regimes[focal]]] * phy$edge.length[desRows[desIndex]], method=c("Ward77")) %*% liks[desNodes[desIndex],]
#Divide by the sum of the liks to deal with underflow issues:
liks[desNodes[desIndex],] <- liks[desNodes[desIndex],]/sum(liks[desNodes[desIndex],])
#Collects the likeliest state at the tips:
comp[desNodes[desIndex],] <- which.max(liks[desNodes[desIndex],])
}
}
#Collects t_z, or the branch subtending focal:
tz<-phy$edge.length[which(phy$edge[,2] == focal)]
if(length(tz)==0){
#The focal node is the root, calculate P_k:
if(is.null(root.p)){
root.state=1
for (desIndex in sequence(length(desRows))){
#This is the basic marginal calculation:
root.state <- root.state * expm(Q[[regimes[focal]]] * phy$edge.length[desRows[desIndex]], method=c("Ward77")) %*% liks[desNodes[desIndex],]
}
if(is.null(root.p)){
liks[focal, ] <- root.state
}
else{
if(is.character(root.p)){
equil.root <- NULL
for(i in 1:ncol(Q[[root.reg]])){
posrows <- which(Q[[root.reg]][,i] >= 0)
rowsum <- sum(Q[[root.reg]][posrows,i])
poscols <- which(Q[[root.reg]][i,] >= 0)
colsum <- sum(Q[[root.reg]][i,poscols])
equil.root <- c(equil.root,rowsum/(rowsum+colsum))
}
liks[focal, ] <- root.state * equil.root
}
else{
liks[focal, ] <- root.p
}
}
liks[focal, ] <- liks[focal,] / sum(liks[focal,])
}
}
else{
#Calculates P_ij(t_z):
Pij <- expm(Q[[regimes[focal]]] * tz, method=c("Ward77"))
#Calculates L_z(i):
if(hrm==TRUE){
v<-c(rep(1, k*rate.cat))
}
if(hrm==FALSE){
v<-c(rep(1, nl^k))
}
for (desIndex in sequence(length(desRows))){
v = v * liks[desNodes[desIndex],]
}
#Finishes L_z(i):
L <- t(Pij) * v
#Collects which is the highest likelihood and which state it corresponds to:
liks[focal,] <- apply(L, 2, max)
comp[focal,] <- apply(L, 2, which.max)
#Divide by the sum of the liks to deal with underflow issues:
liks[focal,] <- liks[focal,]/sum(liks[focal,])
}
}
root <- nb.tip + 1L
lik.states[root] <- which.max(liks[root,])
N <- dim(phy$edge)[1]
for(i in N:1){
des <- phy$edge[i,2]
tmp <- which.max(liks[des,])
lik.states[des] <- comp[des,tmp]
}
#Outputs likeliest tip states
obj$lik.tip.states <- lik.states[TIPS]
#Outputs likeliest node states
obj$lik.anc.states <- lik.states[-TIPS]
}
if(method=="marginal"){
#A temporary likelihood matrix so that the original does not get written over:
liks.down<-liks
#root equilibrium frequencies
if(is.null(root.p)){
equil.root<-rep(1/dim(Q[[1]])[2], dim(Q[[1]])[2])
}
else{
if(is.character(root.p)){
equil.root <- NULL
for(i in 1:ncol(Q[[root.reg]])){
posrows <- which(Q[[root.reg]][,i] >= 0)
rowsum <- sum(Q[[root.reg]][posrows,i])
poscols <- which(Q[[root.reg]][i,] >= 0)
colsum <- sum(Q[[root.reg]][i,poscols])
equil.root <- c(equil.root,rowsum/(rowsum+colsum))
}
}
else{
equil.root=root.p
}
}
#A transpose of Q for assessing probability of j to i, rather than i to j:
tranQ<-Q
for(i in 1:nregimes){
tranQ[[i]]<-t(Q[[i]])
}
comp<-matrix(0,nb.tip + nb.node,ncol(liks))
#The first down-pass: The same algorithm as in the main function to calculate the conditional likelihood at each node:
for (i in seq(from = 1, length.out = nb.node)) {
#the ancestral node at row i is called focal
focal <- anc[i]
#Get descendant information of focal
desRows<-which(phy$edge[,1]==focal)
desNodes<-phy$edge[desRows,2]
v <- 1
for (desIndex in sequence(length(desRows))){
v <- v*expm(Q[[regimes[focal]]] * phy$edge.length[desRows[desIndex]], method=c("Ward77")) %*% liks.down[desNodes[desIndex],]
}
comp[focal] <- sum(v)
liks.down[focal, ] <- v/comp[focal]
}
root <- nb.tip + 1L
#Enter the root defined root probabilities if they are supplied by the user:
if(!is.null(root.p)){
root <- nb.tip + 1L
liks.down[root, ]<-root.p
}
#The up-pass
liks.up<-liks
states<-apply(liks,1,which.max)
N <- dim(phy$edge)[1]
comp <- numeric(nb.tip + nb.node)
for(i in length(anc):1){
focal <- anc[i]
if(!focal==root){
#Gets mother and sister information of focal:
focalRow<-which(phy$edge[,2]==focal)
motherRow<-which(phy$edge[,1]==phy$edge[focalRow,1])
motherNode<-phy$edge[focalRow,1]
desNodes<-phy$edge[motherRow,2]
sisterNodes<-desNodes[(which(!desNodes==focal))]
sisterRows<-which(phy$edge[,2]%in%sisterNodes==TRUE)
#If the mother is not the root then you are calculating the probability of the being in either state.
#But note we are assessing the reverse transition, j to i, rather than i to j, so we transpose Q to carry out this calculation:
if(motherNode!=root){
v <- expm(tranQ[[regimes[motherNode]]] * phy$edge.length[which(phy$edge[,2]==motherNode)], method=c("Ward77")) %*% liks.up[motherNode,]
}
#If the mother is the root then just use the marginal. This can also be the prior, which I think is the equilibrium frequency.
#But for now we are just going to use the marginal at the root -- it is unclear what Mesquite does.
else{
v <- equil.root
}
#Now calculate the probability that each sister is in either state. Sister can be more than 1 when the node is a polytomy.
#This is essentially calculating the product of the mothers probability and the sisters probability:
for (sisterIndex in sequence(length(sisterRows))){
v <- v*expm(Q[[regimes[sisterNodes[sisterIndex]]]] * phy$edge.length[sisterRows[sisterIndex]], method=c("Ward77")) %*% liks.down[sisterNodes[sisterIndex],]
}
comp[focal] <- sum(v)
liks.up[focal,] <- v/comp[focal]
}
}
#The final pass
liks.final<-liks
comp <- numeric(nb.tip + nb.node)
for (i in seq(from = 1, length.out = nb.node-1)) { # In this final pass, root is never encountered. But its OK, because root likelihoods are set after loop.
#the ancestral node at row i is called focal
focal <- anc[i]
focalRow<-which(phy$edge[,2]==focal)
motherRow<-which(phy$edge[,1]==phy$edge[focalRow,1])
motherNode<-phy$edge[focalRow,1]
#Now you are assessing the change along the branch subtending the focal by multiplying the probability of
#everything at and above focal by the probability of the mother and all the sisters given time t:
v <- liks.down[focal,]*expm(tranQ[[regimes[motherNode]]] * phy$edge.length[focalRow], method=c("Ward77")) %*% liks.up[focal,]
comp[focal] <- sum(v)
liks.final[focal, ] <- v/comp[focal]
}
#Just add in the marginal at the root calculated on the original downpass or if supplied by the user:
liks.final[root,] <- liks.down[root,] * equil.root
root.final <- liks.down[root,] * equil.root
comproot <- sum(root.final)
liks.final[root,] <- root.final/comproot
#Reports just the probabilities at internal nodes:
obj$lik.anc.states <- liks.final[-TIPS, ]
}
if(method=="scaled"){
comp<-matrix(0,nb.tip + nb.node,ncol(liks))
#The same algorithm as in the main function. See comments in either corHMM.R or corDISC.R for details:
for (i in seq(from = 1, length.out = nb.node)) {
#the ancestral node at row i is called focal
focal <- anc[i]
#Get descendant information of focal
desRows<-which(phy$edge[,1]==focal)
desNodes<-phy$edge[desRows,2]
v <- 1
for (desIndex in sequence(length(desRows))){
v <- v*expm(Q[[regimes[focal]]] * phy$edge.length[desRows[desIndex]], method=c("Ward77")) %*% liks[desNodes[desIndex],]
}
comp[focal] <- sum(v)
liks[focal, ] <- v/comp[focal]
}
if(!is.null(root.p)){
root <- nb.tip + 1L
if(is.character(root.p)){
equil.root <- NULL
for(i in 1:ncol(Q[[root.reg]])){
posrows <- which(Q[[root.reg]][,i] >= 0)
rowsum <- sum(Q[[root.reg]][posrows,i])
poscols <- which(Q[[root.reg]][i,] >= 0)
colsum <- sum(Q[[root.reg]][i,poscols])
equil.root <- c(equil.root,rowsum/(rowsum+colsum))
}
liks[root, ] <- (liks[root,] * equil.root) / sum((liks[root,] * equil.root))
}
else{
liks[root, ] <- root.p
}
}
obj$lik.anc.states <- liks[-TIPS, ]
}
obj
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.