# pheno_correlated <- TRUE
# pheno_error <- TRUE
# REML <- TRUE
# EM_Fels_limit <- 1e3
# model <- "lambda"
# repeat_optim_limit <- 1
# EM_missing_limit <- 50
# repeat_optim_tol <- 1e-2
# model_par_evals <- 10
anc.recon <- function(trait_data, tree, vars = FALSE, CI = FALSE)
{
trait_data <- as.data.frame(trait_data)
tree <- tree[c("edge","tip.label","edge.length","Nnode")]
class(tree) <- "phylo"
tree <- reorder(tree,"postorder")
if(is.null(tree$node.label))
{
tree$node.label <- (length(tree$tip.label)+1):(length(tree$tip.label)+tree$Nnode)
}
if(inherits(trait_data,"numeric"))
{
if(is.null(names(trait_data))) names(trait_data) <- as.character(tree$tip.label)
trait_data <- as.matrix(trait_data)
} else if(inherits(trait_data,"data.frame"))
{
if(any(grepl("species",colnames(trait_data),ignore.case = TRUE)))
{
rownames(trait_data) <- as.character(trait_data[,grep("species",colnames(trait_data),ignore.case = TRUE)[1]])
trait_data <- trait_data[,-grep("species",colnames(trait_data),ignore.case = TRUE)[1]]
}
trait_data <- as.matrix(trait_data)
}
if(is.null(rownames(trait_data))) rownames(trait_data) <- tree$tip.label
trait_data <- trait_data[tree$tip.label,,drop=FALSE]
ret <- C_anc_recon(Y=trait_data,anc=tree$edge[,1],des=tree$edge[,2],edge_vec=tree$edge.length,nedge=nrow(tree$edge),nvar=ncol(trait_data),nspecies=length(tree$tip.label))
Yhat <- ret$Yhat[-(1:length(tree$tip.label)),,drop=FALSE]
rownames(Yhat) <- tree$node.label
if(!is.null(colnames(trait_data))) colnames(Yhat) <- colnames(trait_data) else colnames(Yhat) <- paste("V",1:ncol(trait_data),sep="")
if(vars || CI)
{
p <- ret$p[-(1:length(tree$tip.label))]
m2d <- multi2di(phy = tree,random = FALSE)
sigma <- colMeans(apply(trait_data,2,pic,phy=m2d)^2)
var <- matrix(1/p) %*% (sigma)
rownames(var) <- tree$node.label
colnames(var) <- colnames(Yhat)
if(CI)
{
lower <- Yhat + qnorm(.975)*sqrt(var)
upper <- Yhat - qnorm(.975)*sqrt(var)
rownames(lower) <- rownames(upper) <- tree$node.label
colnames(lower) <- colnames(upper) <- colnames(Yhat)
}
}
if((!vars & !CI)) return(Yhat) else if(!CI) return(list(Yhat=Yhat,vars=var)) else
if(!vars) return(list(Yhat=Yhat,lowerCI=lower,upperCI=upper)) else
return(list(Yhat=Yhat,vars=var,lowerCI=lower,upperCI=upper))
}
fast.SSC <- function(trait_data,tree,niter=1000)
{
trait_data <- as.data.frame(trait_data)
tree <- tree[c("edge","tip.label","edge.length","Nnode")]
class(tree) <- "phylo"
tree <- reorder(tree,"postorder")
nspecies <- length(tree$tip.label)
if(is.null(tree$node.label))
{
tree$node.label <- (nspecies+1):(nspecies+tree$Nnode)
}
if(inherits(trait_data,"numeric"))
{
if(is.null(names(trait_data))) names(trait_data) <- as.character(tree$tip.label)
trait_data <- as.matrix(trait_data)
} else if(inherits(trait_data,"data.frame"))
{
if(any(grepl("species",colnames(trait_data),ignore.case = TRUE)))
{
rownames(trait_data) <- as.character(trait_data[,grep("species",colnames(trait_data),ignore.case = TRUE)[1]])
trait_data <- trait_data[,-grep("species",colnames(trait_data),ignore.case = TRUE)[1]]
}
trait_data <- as.matrix(trait_data)
}
if(is.null(rownames(trait_data))) rownames(trait_data) <- tree$tip.label
trait_data <- trait_data[tree$tip.label,,drop=FALSE]
ret <- anc.recon(trait_data = trait_data,tree = tree,vars=TRUE)
Yhat <- ret$Yhat
vars <- rbind(trait_data*0,ret$vars)*(nspecies-1)/nspecies
SSC <- sum((rbind(trait_data,Yhat)[tree$edge[,2],] - rbind(trait_data,Yhat)[tree$edge[,1],])^2)
pval <- 0
for(i in 1:niter)
{
Y.r <- trait_data[sample(nspecies),,drop=FALSE]
ret.r <- C_anc_recon(Y=Y.r,anc=tree$edge[,1],des=tree$edge[,2],edge_vec=tree$edge.length,nedge=nrow(tree$edge),nvar=ncol(trait_data),nspecies=nspecies)
Yhat.r <- ret.r$Yhat[-(1:nspecies),,drop=FALSE]
SSC.r <- sum((rbind(Y.r,Yhat.r)[tree$edge[,2],] - rbind(Y.r,Yhat.r)[tree$edge[,1],])^2)
if(SSC.r<=SSC) pval <- pval + 1
}
pval <- pval/niter
exp_var <- sum(vars[tree$edge[,1],] + vars[tree$edge[,2],])
scaled <- SSC/exp_var
results <- list(pvalue=pval,scaled.SSC=scaled,SSC=SSC)
class(results) <- "SSC"
results
}
print.SSC <- function(x, ...)
{
cat("Total sum of squared changes (SSC)\n")
cat(x$SSC)
cat("\n\n")
cat("Scaled sum of squared changes\n")
cat(x$scaled.SSC)
cat("\n\n")
cat("P value\n")
cat(round(x$pvalue,4))
cat("\n\n")
}
phylopars <- function(trait_data,tree,model="BM",pheno_error,phylo_correlated=TRUE,pheno_correlated=TRUE,REML=TRUE,full_alpha=TRUE,phylocov_start,phenocov_start,model_par_start,phylocov_fixed,phenocov_fixed,model_par_fixed,skip_optim=FALSE,skip_EM=FALSE,EM_Fels_limit=1e3,repeat_optim_limit=1,EM_missing_limit=50,repeat_optim_tol = 1e-2,model_par_evals=10,max_delta=1e4,EM_verbose=FALSE,optim_verbose=FALSE,npd=FALSE,nested_optim=FALSE,usezscores=TRUE,phenocov_list=list(),ret_args=FALSE,ret_level=1,get_cov_CIs = FALSE)
{
trait_data <- as.data.frame(trait_data)
tree <- tree[c("edge","tip.label","edge.length","Nnode")]
class(tree) <- "phylo"
tree <- reorder(tree,"postorder")
if(model=="white" | model=="star") tree <- reorder(transf.branch.lengths(phy = tree,model = "lambda",parameters=list(lambda=0))$tree,"postorder")
if(is.null(tree$node.label))
{
tree$node.label <- (length(tree$tip.label)+1):(length(tree$tip.label)+tree$Nnode)
}
if(colnames(trait_data)[1]!="species")
{
stop("First column name of trait_data MUST be 'species' (all lower case).")
}
trait_data[,1] <- as.character(trait_data[,1])
if(!all(trait_data[,1] %in% tree$tip.label))
{
mismatch <- unique(trait_data[,1])[which(!(unique(trait_data[,1]) %in% tree$tip.label))]
stop(paste(length(mismatch)," species name(s) in the first column of trait_data did not match any names in tree$tip.label:\n",paste(unique(trait_data[,1])[which(!(unique(trait_data[,1]) %in% tree$tip.label))],collapse="\n"),sep=""))
}
# prevent crashing from trailing blank columns
col_rm <- apply(trait_data,2,function(X) all(is.na(X)))
if(any(col_rm))
{
trait_data <- trait_data[,-which(col_rm)]
}
# prevent crashing for blank (NA) species
row_rm <- apply(trait_data,1,function(X) is.na(X[[1]]))
if(any(row_rm))
{
trait_data <- trait_data[-which(row_rm),]
}
# prevent crashing for species not in tree
row_rm <- apply(trait_data,1,function(X) !(X[[1]] %in% tree$tip.label))
if(any(row_rm))
{
trait_data <- trait_data[-which(row_rm),]
warning("Dropping species (",unique(trait_data[which(row_rm),1]),") from data: not found in tree.")
}
# prevent crashing for NA column names
col_rename <- is.na(colnames(trait_data))
if(any(col_rename))
{
colnames(trait_data)[which(col_rename)] <- paste("V",which(col_rename)-1,sep="")
}
# prevent issues from "" column names
col_rename <- colnames(trait_data)==""
if(any(col_rename))
{
colnames(trait_data)[which(col_rename)] <- paste("V",which(col_rename)-1,sep="")
}
# Copied from geiger package, which is set to be archived on CRAN on May 9, 2022
name.check <- function (phy, data, data.names = NULL)
{
if (is.null(data.names)) {
if (is.vector(data)) {
data.names <- names(data)
}
else {
data.names <- rownames(data)
}
}
t <- phy$tip.label
r1 <- t[is.na(match(t, data.names))]
r2 <- data.names[is.na(match(data.names, t))]
r <- list(sort(r1), sort(r2))
names(r) <- cbind("tree_not_data", "data_not_tree")
class(r) <- "name.check"
if (length(r1) == 0 && length(r2) == 0) {
return("OK")
}
else {
return(r)
}
}
f_args <- as.list(environment())
drop_taxa <-
name.check(phy = tree,data.names = unique(as.character(trait_data$species)))
if(length(drop_taxa)>1)
{
if(length(drop_taxa$tree_not_data)>0)
{
add_data <- data.frame(species=as.character(drop_taxa$tree_not_data),
matrix(as.double(NA),length(drop_taxa$tree_not_data),ncol(trait_data)-1))
colnames(add_data) <- colnames(trait_data)
trait_data <- rbind(trait_data,add_data)
}
}
drop_tree <- multi2di(tree)
nvar <- ncol(trait_data)-1
if(nvar==1 & model=="mvOU") model <- "OU"
height <- mean(pruningwise.distFromRoot((tree)))
if(model=="lambda")
{
par_bounds <- seq(0,1,length=model_par_evals)
MOD_PAR_START <- .5
} else if(model=="kappa")
{
par_bounds <- seq(0,3,length=model_par_evals)
MOD_PAR_START <- .5
} else if(model=="delta")
{
par_bounds <- seq(10^(-7)/height,3/height,length=model_par_evals)
MOD_PAR_START <- .5
} else if(model=="OU" | model=="mvOU")
{
par_bounds <- seq(10^(-7)/height,50/height,length=model_par_evals)
MOD_PAR_START <- .5/height
} else if(model=="EB")
{
par_bounds <- seq(-3/height,0,length=model_par_evals)
MOD_PAR_START <- -1/height
}
evec <- function(bounds)
{
if((model=="OU" & bounds==0) | (model=="EB" & bounds==0) | (model=="lambda" & bounds==1) | (model=="delta" & bounds==1) | (model=="kappa" & bounds==1)) return(edge_vec)
if(model=="OU") edge_vec <- c(reorder(transf.branch.lengths(phy = tree,model = "OUfixedRoot",parameters=list(alpha=bounds))$tree,"postorder")$edge.length,0)
if(model=="EB") edge_vec <- c(reorder(transf.branch.lengths(phy = tree,model = "EB",parameters=list(rate=bounds))$tree,"postorder")$edge.length,0)
if(model=="lambda") edge_vec <- c(reorder(transf.branch.lengths(phy = tree,model = "lambda",parameters=list(lambda=bounds))$tree,"postorder")$edge.length,0)
if(model=="kappa") edge_vec <- c(reorder(transf.branch.lengths(phy = tree,model = "kappa",parameters=list(kappa=bounds))$tree,"postorder")$edge.length,0)
if(model=="delta") edge_vec <- c(reorder(transf.branch.lengths(phy = tree,model = "delta",parameters=list(delta=bounds))$tree,"postorder")$edge.length,0)
edge_vec
}
if(!is.ultrametric(tree) & (model=="OU" | model=="mvOU")) stop("OU model not currently supported for non-ultrametric trees.")
if(model=="mvOU" & FALSE)
{
pheno_error <- FALSE
pheno_correlated <- FALSE
}
zphenocov_list <- list()
force_optim <- FALSE
if(length(phenocov_list)>0)
{
force_optim <- TRUE
if(length(phenocov_list)==1) phenocov_list <- rep(phenocov_list,length(tree$tip.label))
if(length(which(names(phenocov_list) %in% tree$tip.label))<length(tree$tip.label)) names(phenocov_list) <- tree$tip.label
pheno_error <- pheno_correlated <- FALSE
phenocov_fixed <- phenocov_start <- zphenocov_fixed <- zphenocov_start <- matrix(NA)
} else phenocov_list <- zphenocov_list <- list()
if(missing(phylocov_start)) phylocov_start <- matrix(NA) else if(is.na(phylocov_start)[[1]]) phylocov_start <- matrix(NA)
if(missing(phenocov_start)) phenocov_start <- matrix(NA) else if(is.na(phenocov_start)[[1]]) phenocov_start <- matrix(NA)
if(missing(model_par_start)) model_par_start <- matrix(NA) else if(is.na(model_par_start)[[1]]) model_par_start <- matrix(NA)
zstand <- list(phylocov=diag(nvar),mu=matrix(rep(0,nvar)))
if(usezscores)
{
for(i in 1:nvar)
{
dat <- setNames(trait_data[,i+1],as.character(trait_data$species))
dat <- tapply(dat,names(dat),function(X) mean(X,na.rm=TRUE))
#dat <- dat[!is.na(dat)]
zstand$mu[i] <- mean(dat,na.rm=TRUE)
zstand$phylocov[i,i] <- var(dat,na.rm=TRUE)*(length(tree$tip.label)-1)/(length(tree$tip.label)-REML)
#drp <- name.check(phy = tree,data.names = names(dat))
#if(length(drp)>1)
#{
# if(length(drp[[1]])>0) temp_tree <- drop.tip(tree,drp$tree_not_data) else temp_tree <- tree
# if(length(drp[[2]])>0) dat <- dat[-match(drp$data_not_tree,names(dat))]
#} else
#{
# temp_tree <- tree
#}
#zstand$phylocov[i,i] <- sum(pic(x = dat,phy = multi2di(temp_tree,random = FALSE))^2) / (length(temp_tree$tip.label)-REML)
#zstand$mu[i] <- ace(x = dat,phy = multi2di(temp_tree,random=FALSE),method="pic")$ace[[1]]
}
}
if(length(phenocov_list)>0)
{
zphenocov_list <- lapply(phenocov_list,function(X) as.matrix(X / (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov))))))[tree$tip.label]
phenocov_list <- phenocov_list[tree$tip.label]
zphenocov_list <- zphenocov_list[tree$tip.label]
}
if(missing(phylocov_fixed)) phylocov_fixed <- zphylocov_fixed <- matrix(NA) else if(!is.na(phylocov_fixed)[[1]]) zphylocov_fixed <- as.matrix(phylocov_fixed / (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov))))) else phylocov_fixed <- zphylocov_fixed <- matrix(NA)
if(missing(phenocov_fixed)) phenocov_fixed <- zphenocov_fixed <- matrix(NA) else if(!is.na(phenocov_fixed)[[1]]) zphenocov_fixed <- as.matrix(phenocov_fixed / (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov))))) else phenocov_fixed <- zphenocov_fixed <- matrix(NA)
if(missing(model_par_fixed)) model_par_fixed <- matrix(NA) else if(model=="mvOU") alpha <- model_par_fixed
if(is.na(model_par_fixed)[[1]]) model_par_fixed <- matrix(NA)
if(!is.na(phylocov_start)[[1]]) phylocov <- as.matrix(phylocov_start)
if(!is.na(phylocov_fixed)[[1]]) phylocov <- as.matrix(phylocov_fixed) else phylocov <- diag(nvar)
if(!is.na(phenocov_start)[[1]]) phenocov <- as.matrix(phenocov_start)
if(!is.na(phenocov_fixed)[[1]]) phenocov <- as.matrix(phenocov_fixed) else phenocov <- diag(nvar)
if(!is.na(model_par_start)[[1]]) model_par <- as.matrix(model_par_start)
if(!is.na(model_par_fixed)[[1]]) model_par <- as.matrix(model_par_fixed) else if(model=="mvOU") model_par <- diag(nvar)
if(!is.na(model_par_fixed[[1]])) nested_optim <- TRUE
mu <- matrix(0)
nspecies <- length(tree$tip.label)
nedge <- length(tree$edge.length)
if(!missing(pheno_error)) if(pheno_error==FALSE) pheno_correlated <- FALSE
if(pheno_correlated) pheno_error <- TRUE
if(max(table(as.character(trait_data$species)))>1 & if(!missing(pheno_error)) !pheno_error else FALSE)
{
temp_dat <- convert_to_means(trait_data,sort_vec=tree$tip.label)
temp_dat <- data.frame(species=rownames(temp_dat),temp_dat)
trait_data <- temp_dat
rm(temp_dat)
} else if(max(table(as.character(trait_data$species)))==1) pheno_error <- pheno_correlated <- FALSE else pheno_error <- TRUE
if(!is.na(phenocov_fixed[1,1])) pheno_error <- pheno_correlated <- TRUE
if(pheno_correlated) pheno_error <- 2 else if(pheno_error) pheno_error <- 1 else pheno_error <- 0
if(pheno_error==0)
{
phenocov_fixed <- phenocov_start <- matrix(NA)
}
# index for model parameters
if(!is.na(phylocov_fixed)[[1]]) phylocov_pars <- numeric() else if(phylo_correlated) phylocov_pars <- 1:((nvar^2-nvar)/2+nvar) else phylocov_pars <- 1:nvar
if(!is.na(phenocov_fixed)[[1]]) phenocov_pars <- numeric() else if(pheno_error>0) phenocov_pars <- if(pheno_correlated) length(phylocov_pars) + 1:((nvar^2-nvar)/2+nvar) else (length(phylocov_pars)+1):(length(phylocov_pars)+nvar)
if(model=="mvOU")
{
if(!is.na(model_par_fixed)[[1]]) alpha_pars <- numeric() else alpha_pars <- if(full_alpha) length(phylocov_pars) + 1:((nvar^2-nvar)/2+nvar) else (length(phylocov_pars)+1):(length(phylocov_pars)+nvar)
if(is.na(model_par_fixed)[[1]]) if(pheno_error>0) alpha_pars <- alpha_pars + length(phenocov_pars)
} else alpha_pars <- numeric()
if(!is.na(model_par_fixed[[1]])) perm_model <- model else perm_model <- NA
# which variables are represented (by row) in trait_data
species_subset <- lapply(apply(trait_data[,1:nvar+1,drop=FALSE],1,function(X) list(which(!is.na(X))-1)),function(X) X[[1]])
# which variables are NOT represented (by row) in trait_data
un_species_subset <- as.list(apply(trait_data[,1:nvar+1,drop=FALSE],1,function(X) which(is.na(X))-1))
# what unique combinations of variables subsets exist
subset_list <- unique(species_subset)
# what combination of unique variable combinations do each row correspond to
tip_combn <- match(species_subset,subset_list)-1
# edge indices
anc <- tree$edge[,1]-1
des <- tree$edge[,2]-1
# number of individuals (rows) observed
nind <- nrow(trait_data)
# is the data complete
complete_data <- if(all(complete.cases(trait_data))) TRUE else FALSE
if(complete_data) EM_missing_limit <- 1
# which row of X and vectorized Y corresponds to a given observation (row)
inds <- t((!is.na(trait_data[,1:nvar+1,drop=FALSE])) * 1:nind)
inds[inds==0] <- NA
inds[(1:length(inds))[!is.na(inds)]] <- 1:length(inds[(1:length(inds))[!is.na(inds)]])
ind_list <- lapply(apply(inds,2,function(X) list(X[!is.na(X)]-1)),function(X) X[[1]])
# design matrix for a vectorized Y (zeros and ones)
X <- matrix(0,length(na.exclude(as.integer(inds))),nvar)
# design matrix if X had complete data (for EM algorithm)
X_complete <- matrix(0,nrow(trait_data)*nvar,nvar)
for(i in 1:nind)
{
X_complete[1:nvar+(i-1)*nvar,] <- diag(nvar)
if(length(species_subset[[i]])>1)
{
diag(X[inds[which(!is.na(inds[,i])),i],species_subset[[i]]+1]) <- 1
} else X[inds[which(!is.na(inds[,i])),i],species_subset[[i]]+1] <- 1
}
# vectorized Y
R <- matrix(na.exclude(as.double(t(trait_data[,1:nvar+1,drop=FALSE]))),ncol=1)
# matrix form of Y
Rmat <- trait_data[,1:nvar+1,drop=FALSE]
# total number of observations (e.g., for complete data, nind*nvar)
nob <- nrow(X)
# edge lengths
edge_vec <- tree$edge.length
species_ind <- match(trait_data$species,tree$tip.label)
# what edge number corrsponds to the descendant for a given individual (row)
edge_ind <- match(species_ind,tree$edge[,2])-1
# what individual (row number) corresponds to a given terminal edge
ind_edge <- match(tree$edge[,2],species_ind)-1
# map to link descendant nodes to parent nodes
parent_edges <- match(tree$edge[,1],tree$edge[,2],nomatch = nrow(tree$edge)+1)
edge_vec <- c(edge_vec,0)
parent_edges <- c(parent_edges,parent_edges[length(parent_edges)])-1
# does a given edge number correspond to a terminal edge
is_edge_ind <- rep(FALSE,nedge+1)
is_edge_ind[which(tree$edge[,2]<=nspecies)] <- TRUE
# same as above, but if data were complete
species_subset_complete <- rep(list(1:nvar-1),nind)
un_species_subset_complete <- rep(list(),nind)
subset_list_complete <- list(1:nvar-1)
ind_list_complete <- lapply(apply(matrix(1:(nvar*nind)-1,nrow=nvar),2,function(X) list(X)),function(X) X[[1]])
tip_combn_complete <- rep(0,nind)
em_tol <- 1e-3
if(ret_args)
{
return(list(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=double(), nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=ret_level,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list()))
}
# function for estimating phylogenetic/phenotypic covariance using EM and numerical optimization
estim_pars <- function(edge_vec,do_optim=FALSE,phylocov=diag(nvar),phenocov=diag(nvar),mu=matrix(0))
{
tree$edge.length <- edge_vec[1:nedge]
# function for numerical optimization
# if the difference between the EM log-likelihood and the current log-likelihood is extremely high, reject (likely numerical precision issues)
f <- function(pars,em_ll,R,Rmat,phylocov_fixed,phenocov_fixed,phenocov_list)
{
if(!nested_optim & (model=="lambda" | model=="OU" | model=="EB" | model=="kappa" | model=="delta"))
{
edge_vec <- evec(((max(par_bounds) - min(par_bounds)) / (1+exp(-(pars[length(pars)])))) + min(par_bounds))
tree$edge.length[1:nedge] <- edge_vec[1:nedge]
}
if(is.na(phylocov_fixed)[[1]]) phylocov <- pars_to_mat(pars[phylocov_pars],nvar,as.integer(!phylo_correlated)) else phylocov <- phylocov_fixed
if(pheno_error>0) if(is.na(phenocov_fixed)[[1]]) phenocov <- pars_to_mat(pars[phenocov_pars],nvar,pheno_error) else phenocov <- phenocov_fixed
pars <- numeric()
if(is.na(phylocov_fixed)[[1]])
{
if(npd) phylocov <- try(as.matrix(nearPD(phylocov)$mat),silent=TRUE)
if(inherits(phylocov,"try-error")) return(em_ll - max_delta)
pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
if(inherits(pars,"try-error")) return(em_ll - max_delta)
}
if(is.na(phenocov_fixed)[[1]])
{
if(npd) phenocov <- try(as.matrix(nearPD(phenocov)$mat),silent=TRUE)
if(inherits(phenocov,"try-error")) return(em_ll - max_delta)
if(pheno_error>0) pars <- c(pars,mat_to_pars(phenocov,nvar,pheno_error))
if(inherits(pars,"try-error")) return(em_ll - max_delta)
}
ll <- try(tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=1,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed, is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())$logl[[1]],silent=TRUE)
if(inherits(ll,"try-error")) ll <- em_ll-max_delta
if(is.na(ll)) ll <- em_ll - max_delta
if(abs(abs(em_ll)-abs(ll))>max_delta) ll <- em_ll - max_delta
if(ll==(em_ll-max_delta)) return(ll)
temp_pars <- numeric()
if(is.na(phylocov_fixed)[[1]])
{
temp_pars <- mat_to_pars(phylocov+diag(nvar)*1e-6,nvar,as.integer(!phylo_correlated))
if(inherits(pars,"try-error")) return(em_ll - max_delta)
}
if(is.na(phenocov_fixed)[[1]])
{
if(pheno_error>0) temp_pars <- c(temp_pars,mat_to_pars(phenocov+diag(nvar)*1e-6,nvar,pheno_error))
if(inherits(pars,"try-error")) return(em_ll - max_delta)
}
temp_ll2 <- try(tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=temp_pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=1,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())$logl[[1]],silent=TRUE)[[1]]
if(!inherits(temp_ll2,"try-error")) if(!is.na(temp_ll2)) if(abs(temp_ll2-ll[[1]])>100)
{
ll <- em_ll-max_delta
}
if(optim_verbose) cat(c(ll,"\n"))
ll
}
if(!skip_EM | complete_data)
{
# if estimating phenotypic covariance, use Felsenstein 2008 EM algorithm
# if missing data, nest Felsenstein 2008 EM algorithm within missing data EM algorithm
if(pheno_error!=0)
{
pars <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) pars <- c(pars,mat_to_pars(phenocov,nvar,pheno_error))
imputed <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=mu,ret_level=3,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=0,phenocov_list=list(),
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())
imputed_dat <- data.frame(species=trait_data$species,imputed$recon_ind)
args <- prep_em(imputed_dat,tree)
AP <- EM_Fels2008(pics=args[[1]],vars=args[[2]],phylocov=phylocov,phenocov=phenocov,nvar=nvar,tol=em_tol,diag_pheno=as.integer(!pheno_correlated),EM_Fels_limit=EM_Fels_limit,REML=as.integer(REML),diag_phylo=as.integer(!phylo_correlated),
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed)
best_phylocov <- phylocov <- AP[1:nvar,1:nvar,drop=FALSE]
best_phenocov <- phenocov <- AP[1:nvar+nvar,1:nvar,drop=FALSE]
pars <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) pars <- c(pars,mat_to_pars(phenocov,nvar,pheno_error))
ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=2,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=0,phenocov_list=list(),
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())
mu <- ll2$theta
ll <- ll2[[1]]
for(i in 1:EM_missing_limit)
{
imputed <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=mu,ret_level=3,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=0,phenocov_list=list(),
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())
imputed_dat <- data.frame(species=trait_data$species,imputed$recon_ind)
args <- prep_em(imputed_dat,tree)
AP <- EM_Fels2008(pics=args[[1]],vars=args[[2]],phylocov=phylocov,phenocov=phenocov,nvar=nvar,tol=em_tol,diag_pheno=as.integer(!pheno_correlated),EM_Fels_limit=EM_Fels_limit,REML=as.integer(REML),diag_phylo=as.integer(!phylo_correlated),
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed)
AP[1:nvar+nvar,1:nvar] <- AP[1:nvar+nvar,1:nvar,drop=FALSE]
phylocov <- AP[1:nvar,1:nvar,drop=FALSE]
phenocov <- AP[1:nvar+nvar,1:nvar,drop=FALSE]
pars <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) pars <- c(pars,mat_to_pars(phenocov,nvar,pheno_error))
AP <- EM_Fels2008(pics=args[[1]],vars=args[[2]],phylocov=phylocov,phenocov=phenocov,nvar=nvar,tol=em_tol,diag_pheno=as.integer(!pheno_correlated),EM_Fels_limit=EM_Fels_limit,REML=as.integer(REML),diag_phylo=as.integer(!phylo_correlated),
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed)
AP[1:nvar+nvar,1:nvar] <- AP[1:nvar+nvar,1:nvar,drop=FALSE] + imputed$tip_uncertainty/(nind-nvar)
phylocov <- AP[1:nvar,1:nvar,drop=FALSE]
if(is.na(phenocov_fixed)[[1]]) phenocov <- AP[1:nvar+nvar,1:nvar,drop=FALSE]
pars <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) pars <- c(pars,mat_to_pars(phenocov,nvar,pheno_error))
mu <- tp(L=X_complete, R=matrix(as.double(t(imputed$recon_ind)),ncol=1), Rmat = imputed$recon_ind,mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset_complete, un_species_subset = un_species_subset_complete,subset_list=subset_list_complete,
ind_list=ind_list_complete, tip_combn=tip_combn_complete,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=2,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=0,phenocov_list=list(),
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())$theta
ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=1,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=0,phenocov_list=list(),
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())$logl
if(ll2<ll)
{
phylocov <- best_phylocov
phenocov <- best_phenocov
ll2 <- ll
break
} else
{
best_phylocov <- phylocov
best_phenocov <- phenocov
ll <- ll2
if(EM_verbose) cat(c(ll2,"\n"))
}
}
} else if(is.na(phylocov_fixed)[[1]])
{
# EM algorithm for missing data (no phenotypic error)
pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
mu <- matrix(colMeans(trait_data[,1:nvar+1,drop=FALSE],na.rm = TRUE))
dat <- as.matrix(trait_data[,1:nvar+1,drop=FALSE])
rownames(dat) <- trait_data$species
for(i in 1:ncol(Rmat)) dat[is.na(dat[,i]),i] <- mu[i]
mu <- tp(L=X_complete, R=as.matrix(as.double(dat)), Rmat = as.matrix(dat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset_complete, un_species_subset = un_species_subset_complete,subset_list=subset_list_complete,
ind_list=ind_list_complete, tip_combn=tip_combn_complete,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=2,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())$theta
best_phylocov <- phylocov <- crossprod(apply(dat,2,pic,phy=drop_tree))/(nspecies-REML)
if(!phylo_correlated) best_phylocov <- phylocov <- diag(diag(phylocov))
pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
ll <- ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=1,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())$logl
for(k in 1:EM_missing_limit)
{
imputed <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=mu,ret_level=3,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())
dat <- imputed$recon_ind
rownames(dat) <- trait_data$species
biased_sigma <- crossprod(apply(dat,2,pic,phy=drop_tree))/(nspecies-REML)
biased_pars <- mat_to_pars(biased_sigma,nvar,as.integer(!phylo_correlated))
tip_un <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=biased_pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=3,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())$tip_uncertainty
mu <- as.matrix(apply(dat,2,function(X) ace(X,drop_tree,method="pic")$ace[[1]]))
phylocov <- biased_sigma + tip_un/(nspecies-REML)
if(!phylo_correlated) phylocov <- diag(diag(phylocov))
pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=1,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())$logl
if(ll2<ll)
{
phylocov <- best_phylocov
#phenocov <- best_phenocov
ll2 <- ll
break
} else
{
best_phylocov <- phylocov
#best_phenocov <- phenocov
ll <- ll2
if(EM_verbose) cat(c(ll2,"\n"))
}
if(EM_verbose) cat(c(ll2,"\n"))
}
}
}
if(model=="BM" | model=="white" | model=="star")
{
if(!is.na(phylocov_start)[[1]] & is.na(phylocov_fixed)[[1]]) phylocov <- phylocov_start
if(!is.na(phenocov_start)[[1]] & is.na(phenocov_fixed)[[1]]) phenocov <- phenocov_start
}
pars <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) if(pheno_error!=0) pars <- c(pars,mat_to_pars(phenocov,nvar,pheno_error))
ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=3,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())
mu <- ll2[[2]]
if(do_optim | (!skip_optim))
{
tedge_vec <- edge_vec
# numerical optimization
ll <- ll2[[1]]
if(!is.na(model_par_start)[[1]]) MOD_PAR_START <- model_par_start else
if(!is.na(model_par_fixed)[[1]]) MOD_PAR_START <- model_par_start
if(!nested_optim & (model=="lambda" | model=="OU" | model=="EB" | model=="kappa" | model=="delta")) MOD_PAR <- -log(-(-max(par_bounds)+MOD_PAR_START)/(-min(par_bounds)+MOD_PAR_START))
if((complete_data & (model!="BM" & model!="white" & model!="star" & is.na(model_par_fixed[[1]]))) | pheno_error | !complete_data | force_optim)
for(i in 1:repeat_optim_limit)
{
zRmat <- (trait_data[,1:nvar+1,drop=FALSE] - matrix(1,nind) %*% t(zstand$mu)) / sqrt(matrix(1,nind) %*% diag(zstand$phylocov))
zR <- matrix(na.exclude(as.double(t(zRmat[,1:nvar]))),ncol=1)
pars2 <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars2 <- mat_to_pars(phylocov / (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov)))),nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) if(pheno_error!=0) pars2 <- c(pars2,mat_to_pars(phenocov / (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov)))),nvar,pheno_error))
z_em_ll <- tp(L=X, R=zR, Rmat = as.matrix(zRmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars2, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=1,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=zphylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=zphenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=zphenocov_fixed,OU_len=list())$logl
if(!nested_optim & (model=="lambda" | model=="OU" | model=="EB" | model=="kappa" | model=="delta")) pars2 <- c(pars2,MOD_PAR)
pars2 <- optim(par = pars2,fn = f,control=list(fnscale=-1),method=if(i%%2 == 1) "BFGS" else "Nelder-Mead",em_ll=z_em_ll,R=zR,Rmat=zRmat,phylocov_fixed=zphylocov_fixed,phenocov_fixed=zphenocov_fixed,phenocov_list=zphenocov_list)
if(is.na(phylocov_fixed)[[1]]) phylocov2 <- pars_to_mat(pars2$par[phylocov_pars],nvar,as.integer(!phylo_correlated)) else phylocov2 <- zphylocov_fixed
if(is.na(phenocov_fixed)[[1]])
{
if(pheno_error!=0) phenocov2 <- pars_to_mat(pars2$par[phenocov_pars],nvar,pheno_error) else phenocov2 <- zphenocov_fixed
}
if(!nested_optim & (model=="lambda" | model=="OU" | model=="EB" | model=="kappa" | model=="delta"))
{
MOD_PAR <- pars2$par[length(pars2$par)]
tedge_vec <- evec(((max(par_bounds) - min(par_bounds)) / (1+exp(-(MOD_PAR)))) + min(par_bounds))
} else tedge_vec <- edge_vec
pars2 <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars2 <- mat_to_pars(phylocov2 * (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov)))),nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) if(pheno_error!=0) pars2 <- c(pars2,mat_to_pars(phenocov2 * (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov)))),nvar,pheno_error))
ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=tedge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars2, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=1,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())
if(is.na(phenocov_fixed)[[1]]) if(pheno_error!=0) phenocov <- phenocov2 * (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov))))
if(is.na(phylocov_fixed)[[1]]) phylocov <- phylocov2 * (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov))))
pars <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) if(pheno_error!=0) pars <- c(pars,mat_to_pars(phenocov,nvar,pheno_error))
if(i>1) if(abs(ll-ll2$logl)<repeat_optim_tol) break else ll <- ll2$logl
}
ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=tedge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=3,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())
}
ret <- list(ll=ll2$logl,phylocov=phylocov,phenocov=phenocov,mu=ll2[[2]],pars=pars,ll2=ll2)
ret$threepoint_calc = list(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=tedge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=3,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())
if(!nested_optim & (model=="lambda" | model=="OU" | model=="EB" | model=="kappa" | model=="delta"))
{
ret[model] <- ((max(par_bounds) - min(par_bounds)) / (1+exp(-(MOD_PAR)))) + min(par_bounds)
}
ret
}
if(nested_optim & model!="BM" & model!="white" & model!="star" & model!="mvOU")
{
max_i <- max_logl <- -Inf
max_par <- mean(par_bounds)
max_phylocov <- phylocov
max_phenocov <- phenocov
max_mu <- mu
if(!is.na(model_par_fixed)[[1]])
{
tree$edge.length <- evec(model_par_fixed)[1:nedge]
ret <- phylopars(trait_data=trait_data,tree=tree,pheno_error=pheno_error>0,pheno_correlated=pheno_error==2,phylo_correlated=phylo_correlated,
REML=REML,EM_Fels_limit=EM_Fels_limit,repeat_optim_limit=repeat_optim_limit,EM_missing_limit=EM_missing_limit,repeat_optim_tol=repeat_optim_tol,phylocov_start=phylocov_start,
phenocov_start=phenocov_start,phylocov_fixed=phylocov_fixed,phenocov_fixed=phenocov_fixed,skip_optim=skip_optim,skip_EM=skip_EM,EM_verbose=EM_verbose,optim_verbose=optim_verbose,phenocov_list=phenocov_list)
if(!is.na(perm_model))
{
if(perm_model=="lambda" | perm_model=="OU" | perm_model=="EB" | perm_model=="kappa" | perm_model=="delta")
{
ret$model <- list(model=perm_model,mod_par=model_par_fixed)
names(ret$model)[2] <- if(ret$model[[1]]=="OU") "alpha" else if(ret$model[[1]]=="EB") "rate" else ret$model[[1]]
ret$npars <- ret$npars + 1
if(perm_model=="OU")
{
stationary_cov <- ret$pars[[1]]/(2*ret$model[[2]])
dimnames(stationary_cov) <- list(colnames(trait_data)[1:nvar+1],colnames(trait_data)[1:nvar+1])
ret$model <- c(ret$model,list(stationary_cov=stationary_cov))
}
return(ret)
}
}
}
temp_edge_vec <- edge_vec
for(j in 1:1)
{
for(i in 1:model_par_evals)
{
temp_edge_vec <- evec(par_bounds[i])
pars <- try(estim_pars(temp_edge_vec,do_optim = FALSE),silent=TRUE)
if(inherits(pars,"try-error"))
pars <- try(estim_pars(temp_edge_vec,do_optim = TRUE,skip_EM=TRUE),silent=TRUE)
if(inherits(pars,"try-error"))
{
next
}
if(is.na(phylocov_fixed)[[1]]) phylocov <- pars$phylocov
if(is.na(phenocov_fixed)[[1]]) phenocov <- pars$phenocov
mu <- pars$mu
if(i==1 | is.infinite(max_logl))
{
max_logl <- pars[[1]]
max_i <- i
max_par <- par_bounds[i]
max_phylocov <- phylocov
max_phenocov <- phenocov
max_mu <- mu
}
if(pars[[1]] > max_logl)
{
max_logl <- pars[[1]]
max_i <- i
max_par <- par_bounds[i]
max_phylocov <- phylocov
max_phenocov <- phenocov
max_mu <- mu
}
}
max_i <- c(max_i-2,max_i+2)
max_i[max_i<1] <- 1
max_i[max_i>model_par_evals] <- model_par_evals
if(max_i[1]==1 & max_i[2]==model_par_evals) break
par_bounds <- seq(min(par_bounds[max_i]),max(par_bounds[max_i]),length=model_par_evals)
if(max_par < min(par_bounds)) par_bounds[which.min(par_bounds)] <- max_par
if(max_par > max(par_bounds)) par_bounds[which.max(par_bounds)] <- max_par
par_bounds <- seq(min(par_bounds),max(par_bounds),length=model_par_evals)
}
if(!is.na(model_par_start)[[1]]) max_par <- model_par_start
a <- -log(-(-max(par_bounds)+max_par)/(-min(par_bounds)+max_par))
if(is.infinite((a))) a <- -log(-(-max(par_bounds)+mean(par_bounds))/(-min(par_bounds)+mean(par_bounds)))
o1 <- optim(par = a,fn = function(X) estim_pars(evec(((max(par_bounds) - min(par_bounds)) / (1+exp(-(X)))) + min(par_bounds)),do_optim = TRUE,phylocov = max_phylocov,phenocov = max_phenocov,mu = max_mu)[[1]],control=list(fnscale=-1),method="BFGS")
#o1 <- optimize(f = function(X) estim_pars(evec(X),do_optim = TRUE,phylocov = phylocov,phenocov = phenocov,mu = mu)[[1]],interval = c(min(par_bounds[max_i]),max(par_bounds[max_i])),maximum = TRUE)
o2 <- estim_pars(evec(min(par_bounds[max_i])),do_optim = TRUE,phylocov=max_phylocov,phenocov=max_phenocov,mu=max_mu)
o3 <- estim_pars(evec(max(par_bounds[max_i])),do_optim = TRUE,phylocov=max_phylocov,phenocov=max_phenocov,mu=max_mu)
w <- which.max(c(o1$value,o2$ll,o3$ll))
if(w==1)
{
model_par <- ((max(par_bounds) - min(par_bounds)) / (1+exp(-(o1$par)))) + min(par_bounds)
pars <- estim_pars(evec(model_par),do_optim = TRUE,phylocov = max_phylocov,phenocov=max_phenocov,mu = max_mu)
ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=evec(model_par),
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars$pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=3,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())
} else if(w==2)
{
model_par <- min(par_bounds[max_i])
ll2 <- o2$ll2
} else if(w==3)
{
model_par <- max(par_bounds[max_i])
ll2 <- o3$ll2
}
ll2[model] <- model_par
if((model=="OU" & round(model_par-0,6)==0) | (model=="EB" & round(model_par-0,6)==0) | (model=="lambda" & round(model_par-1,6)==0) | (model=="delta" & round(model_par-1,6)==0) | (model=="kappa" & round(model_par-1,6)==0)) warning(paste("Estimated",model,"parameter is at bounds and is indistinguishable from a Brownian Motion model."))
ret <- list(ll=ll2$logl,phylocov=phylocov,phenocov=phenocov,mu=ll2[[2]],pars=pars,ll2=ll2)
ret$threepoint_calc <- list(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=evec(model_par),
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars$pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=3,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed,OU_len=list())
if(model=="lambda" | model=="OU" | model=="EB" | model=="kappa" | model=="delta")
{
ret[model] <- ((max(par_bounds) - min(par_bounds)) / (1+exp(-(model_par)))) + min(par_bounds)
}
ll2 <- ret
} else if((model=="BM" | model=="white" | model=="star") | (!nested_optim & model!="mvOU")) ll2 <- estim_pars(edge_vec = edge_vec,do_optim = TRUE,phylocov = phylocov,phenocov = phenocov,mu = mu) else
if(model=="mvOU")
{
hts <- getHeights(tree)
edge_mat <- postorder_tools(tree)
des_order <- edge_mat[[2]]
edge_mat <- edge_mat[[1]]
OU_fun <- function(pars,R,Rmat,phylocov_fixed,phenocov_fixed,phenocov_list,ret_level=1,BM_ll=NA)
{
if(is.na(phylocov_fixed)[[1]]) phylocov <- pars_to_mat(pars[phylocov_pars],nvar,as.integer(!phylo_correlated)) else phylocov <- phylocov_fixed
if(pheno_error>0) if(is.na(phenocov_fixed)[[1]]) phenocov <- pars_to_mat(pars[phenocov_pars],nvar,pheno_error) else phenocov <- phenocov_fixed
if(is.na(model_par_fixed)[[1]]) alpha <- pars_to_mat(pars[alpha_pars],nvar,diag = abs(full_alpha-1))
if(is.na(phylocov_fixed)[[1]])
{
if(npd) phylocov <- try(as.matrix(nearPD(phylocov)$mat),silent=TRUE)
if(inherits(phylocov,"try-error")) return(phylocov)
pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
if(inherits(pars,"try-error")) return(pars)
}
if(pheno_error>0) if(is.na(phenocov_fixed)[[1]])
{
if(npd) phenocov <- try(as.matrix(nearPD(phenocov)$mat),silent=TRUE)
if(inherits(phenocov,"try-error")) return(phenocov)
pars <- c(pars,mat_to_pars(phenocov,nvar,pheno_error))
if(inherits(pars,"try-error")) return(pars)
}
if(npd) alpha <- try(as.matrix(nearPD(alpha)$mat),silent=TRUE)
if(inherits(alpha,"try-error")) return(alpha)
#HOU <- calc_OU_heights(heights = hts,edge_mat = edge_mat,des_order = des_order,nedge = nedge,alpha = alpha,sigma = phylocov)
#len_vec <- apply(HOU,2,function(X) convert_to_tree(heights = X,nspecies = nspecies,nedge = nedge,anc = tree$edge[,1],des = tree$edge[,2]))
#len_vec <- lapply(apply(len_vec,1,function(X) list(matrix(X,nvar,nvar))),function(X) X[[1]])
pars <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
if(!is.na(BM_ll)) if(inherits(pars,"try-error")) return(BM_ll - max_delta)
if(pheno_error>0) if(is.na(phenocov_fixed)[[1]]) pars <- c(pars,mat_to_pars(phenocov,nvar,pheno_error))
if(is.na(model_par_fixed)[[1]]) pars <- c(pars,mat_to_pars(alpha,nvar,abs(full_alpha-1)))
if(!is.na(BM_ll)) if(inherits(pars,"try-error")) return(BM_ll - max_delta)
ea <- try(eigen(alpha),silent=TRUE)
if(inherits(ea,"try-error")) return(ea)
P <- ea$vectors
lambda <- ea$values
if(any(lambda<=1e-8))
{
class(alpha) <- "try-error"
return(alpha)
}
#temp_phylocov <- phylocov + diag(diag(phylocov)*1e-12)
temp_phylocov <- phylocov + diag(nvar)*1e-6
temp_len_vec <- calc_OU_len(heights=hts,edge_mat=edge_mat,des_order=des_order,nedge=nedge,P=P,lambda=lambda,sigma=temp_phylocov,anc=tree$edge[,1],des=tree$edge[,2],nvar=nvar,nspecies=nspecies)
temp_ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=1,OU_len=temp_len_vec,OU_par=1,use_LL=0,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed)[[1]]
len_vec <- calc_OU_len(heights=hts,edge_mat=edge_mat,des_order=des_order,nedge=nedge,P=P,lambda=lambda,sigma=phylocov,anc=tree$edge[,1],des=tree$edge[,2],nvar=nvar,nspecies=nspecies)
ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=ret_level,OU_len=len_vec,OU_par=1,use_LL=0,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed)
if(is.na(ll2)[[1]])
{
class(alpha) <- "try-error"
return(alpha)
}
if(!is.na(temp_ll2)) if(abs(temp_ll2-ll2[[1]])>100)
{
class(alpha) <- "try-error"
return(alpha)
}
temp_alpha <- alpha + diag(nvar)*1e-6
temp_ea <- try(eigen(temp_alpha),silent=TRUE)
if(inherits(temp_ea,"try-error")) return(temp_ea)
temp_P <- temp_ea$vectors
temp_lambda <- temp_ea$values
if(any(temp_lambda<=1e-8))
{
class(temp_alpha) <- "try-error"
return(temp_alpha)
}
temp_len_vec <- calc_OU_len(heights=hts,edge_mat=edge_mat,des_order=des_order,nedge=nedge,P=temp_P,lambda=temp_lambda,sigma=phylocov,anc=tree$edge[,1],des=tree$edge[,2],nvar=nvar,nspecies=nspecies)
temp_ll2 <- tp(L=X, R=R, Rmat = as.matrix(Rmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=1,OU_len=temp_len_vec,OU_par=1,use_LL=0,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=phylocov_fixed,is_phenocov_list=length(phenocov_list),phenocov_list=phenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=phenocov_fixed)[[1]]
if(!is.na(temp_ll2)) if(abs(temp_ll2-ll2[[1]])>100)
{
class(alpha) <- "try-error"
return(alpha)
}
if(optim_verbose) cat(c(ll2[[1]],"\n"))
if(ret_level>1) return(list(ll=ll2$logl,phylocov=phylocov,phenocov=phenocov,alpha=alpha,mu=ll2[[2]],pars=pars,ll2=ll2))
list(ll=ll2[[1]])
}
ret <- phylopars(trait_data=trait_data,tree=tree,model="OU",pheno_error=pheno_error>0,pheno_correlated=pheno_error==2,phylo_correlated=phylo_correlated,
REML=REML,EM_Fels_limit=EM_Fels_limit,repeat_optim_limit=repeat_optim_limit,EM_missing_limit=EM_missing_limit,repeat_optim_tol=repeat_optim_tol,phylocov_start=phylocov_start,
phenocov_start=phenocov_start,phylocov_fixed=phylocov_fixed,phenocov_fixed=phenocov_fixed,skip_optim=skip_optim,skip_EM=skip_EM,EM_verbose=EM_verbose,optim_verbose=optim_verbose,phenocov_list=phenocov_list)
ALPHA <- diag(x = ret$model$alpha + max(c(min(par_bounds),min(c(.001,max(par_bounds))))),nrow = nvar)
ALPHA[ALPHA>max(par_bounds)] <- max(par_bounds)
#phylocov <- ret$pars[[1]]
tr_a <- tr_b <- tr_c <- tree
SIGMA <- matrix(0,nvar,nvar)
for(a in 1:nvar)
{
for(b in 1:nvar)
{
i <- j <- a
tr_a$edge.length <-
(reorder(transf.branch.lengths(tree,model="OUfixedRoot",parameters = list(alpha=(ALPHA[i,i]+ALPHA[j,j])/2))$
tree,"postorder")$edge.length)#*sigma[i,j]#/(alpha[i,i]+alpha[j,j]))
i <- j <- b
tr_b$edge.length <-
(reorder(transf.branch.lengths(tree,model="OUfixedRoot",parameters = list(alpha=(ALPHA[i,i]+ALPHA[j,j])/2))$
tree,"postorder")$edge.length)#*sigma[i,j]#/(alpha[i,i]+alpha[j,j]))
i <- a
j <- b
tr_c$edge.length <-
(reorder(transf.branch.lengths(tree,model="OUfixedRoot",parameters = list(alpha=(ALPHA[i,i]+ALPHA[j,j])/2))$
tree,"postorder")$edge.length)#*sigma[i,j]#/(alpha[i,i]+alpha[j,j]))
SIGMA[a,b] <- three.point.compute(phy = tr_c,ret$anc_recon[1:nspecies,i]-ace(ret$anc_recon[1:nspecies,i],tr_a,method="pic")$ace[[1]],ret$anc_recon[1:nspecies,j]-ace(ret$anc_recon[1:nspecies,j],tr_b,method="pic")$ace[[1]])$QP/length(tree$tip.label)*(ALPHA[i,i]+ALPHA[j,j])
}
}
SIGMA <- as.matrix(nearPD(SIGMA)$mat)
if(pheno_error!=0) phenocov <- ret$pars[[2]]
mu <- ret$mu
if(!is.na(phylocov_start)[[1]]) phylocov <- phylocov_start
if(!is.na(phenocov_start)[[1]]) phenocov <- phenocov_start
pars <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars <- mat_to_pars(phylocov,nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) if(pheno_error!=0) pars <- c(pars,mat_to_pars(phenocov,nvar,pheno_error))
#pars[alpha_pars] <- mat_to_pars(diag(runif(nvar)),nvar,diag = abs(full_alpha-1))
if(is.na(model_par_start)[[1]] & is.na(model_par_fixed)[[1]])
{
#pars[alpha_pars] <- mat_to_pars(diag(nvar),nvar,diag = abs(full_alpha-1))
pars[alpha_pars] <- mat_to_pars(ALPHA,nvar,diag=abs(full_alpha-1))
} else if(!is.na(model_par_start)[[1]])
{
pars[alpha_pars] <- mat_to_pars(model_par_start,nvar,diag = abs(full_alpha-1))
}
zRmat <- (trait_data[,1:nvar+1,drop=FALSE] - matrix(1,nind) %*% t(zstand$mu)) / sqrt(matrix(1,nind) %*% diag(zstand$phylocov))
zR <- matrix(na.exclude(as.double(t(zRmat[,1:nvar]))),ncol=1)
pars2 <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars2 <- mat_to_pars(phylocov / (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov)))),nvar,as.integer(!phylo_correlated))
#pars2 <- mat_to_pars(phylocov / (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov)))),nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) if(pheno_error!=0) pars2 <- c(pars2,mat_to_pars(phenocov / (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov)))),nvar,pheno_error))
if(is.na(model_par_fixed)[[1]]) pars2[alpha_pars] <- pars[alpha_pars]
zll2 <- tp(L=X, R=zR, Rmat = as.matrix(zRmat),mL=ncol(X), mR=1, pheno_error=pheno_error, edge_vec=edge_vec,
edge_ind=edge_ind,ind_edge=ind_edge, parent_edges = parent_edges,pars=pars2, nvar=nvar,
phylocov_diag=as.integer(!phylo_correlated), nind=nind, nob=nob, nspecies=nspecies, nedge=nedge, anc=anc, des=des, REML=as.integer(REML),
species_subset=species_subset, un_species_subset = un_species_subset,subset_list=subset_list,
ind_list=ind_list, tip_combn=tip_combn,is_edge_ind=is_edge_ind,fixed_mu=matrix(0),ret_level=3,OU_par=0,use_LL=0,
is_phylocov_fixed=as.integer(!is.na(phylocov_fixed)[[1]]),phylocov_fixed=zphylocov_fixed,
is_phenocov_list=length(phenocov_list),phenocov_list=zphenocov_list,
is_phenocov_fixed=as.integer(!is.na(phenocov_fixed)[[1]]),phenocov_fixed=zphenocov_fixed,OU_len=list())
zBM_ll <- zll2[[1]]
o <- optim(pars2,fn = function(X,BM_ll,R,Rmat,phylocov_fixed,phenocov_fixed,phenocov_list)
{
ll <- try(OU_fun(X,R=R,Rmat=Rmat,phylocov_fixed=phylocov_fixed,phenocov_fixed=phenocov_fixed,phenocov_list,BM_ll=BM_ll),silent=TRUE)
if(inherits(ll,"try-error")) ll <- BM_ll-max_delta
ll <- ll[[1]]
if(is.na(ll)) ll <- BM_ll - max_delta
if(abs(abs(BM_ll)-abs(ll[[1]]))>max_delta) ll <- BM_ll - max_delta
ll[[1]]
},control=list(fnscale=-1),BM_ll=zBM_ll,R=zR,Rmat=zRmat,phylocov_fixed=zphylocov_fixed,phenocov_fixed=zphenocov_fixed,phenocov_list=zphenocov_list)
if(is.na(phylocov_fixed)[[1]]) phylocov2 <- pars_to_mat(o$par[phylocov_pars],nvar,as.integer(!phylo_correlated)) else phylocov2 <- zphylocov_fixed
if(is.na(phenocov_fixed)[[1]]) if(pheno_error>0) phenocov2 <- pars_to_mat(o$par[phenocov_pars],nvar,pheno_error) else phenocov2 <- zphenocov_fixed
if(is.na(model_par_fixed)[[1]]) alpha <- pars_to_mat(o$par[alpha_pars],nvar,diag = abs(full_alpha-1))
if(npd) phylocov2 <- as.matrix(nearPD(phylocov2)$mat)
if(npd) if(pheno_error>0) phenocov2 <- as.matrix(nearPD(phenocov2)$mat)
if(npd) alpha <- as.matrix(nearPD(alpha)$mat)
pars2 <- numeric()
if(is.na(phylocov_fixed)[[1]]) pars2 <- mat_to_pars(phylocov2 * (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov)))),nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) if(pheno_error!=0) pars2 <- c(pars2,mat_to_pars(phenocov2 * (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov)))),nvar,pheno_error))
if(is.na(model_par_fixed)[[1]]) pars2[alpha_pars] <- o$par[alpha_pars]
if(is.na(phylocov_fixed)[[1]]) phylocov <- phylocov2 * (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov))))
if(is.na(phenocov_fixed)[[1]]) if(pheno_error!=0) phenocov <- phenocov2 * (sqrt(diag(zstand$phylocov)) %*% t(sqrt(diag(zstand$phylocov))))
pars <- pars2
ll2 <- OU_fun(pars,R=R,Rmat=Rmat,phylocov_fixed=phylocov_fixed,phenocov_fixed=phenocov_fixed,phenocov_list=phenocov_list,ret_level=3)
o <- optim(pars,fn = function(X,BM_ll,R,Rmat,phylocov_fixed,phenocov_fixed,phenocov_list)
{
ll <- try(OU_fun(X,R=R,Rmat=Rmat,phylocov_fixed=phylocov_fixed,phenocov_fixed=phenocov_fixed,phenocov_list=phenocov_list,BM_ll=BM_ll),silent=TRUE)
if(inherits(ll,"try-error")) ll <- BM_ll-max_delta
ll <- ll[[1]]
if(is.na(ll)) ll <- BM_ll - max_delta
if(abs(abs(BM_ll)-abs(ll[[1]]))>max_delta) ll <- BM_ll - max_delta
ll[[1]]
},control=list(fnscale=-1),method="BFGS",BM_ll=ll2[[1]],R=R,Rmat=Rmat,phylocov_fixed=phylocov_fixed,phenocov_fixed=phenocov_fixed,phenocov_list=phenocov_list)
if(is.na(phylocov_fixed)[[1]]) phylocov <- pars_to_mat(o$par[phylocov_pars],nvar,as.integer(!phylo_correlated))
if(is.na(phenocov_fixed)[[1]]) if(pheno_error>0) phenocov <- pars_to_mat(o$par[phenocov_pars],nvar,pheno_error)
if(is.na(model_par_fixed)[[1]]) alpha <- pars_to_mat(o$par[alpha_pars],nvar,diag = abs(full_alpha-1))
ll2 <- OU_fun(o$par,R=R,Rmat=Rmat,phylocov_fixed=phylocov_fixed,phenocov_fixed=phenocov_fixed,phenocov_list=phenocov_list,ret_level=3)
pars <- o$par
if(usezscores)
{
f_args$usezscores <- FALSE
raw_ll2 <- do.call(phylopars,f_args)
if(raw_ll2[[1]]>ll2[[1]]) return(raw_ll2)
}
#ll2 <- list(ll=ll2$logl,phylocov=phylocov,phenocov=phenocov,mu=ll2[[2]],pars=pars,ll2=ll2,alpha=alpha)
}
npars <- 0
dimnames(ll2$phylocov) <- list(colnames(trait_data)[1:nvar+1],colnames(trait_data)[1:nvar+1])
if(phylo_correlated) npars <- ((nvar^2-nvar)/2+nvar) else npars <- nvar
if(pheno_error != 0) dimnames(ll2$phenocov) <- dimnames(ll2$phylocov)
if(pheno_error==1) npars <- npars + nvar else if (pheno_error==2) npars <- npars + ((nvar^2-nvar)/2+nvar)
if(pheno_error==0) pars <- list(phylocov=ll2$phylocov) else pars <- list(phylocov=ll2$phylocov,phenocov=ll2$phenocov)
if(model=="mvOU")
{
dimnames(alpha) <- list(colnames(trait_data)[1:nvar+1],colnames(trait_data)[1:nvar+1])
model <- list(model=model,alpha=alpha)
npars <- npars + if(full_alpha) ((nvar^2-nvar)/2+nvar) else nvar
} else if(model!="BM" & model!="white" & model!="star")
{
model <- list(model=model,mod_par=ll2[[model]])
names(model)[2] <- if(model[[1]]=="OU") "alpha" else if(model[[1]]=="EB") "rate" else model[[1]]
npars <- npars + 1
} else
{
model <- list(model=model)
}
logLik <- ll2[[1]]
anc_recon <- ll2$ll2$anc_recon
anc_var <- matrix(0,nrow(anc_recon),ncol(anc_recon))
anc_cov <- vector("list",nrow(anc_recon))
colnames(anc_recon) <- colnames(anc_var) <- colnames(trait_data)[1:nvar+1]
for(i in 1:nrow(anc_recon))
{
anc_cov[[i]] <- ll2$ll2$recon_var[1:nvar+(i-1)*nvar,,drop=FALSE]
dimnames(anc_cov[[i]]) <- list(colnames(trait_data)[1:nvar+1],colnames(trait_data)[1:nvar+1])
anc_var[i,] <- as.matrix(diag(anc_cov[[i]]))
}
rownames(anc_var) <- rownames(anc_recon) <- names(anc_cov) <- 1:length(anc_cov)
rownames(anc_var)[1:nspecies] <- rownames(anc_recon)[1:nspecies] <- names(anc_cov)[1:nspecies] <- tree$tip.label
rownames(anc_var)[(nspecies+1):nrow(anc_var)] <- rownames(anc_recon)[(nspecies+1):nrow(anc_var)] <- names(anc_cov)[(nspecies+1):nrow(anc_var)] <-
tree$node.label #(nspecies+1):nrow(anc_var)
ret_list <- list(logLik=logLik,pars=pars,model=model,mu=anc_recon[nspecies+1,],npars=npars,anc_recon=anc_recon,anc_var=anc_var,anc_cov=anc_cov,tree=tree,trait_data=trait_data,REML=REML)
if(pheno_error!=0)
{
recon_ind <- ll2$ll2$recon_ind
tip_var <- matrix(0,nrow(recon_ind),ncol(recon_ind))
tip_cov <- vector("list",nrow(recon_ind))
colnames(recon_ind) <- colnames(tip_var) <- colnames(trait_data)[1:nvar+1]
recon_ind <- data.frame(trait_data$species,recon_ind)
for(i in 1:nrow(recon_ind))
{
#tip_cov[[i]] <- ll2$ll2$tip_uncertainty[1:nvar+(i-1)*nvar,,drop=FALSE]
#dimnames(tip_cov[[i]]) <- list(colnames(trait_data)[1:nvar+1],colnames(trait_data)[1:nvar+1])
#tip_var[i,] <- as.matrix(diag(tip_cov[[i]]))
}
#rownames(tip_var) <- rownames(recon_ind) <- names(tip_cov) <- 1:length(tip_cov)
#rownames(tip_var)[1:nspecies] <- rownames(recon_ind)[1:nspecies] <- names(tip_cov)[1:nspecies] <- tree$tip.label
#rownames(tip_var)[(nspecies+1):nrow(tip_var)] <- rownames(recon_ind)[(nspecies+1):nrow(tip_var)] <- names(tip_cov)[(nspecies+1):nrow(tip_var)] <-
# tree$node.label #(nspecies+1):nrow(tip_var)
ret_list$ind_recon <- recon_ind
#ret_list$tip_var <- tip_var
#ret_list$tip_cov <- tip_cov
}
ret_list$threepoint_calc = ll2$threepoint_calc
if(model[[1]]=="mvOU" | model[[1]]=="OU")
{
if(model[[1]]=="OU") stationary_cov <- pars[[1]]/(2*model[[2]])
else
{
ea <- eigen(model[[2]])
P <- ea$vectors
lambda <- ea$values
lambda_mat <- matrix(0,nvar,nvar)
for(i in 1:nvar)
for(j in 1:i)
{
lambda_mat[i,j] <- lambda[i] + lambda[j]
}
lambda_mat[upper.tri(lambda_mat)] <- t(lambda_mat)[upper.tri(lambda_mat)]
stationary_cov <- P %*% ((1/lambda_mat)*(solve(P)%*%pars[[1]]%*%t(solve(P)))) %*% t(P)
}
dimnames(stationary_cov) <- list(colnames(trait_data)[1:nvar+1],colnames(trait_data)[1:nvar+1])
ret_list$model <- c(ret_list$model,list(stationary_cov=stationary_cov))
}
class(ret_list) <- "phylopars"
if(get_cov_CIs) ret_list$cov_CIs <- get_cov_CIs(ret_list)
#g <- grad(f,pars2[[1]],em_ll=z_em_ll[[1]],R=R,Rmat=Rmat)
#pars_to_mat(g[phylocov_pars],nvar,as.integer(!phylo_correlated))
ret_list
}
prep_em <- function(trait_data,tree)
{
trait_data <- as.data.frame(trait_data)
nvar <- ncol(trait_data)-1
p1 <- pic.ortho(x = setNames(lapply(tree$tip.label,function(X) trait_data[as.character(trait_data$species)==X,2]),tree$tip.label),phy = multi2di(tree,random=FALSE),var.contrasts = TRUE,intra = TRUE)
pp1 <- cbind(c(unlist(attributes(p1)$intra),p1[,1]))
vars <- c(unlist(attributes(p1)$intra)*0,p1[,2])
if(ncol(trait_data)>2)
for(i in 2:nvar)
{
p1 <- pic.ortho(x = setNames(lapply(tree$tip.label,function(X) trait_data[as.character(trait_data$species)==X,i+1]),tree$tip.label),phy = multi2di(tree,random=FALSE),intra = TRUE)
pp1 <- cbind(pp1,c(unlist(attributes(p1)$intra),p1))
}
list(pics=pp1,vars=vars)
}
postorder_tools <- function(tree)
{
# this function is linear in time with the number of species
# this function returns a matrix called "edge_mat" and a vector called "des_order"
###
### edge_mat has 3 columns, constructed as follows:
####### the node number of the most recent
####### common ancestor of the species number in
####### column 1 and the species number in column 2
####### is found in column 3. The order is
####### in reverse postorder tree transversal
####### (from the root to the tip).
#######
###
### des_order returns the vector of indexes that would put
### tree$edge[,2] in order, with the exception that a root edge
### is inserted at position (nspecies+1)
tree <- reorder(tree,"postorder")
nedge <- nrow(tree$edge)
nspecies <- length(tree$tip.label)
des <- tree$edge[,2] # descendant nodes for edge i
anc <- tree$edge[,1] # ancestral nodes for edge i
des_order <- order(des)
# the next steps reorder the edge matrix so that
# the descendant nodes are in order
# A root edge is temporarily inserted at row (nspecies+1)
new_edge <- tree$edge[des_order,1:2]
new_edge <- rbind(new_edge[1:nspecies,],c(nspecies+1,nspecies+1),new_edge[(nspecies+1):nedge,])
# 'edges' is the matrix to be returned
### first column is the low species number
### second column is the high species number
### third column is the descendant number
edges <- cbind(new_edge*0,new_edge[,2])
edges[,1] <- nspecies^2 # make sure the low species number initially contains a number >(nedge+1)
edges[1:nspecies,1] <- edges[1:nspecies,2] <- 1:nspecies # set the mrca for species as themselves
for(i in 1:nedge)
{
des_node <- des[i]
anc_node <- anc[i]
if(des_node<=nspecies)
{
edges[anc_node,1] <- min(edges[anc_node,1],des_node) # the lowest "low species" so far to represent anc[i]
edges[anc_node,2] <- max(edges[anc_node,2],des_node) # the highest "high species" so far to represent anc[i]
} else
{
# the lowest "low species" so far to represent anc[i]
edges[anc_node,1] <- min(edges[des_node,1],edges[anc_node,1])
# the highest "high species" so far to represent anc[i], but also make sure it's not >nspecies
edges[anc_node,2] <- max(edges[des_node,2],ifelse(edges[anc_node,1]<=nspecies,edges[anc_node,1],0))
}
}
new_des_order <- numeric(nedge+1)
new_des_order[1:nspecies] <- des_order[1:nspecies]
new_des_order[nspecies+1] <- nedge+1 # add in a root edge for des_order
new_des_order[(nspecies+2):(nedge+1)] <- des_order[(nspecies+1):(nedge)]
edges <- edges[tree$edge[,2],c(1,2,3)] # sort the edge_mat in postorder, and remove root edge
list(edge_mat=edges,des_order=new_des_order)
}
getHeights <- function(tree) # identical to phylolm::pruningwise.distFromRoot
{
tree <- reorder(tree,"postorder")
nt = length(tree$tip.label)
xx <- numeric(tree$Nnode + nt)
for (i in length(tree$edge.length):1) xx[tree$edge[i, 2]] <- xx[tree$edge[i,1]] + tree$edge.length[i]
names(xx) <- if (is.null(tree$node.label))
1:(nt + tree$Nnode)
else c(tree$tip.label, tree$node.label)
return(xx)
}
convert_to_tree <- function(heights,nspecies,nedge,anc,des)
{
new_len_vec <- numeric(nedge+1)
edge_i <- nedge
for(i in seq_along(des))
{
des_i <- des[edge_i]
anc_i <- anc[edge_i]
new_len_vec[edge_i] <- heights[des_i] - if(anc_i>(nspecies+1)) heights[anc_i] else 0
edge_i <- edge_i - 1
}
new_len_vec[nedge+1] <- heights[nspecies+1] # root edge
new_len_vec
}
calc_OU_heights <- function(heights,edge_mat,des_order,nedge,alpha,sigma)
{
ntraits <- dim(alpha)[1]
P <- eigen(alpha)$vectors
tP <- t(P)
invP <- solve(P)
tinvP <- t(invP)
len_mat <- matrix(0,nedge+1,ntraits^2)
lambda <- eigen(alpha)$values
lambda_mat <- matrix(0,ntraits,ntraits)
for(i in 1:ntraits)
for(j in 1:i)
{
lambda_mat[i,j] <- lambda[i] + lambda[j]
}
lambda_mat[upper.tri(lambda_mat)] <- t(lambda_mat)[upper.tri(lambda_mat)]
for(i in 1:nedge)
{
temp_edge <- edge_mat[i,]
t1 <- heights[temp_edge[1]]
t2 <- heights[temp_edge[2]]
t12 <- heights[temp_edge[3]]
if(ntraits>1)
{
eAC1 <- P%*%diag(exp(-lambda*(t1-t12)))%*%invP
teAC2 <- P%*%diag(exp(-lambda*(t2-t12)))%*%invP
} else
{
eAC1 <- P*(exp(-lambda*(t1-t12)))*invP
teAC2 <- P*(exp(-lambda*(t2-t12)))*invP
}
M <- P %*% (((1/lambda_mat)*(1-exp(-lambda_mat*t12)))*(invP%*%sigma%*%tinvP)) %*% tP
temp <- eAC1 %*% M %*% teAC2
len_mat[i,] <- temp[1:length(temp)]
}
#if(!is.ultrametric(tree))
#{
# flag <- 1
# stop("Non-ultrametric trees not supported for OU model.")
#externalEdge <- tree$edge[,2] <= nspecies
#u <- c(mean(hts[1:nspecies]) - hts[1:nspecies],rep(0,nedge-nspecies+1))
#tree$edge.length[externalEdge] <- tree$edge.length[externalEdge] + u[des[externalEdge]]
#hts <- getHeights(tree)
#HOU <- calc_OU_heights(heights = hts,edge_mat = edge_mat,des_order = des_order,nedge = nedge,alpha = alpha,sigma = sigma)
#transf_heights <-
# calc_OU_heights(heights = u,edge_mat = edge_mat,des_order = des_order,nedge = nedge,alpha = alpha,sigma = sigma)
#HOU <- HOU - transf_heights
#} else
#{
# flag <- 0
# hts <- getHeights(tree)
# edge_mat <- postorder_tools(tree)
# des_order <- edge_mat[[2]]
# edge_mat <- edge_mat[[1]]
# HOU <- calc_OU_heights(heights = hts,edge_mat = edge_mat,des_order = des_order,nedge = nedge,alpha = alpha,sigma = sigma)
# len_vec <- apply(HOU,2,function(X) convert_to_tree(heights = X,nspecies = nspecies,nedge = nedge,anc = tree$edge[,1],des = tree$edge[,2]))
#
#}
#len_vec <- apply(HOU,2,function(X) convert_to_tree(heights = X,nspecies = nspecies,nedge = nedge,anc = tree$edge[,1],des = tree$edge[,2]))
#if(flag==1)
#{
#ealpha <- eigen(alpha)
#ea_vectors <- ealpha$vectors
#inv_ea_vectors <- solve(ea_vectors)
#ea_values <- ealpha$values
#enalpha <- eigen(-alpha)
#ena_vectors <- enalpha$vectors
#inv_ena_vectors <- solve(ena_vectors)
#ena_values <- enalpha$values
#invL <- lapply(u[1:nspecies],function(X) ea_vectors %*% diag(exp(-ea_values*X)) %*% inv_ea_vectors)
#L <- t(matrix(as.double(simplify2array(invL)),nrow=nvar))
#R <- L %*% as.double(Y)
#}
len_mat[des_order,]
}
mat_to_pars <- function(M,nvar,diag,log_chol=TRUE,mod_chol=TRUE)
{
len <- (nvar*nvar-nvar)/2+nvar
if(diag==1)
{
ret <- if(log_chol) log(sqrt(diag(M))) else sqrt(diag(M))
return(ret)
} else
{
M <- try(chol(M),silent=TRUE)
if(inherits(M,"try-error")) return(M)
diag(M) <- if(log_chol) log(diag(M)) else diag(M)
}
if(diag!=1 & nvar>1)
{
if(log_chol & mod_chol) M[1,2] <- M[1,2]/exp(M[1,1])
}
vec_count = 1
ret <- numeric(len)
for(j in 1:nvar)
{
for(i in 1:j)
{
ret[vec_count] <- M[i,j]
vec_count <- vec_count + 1
}
}
ret
}
print.phylopars <- function(x, ...)
{
PPE <- x
cat("Phylogenetic trait variance-covariance\n")
print(PPE$pars[[1]])
cat("\n")
if(length(x$pars)>1)
{
cat("Phenotypic trait variance-covariance\n")
print(PPE$pars[[2]])
nvar <- ncol(PPE$pars[[1]])
cat("\n% variance explained by phylogeny\n")
percent_var <- (1-diag(PPE$pars[[2]])/apply(PPE$trait_data[,1:nvar+1,drop=FALSE],2,function(X) var(X,na.rm=TRUE)))*100
names(percent_var) <- colnames(PPE$pars[[1]])
print(percent_var)
}
if(length(PPE$model)>1)
{
cat(paste("",PPE$model[[1]],if(names(PPE$model)[2]!="lambda" & names(PPE$model)[2]!="delta" & names(PPE$model)[2]!="kappa") paste(" ",names(PPE$model)[2],sep="")," = ",sep=""))
if(PPE$model[[1]]!="mvOU") cat((PPE$model[[2]])) else
{
cat("\n")
print(PPE$model[[2]])
cat("\n")
}
if(PPE$model[[1]]=="OU" | PPE$model[[1]]=="mvOU")
{
cat("\n")
cat("Stationary covariance = ")
cat("\n")
print(PPE$model[[3]])
}
} else if(PPE$model=="BM") cat("Brownian motion model") else if(PPE$model=="white" | PPE$model=="star") cat("Star phylogeny model")
cat("\n")
}
summary.phylopars <- function(object, ...)
{
PPE <- object
if(length(PPE$model)>1)
{
cat(paste("",PPE$model[[1]],if(names(PPE$model)[2]!="lambda" & names(PPE$model)[2]!="delta" & names(PPE$model)[2]!="kappa") paste(" ",names(PPE$model)[2],sep="")," = ",sep=""))
if(PPE$model[[1]]!="mvOU") cat((PPE$model[[2]])) else
{
cat("\n")
print(PPE$model[[2]])
}
} else if(PPE$model=="BM") cat("Brownian motion model") else if(PPE$model=="white" | PPE$model=="star") cat("Star phylogeny model")
cat("\n")
ncol <- ifelse(length(PPE$pars)>1,4,2)
ret <- matrix(0,nrow(PPE$pars[[1]]),ncol,dimnames = list(colnames(PPE$pars[[1]]),
c("phylogenetic mean","phylogenetic sd","phenotypic sd",
"% variance explained by phylogeny")[1:ncol]))
ret[,1] <- PPE$mu
ret[,2] <- round(sqrt(diag(PPE$pars[[1]])),4)
if(length(PPE$pars)>1) {
ret[,3] <- round(sqrt(diag(PPE$pars[[2]])),4)
nvar <- ncol(PPE$pars[[1]])
percent_var <- (1-diag(PPE$pars[[2]])/apply(PPE$trait_data[,1:nvar+1,drop=FALSE],2,function(X) var(X,na.rm=TRUE)))*100
names(percent_var) <- colnames(PPE$pars[[1]])
ret[,4] <- round(percent_var,2)
}
ret
}
pval <- function(r,tree)
{
N <- length(tree$tip.label)
r <- abs(r)
t <- r*sqrt((N-2)/(1-r^2))
p <- 2*pt(t,N-2-1,lower.tail = FALSE)
p[p>1] <- 1
p
}
phylopars.lm <- function()
{
args <- as.list(match.call())
args <- args[3:length(args)]
#trait_data$species <- factor(trait_data$species, levels=tree$tip.label)
trait_data <- as.data.frame(trait_data)
if(colnames(trait_data)[1]!="species")
{
stop("First column name of trait_data MUST be 'species' (all lower case).")
}
trait_data[,1] <- as.character(trait_data[,1])
if(!all(trait_data[,1] %in% tree$tip.label))
{
mismatch <- unique(trait_data[,1])[which(!(unique(trait_data[,1]) %in% tree$tip.label))]
stop(paste(length(mismatch)," species name(s) in the first column of trait_data did not match any names in tree$tip.label:\n",paste(unique(trait_data[,1])[which(!(unique(trait_data[,1]) %in% tree$tip.label))],collapse="\n"),sep=""))
}
trait_data <- trait_data[,c(which(colnames(trait_data)=="species"),which(colnames(trait_data)!="species"))]
original_data <- trait_data
original_option <- getOption("na.action")
options(na.action="na.pass")
mod.mat <- model.matrix(object = formula,data = trait_data)
intercept <- attr(terms(formula,data = trait_data),"intercept")==1
if(!intercept) stop("Intercept-free PGLS not currently supported.")
y_var <- model.frame(formula,data=trait_data)
var_name <- colnames(y_var)[1]
y_var <- y_var[,1,drop=FALSE]
mod.mat <- cbind(mod.mat,y_var)
colnames(mod.mat)[ncol(mod.mat)] <- var_name
if(intercept)
{
trait_data <- data.frame(species=trait_data$species,mod.mat[,2:ncol(mod.mat)])
colnames(trait_data) <- c("species",colnames(mod.mat)[2:ncol(mod.mat)])
} else
{
trait_data <- data.frame(species=trait_data$species,mod.mat[,1:ncol(mod.mat)])
colnames(trait_data) <- c("species",colnames(mod.mat)[1:ncol(mod.mat)])
}
options(na.action = original_option)
args$trait_data <- trait_data
PPE <- do.call(phylopars,args)
n <- nspecies <- length(PPE$tree$tip.label)
means <- PPE$anc_recon[nspecies+1,]
trait_data <- PPE$trait_data
df.int <- as.integer(intercept)
k <- ncol(PPE$pars[[1]])
rdf <- n - k
if(PPE$model[[1]]!="mvOU" & PPE$model[[1]]!="OU") covX <- PPE$pars[[1]] else covX <- PPE$model[[3]]
npred <- ncol(covX)-1
y_pos <- ncol(covX)
if(ncol(covX)==1 & intercept)
{
R2 <- 0
ts <- ps <- SEs <- NA
} else
{
coefs <- solve(covX[1:npred,1:npred,drop=FALSE])%*%covX[1:npred,y_pos,drop=FALSE]
R2 <- as.double(sum(covX[1:npred,y_pos,drop=FALSE] * coefs) / covX[y_pos,y_pos,drop=FALSE])
}
R2adj <- 1-(1-R2)*(n-df.int)/(rdf)
SST <- as.double(covX[y_pos,y_pos]) * (n-1)
SSreg <- SST * R2
SSres <- SST - SSreg
MSres <- SSres / ((rdf))
sigma <- sqrt(MSres)
if(!(ncol(covX)==1 & intercept))
{
SEs <- sqrt(diag(solve((covX)[1:npred,1:npred,drop=FALSE]) * MSres / (n-1) ))
ts <- coefs / SEs
ps <- 2*(1-pt(abs(ts),rdf))
}
if(intercept==1)
{
if(ncol(covX)==1 & intercept) coefs <- setNames(means[y_pos],"(Intercept)") else
{
coefs <- as.double(c(Intercept=means[y_pos] - means[1:npred] %*% solve(covX[1:npred,1:npred,drop=FALSE])%*%covX[1:npred,y_pos,drop=FALSE],coefs))
SEs <- c(NA,as.double(SEs))
ts <- c(NA,as.double(ts))
ps <- c(NA,as.double(ps))
names(coefs) <- c("(Intercept)",colnames(covX)[1:npred])
}
} else names(coefs) <- colnames(covX)[1:npred]
Fstat <- rdf / (k-df.int)*R2 / (1-R2)
pval <- as.double(pf(Fstat,k-df.int,rdf,lower.tail = FALSE))
logdet <- three.point.compute(tree,cbind(setNames(rep(1,n),tree$tip.label)))$logd
ll <- -n/2 * log(2*pi) - n/2 * log((n-k) * MSres/n) - logdet/2 - n/2
if(any(is.na(trait_data))) ll <- NA
ret <- list(coefficients=coefs,SEs=SEs,ts=ts,ps=ps,R2=R2,R2adj=R2adj,sigma=sigma,Fstat=Fstat,pval=pval,df1=k,df2=rdf,dims=list(N=n,p=npred,REML=PPE$REML,df.int=df.int),model=formula,SST=SST,SSres=SSres,SSreg=SSreg,logLik=ll,PPE=PPE,original_data=original_data,covX=covX)
class(ret) <- "phylopars.lm"
ret
}
formals(phylopars.lm) <- c(alist(formula = ),formals(phylopars))
anova.phylopars.lm <- function(object,...)
{
trait_data <- object$original_data
covX <- object$covX
SST <- object$SST
tlabels <- attr(terms(object$model),"term.labels")
k <- length(tlabels)
n <- object$dims$N
y_pos <- ncol(covX)
NR <- length(tlabels) + 1
rss <- resdf <- rep(NA, NR)
rss[1] <- object$SST
resdf[1] <- n - 1
coefs <- coef(object)
vars <- double()
accum <- 0
for (i in 1:(k)) {
fmla <- as.formula(paste(colnames(object$PPE$pars[[1]])[ncol(object$PPE$pars[[1]])],"~",paste(tlabels[1:i],collapse="+")))
rdf <- length(colnames(model.matrix(fmla,data=trait_data))[-1]) - length(vars)
accum <- accum + rdf
vars <- colnames(model.matrix(fmla,data=trait_data))[-1]
coefs <- solve(covX[1:accum,1:accum,drop=FALSE])%*%covX[1:accum,y_pos,drop=FALSE]
R2 <- as.double(sum(covX[1:accum,y_pos,drop=FALSE] * coefs) / covX[y_pos,y_pos,drop=FALSE])
SSreg <- SST * R2
SSres <- SST - SSreg
rss[i + 1] <- SSres
resdf[i + 1] <- n - rdf
resdf[i+1] <- resdf[i]-rdf
}
ss <- c(abs(diff(rss)), object$SSres)
df <- c(abs(diff(resdf)), n - object$df1)
ms <- ss/df
fval <- ms/ms[NR]
P <- pf(fval, df, df[NR], lower.tail = FALSE)
table <- data.frame(df, ss, ms, f = fval, P)
table[length(P), 4:5] <- NA
dimnames(table) <- list(c(tlabels, "Residuals"), c("Df",
"Sum Sq", "Mean Sq", "F value", "Pr(>F)"))
structure(table, heading = c("Analysis of Variance Table\nSequential SS",
paste("Response:", deparse(object$model[[2L]]))),
class = c("anova", "data.frame"))
}
coef.phylopars.lm <- function(object,...)
{
object$coefficients
}
print.phylopars.lm <- function(x,...)
{
PPE <- x
dd <- x$dims
mCall <- x$call
cat("Generalized least squares fit by ")
cat(ifelse(dd$REML == 1, "REML\n", "maximum likelihood\n"))
cat(" Model:", deparse(x$model), "\n\n")
if(length(PPE$model)>1)
{
cat(paste("",PPE$model[[1]],names(PPE$model)[2]," = "))
if(PPE$model[[1]]!="mvOU") cat((PPE$model[[2]])) else
{
cat("\n")
print(PPE$model[[2]])
cat("\n")
}
if(PPE$model[[1]]=="OU" | PPE$model[[1]]=="mvOU")
{
cat("\n")
cat("Stationary covariance = ")
cat("\n")
print(PPE$model[[3]])
}
} else if(PPE$model=="BM") cat("Brownian motion model") else if(PPE$model=="white" | PPE$model=="star") cat("Star phylogeny model")
cat("\n")
cat(" Log-", ifelse(dd$REML == 1, "restricted-",
""), "likelihood: ", (x$logLik), "\n", sep = "")
cat("\nCoefficients:\n")
print(coef(x))
cat("\n")
cat("Degrees of freedom:", dd[["N"]] - dd[["p"]]-dd[["df.int"]], "\n")
cat("Residual standard error:", (x$sigma), "\n")
invisible(x)
}
logLik.phylopars <- function(object,...)
{
val <- object$logLik
attr(val,"nobs") <- attr(val,"nall") <- length(na.exclude(as.double(unlist(object$trait_data[,2:ncol(object$trait_data)]))))
attr(val,"df") <- object$npars
class(val) <- "logLik"
val
}
logLik.phylopars.lm <- function(object,...)
{
val <- object$logLik
attr(val,"nall") <- object$dims$N
attr(val,"nobs") <- object$dims$N
attr(val,"df") <- object$df1 + 1
class(val) <- "logLik"
val
}
summary.phylopars.lm <- function(object,...)
{
PPE <- object
tTable <- data.frame(coef(object), object$SEs, object$ts, object$ps)
dimnames(tTable) <- list(names(coef(object)), c("Value", "Std.Error",
"t-value", "p-value"))
dd <- object$dims
mCall <- object$call
cat("Generalized least squares fit by ")
cat(ifelse(dd$REML == 1, "REML\n", "maximum likelihood\n"))
cat(" Model:", deparse(object$model), "\n")
if(length(PPE$model)>1)
{
cat(paste("",PPE$model[[1]],names(PPE$model)[2]," = "))
if(PPE$model[[1]]!="mvOU") cat((PPE$model[[2]])) else
{
cat("\n")
print(PPE$model[[2]])
cat("\n")
}
if(PPE$model[[1]]=="OU" | PPE$model[[1]]=="mvOU")
{
cat("\n")
cat("Stationary covariance = ")
cat("\n")
print(PPE$model[[3]])
}
} else if(PPE$model=="BM") cat("Brownian motion model") else if(PPE$model=="white" | PPE$model=="star") cat("Star phylogeny model")
cat("\n")
aux <- logLik(object)
object$BIC <- BIC(aux)
object$AIC <- AIC(aux)
print(data.frame(AIC = object$AIC, BIC = object$BIC, logLik = as.vector(object$logLik),
row.names = " "))
cat("\nCoefficients:\n")
colnames(tTable) <- c("Estimate", "Std.Err", "Z value", "Pr(>z)")
printCoefmat(tTable,P.values = TRUE,has.Pvalue = TRUE,na.print = "")
cat("\n")
cat("Residual standard error:", (object$sigma), "\n")
cat("Degrees of freedom:", dd[["N"]] -
dd[["p"]] - dd[["df.int"]], "\n")
cat("Multiple R-squared:", (object$R2), "Adjusted R-squared:", (object$R2adj), "\n")
cat("F-statistic:", (object$Fstat), "on", (object$df1-1), "and", (object$df2), "DF, p-value:", (object$pval), "\n")
invisible(object)
}
ll_wrapper <- function(pars,args)
{
args$pars <- pars
do.call(tp,args)[[1]]
}
get_cov_CIs <- function(p,lower=.025,upper=.975,nsim=5000,verbose=TRUE)
{
args <- p$threepoint_calc
h <- hessian(ll_wrapper,p$threepoint_calc$pars,args = args)
m <- mvrnorm(n = nsim,mu = p$threepoint_calc$pars,Sigma = solve(-h))
nvar <- p$threepoint_calc$nvar
ret_list1 <- list(NULL)
ret_list2 <- list(NULL)
all_mats <- lower_CI <- upper_CI <- vector("list",length = length(p$pars))
for(i in 1:length(all_mats))
{
if(i==1)
{
if(!is.na(p$threepoint_calc$phylocov_fixed)[[1]])
{
phylocov_pars <- numeric()
} else
{
phylocov_pars <- 1:length(which(upper.tri(matrix(NA,nvar,nvar),diag = 1 - p$threepoint_calc$phylocov_diag)))
all_mats[[i]] <- t(apply(m,1,function(X) pars_to_mat(pars = X,nvar = nvar,diag=p$threepoint_calc$phylocov_diag)))
bad_mats <- which(apply(all_mats[[i]],1,function(X) any(is.na(X) | !is.finite(X))))
if(length(bad_mats)>0)
{
#warning(length(bad_mats)," simulated phylogenetic trait covariance matrices were singular. Re-running.")
}
while(length(bad_mats)>0)
{
m[which(apply(all_mats[[i]],1,function(X) any(is.na(X) | !is.finite(X)))),] <- mvrnorm(n = length(bad_mats),mu = p$threepoint_calc$pars,Sigma = solve(-h))
all_mats[[i]] <- t(apply(m,1,function(X) pars_to_mat(pars = X,nvar = nvar,diag=p$threepoint_calc$phylocov_diag)))
bad_mats <- which(apply(all_mats[[i]],1,function(X) any(is.na(X) | !is.finite(X))))
}
}
try(lower_CI[[i]] <- matrix(t(apply(all_mats[[i]],2,function(X) quantile(X,c(lower)))),nvar,nvar,dimnames=dimnames(p$pars[[i]])))
try(upper_CI[[i]] <- matrix(t(apply(all_mats[[i]],2,function(X) quantile(X,c(upper)))),nvar,nvar,dimnames=dimnames(p$pars[[i]])))
ret_list1 <- list(estimate = p$pars[[1]],lowerCI = lower_CI[[1]],upperCI = upper_CI[[1]])
} else if(i==2)
{
#if(!is.na(p$threepoint_calc$phenocov_fixed)[[1]]) next
#if(p$threepoint_calc$pheno_error>0) if(is.na(p$threepoint_calc$phenocov_fixed)[[1]]) phenocov <- pars_to_mat(pars = p$threepoint_calc$pars[phenocov_pars],nvar,p$threepoint_calc$pheno_error) else phenocov <- phenocov_fixed
all_mats[[i]] <- t(apply(m,1,function(X) pars_to_mat(pars = X,nvar = nvar,diag=p$threepoint_calc$pheno_error)))
if(!is.na(p$threepoint_calc$phenocov_fixed)[[1]])# | length(p$threepoint_calc$phenocov_list)!=0)
{
phenocov_pars <- numeric()
} else
{
if(length(phylocov_pars)==0)
{
phenocov_pars <- 1:ncol(h)
} else
{
phenocov_pars <- (1:ncol(h))[-phylocov_pars]
}
all_mats[[i]] <- t(apply(m,1,function(X) pars_to_mat(pars = X[phenocov_pars],nvar = nvar,diag=p$threepoint_calc$pheno_error)))
bad_mats <- which(apply(all_mats[[i]],1,function(X) any(is.na(X) | !is.finite(X))))
if(length(bad_mats)>0)
{
#warning(length(bad_mats)," simulated phenotypic trait covariance matrices were singular. Re-running.")
}
while(length(bad_mats)>0)
{
m[which(apply(all_mats[[i]],1,function(X) any(is.na(X) | !is.finite(X)))),] <- mvrnorm(n = length(bad_mats),mu = p$threepoint_calc$pars,Sigma = solve(-h))
all_mats[[i]] <- t(apply(m,1,function(X) pars_to_mat(pars = X[phenocov_pars],nvar = nvar,diag=p$threepoint_calc$pheno_error)))
bad_mats <- which(apply(all_mats[[i]],1,function(X) any(is.na(X) | !is.finite(X))))
}
}
try(lower_CI[[i]] <- matrix(t(apply(all_mats[[i]],2,function(X) quantile(X,c(lower)))),nvar,nvar,dimnames=dimnames(p$pars[[i]])))
try(upper_CI[[i]] <- matrix(t(apply(all_mats[[i]],2,function(X) quantile(X,c(upper)))),nvar,nvar,dimnames=dimnames(p$pars[[i]])))
ret_list2 <- list(estimate = p$pars[[2]],lowerCI = lower_CI[[2]],upperCI = upper_CI[[2]])
}
if(verbose)
{
cat(ifelse(i==1,"\nPhylogenetic","\nPhenotypic")," covariance and lower/upper 95% confidence intervals:\n",sep="")
print(p$pars[[i]])
cat("\nLower CI:\n")
print(lower_CI[[i]])
cat("\nUpper CI:\n")
print(upper_CI[[i]])
cat("\n")
}
}
ret_list <- list(phylocov = ret_list1,phenocov = ret_list2)
ret_list
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.