Nothing
# Fit a Brownian Motion model of continuous trait evolution for one or more traits
# If more than one trait is modelled, then this function estimates the full diffusivity matrix D (=sigma^2/2)
# D is a non-negative definite symmetric matrix of size Ntraits x Ntraits,
# such that exp(-X^T*D^{-1}*X/(4*L))/sqrt(det(2*pi*D)) is the probability density for the multidimensional trait vector X after phylogenetic distance L, if initially located at the origin.
# The main inputs are a list of trees (thought to be generated by the same BM process) and the corresponding numerical tip states
fit_bm_model = function(trees, # either a single tree in phylo format, or a list of trees
tip_states, # Numerical trait values for the tips of each tree. If trees[] is a single tree, then tip_states[] should be either a 1D vector of size Ntips or a 2D matrix of size Ntips x Ntraits. If trees[] is a list of trees, then tip_states should either be a list of 1D vectors or a 2D matrix of size Ntips x Ntraits.
isotropic = FALSE, # logical, specifying whether diffusivity is assumed to be isotropic (i.e., independent of the direction). Hence, if isotropic=TRUE, then the diffusivity matrix is forced to be diagonal, with all entries being equal.
Nbootstraps = 0, # integer, number of bootstraps to perform for estimating confidence intervals. Set to <=0 for no bootstrapping.
check_input = TRUE){
# basic input checking
if("phylo" %in% class(trees)){
# trees[] is actually a single tree
trees = list(trees)
Ntrees = 1
if(!(("list" %in% class(tip_states)) && (length(tip_states)==1))){
tip_states = list(tip_states)
}
}else if("list" %in% class(trees)){
# trees[] is a list of trees
Ntrees = length(trees)
if("list" %in% class(tip_states)){
if(length(tip_states)!=Ntrees) return(list(success=FALSE,error=sprintf("Input list of tip_states has length %d, but should be of length %d (number of trees)",length(tip_states),Ntrees)))
}else if("numeric" %in% class(tip_states)){
if(Ntrees!=1) return(list(success=FALSE,error=sprintf("Input tip_states was given as a single vector, but expected a list of %d vectors (number of trees)",Ntrees)))
if(length(tip_states)!=length(trees[[1]]$tip.label)) return(list(success=FALSE,error=sprintf("Input tip_states was given as a single vector of length %d, but expected length %d (number of tips in the input tree)",length(tip_states),length(trees[[1]]$tip.label))))
tip_states = list(tip_states)
}else if("matrix" %in% class(tip_states)){
if(Ntrees!=1) return(list(success=FALSE,error=sprintf("Input tip_states was given as a single matrix, but expected a list of %d matrixes (number of trees)",Ntrees)))
if(nrow(tip_states)!=length(trees[[1]]$tip.label)) return(list(success=FALSE,error=sprintf("Input tip_states was given as a matrix with %d rows, but expected %d rows (number of tips in the input tree)",nrow(tip_states),length(trees[[1]]$tip.label))))
tip_states = list(tip_states)
}
}else{
return(list(success=FALSE,error=sprintf("Unknown data format '%s' for input trees[]: Expected a list of phylo trees or a single phylo tree",class(trees)[1])))
}
if(is.null(Nbootstraps) || is.na(Nbootstraps) || (Nbootstraps<0)) Nbootstraps = 0;
scalar = is.vector(tip_states[[1]])
Ntraits = (if(scalar) 1 else ncol(tip_states[[1]]))
if(check_input){
# check tree quality
for(i in seq_len(Ntrees)){
if(any(trees[[i]]$edge.length<0)) return(list(success=FALSE,error=sprintf("Tree #%d includes negative edge lengths",i)))
}
}
# loop through the trees and extract independet contrasts
# X will be a matrix of size Ncontrasts x Ntraits, each row of which lists a single independent contrast, i.e. geodistance divided by the square root of the corresponding phylodistance
phylodistances = numeric()
X = matrix(nrow=0, ncol=Ntraits)
for(i in seq_len(Ntrees)){
pic_results = get_independent_contrasts(trees[[i]], tip_states[[i]], scaled = TRUE, only_bifurcations = FALSE, include_zero_phylodistances=FALSE, check_input = check_input);
phylodistances = c(phylodistances, pic_results$distances)
if(scalar){
X = rbind(X, matrix(pic_results$PICs,ncol=1))
}else{
X = rbind(X, pic_results$PICs)
}
}
Ncontrasts = length(phylodistances)
# estimate diffusivity based on vectorial increments
if(scalar){
diffusivity = 0.5 * mean(X**2)
loglikelihood = -0.25 * sum((X**2)/diffusivity) - 0.5*Ncontrasts*log(2*pi) - 0.5*sum(log(2*phylodistances)) - 0.5*Ncontrasts*log(diffusivity)
Nparams = 1
}else{
if(isotropic){
# fit a single diffusivity for all directions
diffusivity = (0.5/Ntraits) * mean(sapply(1:Ncontrasts, FUN=function(k) sum(X[k,]**2)), na.rm=TRUE)
diffusivity = diag(diffusivity, Ntraits, Ntraits)
Nparams = 1
}else{
# fit an arbitrary diffusivity matrix (constrained to be symmetric and non-negative definite)
# maximum-likelihood estimator on the intrinsic geometry of positive-definite matrices
diffusivity = matrix(0, ncol=Ntraits, nrow=Ntraits);
for(t1 in 1:Ntraits){
for(t2 in t1:Ntraits){
diffusivity[t1,t2] = 0.5 * mean(X[,t1]*X[,t2])
diffusivity[t2,t1] = diffusivity[t1,t2];
}
}
Nparams = (Ntraits*(Ntraits+1))/2
}
inverse_diffusivity = solve(diffusivity)
loglikelihood = -0.25 * sum(sapply(1:Ncontrasts, function(p) X[p,,drop=FALSE] %*% inverse_diffusivity %*% t(X[p,,drop=FALSE]))) - 0.5*Ntraits*Ncontrasts*log(2*pi) - 0.5*Ntraits*sum(log(2*phylodistances)) - 0.5*Ncontrasts*determinant(diffusivity,logarithm=TRUE)$modulus
}
#######################################################################
# estimate confidence intervals if needed, via parametric bootstrapping
if(Nbootstraps>0){
bootstrap_Ds = matrix(NA,nrow=Nbootstraps,ncol=Ntraits*Ntraits)
bootstrap_LLs = rep(NA,times=Nbootstraps)
for(b in 1:Nbootstraps){
# simulate model with fitted diffusivity, along each tree
bootstrap_tip_states = vector(mode="list", Ntrees)
for(i in 1:Ntrees){
bootstrap_tip_states[[i]] = castor::simulate_bm_model( tree = trees[[i]],
diffusivity = diffusivity,
include_tips = TRUE,
include_nodes = FALSE,
root_states = NULL,
Nsimulations = 1,
drop_dims = TRUE)$tip_states
}
# fit diffusivity using bootstrap-simulation
fit = fit_bm_model( trees,
tip_states = bootstrap_tip_states,
Nbootstraps = 0,
check_input = FALSE)
if(fit$success){
bootstrap_Ds[b,] = as.vector(fit$diffusivity)
bootstrap_LLs[b] = fit$loglikelihood
}
}
# calculate standard errors and confidence intervals from distribution of bootstrapped parameters
standard_errors = sqrt(pmax(0, colMeans(bootstrap_Ds^2, na.rm=TRUE) - colMeans(bootstrap_Ds, na.rm=TRUE)^2))
quantiles = sapply(1:ncol(bootstrap_Ds), FUN=function(p) quantile(bootstrap_Ds[,p], probs=c(0.25, 0.75, 0.025, 0.975), na.rm=TRUE, type=8))
CI50lower = matrix(quantiles[1,],ncol=Ntraits)
CI50upper = matrix(quantiles[2,],ncol=Ntraits)
CI95lower = matrix(quantiles[3,],ncol=Ntraits)
CI95upper = matrix(quantiles[4,],ncol=Ntraits)
mean_BLL = mean(bootstrap_LLs,na.rm=TRUE)
consistency = sum(abs(bootstrap_LLs-mean_BLL)>=abs(loglikelihood-mean_BLL),na.rm=TRUE)/sum(!is.nan(bootstrap_LLs))
}
# return results
results = list( success = TRUE,
diffusivity = diffusivity,
loglikelihood = loglikelihood,
AIC = 2*Nparams - 2*loglikelihood,
BIC = Nparams * log(Ncontrasts) - 2*loglikelihood,
Ncontrasts = Ncontrasts,
phylodistances = phylodistances)
if(Nbootstraps>0){
results$standard_errors = standard_errors
results$CI50lower = CI50lower
results$CI50upper = CI50upper
results$CI95lower = CI95lower
results$CI95upper = CI95upper
results$consistency = consistency
}
return(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.