Nothing
#' Bootstrap likelihood ratio test of diversity-dependent diversification model
#'
#' This function computes the maximum likelihood and the associated estimates
#' of the parameters of a diversity-dependent diversification model for a given
#' set of phylogenetic branching times. It then performs a bootstrap likelihood
#' ratio test of the diversity-dependent (DD) model against the constant-rates
#' (CR) birth-death model. Finally, it computes the power of this test.
#'
#' The output is a list with 3 elements:
#'
#' @param brts A set of branching times of a phylogeny, all positive
#' @param initparsoptDD The initial values of the parameters that must be
#' optimized for the diversity-dependent (DD) model: lambda_0, mu and K
#' @param initparsoptCR The initial values of the parameters that must be
#' optimized for the constant-rates (CR) model: lambda and mu
#' @param missnumspec The number of species that are in the clade but missing
#' in the phylogeny
#' @param outputfilename The name (and location) of the file where the output
#' will be saved. Default is no save.
#' @param seed The seed for the pseudo random number generator for simulating
#' the bootstrap data
#' @param endmc The number of bootstraps
#' @param alpha The significance level of the test
#' @param plotit Boolean to plot results or not
#' @param res Sets the maximum number of species for which a probability must
#' be computed, must be larger than 1 + length(brts)
#' @param ddmodel Sets the model of diversity-dependence: \cr \code{ddmodel ==
#' 1} : linear dependence in speciation rate with parameter K (= diversity
#' where speciation = extinction)\cr \code{ddmodel == 1.3} : linear dependence
#' in speciation rate with parameter K' (= diversity where speciation = 0)\cr
#' \code{ddmodel == 2} : exponential dependence in speciation rate with
#' parameter K (= diversity where speciation = extinction)\cr \code{ddmodel ==
#' 2.1} : variant of exponential dependence in speciation rate with offset at
#' infinity\cr \code{ddmodel == 2.2} : 1/n dependence in speciation rate\cr
#' \code{ddmodel == 2.3} : exponential dependence in speciation rate with
#' parameter x (= exponent)\cr \code{ddmodel == 3} : linear dependence in
#' extinction rate \cr \code{ddmodel == 4} : exponential dependence in
#' extinction rate \cr \code{ddmodel == 4.1} : variant of exponential
#' dependence in extinction rate with offset at infinity \cr \code{ddmodel ==
#' 4.2} : 1/n dependence in extinction rate with offset at infinity \cr
#' \code{ddmodel == 5} : linear dependence in speciation and extinction rate
#' \cr
#' @param cond Conditioning: \cr cond == 0 : conditioning on stem or crown age
#' \cr cond == 1 : conditioning on stem or crown age and non-extinction of the
#' phylogeny \cr cond == 2 : conditioning on stem or crown age and on the total
#' number of extant taxa (including missing species) \cr cond == 3 :
#' conditioning on the total number of extant taxa (including missing species)
#' \cr Note: cond == 3 assumes a uniform prior on stem age, as is the standard
#' in constant-rate birth-death models, see e.g. D. Aldous & L. Popovic 2004.
#' Adv. Appl. Prob. 37: 1094-1115 and T. Stadler 2009. J. Theor. Biol. 261:
#' 58-66.
#' @param btorph Sets whether the likelihood is for the branching times (0) or
#' the phylogeny (1)
#' @param soc Sets whether stem or crown age should be used (1 or 2)
#' @param tol Sets the tolerances in the optimization. Consists of: \cr reltolx
#' = relative tolerance of parameter values in optimization \cr reltolf =
#' relative tolerance of function value in optimization \cr abstolx = absolute
#' tolerance of parameter values in optimization
#' @param maxiter Sets the maximum number of iterations in the optimization
#' @param changeloglikifnoconv if TRUE the loglik will be set to -Inf if ML
#' does not converge
#' @param optimmethod Method used in optimization of the likelihood. Current
#' default is 'subplex'. Alternative is 'simplex' (default of previous
#' versions)
#' @param methode The method used to solve the master equation, default is
#' 'analytical' which uses matrix exponentiation; alternatively numerical ODE
#' solvers can be used, such as 'odeint::runge_kutta_cash_karp54'. These were used in the
#' package before version 3.1.
#' @return \item{treeCR}{a list of trees generated under the constant-rates
#' model using the ML parameters under the CR model} \item{treeDD}{a list of
#' trees generated under the diversity-dependent model using the ML parameters
#' under the diversity-dependent model} \item{out}{a dataframe with the
#' parameter estimates and maximum likelihoods for diversity-dependent and
#' constant-rates models \code{$model} - the model used to generate the data. 0
#' = unknown (for real data), 1 = CR, 2 = DD \cr \code{$mc} - the simulation
#' number for each model \cr \code{$lambda_CR} - speciation rate estimated
#' under CR \cr \code{$mu_CR} - extinction rate estimated under CR \cr
#' \code{$LL_CR} - maximum likelihood estimated under CR \cr \code{$conv_CR} -
#' convergence code for likelihood optimization; conv = 0 means convergence \cr
#' \code{$lambda_DD1} - initial speciation rate estimated under DD for first
#' set of initial values\cr \code{$mu_DD1} - extinction rate estimated under DD
#' for first set of initial values \cr \code{$K_DD1} - clade-wide
#' carrying-capacity estimated under DD for first set of initial values \cr
#' \code{$LL_DD1} - maximum likelihood estimated under DD for first set of
#' initial values \cr \code{$conv_DD1} - convergence code for likelihood
#' optimization for first set of initial values; conv = 0 means convergence \cr
#' \code{$lambda_DD2} - initial speciation rate estimated under DD for second
#' set of initial values\cr \code{$mu_DD2} - extinction rate estimated under DD
#' for second set of initial values \cr \code{$K_DD2} - clade-wide
#' carrying-capacity estimated under DD for second set of initial values \cr
#' \code{$LL_DD2} - maximum likelihood estimated under DD for second set of
#' initial values \cr \code{$conv_DD2} - convergence code for likelihood
#' optimization for second set of initial values; conv = 0 means convergence
#' \cr \code{$LR} - likelihood ratio between DD and CR } \item{pvalue}{p-value
#' of the test} \item{LRalpha}{Likelihood ratio at the signifiance level alpha}
#' \item{poweroftest}{power of the test for significance level alpha}
#' @author Rampal S. Etienne & Bart Haegeman
#' @seealso \code{\link{dd_loglik}}, \code{\link{dd_ML}}
#' @references - Etienne, R.S. et al. 2016. Meth. Ecol. Evol. 7: 1092-1099,
#' doi: 10.1111/2041-210X.12565 \cr - Etienne, R.S. et al. 2012, Proc. Roy.
#' Soc. B 279: 1300-1309, doi: 10.1098/rspb.2011.1439 \cr - Etienne, R.S. & B.
#' Haegeman 2012. Am. Nat. 180: E75-E89, doi: 10.1086/667574
#' @keywords models
#' @export dd_LR
dd_LR = function(
brts,
initparsoptDD,
initparsoptCR,
missnumspec,
outputfilename = NULL,
seed = 42,
endmc = 1000,
alpha = 0.05,
plotit = TRUE,
res = 10 * (1 + length(brts) + missnumspec),
ddmodel = 1,
cond = 1,
btorph = 1,
soc = 2,
tol = c(1E-3,1E-4,1E-6),
maxiter = 2000,
changeloglikifnoconv = FALSE,
optimmethod = 'subplex',
methode = 'analytical'
)
{
if(!is.null(seed))
{
set.seed(roundn(seed))
}
if(cond > 1)
{
cat("Conditioning on number of tips is not possible.\n")
return(NULL)
}
age = max(brts)
cat("\nEstimating parameters under the constant-rate model ...\n")
outCRO = dd_ML(brts = brts,initparsopt = initparsoptCR,idparsopt = 1:2,idparsfix = 3,parsfix = Inf,res = res,ddmodel = ddmodel,missnumspec = missnumspec,cond = cond,btorph = btorph,soc = soc,tol = tol,maxiter = maxiter,changeloglikifnoconv = changeloglikifnoconv, optimmethod = optimmethod, methode = methode)
cat("\nEstimating parameters under the diversity-dependent model ...\n")
outDDO = dd_ML(brts = brts,initparsopt = initparsoptDD,idparsopt = 1:3,res = res,ddmodel = ddmodel,missnumspec = missnumspec,cond = cond,btorph = btorph,soc = soc,tol = tol,maxiter = maxiter,changeloglikifnoconv = changeloglikifnoconv, optimmethod = optimmethod, methode = methode)
LRO = outDDO$loglik - outCRO$loglik
out = cbind(NA,NA,outCRO,outDDO,NA,NA,NA,NA,NA,LRO)
out = out[,-c(5,7,13)]
newnames = c("model","mc","lambda_CR","mu_CR","LL_CR","conv_CR","lambda_DD1","mu_DD1","K_DD1","LL_DD1","conv_DD1","lambda_DD2","mu_DD2","K_DD2","LL_DD2","conv_DD2","LR")
names(out) = newnames
if(!is.null(outputfilename))
{
save(seed,brts,out,file = outputfilename)
}
parsCR = as.numeric(outCRO[1:2])
parsDD = as.numeric(outDDO[1:3])
treeCR = list()
treeDD = list()
cat('\nSimulating trees under CR and DD models ...\n')
for(mc in 1:endmc)
{
treeCR[[mc]] = dd_sim(pars = c(parsCR,Inf),age = age,ddmodel = ddmodel)
treeDD[[mc]] = dd_sim(pars = parsDD,age = age,ddmodel = ddmodel)
}
if(!is.null(outputfilename))
{
save(seed,brts,out,treeCR,treeDD,file = outputfilename)
}
cat('\nPerforming bootstrap to determine critical LR ...\n')
for(mc in 1:endmc)
{
cat('\nAnalyzing simulation:',mc,'\n')
brtsCR = ape::branching.times(treeCR[[mc]][[1]])
outCR = dd_ML(brtsCR,initparsopt = parsCR,idparsopt = 1:2,idparsfix = 3,parsfix = Inf,res = res,ddmodel = ddmodel,missnumspec = 0,cond = cond,btorph = btorph,soc = soc,tol = tol,maxiter = maxiter,changeloglikifnoconv = changeloglikifnoconv, optimmethod = optimmethod, methode = methode)
outDD1 = dd_ML(brtsCR,initparsopt = parsDD,idparsopt = 1:3,res = res,ddmodel = ddmodel,missnumspec = 0,cond = cond,btorph = btorph,soc = soc,tol = tol,maxiter = maxiter,changeloglikifnoconv = changeloglikifnoconv, optimmethod = optimmethod, methode = methode)
outDD2 = dd_ML(brtsCR,initparsopt = c(parsCR + 0.05,length(brts) + 1000),idparsopt = 1:3,res = res,ddmodel = ddmodel,missnumspec = 0,cond = cond,btorph = btorph,soc = soc,tol = tol,maxiter = maxiter,changeloglikifnoconv = changeloglikifnoconv, optimmethod = optimmethod, methode = methode)
if(outDD1$conv == -1 & outDD2$conv == -1)
{
maxLLDD = outCR$loglik
} else if(outDD1$conv == -1 & outDD2$conv != -1)
{
maxLLDD = outDD2$loglik
} else if(outDD1$conv != -1 & outDD2$conv == -1)
{
maxLLDD = outDD1$loglik
} else {
maxLLDD = max(outDD1$loglik,outDD2$loglik)
}
LR = pmax(0,maxLLDD - outCR$loglik)
outff = cbind(1,mc,outCR,outDD1,outDD2,LR)
outff = outff[,-c(5,7,13,19)]
names(outff) = newnames
out = rbind(out,outff)
if(!is.null(outputfilename))
{
save(seed,brts,out,treeCR,treeDD,file = outputfilename)
}
}
opt = rep(0,endmc)
cat('\nPerforming bootstrap to determine power ...\n')
for(mc in 1:endmc)
{
cat('\nAnalyzing simulation:',mc,'\n')
brtsDD = ape::branching.times(treeDD[[mc]][[1]])
outCR = dd_ML(brtsDD,initparsopt = parsCR,idparsopt = 1:2, idparsfix = 3,parsfix = Inf,res = res,ddmodel = ddmodel,missnumspec = 0,cond = cond,btorph = btorph,soc = soc,tol = tol,maxiter = maxiter,changeloglikifnoconv = changeloglikifnoconv, optimmethod = optimmethod, methode = methode)
outDD1 = dd_ML(brtsDD,initparsopt = parsDD,idparsopt = 1:3,res = res,ddmodel = ddmodel,missnumspec = 0,cond = cond,btorph = btorph,soc = soc,tol = tol,maxiter = maxiter,changeloglikifnoconv = changeloglikifnoconv, optimmethod = optimmethod, methode = methode)
outDD2 = dd_ML(brtsDD,initparsopt = c(parsCR + 0.05,length(brts) + 1000), idparsopt = 1:3,res = res,ddmodel = ddmodel,missnumspec = 0,cond = cond,btorph = btorph,soc = soc,tol = tol,maxiter = maxiter,changeloglikifnoconv = changeloglikifnoconv, optimmethod = optimmethod, methode = methode)
if(outDD1$conv == -1 & outDD2$conv == -1)
{
maxLLDD = outCR$loglik
opt[mc] = 1
} else if(outDD1$conv != -1 & outDD2$conv == -1)
{
maxLLDD = outDD1$loglik
opt[mc] = 2
} else if(outDD1$conv == -1 & outDD2$conv != -1)
{
maxLLDD = outDD2$loglik
opt[mc] = 3
} else {
maxLLDD = max(outDD1$loglik,outDD2$loglik)
opt[mc] = 1 + min(which(c(outDD1$loglik,outDD2$loglik) == maxLLDD))
}
LR = pmax(0,maxLLDD - outCR$loglik)
outff = cbind(1,mc,outCR,outDD1,outDD2,LR)
outff = outff[,-c(5,7,13,19)]
names(outff) = newnames
out = rbind(out,outff)
if(!is.null(outputfilename))
{
save(seed,brts,out,opt,treeCR,treeDD,file = outputfilename)
}
}
inverse_quantile = function(samples,x)
{
samplessort = sort(samples)
pup = which(samplessort > x)
if(length(pup) > 0)
{
if(length(pup) < length(samplessort))
{
pup = min(pup)
invquant = (pup + (x - samplessort[pup])/(samplessort[pup - 1] - samplessort[pup]))/length(samples)
} else {
invquant = 0
}
} else {
invquant = 1
}
return(invquant)
}
funpvalue = function(samples,x)
{
samplessort = sort(samples)
pup = which(samplessort > x)
pvalue = (length(pup) + 1)/ (length(samples) + 1)
return(pvalue)
}
funpoweroftest = function(samples,x)
{
samplessort = sort(samples)
pup = which(samplessort > x)
poweroftest = length(pup)/(length(samples) + 1)
return(poweroftest)
}
#pvalue = 1 - inverse_quantile(out$LR[2:(endmc + 1)],out$LR[1])
pvalue = funpvalue(out$LR[2:(endmc + 1)],out$LR[1])
LRalpha = as.numeric(stats::quantile(out$LR[2:(endmc + 1)],1 - alpha,type = 4))
#poweroftest = 1 - inverse_quantile(out$LR[(endmc + 2):(2 * endmc + 1)],LRalpha)
poweroftest = funpoweroftest(out$LR[(endmc + 2):(2 * endmc + 1)],LRalpha)
if(plotit == TRUE)
{
try(grDevices::dev.off())
try(grDevices::dev.off())
pdffilename = paste(getwd(),'/LR.pdf',sep = '')
grDevices::pdf(pdffilename,paper = "a4r", width = 29.7, height = 21)
al = 0.03
alw = 2
alw2 = 1.7
aa = 45
graphics::par(mfrow = c(2,2),cex = 1, mar = c(5, 4, 3, 1) + 0.1)
graphics::hist(out$LR[2:(1 + endmc)],main = 'Distribution of LLR under CR',xlab = 'LLR', ylab = 'Frequency', col = 'red',probability = T,nclass = 30, xlim = c(0,max(out$LR[1:(endmc + 1)])))
graphics::arrows(out$LR[1],-1E+120, x1 = out$LR[1],y1 = 0, length = al, angle = aa,lwd = alw, col = 'black')
graphics::arrows(LRalpha,-1E+120, x1 = LRalpha,y1 = 0, length = al, angle = aa,lwd = alw, col = 'blue')
graphics::box()
graphics::plot(0, 0, type = "n", bty = "n", xaxt = "n", yaxt = "n", col = "white", col.lab = 'white', col.axis = 'white')
graphics::hist(out$LR[(endmc + 2):(1 + 2 * endmc)], main = 'Distribution of LLR under DD',xlab = 'LLR', ylab = 'Frequency', col = 'red',probability = T,nclass = 30)
graphics::box()
graphics::arrows(out$LR[1],-1E+120, x1 = out$LR[1],y1 = 0, length = al, angle = aa,lwd = alw, col = 'black')
graphics::arrows(LRalpha,-1E+120, x1 = LRalpha,y1 = 0, length = al, angle = aa,lwd = alw, col = 'blue')
graphics::par(mfrow = c(2,3),cex = 1, mar = c(5, 4, 3, 1) + 0.1)
lambda = out$lambda_CR[(endmc + 2):(2 * endmc + 1)] * (opt == 1) + out$lambda_DD1[(endmc + 2):(2 * endmc + 1)] * (opt == 2) + out$lambda_DD2[(endmc + 2):(2 * endmc + 1)] * (opt == 3)
mu = out$mu_CR[(endmc + 2):(2 * endmc + 1)] * (opt == 1) + out$mu_DD1[(endmc + 2):(2 * endmc + 1)] * (opt == 2) + out$mu_DD2[(endmc + 2):(2 * endmc + 1)] * (opt == 3)
K = 1E+120 * (opt == 1) + pmin(1E+120,out$K_DD1[(endmc + 2):(2 * endmc + 1)]) * (opt == 2) + pmin(1E+120,out$K_DD2[(endmc + 2):(2 * endmc + 1)]) * (opt == 3)
graphics::hist(lambda,main = NULL, xlab = expression(lambda), ylab = 'Frequency', col = 'red',probability = T,nclass = 30, xlim = c(0,max(lambda)))
graphics::arrows(out$lambda_DD1[1],-1E+120, x1 = out$lambda_DD1[1],y1 = 0, length = al, angle = aa,lwd = alw2, col = 'black')
graphics::box()
graphics::hist(mu,main = NULL, xlab = expression(mu), ylab = 'Frequency', col = 'red',probability = T,nclass = 30, xlim = c(0,max(mu)))
graphics::arrows(out$mu_DD1[1],-1E+120, x1 = out$mu_DD1[1],y1 = 0, length = al, angle = aa,lwd = alw2, col = 'black')
graphics::box()
graphics::hist(K,main = NULL, xlab = 'K', ylab = 'Frequency', col = 'red',probability = T,nclass = 30, xlim = c(min(K),max(K)))
graphics::arrows(out$K_DD1[1],-1E+120, x1 = out$K_DD1[1],y1 = 0, length = al, angle = aa,lwd = alw2, col = 'black')
graphics::box()
try(grDevices::dev.off())
try(grDevices::dev.off())
os = .Platform$OS.type
if(os == "windows")
{
shell.exec(pdffilename)
}
if(os == "unix")
{
system(paste("open",pdffilename,sep = " "))
}
}
if(!is.null(outputfilename))
{
save(seed,brts,out,opt,treeCR,treeDD,pvalue,LRalpha,poweroftest,file = outputfilename)
}
return(list(treeCR = treeCR,treeDD = treeDD,out = out,pvalue = pvalue,LRalpha = LRalpha,poweroftest = poweroftest))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.