# depricated functions that still have dependencies in the demos
# brownie, OUCH, wright, release_constraint
#multiou can try and take lca as a parameter option rather than calculating each time, for efficiency
old_update.multiOU <- function(model, data){
switch(model$submodel,
wright = do.call(wright,
c(list(data=data, tree=model$tree,
regimes=model$regimes,
Xo=model$Xo, alpha=model$alpha,
sigma=model$sigma, theta=model$theta),
model$opts)),
ouch = do.call(ouch,
c(list(data=data, tree=model$tree,
regimes=model$regimes, Xo=model$Xo,
alpha=model$alpha, sigma=model$sigma),
model$opts)),
brownie = do.call(brownie,
c(list(data=data, tree=model$tree,
regimes=model$regimes, sigma=model$sigma),
model$opts)),
release_constraint =
do.call(release_constraint,
c(list(data=data, tree=model$tree,
regimes=model$regimes, Xo=model$Xo,
alpha=model$alpha, sigma=model$sigma,
theta=model$theta),
model$opts)))
}
# Likelihood as a function of optimizable parameters
llik.release = function(data, tree, regimes, lca=NULL){
# returns a likelihood funciton of pars: {Xo, alpha, sigma, theta}
n_regimes <- length(levels(regimes))
if(is.null(lca))
lca <- lca_calc(tree)
f <- function(par){
Xo <- par[1]
alpha <- par[2:(1+n_regimes)]
sigma <- rep(par[2+n_regimes], n_regimes)
theta <- rep(par[3+n_regimes], n_regimes)
if (any(alpha < 0)){
llik <- -Inf
}
else if (any(sigma<0)){
llik <- -Inf
} else {
llik<-multiOU_lik_lca(data, tree, regimes, alpha=alpha,
sigma=sigma, theta=theta, Xo=Xo, lca)
}
-llik
}
f
}
release_constraint <- function(data, tree, regimes, alpha=NULL,
sigma=NULL, theta=NULL, Xo=NULL, ...){
opts <- list(...)
# alpha varies by regime, theta and sigma are global
# par is Xo, all alphas, theta, sigma
## Probably a much better way to handle this. could also be functionalized...
n_regimes <- length(levels(regimes))
par <- numeric(n_regimes+3)
if(is.null(Xo))
Xo <- mean(data, na.rm=TRUE)
if(is.null(theta))
theta <- Xo
par[1] <- Xo
if(length(alpha) == n_regimes){
par[2:(1+n_regimes)] <- alpha
} else {
par[2:(1+n_regimes)] <- rep(alpha, n_regimes)
}
par[2+n_regimes] <- sigma
par[3+n_regimes] <- theta
lca <- lca_calc(tree)
f <- llik.release(data, tree, regimes, lca)
print(par)
print(paste("starting loglik = ", -f(par)))
optim_output <- optim(par,f, ...)
# optim(par,f, method="L", lower=c(-Inf, rep(0,n_regimes), rep(-Inf, n_regimes), rep(0, n_regimes)))
output <- list(data=data, tree=tree, regimes=regimes,
loglik=-optim_output$value, Xo=optim_output$par[1],
alpha=optim_output$par[2:(1+n_regimes)],
sigma=optim_output$par[2+n_regimes],
theta=optim_output$par[3+n_regimes],
optim_output=optim_output, submodel="release_constraint",
convergence=optim_output$convergence,
opts=opts)
class(output) = "multiOU"
output
}
# Likelihood as a function of optimizable parameters
llik.wright <- function(data, tree, regimes, lca=NULL){
n_regimes <- length(levels(regimes))
if(is.null(lca))
lca <- lca_calc(tree)
f <- function(par){
Xo <- par[1]
alpha <- par[2:(1+n_regimes)]
sigma <- par[(2+n_regimes):(1+2*n_regimes)]
theta <- par[(2+2*n_regimes):(1+3*n_regimes)]
if (any(alpha < 0)){
llik <- -Inf
}
else if (any(sigma<0)){
llik <- -Inf
} else {
llik<-multiOU_lik_lca(data, tree, regimes, alpha=alpha,
sigma=sigma, theta=theta, Xo=Xo, lca)
}
-llik
}
f
}
wright <- function(data, tree, regimes, alpha=1, sigma=1, Xo=NULL, theta=NULL, ...){
opts <- list(...)
# all are regime dependent
# intialize a parameter vector to optimize:
# par = {Xo, alphas, sigmas, thetas}
n_regimes <- length(levels(regimes))
par <- numeric(1+3*n_regimes)
# Create par vector from the given starting conditions
# (or guess if not given)
## Specify the indices of each. Xo is always 1
alpha_i <- 2:(1+n_regimes)
sigma_i <- (2+n_regimes):(1+2*n_regimes)
theta_i <- (2+2*n_regimes):(1+3*n_regimes)
if(is.null(Xo)) Xo <- mean(data, na.rm=TRUE)
par[1] <- Xo
if(length(alpha) == n_regimes){
par[alpha_i] <- alpha
} else {
par[alpha_i] <- rep(alpha, n_regimes)
}
if(length(sigma) == n_regimes){
par[sigma_i] <- sigma
} else {
par[sigma_i] <- rep(sigma, n_regimes)
}
if(is.null(theta)){
par[theta_i] <- rep(Xo, n_regimes)
} else if(length(theta) == n_regimes){
par[theta_i] <- theta
} else {
par[theta_i] <- rep(theta, n_regimes)
}
lca <- lca_calc(tree)
# Likelihood as a function of optimizable parameters
f <- llik.wright(data, tree, regimes, lca)
print(par)
print(paste("starting loglik = ", -f(par)))
optim_output <- optim(par,f, ...)
# optim(par,f, method="L", lower=c(-Inf, rep(0,n_regimes), rep(-Inf, n_regimes), rep(0, n_regimes)))
output <- list(data=data, tree=tree, regimes=regimes,
loglik=-optim_output$value, Xo=optim_output$par[1],
alpha=optim_output$par[alpha_i],
sigma=optim_output$par[sigma_i],
theta=optim_output$par[theta_i],
optim_output=optim_output, submodel="wright",
convergence=optim_output$convergence, opts=opts)
class(output) = "multiOU"
output
}
# OUCH
ouch <- function(data, tree, regimes, alpha=1, sigma=1, Xo=NULL, ...){
opts <- list(...)
# alpha is fixed at ~zero, sigma is regime dependent, theta is global
# intialize a parameter vector to optimize:
# Xo, alpha, sigma, and the n_regime thetas
n_regimes <- length(levels(regimes))
par <- numeric(3+n_regimes)
if(length(alpha) > 1){
alpha <- alpha[1]
}
if(length(sigma) > 1){
sigma <- sigma[1]
}
# Some starting conditions
if(is.null(Xo)) Xo <- mean(data, na.rm=TRUE)
par[1] <- Xo
par[2] <- alpha
par[3] <- sigma
par[4:(3+n_regimes)] <- rep(Xo, n_regimes)
lca <- lca_calc(tree)
# Likelihood as a function of optimizable parameters
f <- function(par){
Xo <- par[1]
alpha <- rep(par[2], n_regimes)
theta <- par[4:(3+n_regimes)]
sigma <- rep(par[3], n_regimes) # everything else
if (any(alpha < 0)){
llik <- -Inf
}
else if (any(sigma<0)){
llik <- -Inf
} else {
llik <- multiOU_lik_lca(data, tree, regimes, alpha=alpha,
sigma=sigma, theta=theta, Xo=Xo, lca)
}
-llik
}
optim_output <- optim(par,f, ...)
output <- list(data=data, tree=tree, regimes=regimes,
loglik=-optim_output$value, Xo=optim_output$par[1],
alpha=optim_output$par[2],
theta=optim_output$par[4:(3+n_regimes)],
sigma=optim_output$par[3],
optim_output=optim_output,
submodel="ouch",
convergence=optim_output$convergence, opts=opts)
class(output) = "multiOU"
output
}
# Brownie
# should take Xo
brownie <- function(data, tree, regimes, sigma=1, ...){
opts <- list(...)
# intialize a parameter vector to optimize:
# Xo, followed by the n_regime sigmas
n_regimes <- length(levels(regimes))
pars <- numeric(1+n_regimes)
# Some starting conditions
pars[1] <- mean(data, na.rm=TRUE) #Xo
if(length(sigma) == n_regimes){
pars[2:(1+n_regimes)] <- sigma # sigmas
} else {
pars[2:(1+n_regimes)] <- rep(sigma, n_regimes) # sigmas
}
lca <- lca_calc(tree)
# Likelihood as a function of optimizable parameters
f <- function(pars){
Xo <- pars[1]
sigma <- pars[2:(1+n_regimes)] # everything else
alpha <- rep(1e-12, n_regimes) ## all alphas approx 0
theta <- rep(Xo, n_regimes)
if (any(sigma<0)){
llik <- -Inf
} else {
llik <- multiOU_lik_lca(data, tree, regimes, alpha=alpha, sigma=sigma,
theta=theta, Xo=Xo, lca)
}
-llik
}
optim_output <- optim(pars,f, ...)
# optim(par,f, method="L", lower=c(-Inf, rep(0,n_regimes), rep(-Inf, n_regimes), rep(0, n_regimes)))
output <- list(data=data, tree=tree, regimes=regimes,
loglik=-optim_output$value, Xo=optim_output$par[1],
alpha=rep(1e-12, n_regimes),
theta=rep(pars[1], n_regimes),
sigma=optim_output$par[2:(1+n_regimes)],
optim_output=optim_output,
submodel="brownie",
convergence=optim_output$convergence, opts=opts)
class(output) = "multiOU"
output
}
## These should be part of an independent phylogenetic bootstrapping library (or at least file)
## Consider extending to some other functions, such as ape's ace fn, geiger's ancestral states, etc
LR_bootstrap <- function(true_model, test_model, nboot = 200){
# Bootstraps the likelihood ratio statistic using boot function
# Args:
# true_model -- is used to generated the simulated data. Must be a fitted hansentree or browntree
# test_model -- is another fitted model whose likeihood will also be evaluated on the data
# nboot -- is the number of bootstrap replicates to do. Defaults to 200
# Returns:
# boot object that can be fed into boot.ci to
# generate confidence intervals, etc using the boot package
get_loglik <- function(model){
if(is(model, "ouchtree") ) loglik = model@loglik
else loglik = model$loglik
loglik
}
get_data <- function(model){
if(is(model, "ouchtree") ) data = model@data
else data = model$data
data
}
orig_diff <- -2*( get_loglik(true_model) - get_loglik(test_model))
orig_data <- get_data(true_model)
statisticfn <- function(data, ...){
# function required by boot fn that will be boostrapped
# Args:
# data is data generated by a simulation,
# ... is the test_model
# Returns
# the likelihood ratio statistic: 2*(log(null) - log(test) )
test <- update(test_model, data=data)
true <- update(true_model,data=data)
-2*(get_loglik(true) - get_loglik(test))
}
rangendat <- function(d, p){
# simulate using model specified as mle, the true model
out <- simulate(p)
out$rep.1
}
boot.out <- boot( data=orig_data,
statistic=statisticfn,
R=nboot,
sim="parametric",
ran.gen=rangendat,
mle=true_model,
object=test_model)
}
fast_boot <- function(model, nboot=200, cpus=1){
require(snowfall)
sfInit(parallel=TRUE, cpus=cpus)
sfExportAll()
sfLibrary(wrightscape)
fits <- sfLapply(1:nboot, function(i) update(model, data=simulate(model)$rep.1 ) )
if(is(fits[[1]], "wrighttree") ){
n_regimes <- length( fits[[1]]$sigma )
n_pars <- 3*n_regimes+2
regime_names <- levels(fits[[1]]$regimes[[1]])
alpha_names <- sfSapply(1:n_regimes, function(i) paste("alpha.", regime_names[i]) )
sigma_names <- sfSapply(1:n_regimes, function(i) paste("sigma.", regime_names[i]) )
theta_names <- sfSapply(1:n_regimes, function(i) paste("theta.", regime_names[i]) )
X <- sapply(1:nboot, function(i) c(fits[[i]]$loglik, fits[[i]]$Xo, fits[[i]]$alpha, fits[[i]]$sigma, fits[[i]]$theta ) )
rownames(X) <- c("loglik", "Xo", alpha_names, sigma_names, theta_names)
}
out <- list(bootstrap_values = X, model=model, nboot=nboot)
class(out) <- "wrightboot"
out
}
plot.wrightboot <- function(input, CHECK_OUTLIERS=FALSE){
object <- input$bootstrap_values
par(mfrow=c(1,3) )
n_regimes <- (dim(object)[1]-2)/3
alphas <- 3:(2+n_regimes)
sigmas <- (3+n_regimes):(2*n_regimes+2)
thetas <- (3+2*n_regimes):(3*n_regimes+2)
nboot <- dim(object)[2]
outliers <- numeric(nboot)
xlim <- c(0, 3*median(object[alphas,]) )
ylim <- c(0, max(sapply(alphas, function(i) max(density(object[i,])$y))))
plot(density(object[alphas[1], ]), xlim=xlim, ylim=ylim, xlab="Alpha values", type='n', main="", cex.lab=1.6, cex.axis = 1.6)
k <- 1
for(i in alphas){
if(CHECK_OUTLIERS) outliers <- object[i,] > xlim[2]
if( sum(outliers) > 0 ) print(paste(sum(outliers), " outliers in alpha ", k))
lines(density(object[i,!outliers]), lwd = 3, lty=k)
k <- k+1
}
xlim <- c(0, 3*median(object[sigmas,] ) )
ylim <- c(0, max(sapply(sigmas, function(i) max(density(object[i,])$y))))
plot(density(object[sigmas[1], ]), xlim=xlim, ylim=ylim, xlab="Sigma values", type='n', main="", cex.lab=1.6, cex.axis = 1.6)
k <- 1
for(i in sigmas){
if(CHECK_OUTLIERS) outliers <- object[i,] > xlim[2]
if( sum(outliers) > 0 ) print(paste(sum(outliers), " outliers in sigma ", k))
lines(density(object[i,!outliers]), lwd = 3, lty=k)
k <- k+1
}
xlim <- c(0, 3*median(object[thetas,] ) )
ylim <- c(0, max(sapply(thetas, function(i) max(density(object[i,])$y))))
plot(density(object[thetas[1], ]), xlim=xlim, ylim=ylim, xlab="Theta values", type='n', main="", cex.lab=1.6, cex.axis = 1.6)
k <- 1
for(i in thetas){
if(CHECK_OUTLIERS) outliers <- object[i,] > xlim[2]
if( sum(outliers) > 0 ) print(paste(sum(outliers), " outliers in theta ", k))
lines(density(object[i,!outliers]), lwd = 3, lty=k)
k <- k+1
}
legend("topright", levels(input$model$regimes), lty=1:length(levels(input$model$regimes)), lwd=2)
}
bootstrap.wrighttree <- function(model, nboot = 200, fit=TRUE)
{
# Bootstraps the likelihood ratio statistic using boot function. Should give bootstraps for all parameters!!!!
# Args:
# model -- is used to generated the simulated data. Must be a fitted hansentree or browntree
# nboot -- is the number of bootstrap replicates to do. Defaults to 200
# Returns:
# boot object -- can be fed into boot.ci to generate confidence intervals, etc
# using the boot package
simdata <- simulate(model)
refit_model <- update(model, data=simdata)
}
choose_model <- function(model_list, nboot=200, cpus=1){
require(snowfall)
sfInit(parallel=TRUE, cpus=cpus)
sfExportAll()
sfLibrary(wrightscape)
LR <- sfLapply( 1:(length(model_list)-1),
function(i) LR_bootstrap( model_list[[i]], model_list[[i+1]], nboot )
)
p_vals <- sfSapply( 1:(length(model_list)-1),
function(i) sum( LR[[i]]$t < LR[[i]]$t0 )/length(LR[[i]]$t)
)
print(p_vals)
LR
}
pretty_plot <- function(LR, main=""){
xlim = 1.1*c(min( LR$t, LR$t0), max( LR$t, LR$t0) )
hist(LR$t, col="lightblue", border="white", xlab="Likelihood Ratio", cex.axis=1.6, cex.lab=1.6, main=main, xlim=xlim)
abline(v=LR$t0, lwd=4, lty=2, col="darkblue")
p_val <- 1-sum(LR$t < LR$t0)/length(LR$t)
text(LR$t0, 0.5*par()$yaxp[2], paste("p = ", round(p_val, digits=3)), cex=1.6) # halfway up the vert line
}
# plot the wrightscape tree using the ouch plotting function
plot.wrighttree <- function(object)
{
plot(object$tree, regimes=object$regimes)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.