Nothing
medusaVersion = 1.41;
medusa <- function (phy, richness = NULL, criterion = c("aicc", "aic"), partitions = NA, threshold = NA, model = c("mixed", "bd", "yule"),
cut = c("both", "stem", "node"), stepBack = TRUE, init = c(r = 0.05, epsilon = 0.5), ncores = NULL, verbose = FALSE, ...) {
## CHECK ARGUMENTS
initialE <- init[["epsilon"]];
initialR <- init[["r"]];
sp = c(initialR, initialE);
## Medusa requires cladewise trees, so:
phy<-reorder(phy, "cladewise")
## Determine whether multiple cores can be used (i.e. non-Windows, non-GUI).
## This is apparently breaking the build. Fix!
fx <- .get.parallel(ncores);
criterion <- match.arg(criterion, choices = c("aicc", "aic"), several.ok = FALSE);
model <- match.arg(model, choices = c("mixed", "bd", "yule"), several.ok = FALSE);
shiftCut <- match.arg(cut, choices = c("both", "stem", "node"), several.ok = FALSE);
## Before determining model.limit, prune tree as necessary (from 'taxon' information in 'richness')
if (!any(c("phylo", "multiPhylo") %in% class(phy))) {
stop("'phy' must either be a phylo or multiPhylo object");
}
richness <- .check.richness(phy = phy, richness = richness);
phyData <- .treedata.medusa(phy = phy, richness = richness, warnings = FALSE); ## modified prune.tree.merge.data for multiple trees (jme)
## Determine correct AICc threshold from tree size (based on simulations), or use user-provided threshold
threshold_N <- .threshold.medusa(threshold = threshold, phy = phyData$phy, criterion = criterion);
## Limit on number of piecewise models fitted; based on tree size, aicc correction factor,
## and flavour of model fitted (i.e. # parameters estimated; birth-death or pure-birth)
model.limit <- .get.max.model.limit(richness = richness, partitions = partitions, model = model, verbose = verbose);
# internal function for running a single tree and richness data.frame (jme)
medusa_runner <- function (phy, richness) {
## Keep track of all nodes, internal and pendant (for keeping track of breakpoints)
pend.nodes <- seq_len(length(phy$tip.label)); # Calculate pendant splits just once, keep track through various models
int.nodes <- unique(phy$edge[,1])[-1]; # Omit root node
root.node <- length(phy$tip.label) + 1;
all.nodes <- c(pend.nodes, root.node, int.nodes);
## Store pertinent information: branch times, richness, descendants
#cat("Preparing data for analysis... ");
obj <- .make.cache.medusa(phy = phy, richness = richness, fx = fx, shiftCut = shiftCut);
#cat("done.\n");
desc <- list(stem = obj$desc.stem, node = obj$desc.node);
# z.orig is no longer used, but helpful when debugging
z <- z.orig <- obj$z;
## Fit single model to entire tree
fit <- .fit.base.medusa(z = z, sp = sp, model = model, criterion = criterion);
# If only one model is desired (i.e. base model), don't bother with all of the precalculations.
prefit <- NULL;
if (model.limit != 1) {
## Needed downstream; do not recalculate
## Gives the number of tips associated with an internal node; determines whether a node is 'virgin' or not
num.tips <- fx(all.nodes, function (x) length(obj$tips[[x]]));
## Pre-fit pendant edges so these values need not be re(re(re))calculated; amounts to ~25% of all calculations
## Will show particular performance gain for edges with many fossil observations
#cat("Optimizing parameters for pendant edges... ");
# Will always be shiftCut="stem"; if mixed model, keep only best fit and throw out other in .prefit.medusa
tips <- fx(pend.nodes, .prefit.tip.medusa, z = z, sp = sp, model = model, criterion = criterion);
#cat("done.\n");
## Pre-fit virgin internal nodes; should deliver performance gain for early models, and especially for large trees
## Remain useful until a split is accepted within the clade
virgin.stem <- list();
virgin.node <- list();
if (shiftCut == "stem" || shiftCut == "both") {
virgin.stem <- fx(int.nodes, .prefit.medusa, z = z, desc = desc, sp = sp,
model = model, shiftCut = "stem", criterion = criterion);
}
if (shiftCut == "node" || shiftCut == "both") {
virgin.node <- fx(int.nodes, .prefit.medusa, z = z, desc = desc, sp = sp,
model = model, shiftCut = "node", criterion = criterion);
}
virgin.nodes <- list(stem = virgin.stem, node = virgin.node);
#cat("done.\n\n");
prefit <- list(tips = tips, virgin.nodes = virgin.nodes);
}
## this will stop in one of two ways:
## 1) model doesn't improve AIC by 'threshold' (determined by MEDUSA, or set by user)
## 2) hit the maximum allowed number of models (almost certainly only by user specified value of 'partitions')
runStepwiseAIC <- function (fit) {
optModel <- fit;
i <- 1;
done <- FALSE;
cat("Step 1: lnLik=", round(optModel$lnLik, digits=7), "; ", criterion, "=",
round(as.numeric(optModel[criterion]), digits=7), "; model=", optModel$model[1], "\n", sep="");
while (!done & i < model.limit) {
i <- i + 1;
node.list <- all.nodes[-fit$split.at];
res <- fx(node.list, .update.fit.medusa, z = z, desc = desc, fit = fit, prefit = prefit, num.tips = num.tips,
root.node = root.node, model = model, criterion = criterion, shiftCut = shiftCut);
# Select model with best score according to the specific criterion employed (default aicc)
fit <- res[[which.min(unlist(lapply(res, "[[", criterion)))]];
z.best <- .split.z.at.node.medusa(node = tail(fit$split.at, 1), z = z, desc = desc, shiftCut = tail(fit$cut.at, 1))$z;
step <- rbind(optModel$step, c("add", tail(fit$split.at, 1)));
# Consider parameter removal
if (stepBack) {
backFit <- .back.step.medusa(currentModel = fit, z = z.best, step = step, model = model, criterion = criterion);
fit <- backFit$fit;
z.best <- backFit$z;
step <- backFit$step;
}
fit$step <- step;
# Compare last accepted model to current best model
if (as.numeric(optModel[criterion]) - as.numeric(fit[criterion]) < threshold_N) {
cat("\nNo significant increase in ", criterion, " score. Disregarding subsequent piecewise models.\n\n", sep="");
done <- TRUE;
break;
} else {
if (stepBack && !is.null(backFit$remove)) {.print.removed.shifts(remove=backFit$remove);}
optModel <- fit;
z <- z.best;
cat("Step ", i, ": lnLik=", round(fit$lnLik, digits=7), "; ", criterion, "=",
round(as.numeric(optModel[criterion]), digits=7), "; shift at node ", tail(fit$split.at,1), "; model=",
tail(fit$model,1), "; cut=", tail(fit$cut.at,1), "; # shifts=", length(fit$split.at) - 1, "\n", sep="");
}
}
return(list(optModel=optModel, z=z));
}
res <- runStepwiseAIC(fit = fit);
optModel <- res$optModel;
z <- res$z;
modelSummary <- .summary.modelfit.medusa(optModel);
summarizeZ <- function (z) {
opt.model <- data.frame(cut = optModel$cut.at, split = optModel$split.at, optModel$par, lnLp = optModel$lnLik.part, stringsAsFactors = FALSE);
break.pts <- as.numeric(optModel$split.at);
cut.at <- optModel$cut.at;
rownames(z) <- phy$hash[z[, "dec"]]; # identify identical edges among trees
# temp hack. old format used NA for split.at for root
root <- min(z[,"anc"]);
break.pts[which(break.pts == root)] <- NA;
# collect shift times
internal <- is.na(z[, "n.t"]);
res <- numeric(nrow(z));
res[] <- NA;
check <- !is.na(break.pts);
cut.at <- cut.at[check];
break.pts <- break.pts[check];
if (length(break.pts)) {
for (i in 1:length(break.pts)) {
idx <- which(z[, "dec"] == break.pts[i]);
if (cut.at[i] == "node" && internal[idx]) {
res[idx] <- z[idx, "t.1"];
} else if (cut.at[i] == "stem" || !internal[idx]) { # pendant node
res[idx] <- z[idx, "t.0"];
} else {
stop("'cut' should either be stem or node");
}
}
}
anc <- match(z[, "anc"], z[, "dec"]);
z <- cbind(z, shift = match(z[, "dec"], optModel$split), t.shift = res);
z <- cbind(z, r = optModel$par[z[, "partition"],"r"], epsilon = optModel$par[z[, "partition"],"epsilon"]);
z <- cbind(z, ancestral.r = z[anc, "r"], ancestral.epsilon = z[anc, "epsilon"]);
return(z);
}
zSummary <- summarizeZ(z);
control <- list(threshold = structure(threshold_N, names = criterion), partitions = partitions);
results <- list(control = control, cache = list(desc = desc, phy = phy), model = optModel,
summary = modelSummary, zSummary = zSummary, medusaVersion = medusaVersion);
class(results) <- c("medusa", class(results));
return(results);
}
## I don't like this format. Why store N (possibly 10,000) copies of richness?
# seems like hash is ONLY useful with mulitple trees.
if ("multiPhylo" %in% class(phy)) { ## deal with multiple trees
#res <- lapply(phyData, function (x) medusa_runner(x$phy, x$richness));
res <- lapply(phyData$phy, function (x) medusa_runner(phy = x, richness = phyData$richness));
names(res) <- names(phy);
res$richness <- phyData$richness;
class(res) <- c("multimedusa", class(res));
} else {
res <- medusa_runner(phyData$phy, phyData$richness);
cat("Calculating profile likelihoods on parameter values.\n\n");
res$model$prof.par <- .get.profile.likelihoods(res); # add profile likelihoods on parameter values
res$summary <- cbind(res$summary, res$model$prof.par);
res$richness <- phyData$richness;
print(res$summary);
}
invisible(res);
}
##############################################################################
##############################################################################
# make sure things are in the correct order and of correct format
.check.richness <- function (phy, richness = NULL) {
if (is.null(richness)) {
if ("multiPhylo" %in% class(phy)) {
phy <- phy[[1]];
}
richness <- data.frame(taxon = phy$tip.label, n.taxa = 1);
} else {
richness = data.frame(richness, stringsAsFactors = FALSE);
if (length(richness[1, ]) == 2) {
if (colnames(richness)[1] != "taxon" || colnames(richness)[2] != "n.taxa") {
if (inherits(richness[, 1], "factor") && inherits(richness[, 2], "integer")) {
colnames(richness) = c("taxon", "n.taxa");
} else if (inherits(richness[, 1], "integer") && inherits(richness[, 2], "factor")) {
colnames(richness) = c("n.taxa", "taxon");
} else {
stop("'richness' data appear incorrectly formated: see medusa()");
}
}
}
}
return(richness);
}
## Function to prune tree using 'richness' information, assumed to have minimally two columns, "taxon" and "n.taxa"
## Perhaps relax on these column names, may cause too many problems
## May also include 'exemplar' column; in that case, rename relevant tip.label before pruning. this is currently broke.
#prune.tree.merge.data
.treedata.medusa <- function (phy, richness = NULL, ...) {
## MODIFIED -- jme
if ("multiPhylo" %in% class(phy)) { ## deal with multiple trees
res = lapply(phy, .treedata.medusa, richness, ...);
tips = c();
for (i in 1:length(res)) {
tips = union(tips, res[[i]]$phy$tip.label);
}
nn = sapply(res, function (x) Ntip(x$phy));
if (!all(nn == length(tips))) {
stop("'phy' appears to have trees with incompletely overlapping sets of tips");
}
trees = lapply(res, "[[", "phy");
class(trees) = "multiPhylo";
trees = hashes.phylo(phy = trees, tips = tips) ## FROM PHYLO package [returns trees with $hash object -- a 'key' for each split]
#for (i in 1:length(res)) res[[i]]$phy = trees[[i]];
#return(res);
return(list(phy = trees, richness = richness));
}
## END -- jme
# Rename exemplar taxa with taxon name in richness file. Doesn't work in current implementation
if (!is.null(richness$exemplar)) {
# Change relevant tip.labels in phy; individual 'exemplar' may be NA, use original tip.label.
# Ordering in richness file should NOT be assumed to match order of tip.labels
i.na <- is.na(richness$exemplar);
phy$tip.label[match(richness$exemplar[!i.na], phy$tip.label)] <- as.character(richness$taxon[!i.na]);
}
# checking for typo; if same size, nothing should be dropped
check <- FALSE;
if (length(phy$tip.label) == length(richness[, 1])) {
check <- TRUE;
}
# Prune tree down to lineages with assigned richnesses
temp <- richness[, "n.taxa"];
names(temp) <- richness[, "taxon"];
pruned <- treedata(phy, temp, ...); # geiger function calling ape (namecheck)
if (check) {
if (length(phy$tip.label) != length(pruned$phy$tip.label)) {
stop("'richness' data and tip labels in 'phy' do not appear to match fully");
}
}
phy <- pruned$phy;
rr <- pruned$data;
richness <- structure(data.frame(taxon = rownames(rr), n.taxa = rr[, 1], row.names = NULL));
# Check the tree
# plotNN(phy); # Node numbers (ape-style) plotted
return(list(phy = phy, richness = richness));
}
## Original default was to fit 20 models (or less if the tree was small).
## Changing to a stop-criterion ("model.limit") e.g. when k = n-1 (i.e. when denominator of aicc correction is undefined).
## k <- (3*i-1) # when both birth and death are estimated, where i is the number of piecewise models
## This occurs when i = n/3
## If Yule, max i = n/2
## n <- (2*num.taxa - 1) == (2*length(richness[,1]) - 1) # i.e. total number of nodes in tree (internal + pendant)
## Alternatively use aicc threshold itself as a stopping criterion.
# AICc = AIC + 2*k*(k+1)/(n-k-1);
.get.max.model.limit <- function (richness, partitions, model, verbose) {
samp.size <- (2 * nrow(richness) - 1);
if (model == "bd" || model == "mixed") {
max.model.limit <- as.integer(samp.size/3) - ((!(samp.size%%3)) * 1);
} else {
max.model.limit <- as.integer(samp.size/2) - ((!(samp.size%%2)) * 1);
}
if (!is.na(partitions)) {
flag <- "'partitions' should either be NA or a positive integer specifying the maximum number of piecewise models to consider";
if (!is.numeric(partitions) | partitions <= 0) {
stop(flag);
}
if (partitions > max.model.limit) {
model.limit <- max.model.limit;
warning("Supplied 'partitions' is in excess of the maximal number that can be considered");
} else {
model.limit <- partitions;
}
} else {
model.limit <- max.model.limit;
}
if (verbose) {
cat("Limiting consideration to a maximum of ", model.limit, " piecewise", sep = "");
if (model == "bd") {
cat(" birth-death models");
} else if (model == "mixed") {
cat(" mixed models");
} else {
cat(" pure-birth (Yule) models");
}
cat(" (or until threshold is not satisfied).\n\n");
}
return(model.limit);
}
## Fitted curve from random b-d simulations
## Value corresponds to 95th percentile of AICc(split) - AICc(no-split) for no-split simulations
## x-shifted power function
## User may override this with 'threshold' argument
.threshold.medusa <- function (threshold, phy, criterion) {
if ("multiPhylo" %in% class(phy)) {
phy <- phy[[1]];
}
N <- Ntip(phy);
a <- -35.9410523803326;
b <- 6.7372587299747;
c <- -0.100615083407549;
Offset <- 27.5166786643334;
y <- a * (N - b)^c + Offset;
if (y < 0 | is.na(y)) {
y <- 0;
}
# check uer-specified value
if (!is.na(threshold)) {
if (!is.numeric(threshold)) {
stop("'threshold' should be numeric, specifying the improvement in AIC model fit that is deemed significant");
} else if (threshold < 0) {
stop("'threshold' should be a positive, specifying the improvement in AIC model fit that is deemed significant");
} else {
cat("Using user-specified ", criterion, "-threshold of ", threshold,
" (rather than the default value for a tree of ", N, " tips: ", y, ").\n\n", sep="");
return(threshold);
}
} else {
cat("Appropriate ", criterion, "-threshold for a tree of ", N, " tips is: ", y, ".\n\n", sep="");
return(y);
}
}
## The make.cache.medusa function is like the first half of the original splitEdgeMatrix().
## It works through and reorders the edges, then works out start and end times of these
## based on the phylogeny's branching times.
##
## In addition, every node's descendants are also calculated. The element 'desc' is a list.
## $desc[i] contains the indices within $edge, $t.start, etc., of all descendants of node 'i'
## (in ape node numbering format).
## f: either mclapply or lapply -- see .get.parallel()
.make.cache.medusa <- function (phy, richness, fx, shiftCut) {
n.tips <- length(phy$tip.label);
n.int <- nrow(phy$edge) - n.tips;
## Ape numbers the tips first
i.int <- seq_len(n.int);
interior <- phy$edge[, 2] %in% phy$edge[, 1];
bt <- branching.times(phy);
# Consider only internal edges first. may be zero if only 2 tips.
if (n.int > 0) {
edges.int <- matrix(phy$edge[interior,], nrow = n.int, ncol = 2);
colnames(edges.int) <- c("anc", "dec");
t.0 <- bt[match(edges.int[,1], (n.tips + 1):max(edges.int))];
t.1 <- c(t.0[i.int] - phy$edge.length[interior]);
z.internal <- cbind(edges.int, t.0, t.1, t.len = t.0 - t.1,
n.0 = rep(1, n.int), n.t = rep(NA, n.int));
}
# Now, pendant edges;
edges.pendant <- phy$edge[match(seq_len(n.tips), phy$edge[,2]),];
colnames(edges.pendant) <- c("anc", "dec");
t.0 <- bt[match(edges.pendant[, 1], (n.tips + 1):max(edges.pendant))];
t.1 <- rep(0, n.tips);
# cannot assume richness ordering necessarily matches that of tip labels
ext.richness <- richness$n.taxa[match(phy$tip.label, richness$taxon)];
z.pendant <- cbind(edges.pendant, t.0, t.1, t.len = t.0 - t.1,
n.0 = rep(1, n.tips), n.t = ext.richness);
if (n.int > 0) {
z <- rbind(z.internal, z.pendant);
} else { # case with only 2 pendant edges.
z <- z.pendant;
}
z <- cbind(z, partition = rep(1, length(z[, 1]))) # Stores piecewise model structure
rownames(z) <- NULL;
# Used for identifying descendant nodes below i.e. tracking breakpoints
all.edges <- as.matrix(z[, c("anc", "dec")]);
desc.stem <- list();
desc.node <- list();
# only calculate what is needed given value of shiftCut
if (shiftCut == "both" || shiftCut == "stem") {
desc.stem <- fx(seq_len(max(all.edges)), .descendants.cutAtStem.idx, all.edges = all.edges);
}
if (shiftCut == "both" || shiftCut == "node") {
if (length(desc.stem) != 0) {
root <- min(z[,"anc"]);
desc.node <- fx(desc.stem, .strip.stem); # much faster
desc.node[root] <- desc.stem[root];
} else {
desc.node <- fx(seq_len(max(all.edges)), .descendants.cutAtNode.idx, all.edges = all.edges);
# for the case of tips (no descendants), add themselves as their descendant
for (i in 1:length(desc.node)) {
if (length(desc.node[[i]]) == 0) { # tips
desc.node[[i]] <- .descendants.cutAtStem.idx(node.list = i, all.edges = all.edges);
}
}
}
}
tips=NULL;
#tips <- .cache.descendants(phy)$tips;
res <- list(z = z, desc.stem = desc.stem, desc.node = desc.node, tips = tips);
return(res);
}
## This generates the indices of all descendants of a node, using ape's edge matrix.
## Deals with row numbers of the edge matrix rather than node numbers of the tree.
.descendants.cutAtStem <- function (node, all.edges) {
ans <- numeric();
ans <- node;
repeat {
node <- all.edges[all.edges[, 1] %in% node, 2];
if (length(node) > 0) {
ans <- c(ans, node);
} else {
break;
}
}
return(unlist(ans));
}
## The function 'descendants' returns the indices of all descendants within the edge matrix.
.descendants.cutAtStem.idx <- function (node.list, all.edges) {
which(all.edges[, 1] == node.list | all.edges[, 2] %in% .descendants.cutAtStem(node.list, all.edges));
}
## This generates the indices of all descendants of a node, using ape's edge matrix.
## Deals with row numbers of the edge matrix rather than node numbers of the tree.
.descendants.cutAtNode <- function (node, all.edges) {
ans <- numeric();
repeat {
node <- all.edges[all.edges[, 1] %in% node, 2]
if (length(node) > 0) {
ans <- c(ans, node);
} else {
break;
}
}
return(unlist(ans));
}
## The function 'descendants' returns the indices of all descendants within the edge matrix.
.descendants.cutAtNode.idx <- function (node.list, all.edges) {
which(all.edges[, 1] == node.list | all.edges[, 2] %in% .descendants.cutAtNode(node.list, all.edges));
}
# Remove stem node from previously calculated set of stem descendants; about a billion times faster than descendants.cutAtNode
.strip.stem <- function (x) {
if (length(x) == 1) { # if it is a tip, leave it alone
return(x);
}
y <- unlist(x);
return(y[-1]);
}
## Only used for base model
#.fit.base.medusa <- function (z, sp, model, fixPar, criterion) {
.fit.base.medusa <- function (z, sp, model, criterion) {
#fit <- .get.optimal.model.flavour(z = z, sp = sp, model = model, fixPar = fixPar, criterion = criterion);
fit <- .get.optimal.model.flavour(z = z, sp = sp, model = model, criterion = criterion);
model.fit <- .calculate.modelfit.medusa(fit = fit, z = z);
steps <- c("add", as.numeric(min(z[,"anc"])));
return(list(par=matrix(fit$par, nrow=1, dimnames=list(NULL,c("r", "epsilon"))), lnLik.part=fit$lnLik, lnLik=fit$lnLik,
split.at=min(z[,"anc"]), aic=round(model.fit$aic, digits=7), aicc=round(model.fit$aicc, digits=7), num.par=model.fit$k,
cut.at="node", model=fit$model, step=matrix(steps, nrow=1, dimnames=list(NULL,c("step", "node")))));
}
## When model == mixed, fit both and find optimal flavour
#.get.optimal.model.flavour <- function (z, sp, model, fixPar, criterion) {
.get.optimal.model.flavour <- function (z, sp, model, criterion) {
fit.bd <- NULL;
fit.yule <- NULL;
fit <- NULL;
if (model == "yule" | model == "mixed") {
fit.yule <- .fit.partition.medusa(z = z, sp = sp, model = "yule");
fit.yule$model <- "yule";
}
if (model == "bd" | model == "mixed") {
if (is.na(sp[2])) {sp[2] <- 0.5;}
fit.bd <- .fit.partition.medusa(z = z, sp = sp, model = "bd");
fit.bd$model <- "bd";
}
# if (model != "mixed" && model != "bd" && model != "yule") { # i.e. the constrained models
# fit <- medusaMLFitPartition(z=z, sp=sp, model=model, fixPar=fixPar);
# fit$model <- model;
# return(fit);
# }
## Figure out which model fits best
if (is.null(fit.bd)) {
fit <- fit.yule;
} else if (is.null(fit.yule)) {
fit <- fit.bd;
} else {
## Considering both models
fit <- .get.best.partial.model(fit1 = fit.yule, fit2 = fit.bd, z = z, criterion = criterion);
}
return(fit);
}
## Used for comparing models fit to the same partition
.get.best.partial.model <- function (fit1, fit2, z, criterion) {
n <- (length(z[,1]) + 1); # the number of nodes involved
## Add '1' to parameters to account for break
k1 <- 1 + sum(!is.na(fit1$par));
k2 <- 1 + sum(!is.na(fit2$par));
if (n - k1 <= 1 || n - k2 <= 1) { # deals with single edges, where AICc correction becomes undefined. use AIC.
if (.get.AIC(fit1$lnLik, k1) < .get.AIC(fit2$lnLik, k2)) {
return(fit1);
} else {
return(fit2);
}
} else {
if (criterion == "aicc") {
if (.get.AICc(fit1$lnLik, k1, n) < .get.AICc(fit2$lnLik, k2 ,n)) {
return(fit1);
} else {
return(fit2);
}
} else {
if (.get.AIC(fit1$lnLik, k1) < .get.AIC(fit2$lnLik, k2)) {
return(fit1);
} else {
return(fit2);
}
}
}
}
# replace these at some point with a general function
.get.AIC <- function (lnLik, k) {
return(-2 * lnLik + 2*k);
}
.get.AICc <- function (lnLik, k, n) {
return(-2 * lnLik + 2*k*n/(n-k-1));
}
#.prefit.tip.medusa <- function (node, z, sp, model, fixPar, criterion) {
.prefit.tip.medusa <- function (node, z, sp, model, criterion) {
z.tip <- z[z[,"dec"] == node,,drop=FALSE];
# tips are always better fit by yule. don't bother with BD.
if (model == "yule" || model == "mixed") {
if (z.tip[,"n.t"] == 1) { # single tip. nothing has happened along edge, so r = 0
return(list(par=c(0, NA), lnLik=0, model="yule"));
} else { # unresolved clade
# no t.len information available, only depth
# MLE birth rate is: log(n.t) / depth
r <- as.numeric(log(z.tip[,"n.t"]) / z.tip[,"t.len"]);
lik <- as.numeric(-z.tip[,"t.len"] * r + (z.tip[,"n.t"] - 1) * log(1 - exp(-z.tip[,"t.len"] * r)));
return(list(par=c(r, NA), lnLik=lik, model="yule"));
}
} else { # at the moment, only BD will get through. but eventually constrained models also
return(.get.optimal.model.flavour(z=z.tip, sp=sp, model=model, criterion=criterion));
}
}
# prefitting ensure that clades are intact. this can enable shortcuts for certain configurations/models.
.prefit.medusa <- function (node, z, desc, sp, model, shiftCut, criterion) {
zz <- .split.z.at.node.medusa(node = node, z = z, desc = desc, shiftCut = shiftCut, extract = TRUE)$z;
if (all(zz[,"n.t"] == 1, na.rm=T)) {
fit <- .get.optimal.simple(z = zz, sp = sp, model = model, criterion = criterion);
} else {
fit <- .get.optimal.model.flavour(z = zz, sp = sp, model = model, criterion = criterion);
}
return(fit);
}
.get.optimal.simple <- function (z, sp, model, criterion) {
fit.bd <- NULL;
fit.yule <- NULL;
fit <- NULL;
if (model == "yule" | model == "mixed") {
fit.yule <- .simple.yule.medusa(z = z);
fit.yule$model <- "yule";
}
if (model == "bd" | model == "mixed") {
if (is.na(sp[2])) {sp[2] <- 0.5;}
fit.bd <- .fit.partition.medusa(z = z, sp = sp, model = "bd");
fit.bd$model <- "bd";
}
## Figure out which model fits best
if (is.null(fit.bd)) {
fit <- fit.yule;
} else if (is.null(fit.yule)) {
fit <- fit.bd;
} else {
## Considering both models
fit <- .get.best.partial.model(fit1 = fit.yule, fit2 = fit.bd, z = z, criterion = criterion);
}
return(fit);
}
# solution known when subtree is completely sampled
.simple.yule.medusa <- function (z) {
n.int <- sum(is.na(z[,"n.t"])); # the number of speciation events
if (n.int == 0) {
return(list(par=c(0, NA), lnLik=0));
}
sum.t <- sum(z[,"t.len"]);
r <- (n.int) / sum.t;
lik <- n.int * log(r) - r * sum.t;
par <- c(r, NA);
return(list(par=c(r, NA), lnLik=lik));
}
## Split the edge matrix 'z' by adding a partition rooted at node 'node'.
## Note: in original MEDUSA parlance, this is cutAtStem=T.
## The list 'desc' is a list of descendants (see make.cache.medusa, above).
## Returns a list with elements:
## z: new medusa matrix, with the new partition added
## affected: indices of the partitions affected by the split (n == 2).
## This is where 'shiftCut' matters
## Arg 'extract' determines whether only the new clade should be returned. Used in prefitting.
#medusa.split
.split.z.at.node.medusa <- function (node, z, desc, shiftCut, extract=FALSE) {
descendants <- NULL;
if (shiftCut == "stem") {
descendants <- desc$stem;
} else {
descendants <- desc$node;
}
part <- z[, "partition"];
base <- min(part[z[, 1] == node | z[, 2] == node]);
tag <- max(part) + 1;
i <- descendants[[node]];
idx <- i[part[i] == base];
z[idx, "partition"] <- tag;
#z[which(z["dec"] == node), "partition"] <- tag; # Possible to have several edges to consider
if (extract) {z <- z[idx,,drop = FALSE];}
return(list(z = z, affected = c(unique(part[idx]), tag)));
}
## 'fit' contains parameter values from previous model, used to initialize subsequent model.
## Pass in pre-fitted values for pendant edges and virgin nodes (in 'prefit'); DON'T recalculate.
## Need to consider the possibility of birth-death, yule, or mixed models.
## Need to consider where shft is placed (shiftCut). Placement affects not only new clade, but
## also the size of the split clade. Only relevant if shiftCut = "both".
## fit1 model is already logged; only need to record fit2 model, and only non-prefitted nodes
#medusa.ml.update
.update.fit.medusa <- function (node, z, desc, fit, prefit, num.tips, root.node, model, criterion, shiftCut) {
## various combinations possible
fit1.stem <- NULL;
fit1.node <- NULL;
fit2.stem <- NULL;
fit2.node <- NULL;
cut.at <- NULL;
sp <- NULL;
aff <- NULL;
op <- fit$par;
cut.at <- NULL;
fit1 <- NULL;
fit2 <- NULL;
if (shiftCut == "stem" || shiftCut == "both") {
## First, diminshed clade
obj.stem <- .split.z.at.node.medusa(node = node, z = z, desc = desc, shiftCut = "stem");
z.stem <- obj.stem$z;
aff <- obj.stem$affected;
## Ensure that neither partition is empty; can occur with "node" or "both" cutting. If so, kill it.
if (sum(z.stem[,"partition"] == aff[1]) == 0 || sum(z.stem[,"partition"] == aff[2]) == 0) {
fit$lnLik <- -Inf;
fit$aic <- Inf;
fit$aicc <- Inf;
return(fit);
}
## Everything is cool; proceed.
sp <- op[aff[1], ]; # Use previously fit parameter values from clade that is currently being split
dimClade <- z.stem[z.stem[,"partition"] == aff[1],,drop = FALSE];
## first, consider diminshed clade. may result in a clade that has been cached previously
## check if diminished clade conforms to a cached clade
x <- which(dimClade[,"anc"] == min(dimClade[,"anc"]));
if (length(x) == 1) { # a cut-at-stem scenario
y <- as.numeric(dimClade[x, "dec"]);
if (length(unique(dimClade[(dimClade[,"dec"] < root.node),"dec"])) == num.tips[[y]]) {
if (y < root.node) {
fit1.stem <- prefit$tips[[y]];
} else {
fit1.stem <- prefit$virgin.nodes$stem[[y - root.node]];
}
}
}
if (is.null(fit1.stem)) { # not cached
fit1.stem <- .get.optimal.model.flavour(z = dimClade, sp = sp, model = model, criterion = criterion);
}
## Second, new clade
if (node < root.node) { # tip, already calculated
fit2.stem <- prefit$tips[[node]];
} else if (length(unique(z.stem[(z.stem[, "partition"] == aff[2] & z.stem[, "dec"] < root.node), "dec"])) == num.tips[[node]]) { # virgin node, already calculated
fit2.stem <- prefit$virgin.nodes$stem[[node - root.node]];
} else { # novel shift
newClade <- z.stem[z.stem[,"partition"] == aff[2],,drop = FALSE]; # only subset if necessary
fit2.stem <- .get.optimal.model.flavour(z=newClade, sp=sp, model=model, criterion=criterion);
}
}
if ((shiftCut == "node" || shiftCut == "both") && (node > root.node)) {
## First, diminshed clade
obj.node <- .split.z.at.node.medusa(node = node, z = z, desc = desc, shiftCut = "node");
z.node <- obj.node$z;
aff <- obj.node$affected;
sp <- op[aff[1], ]; # Use previously fit parameter values from clade that is currently being split
dimClade <- z.node[z.node[,"partition"] == aff[1],,drop = FALSE];
fit1.node <- .get.optimal.model.flavour(z = dimClade, sp = sp, model = model, criterion = criterion);
if (length(unique(z.node[(z.node[, "partition"] == aff[2] & z.node[, "dec"] < root.node), "dec"])) == num.tips[[node]]) { # virgin node
fit2.node <- prefit$virgin.nodes$node[[node - root.node]];
} else { # novel shift
newClade <- z.node[z.node[,"partition"] == aff[2],,drop = FALSE];
fit2.node <- .get.optimal.model.flavour(z=newClade, sp=sp, model=model, criterion=criterion);
}
}
## Now, figure out which shift position is optimal
if (is.null(fit2.node)) {
fit1 <- fit1.stem;
fit2 <- fit2.stem;
cut.at <- "stem";
} else if (is.null(fit1.stem)) {
fit1 <- fit1.node;
fit2 <- fit2.node;
cut.at <- "node";
} else {
## Considering both places for a shift
stem.lik <- (fit1.stem$lnLik + fit2.stem$lnLik);
stem.par <- rbind(fit1.stem$par, fit2.stem$par);
stem.val <- list(lnLik = stem.lik, par = stem.par);
stem.fit <- .calculate.modelfit.medusa(fit = stem.val, z = z);
node.lik <- (fit1.node$lnLik + fit2.node$lnLik);
node.par <- rbind(fit1.node$par, fit2.node$par);
node.val <- list(lnLik = node.lik, par = node.par);
node.fit <- .calculate.modelfit.medusa(fit = node.val, z = z);
if (stem.fit[[criterion]] < node.fit[[criterion]]) {
fit1 <- fit1.stem;
fit2 <- fit2.stem;
cut.at <- "stem";
} else {
fit1 <- fit1.node;
fit2 <- fit2.node;
cut.at <- "node";
}
}
op[aff[1], ] <- fit1$par # Replace parameters with new values for diminished clade
fit$model[aff[1]] <- fit1$model; # update altered model
fit$par <- rbind(op, fit2$par);
fit$lnLik.part[aff] <- c(fit1$lnLik, fit2$lnLik); # Replace parameters with new values for diminished clade
fit$split.at <- c(fit$split.at, node);
fit$lnLik <- sum(fit$lnLik.part);
model.fit <- .calculate.modelfit.medusa(fit = fit, z = z);
fit$aic <- model.fit$aic;
fit$aicc <- model.fit$aicc;
fit$num.par <- model.fit$k;
fit$cut.at <- c(fit$cut.at, cut.at);
fit$model <- c(fit$model, fit2$model);
return(fit);
}
### NEED TO PUT IN ALL OTHER MODELS HERE!!! ###
## sp = initializing values for r & epsilon
## Default values should never be used (except for first model), as the values from the previous model are passed in
#medusa.ml.fit.partition
#.fit.partition.medusa <- function (partition, z, sp = c(0.1, 0.05), model) {
.fit.partition.medusa <- function (z, sp = c(0.05, 0.5), model) {
# Construct likelihood function:
lik <- .lik.partition.medusa(partition = z, model = model);
foo <- function (x) {-lik(pars = exp(x));} # work with parameters in log-space to preserve precision
# starting parameters for bd come from previously-fit models (or default for base model)
if (model == "bd") {
fit <- optim(fn = foo, par = log(sp), method = "N"); # last argument connotes maximization
return(list(par = exp(fit$par), lnLik = -fit$value));
}
## grab values to get an intelligent upper bound on b or r
node.richness <- sum(z[,"n.t"], na.rm=TRUE);
depth <- max(z[,"t.0"]);
if (model == "yule") {
#fit <- optimize(f = foo, interval = c(-25, 1));
#par <- c(exp(fit$minimum), NA);
n.int <- sum(is.na(z[,"n.t"]));
if ((n.int == 0) && all(z[,"n.t"] == 1, na.rm = TRUE)) {
return(list(par=c(0, NA), lnLik=-Inf));
}
maxVal <- (log(node.richness) / depth) * 5;
if (node.richness <= 1) {maxVal <- 1e-5;}
suppressWarnings(fit <- optimize(f=foo, interval=c(-25, log(maxVal)))); # don't really need to suppress here
par <- c(exp(fit$minimum), NA);
#test <- FALSE;
#if (par[1]/maxVal > 0.95) {
# print(z);
# cat("node.richness = ", node.richness, "; depth = ", depth, "\n", sep="");
# test <- TRUE;
#}
while (par[1]/maxVal > 0.95) { # crash against boundary; rarely used
maxVal <- par[1] * 3;
# cat("Hit boundary. Increasing maxVal from ", par[1], " to ", maxVal, "\n", sep="");
suppressWarnings(fit <- optimize(f=foo, interval=c(log(par[1]/2), log(maxVal)))); # don't really need to suppress here
par <- c(exp(fit$minimum), NA);
}
#if (test) cat("Final par = ", par[1], "\n", sep="");
return(list(par = par, lnLik = -fit$objective));
}
# and all of the constrained models ...
}
## make.lik.medusa.part: generate a likelihood function for a single partition.
#make.lik.medusa.part
## lots of alternative constrained models are missing - add back in
.lik.partition.medusa <- function (partition, model) {
# Handle internal and pendant edges separately
is.int <- is.na(partition[, "n.t"]);
is.pend <- !is.int;
n.int <- sum(is.int);
n.pend <- sum(is.pend);
if (n.int + n.pend != length(partition[, 1])) {
stop("You messed up, yo.");
}
## Internal and pendant calculations differ; split'em up
int <- partition[is.int, , drop = FALSE];
pend <- partition[is.pend, , drop = FALSE];
sum.int.t.len <- sum(int[, "t.len"]); # Simply sum all internal edges
int.t.0 <- int[, "t.0"];
# 'n.0' = Foote's 'a', initial diversity; 'n.t' = Foote's 'n', final diversity
pend.n.0 <- pend[, "n.0"]; # Foote's 'a': initial diversity
pend.n.t <- pend[, "n.t"]; # Foote's 'n': final diversity
pend.t.len <- pend[, "t.len"];
# User may pass in epsilon; don't change it, just estimate r
f <- function (pars) {
if (model == "bd") {
r <- pars[1];
epsilon <- pars[2];
if (r < 0 || epsilon <= 0 || epsilon >= 1) {
return(-Inf);
}
} else {
r <- pars[1];
epsilon <- 0;
if (r < 0) {
return(-Inf);
}
}
l.int <- numeric();
l.pend <- numeric();
if (n.int == 0) {
l.int <- 0;
} else {
## Likelihood of internal edges from Rabosky et al. (2007) equation (2.3):
l.int <- n.int * log(r) - r * sum.int.t.len - sum(log(1 - (epsilon * exp(-r * int.t.0))));
}
if (n.pend == 0) {
l.pend <- 0;
} else {
## Calculations are from the following:
## Rabosky et al. 2007. Proc. Roy. Soc. 274: 2915-2923.
## Foote et al. 1999. Science. 283: 1310-1314
## Raup. 1985. Paleobiology 11: 42-52 [Foote et al. correct the equation [A18] where a > 1]
## Bailey. 1964. The Elements Of Stochastic Processes, With Applications To The Natural Sciences
## Kendall. 1948. Ann. Math. Stat. 19: 1-15.
##
## A = probability of extinction of one lineage over time 't'
## B = A * (lambda/mu)
##
## When there is a single lineage at time 0 (a = 1), the calculation is
## log(1 - A) + log(1 - B) + (n - 1)*log(B)
## but this is conditioned on survival by dividing by (1-A)
## (subtracting log(1 - A) on a log scale) which cancels to give:
## log(1 - B) + (n - 1)*log(B)
## - for n.t == 1, reduces further to log(1-B)
##
## A = mu*(exp((lambda - mu)*t) - 1)) / (lambda*exp((lambda - mu)*t) - mu)
## let r = (lambda - mu); ert = exp((lambda - mu)*t)
## A = mu*(ert - 1)/(lambda*ert - mu)
##
## B = A * (lambda/mu)
## = [mu*(ert - 1)/(lambda*ert - mu)] * (lambda/mu)
## = (lambda*(ert - 1))/(lambda*ert - mu)
## = (lambda*(ert - 1))/(lambda(ert - mu/lambda))
## = (ert - 1) / (ert - epsilon)
## All pendant nodes begin with richness '1'; calculations simple.
# i.pend.n.t.1 <- which(pend.n.t == 1) # calculations even simpler: log(1-B)
# i.pend.n.t.n1 <- which(pend.n.t != 1)
ert <- exp(r * pend.t.len);
B <- (ert - 1)/(ert - epsilon); # Equivalently: B <- (bert - b) / (bert - d)
l.pend <- sum(log(1 - B) + (pend.n.t - 1) * log(B));
}
return(l.int + l.pend);
}
}
## 'fit' contains '$par' and '$lnlik'
.calculate.modelfit.medusa <- function (fit, z) {
## Sample size taken (for now) as the total num.nodes in the tree (internal + pendant)
# Since each edge defines a node (i.e. an 'observation'), need only add root node as final obervation
n <- (length(z[, 1]) + 1);
# Includes both formal parameters AND number of breaks. Note: first model does not involve a break.
## Models where all parameters are estimated (i.e. BD model):
# 2 parameters for base model (no breakpoint) + 3 parameters (r, eps, breakpoint) for each subsequent model
# Determine number of piecewise models currently involved
if (length(fit$par) < 3) { # i.e. base model
num.models <- 1;
} else {
num.models <- length(fit$par[, 1]);
}
# Updated for more general models: check how many parameter values != NA
k <- sum(!is.na(fit$par)) + (num.models - 1); # number of estimated parameters + number of breaks
ll <- list(k = k, lnL = fit$lnLik);
return(.aic(ll, n));
#return(.aic(ll$lnL, n, k));
}
## Functions for model removal ##
# ## Consider removing previously-fit rate shifts
# #.back.step.medusa <- function (currentModel, z, step, model, fixPar, criterion) {
.back.step.medusa <- function (currentModel, z, step, model, criterion) {
## As a first step, only consider removing entire shifts. Later deal with individual parameters.
z.opt <- z;
bestModel <- currentModel;
bestScore <- as.numeric(bestModel[criterion]);
allDeletedShifts <- NULL;
bestRemoved <- NULL;
improve <- T;
while (improve) { # may be possible to remove > 1 previously fit shift
allDeletedShifts <- c(allDeletedShifts, bestRemoved);
currentModel <- bestModel;
z <- z.opt;
cuts <- bestModel$cut.at;
nodes <- bestModel$split.at;
pars <- bestModel$par;
numModels <- length(bestModel$par)/2;
improve <- F;
if (numModels > 2) {
for (i in 2:(numModels - 1)) { # don't waste time removing last shift
fitModel <- currentModel;
obj <- .dissolve.split.medusa(z, cut = cuts[i], node = nodes[i], aff = i);
aff <- obj$affected;
z.temp <- obj$z[obj$z[,"partition"] == aff,,drop=FALSE];
## set par to weighted mean of 2 affected partitions
## updated to reflect total path length rather than number of tips
## really only influences weighted parameter starting values
weights <- c(sum(z[which(z[,"partition"] == aff),"t.len"]), sum(z[which(z[,"partition"] == i),"t.len"]));
sp <- c(weighted.mean(pars[c(aff,i),1], weights), weighted.mean(pars[c(aff,i),2], weights, na.rm=T));
#fit <- getOptimalModelFlavour(z=z.temp, sp=sp, model=model, fixPar=fixPar, criterion=criterion);
fit <- .get.optimal.model.flavour(z=z.temp, sp=sp, model=model, criterion=criterion);
## Update fit values
fitModel$par[aff,] <- fit$par;
fitModel$par <- fitModel$par[-i,];
fitModel$lnLik.part[aff] <- fit$lnLik;
fitModel$lnLik.part <- fitModel$lnLik.part[-i];
fitModel$lnLik <- sum(fitModel$lnLik.part);
model.fit <- .calculate.modelfit.medusa(fit=fitModel, z=z);
fitModel$aic <- model.fit$aic;
fitModel$aicc <- model.fit$aicc;
fitModel$num.par <- model.fit$k;
if (fitModel[criterion] < bestScore) {
#cat("Found a better fit by removing a model (split at node #", fitModel$split.at[i], ")!\n", sep="");
fitModel$split.at <- fitModel$split.at[-i];
fitModel$model[aff] <- fit$model;
fitModel$model <- fitModel$model[-i];
fitModel$cut.at <- fitModel$cut.at[-i];
bestModel <- fitModel;
bestScore <- as.numeric(fitModel[criterion]);
z.opt <- .updateZ(z=obj$z, deletedPart=i);
bestRemoved <- nodes[i];
improve <- T;
}
}
if (improve) {step <- rbind(step, c("remove", bestRemoved));}
}
}
return(list(fit=bestModel, z=z.opt, step=step, remove=bestRemoved));
}
## Remove previously-fit rate shift
.dissolve.split.medusa <- function (z, cut, node, aff) {
## Grab ancestral branch partition membership
anc <- z[which(z[,"dec"] == node)];
root <- min(z[,"anc"]);
tag <- NULL;
if (cut == "node") {
tag <- as.numeric(z[which(z[,"dec"] == node),"partition"]);
} else if (cut == "stem" && anc > root) {
tag <- as.numeric(z[which(z[,"dec"] == anc),"partition"]);
} else if (cut == "stem" && anc == root) { # need to take other side of root
dec <- z[which(z[,"anc"] == root),"dec"];
tag <- as.numeric(z[which(z[,"dec"] == dec[which(dec != node)]),"partition"]); # ug. li.
}
idx <- which(z[,"partition"] == aff);
z[idx,"partition"] <- tag;
return(list(z=z, affected=tag));
}
## Only print if model improves AIC score
.print.removed.shifts <- function (remove) {
for (i in 1:length(remove)) {
cat(" Removing shift at node #", remove[i], "\n", sep="");
}
}
## reduce partition IDs to reflect dissolved split
.updateZ <- function (z, deletedPart) {
idx <- z[,"partition"] > deletedPart;
z[idx,"partition"] <- z[idx,"partition"] - 1;
return(z);
}
##########################################################################################
# Printing / plotting / summary
##########################################################################################
## Prints out a table of likelihoods, and parameters.
.summary.modelfit.medusa <- function (optModel) {
modelSize <- length(optModel$split.at);
summ <- data.frame(cbind(seq(1:modelSize), optModel$split.at, optModel$cut.at, optModel$model,
signif(optModel$lnLik.part, digits=7)), signif(optModel$par, digits=6), stringsAsFactors = FALSE);
colnames(summ) <- c("Model.ID", "Shift.Node", "Cut.At", "Model", "Ln.Lik.part", "r", "epsilon");
return(summ);
}
## Print out summary matrix with liks, parameter vals, and confidence intervals (if present)
print.medusa <- function (x, ...) {
n.taxa <- sum(x$zSummary[,"n.t"], na.rm=TRUE);
n.tips <- length(x$cache$phy$tip.label);
if (n.taxa == n.tips) {
cat("\nOptimal MEDUSA model for tree with ", n.tips, " taxa.\n\n", sep="");
} else {
cat("\nOptimal MEDUSA model for tree with ", n.tips, " tips representing ", n.taxa, " taxa.\n\n", sep="");
}
print(x$summary);
cat("\n");
if (!is.null(x$summary$r.low)) {
cat("95% confidence intervals on parameter values calculated from profile likelihoods\n");
}
}
print.multimedusa <- function (x) {
n.trees <- length(x) - 1;
n.taxa <- sum(x[[1]]$zSummary[,"n.t"], na.rm=TRUE);
n.tips <- length(x[[1]]$cache$phy$tip.label);
if (n.taxa == n.tips) {
cat("\nMEDUSA results for ", n.trees, " trees, each with ", n.taxa, " taxa.\n\n", sep="");
} else {
cat("\nMEDUSA results for ", n.trees, " trees, each with ", n.tips, " tips representing ", n.taxa, " taxa.\n\n", sep="");
}
}
# should add in the ability to plot terminal richnesses (if any != 1)
#plot.medusa <- function (x, partitions = list(cex = 2, bg = "gray", alpha = 0.75, col = "black", lwd = 1), ...) {
#plot.medusa <- function (x, cex = 0.5, time = TRUE, bg = "gray", alpha = 0.75, col = "black", lwd = 1, ...) {
# TODO: (1) come up with custom colour-scheme, (2) plot richnesses if any > 1.
plot.medusa <- function (x, cex = 0.5, time = TRUE, ...) {
z <- x$zSummary;
shift <- list(cex = 1, bg = "gray", alpha = 0.75, col = "black", lwd = 1);
phy <- x$cache$phy;
mm <- match(phy$edge[,2], z[,"dec"]);
edge.color <- z[mm, "partition"];
plot(phy, edge.color=edge.color, cex=0.5, ...);
if (time) axisPhylo();
shifts <- z[(idx <- !is.na(z[, "shift"])), "dec"];
if (length(shifts)) {
ss <- z[idx, "shift"];
ww <- character(nrow(phy$edge));
ww[match(shifts, phy$edge[, 2])] <- ss;
xx <- numeric(nrow(phy$edge));
xx[phy$edge[, 2] %in% shifts] <- 1;
edgelabels.auteur(NULL, frame = "circle", cex = ifelse(xx == 1, shift$cex, 1e-10), pch = ifelse(xx == 1, 21, NA),
bg = .transparency(shift$bg, shift$alpha), col = shift$col, lwd = shift$lwd);
edgelabels.auteur(ww, frame = "none", cex = ifelse(xx == 1, shift$cex/3, 1e-10), pch = NA, col = shift$col);
}
}
## Get confidence intervals on parameter values using profile likelihoods
## This is only used for single-tree analyses. Could be added for multimedusa, but I don't really see the point.
## passed-in parameters parm are MLEs stored in a matrix
.get.profile.likelihoods <- function (res, crit=1.92) {
parm <- res$model$par;
models <- res$model$model;
#z <- res$model$z;
z <- res$zSummary;
inc <- 0.05;
prof.par <- matrix(nrow=length(parm[,1]), ncol=4);
colnames(prof.par) <- c("r.low", "r.high", "eps.low", "eps.high")
# get this out of for-loop format
for (i in 1:length(parm[,1])) {
#cat("Model",i,"\n")
model <- models[i];
sp <- as.numeric(parm[i,]);
new.part <- z[z[,"partition"] == i,,drop=FALSE];
lik <- .lik.partition.medusa(partition=new.part, model=model);
if (model == "yule") {
par <- sp[1];
maxLik <- NULL;
if (par == 0) {
maxLik <- 0;
} else {
maxLik <- lik(par);
if (maxLik == -Inf) maxLik <- 0; # correct for -Inf at boundary lambda == 0
}
threshold <- function (x) lik(x) - maxLik + crit; # find roots on either side of maxLik
## need intelligent bounds
if (par != 0) {
low.bound <- par - par/2;
up.bound <- par + par/2;
} else {
low.bound <- par;
up.bound <- par + inc/2;
}
if (low.bound != 0) {
while (threshold(low.bound) > 0) {
#low.bound <- low.bound - inc;
low.bound <- low.bound * 0.95;
}
}
while (threshold(up.bound) > 0) {
up.bound <- up.bound + inc;
}
if (low.bound <= 0) {
prof.par[i,1] <- 0;
} else {
prof.par[i,1] <- uniroot(threshold, lower=low.bound, upper=par)$root;
}
if (par == 0) par <- 1e-10; # avoid -Inf at boundary
prof.par[i,2] <- uniroot(threshold, lower=par, upper=up.bound)$root;
} else if (model == "bd") {
par1 <- sp[1]; par2 <- sp[2];
maxLik <- lik(sp);
## first, r
thresholdR <- function (x) lik(c(x, par2)) - maxLik + crit;
low.bound <- par1 - par1/2;
up.bound <- par1 + par1/2;
while (thresholdR(low.bound) > 0) {
#low.bound <- low.bound - inc;
low.bound <- low.bound * 0.95;
}
while (thresholdR(up.bound) > 0) {
up.bound <- up.bound + inc;
}
if (low.bound <= 0) low.bound <- 0;
prof.par[i,1] <- uniroot(thresholdR, lower=low.bound, upper=par1)$root;
prof.par[i,2] <- uniroot(thresholdR, lower=par1, upper=up.bound)$root;
# test
## now, epsilon
thresholdE <- function (x) lik(c(par1, x)) - maxLik + crit;
low.bound <- par2 - par2/2;
up.bound <- par2 + par2/2;
while (thresholdE(low.bound) > 0) {
low.bound <- low.bound - inc;
}
while (thresholdE(up.bound) > 0) {
up.bound <- up.bound + inc;
}
if (low.bound < 0) low.bound <- 0;
if (up.bound > 1) up.bound <- 1;
if (low.bound == 0) {
prof.par[i,3] <- 0;
} else {
prof.par[i,3] <- uniroot(thresholdE, lower=0, upper=par2)$root;
}
if (up.bound == 1) {
prof.par[i,4] <- 1;
} else {
prof.par[i,4] <- uniroot(thresholdE, lower=par2, upper=up.bound)$root;
}
if (prof.par[i,3] == par2) prof.par[i,3] <- 0; # precision problem?!? check optimization ***
}
}
# remove columns that are not used
idx <- which(apply(prof.par, MARGIN=2, function (x) all(is.na(x))));
if (length(idx) > 0) {
prof.par <- prof.par[,-idx];
}
prof.par <- as.data.frame(round(prof.par, digits=7));
return(prof.par);
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.