Nothing
## chronopl.R (2012-02-09)
## Molecular Dating With Penalized Likelihood
## Copyright 2005-2012 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
chronopl <-
function(phy, lambda, age.min = 1, age.max = NULL,
node = "root", S = 1, tol = 1e-8,
CV = FALSE, eval.max = 500, iter.max = 500, ...)
{
n <- length(phy$tip.label)
ROOT <- n + 1L
if (identical(node, "root")) node <- ROOT
if (any(node <= n))
stop("node numbers should be greater than the number of tips")
zerobl <- which(phy$edge.length <= 0)
if (length(zerobl)) {
if (any(phy$edge[zerobl, 2] <= n))
stop("at least one terminal branch is of length zero:
you should remove it to have a meaningful estimation.")
else {
warning("at least one internal branch is of length zero:
it was collapsed and some nodes have been deleted.")
if (length(node) == 1 && node == ROOT)
phy <- di2multi(phy)
else {
tmp <- FALSE
if (is.null(phy$node.label)) {
tmp <- !tmp
phy$node.label <- paste("node", 1:phy$Nnode)
}
node.lab <- phy$node.label[node - n]
phy <- di2multi(phy)
node <- match(node.lab, phy$node.label) + n
if (tmp) phy$node.label <- NULL
}
}
}
m <- phy$Nnode
el <- phy$edge.length
e1 <- phy$edge[, 1L]
e2 <- phy$edge[, 2L]
N <- length(e1)
TIPS <- 1:n
EDGES <- 1:N
ini.rate <- el
el <- el/S
## `basal' contains the indices of the basal edges
## (ie, linked to the root):
basal <- which(e1 == ROOT)
Nbasal <- length(basal)
## `ind' contains in its 1st column the index of all nonbasal
## edges, and in its second column the index of the edges
## where these edges come from (ie, this matrix contains pairs
## of contiguous edges), eg:
## ___b___ ind:
## | | | |
## ___a___| | b | a |
## | | c | a |
## |___c___ | | |
ind <- matrix(0L, N - Nbasal, 2)
ind[, 1] <- EDGES[-basal]
ind[, 2] <- match(e1[EDGES[-basal]], e2)
age <- numeric(n + m)
#############################################################################
### This bit sets 'ini.time' and should result in no negative branch lengths
seq.nod <- .Call("seq_root2tip", phy$edge, n, phy$Nnode, PACKAGE = "ape")
ini.time <- age
ini.time[ROOT:(n + m)] <- NA
ini.time[node] <- if (is.null(age.max)) age.min else (age.min + age.max) / 2
## if no age given for the root, find one approximately:
if (is.na(ini.time[ROOT]))
ini.time[ROOT] <- if (is.null(age.max)) 3 * max(age.min) else 3 * max(age.max)
ISnotNA.ALL <- unlist(lapply(seq.nod, function(x) sum(!is.na(ini.time[x]))))
o <- order(ISnotNA.ALL, decreasing = TRUE)
for (y in seq.nod[o]) {
ISNA <- is.na(ini.time[y])
if (any(ISNA)) {
i <- 2L # we know the 1st value is not NA, so we start at the 2nd one
while (i <= length(y)) {
if (ISNA[i]) { # we stop at the next NA
j <- i + 1L
while (ISNA[j]) j <- j + 1L # look for the next non-NA
nb.val <- j - i
by <- (ini.time[y[i - 1L]] - ini.time[y[j]]) / (nb.val + 1)
ini.time[y[i:(j - 1L)]] <- ini.time[y[i - 1L]] - by * seq_len(nb.val)
i <- j + 1L
} else i <- i + 1L
}
}
}
real.edge.length <- ini.time[e1] - ini.time[e2]
if (any(real.edge.length <= 0))
stop("some initial branch lengths are zero or negative;
maybe you need to adjust the given dates -- see '?chronopl' for details")
#############################################################################
## because if (!is.null(age.max)), 'node' is modified,
## so we copy it in case CV = TRUE:
node.bak <- node
## `unknown.ages' will contain the index of the nodes of unknown age:
unknown.ages <- n + 1:m
## define the bounds for the node ages:
lower <- rep(tol, length(unknown.ages))
upper <- rep(1/tol, length(unknown.ages))
if (!is.null(age.max)) { # are some nodes known within some intervals?
lower[node - n] <- age.min
upper[node - n] <- age.max
## find nodes known within an interval:
interv <- which(age.min != age.max)
## drop them from the 'node' since they will be estimated:
node <- node[-interv]
if (length(node)) age[node] <- age.min[-interv] # update 'age'
} else age[node] <- age.min
if (length(node)) {
unknown.ages <- unknown.ages[n - node] # 'n - node' is simplification for '-(node - n)'
lower <- lower[n - node]
upper <- upper[n - node]
}
## `known.ages' contains the index of all nodes (internal and
## terminal) of known age:
known.ages <- c(TIPS, node)
## concatenate the bounds for the rates:
lower <- c(rep(tol, N), lower)
upper <- c(rep(1 - tol, N), upper)
minusploglik.gr <- function(rate, node.time) {
grad <- numeric(N + length(unknown.ages))
age[unknown.ages] <- node.time
real.edge.length <- age[e1] - age[e2]
if (any(real.edge.length < 0)) {
grad[] <- 0
return(grad)
}
## gradient for the rates:
## the parametric part can be calculated without a loop:
grad[EDGES] <- real.edge.length - el/rate
if (Nbasal == 2) { # the simpler formulae if there's a basal dichotomy
grad[basal[1]] <-
grad[basal[1]] + lambda*(rate[basal[1]] - rate[basal[2]])
grad[basal[2]] <-
grad[basal[2]] + lambda*(rate[basal[2]] - rate[basal[1]])
} else { # the general case
for (i in 1:Nbasal)
grad[basal[i]] <- grad[basal[i]] +
lambda*(2*rate[basal[i]]*(1 - 1/Nbasal) -
2*sum(rate[basal[-i]])/Nbasal)/(Nbasal - 1)
}
for (i in EDGES) {
ii <- c(which(e2 == e1[i]), which(e1 == e2[i]))
if (!length(ii)) next
grad[i] <- grad[i] + lambda*(2*length(ii)*rate[i] - 2*sum(rate[ii]))
}
## gradient for the 'node times'
for (i in 1:length(unknown.ages)) {
nd <- unknown.ages[i]
ii <- which(e1 == nd)
grad[i + N] <-
sum(rate[ii] - el[ii]/real.edge.length[ii])#, na.rm = TRUE)
if (nd != ROOT) {
ii <- which(e2 == nd)
grad[i + N] <- grad[i + N] -
rate[ii] + el[ii]/real.edge.length[ii]
}
}
grad
}
minusploglik <- function(rate, node.time) {
age[unknown.ages] <- node.time
real.edge.length <- age[e1] - age[e2]
if (any(real.edge.length < 0)) return(1e50)
B <- rate*real.edge.length
loglik <- sum(-B + el*log(B) - lfactorial(el))
-(loglik - lambda*(sum((rate[ind[, 1]] - rate[ind[, 2]])^2)
+ var(rate[basal])))
}
out <- nlminb(c(ini.rate, ini.time[unknown.ages]),
function(p) minusploglik(p[EDGES], p[-EDGES]),
function(p) minusploglik.gr(p[EDGES], p[-EDGES]),
control = list(eval.max = eval.max, iter.max = iter.max, ...),
lower = lower, upper = upper)
attr(phy, "ploglik") <- -out$objective
attr(phy, "rates") <- out$par[EDGES]
attr(phy, "message") <- out$message
age[unknown.ages] <- out$par[-EDGES]
if (CV) ophy <- phy
phy$edge.length <- age[e1] - age[e2]
if (CV) attr(phy, "D2") <-
chronopl.cv(ophy, lambda, age.min, age.max, node.bak,
n, S, tol, eval.max, iter.max, ...)
phy
}
chronopl.cv <- function(ophy, lambda, age.min, age.max, nodes,
n, S, tol, eval.max, iter.max, ...)
### ophy: the original phylogeny
### n: number of tips
### Note that we assume here that the order of the nodes
### in node.label are not modified by the drop.tip operation
{
cat("Doing cross-validation\n")
BT <- branching.times(ophy)
D2 <- numeric(n)
for (i in 1:n) {
cat("\r dropping tip ", i, " / ", n, sep = "")
tr <- drop.tip(ophy, i)
j <- which(ophy$edge[, 2] == i)
if (ophy$edge[j, 1] %in% nodes) {
k <- which(nodes == ophy$edge[j, 1])
node <- nodes[-k]
agemin <- age.min[-k]
agemax <- age.max[-k]
} else node <- nodes
if (length(node)) {
chr <- chronopl(tr, lambda, age.min, age.max, node,
S, tol, FALSE, eval.max, iter.max, ...)
tmp <-
if (Nnode(chr) == Nnode(ophy)) BT else BT[-(ophy$edge[j, 1] - n)]
D2[i] <- sum((tmp - branching.times(chr))^2 / tmp)
} else D2[i] <- 0
}
cat("\n")
D2
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.