#' Multi Loci Bayesian nonparametric phylodynamic reconstruction.
#'
#' @param data \code{multiPhylo} object or list containing vectors of coalescent
#' times \code{coal_times}, sampling times \code{samp_times}, and number
#' sampled per sampling time \code{n_sampled}.
#' @param lengthout numeric specifying number of grid points.
#' @param pref logical. Should the preferential sampling model be used?
#' @param prec_alpha,prec_beta numerics specifying gamma prior for precision
#' \eqn{\tau}.
#' @param beta1_prec numeric specifying precision for normal prior on
#' \eqn{\beta_1}.
#' @param simplify logical whether to fully bucket all Poisson points.
#' @param derivative logical whether to calculate estimates of the
#' log-derivative.
#' @param forward logical whether to use the finite difference approximations of
#' the log-derivative as a forward or backward derivative.
#'
#' @return Phylodynamic reconstruction of effective population size from multiple independent genealogies
#' at grid points. \code{result} contains the INLA output, \code{data} contains the
#' information passed to INLA, \code{grid} contains the grid end points,
#' \code{x} contains the grid point centers, \code{effpop} contains a vector
#' of the posterior median effective population size estimates,
#' \code{effpop025} and \code{effpop975} contain the 2.5th and 97.5th
#' posterior percentiles, \code{summary} contains a data.frame of the
#' estimates, and \code{derivative} (if \code{derivative = TRUE}) contains a
#' data.frame summarizing the log-derivative.
#' @export
#'
#' @examples
#' res = BNPR_Multiple(simulate.tree(n=10,N=2,simulator="ms",args="-T -G 0.1")$out)
#' phylo::plot_BNPR(res)
BNPR_Multiple <- function(data, lengthout = 100, pref=FALSE, prec_alpha=0.01,
prec_beta=0.01, beta1_prec = 0.001, simplify = TRUE, ntrees=0,
derivative = FALSE, forward = TRUE,constr = FALSE)
{
if (class(data) != "multiPhylo")
{
return(phylodyn::BNPR(data,lengthout,pref,prec_alpha,prec_beta,beta1_prec,simplify,derivative,forward))
}
else {
if (ntrees==0){ntrees<-length(data)}
phy <- phylodyn::summarize_phylo(data[[1]])
range.upp<-max(phy$coal_times)
range.low<-min(phy$samp_times)
if (ntrees>1){
for (j in 2:ntrees){
phy <- phylodyn::summarize_phylo(data[[j]])
range.upp<-max(range.upp,max(phy$coal_times))
range.low<-min(range.low,min(phy$samp_times))
}}
grid <- seq(range.low, range.upp, length.out = lengthout+1)
result <- infer_coal_multiple(data=data, grid=grid, ntrees=ntrees,
prec_alpha = prec_alpha, prec_beta = prec_beta,
simplify = simplify, derivative = derivative,constr = constr)
result$effpop <- exp(-result$result$summary.random$time$`0.5quant`)[1:lengthout]
result$effpop975 <- exp(-result$result$summary.random$time$`0.025quant`)[1:lengthout]
result$effpop025 <- exp(-result$result$summary.random$time$`0.975quant`)[1:lengthout]
result$summary <- with(result$result$summary.random$time[1:lengthout,],
data.frame(time = ID[1:lengthout], mean = exp(-mean)[1:lengthout],
sd = sd[1:lengthout] * exp(-mean)[1:lengthout],
quant0.025 = exp(-`0.975quant`)[1:lengthout],
quant0.5 = exp(-`0.5quant`)[1:lengthout],
quant0.975 = exp(-`0.025quant`)[1:lengthout]))
if (derivative)
{
if (forward)
ind <- c(1:(lengthout-1), (lengthout-1))
else
ind <- c(1, 1:(lengthout-1))
result$derivative <- with(result$result$summary.lincomb,
data.frame(time = result$x, mean = -mean[ind], sd = sd[ind],
quant0.025 = -`0.975quant`[ind],
quant0.5 = -`0.5quant`[ind],
quant0.975 = -`0.025quant`[ind]))
}
return(result)
}
}
infer_coal_multiple <- function(data=data, grid=grid,ntrees=ntrees,
prec_alpha = 0.01, prec_beta = 0.01, simplify = TRUE,
derivative = FALSE,constr = FALSE)
{
phy <- phylodyn::summarize_phylo(data[[1]])
if (min(phy$coal_times) < min(phy$samp_times))
stop("First coalescent time occurs before first sampling time")
if (max(phy$samp_times) > max(phy$coal_times))
stop("Last sampling time occurs after last coalescent time")
if (is.null(phy$n_sampled))
phy$n_sampled <- rep(1, length(samp_times))
coal_data <- phylodyn::coal_stats(grid = grid, samp_times = phy$samp_times, n_sampled = phy$n_sampled,
coal_times = phy$coal_times)
#check whether having 0 is a good idea
coal_data<-coal_data[coal_data[,4]!=-100L,]
coal_times_list<-phy$coal_times
samp_times_list<-phy$samp_times
n_sampled_list<-phy$n_sampled
coal_data$tree<-rep(1,nrow(coal_data))
if (ntrees>1){
for (j in 2:ntrees){
phy <- phylodyn::summarize_phylo(data[[j]])
if (min(phy$coal_times) < min(phy$samp_times))
stop("First coalescent time occurs before first sampling time")
if (max(phy$samp_times) > max(phy$coal_times))
stop("Last sampling time occurs after last coalescent time")
if (is.null(phy$n_sampled))
phy$n_sampled <- rep(1, length(samp_times))
coal_data_temp <- phylodyn::coal_stats(grid = grid, samp_times = phy$samp_times, n_sampled = phy$n_sampled,
coal_times = phy$coal_times)
#check whether having 0 is a good idea
# coal_data_temp<-coal_data_temp[coal_data_temp[,4]!=-100L,]
coal_data_temp$tree<-rep(j,nrow(coal_data_temp))
coal_data<-rbind(coal_data,coal_data_temp)
coal_times_list<-rbind(coal_times_list,phy$coal_times)
samp_times_list<-rbind(samp_times_list,phy$samp_times)
n_sampled_list<-rbind(n_sampled_list,phy$n_sampled)
}
}
if (simplify)
coal_data <- with(coal_data, phylodyn::condense_stats(time = time, event = event, E=E))
data <- with(coal_data, data.frame(y = event, time = time, E_log = E_log))
hyper <- list(prec = list(param = c(prec_alpha, prec_beta)))
formula <- y ~ -1 + f(time, model="rw1", hyper = hyper, constr = constr)
#formula <- y ~ -1 + f(time, model="rw1", hyper = hyper, constr = constr,replicate=tree)
if (derivative)
{
Imat <- diag(lengthout)
A <- head(Imat, -1) - tail(Imat, -1)
field <- grid[-1] - diff(grid)/2
A <- diag(1/diff(field)) %*% A
A[A == 0] <- NA
lc_many <- INLA::inla.make.lincombs(time = A)
mod <- INLA::inla(formula, family = "poisson", data = data, lincomb = lc_many,
control.predictor = list(compute=TRUE),
control.inla = list(lincomb.derived.only=FALSE))
}
else
{
mod <- INLA::inla(formula, family = "poisson", data = data, offset = data$E_log,
control.predictor = list(compute=TRUE))
}
return(list(result = mod, data = data, grid = grid, x = unique(coal_data$time),samp_times=unique(samp_times_list),n_sampled=unique(n_sampled_list),coal_times=unique(coal_times_list)))
}
#'@title calculate.moller.hetero
#'@description Approximates the posterior distribution of Ne from a single genealogy at a regular grid of points using INLA package
#'@param coal.factor is a vector with coalescent times in increasing order
#'@param s sampling times and coalescent times in increasing order
#'@param event and indicator vector with 1 for coalescent times and 0 for sampling times that correspond to the s vector
#'@param lengthout number of grid points
#'@param prec_alpha alpha gamma hyperparameter of precision parameter for GP prior
#'@param prec_beta beta gamma hyperparameter of precision parameter for GP prior
#'@param E.log.zero internal
#'@param alpha TODO
#'@param beta TODO
#'@author Julia Palacios \email{julia.pal.r@@gmail.com}
calculate.moller.hetero<-function (coal.factor, s, event, lengthout, prec_alpha = 0.01,
prec_beta = 0.01, E.log.zero = -100, alpha = NULL, beta = NULL)
{
if (prec_alpha == 0.01 & prec_beta == 0.01 & !is.null(alpha) &
!is.null(beta)) {
prec_alpha = alpha
prec_beta = beta
}
grid <- seq(0, max(s), length.out = lengthout + 1)
u <- diff(grid)
field <- grid[-1] - u/2
sgrid <- grid
event_new <- 0
time <- 0
where <- 1
E.factor <- 0
for (j in 1:lengthout) {
count <- sum(s > sgrid[j] & s <= sgrid[j + 1])
if (count > 1) {
points <- s[s > sgrid[j] & s <= sgrid[j + 1]]
u <- diff(c(sgrid[j], points))
event_new <- c(event_new, event[(where):(where +
count - 1)])
time <- c(time, rep(field[j], count))
E.factor <- c(E.factor, coal.factor[where:(where +
count - 1)] * u)
where <- where + count
if (max(points) < sgrid[j + 1]) {
event_new <- c(event_new, 0)
time <- c(time, field[j])
E.factor <- c(E.factor, coal.factor[where] *
(sgrid[j + 1] - max(points)))
}
}
if (count == 1) {
event_new <- c(event_new, event[where])
points <- s[s > sgrid[j] & s <= sgrid[j + 1]]
if (points == sgrid[j + 1]) {
E.factor <- c(E.factor, coal.factor[where] *
(sgrid[j + 1] - sgrid[j]))
time <- c(time, field[j])
where <- where + 1
}
else {
event_new <- c(event_new, 0)
E.factor <- c(E.factor, coal.factor[where] *
(points - sgrid[j]))
E.factor <- c(E.factor, coal.factor[where + 1] *
(sgrid[j + 1] - points))
time <- c(time, rep(field[j], 2))
where <- where + 1
}
}
if (count == 0) {
event_new <- c(event_new, 0)
E.factor <- c(E.factor, coal.factor[where] * (sgrid[j +
1] - sgrid[j]))
time <- c(time, field[j])
}
}
time2 <- time
event_new2 <- event_new
E.factor2 <- E.factor
for (j in 1:lengthout) {
count <- sum(time2 == field[j])
if (count > 1) {
indic <- seq(1:length(event_new2))[time2 == field[j]]
if (sum(event_new2[indic]) == 0) {
event_new2 <- event_new2[-indic[-1]]
time2 <- time2[-indic[-1]]
temp <- sum(E.factor2[indic])
E.factor2[indic[1]] <- temp
E.factor2 <- E.factor2[-indic[-1]]
}
}
}
E.factor2.log = log(E.factor2)
E.factor2.log[E.factor2 == 0] = E.log.zero
data <- list(y = event_new2[-1], event = event_new2[-1],
time = time2[-1], E = E.factor2.log[-1])
formula <- y ~ -1 + f(time, model = "rw1", hyper = list(prec = list(param = c(prec_alpha,
prec_beta))), constr = FALSE)
mod4 <- inla(formula, family = "poisson", data = data, offset = E,
control.predictor = list(compute = TRUE))
return(list(result = mod4, grid = grid, data = data, E = E.factor2.log))
}
#'@title .calculate.moller1
#'@description Approximates the posterior distribution of Ne from a single genealogy at a regular grid of points using INLA package
#'@param tree is a phylo object with a single tree
#'@param lengthout number of grid points
#'@param L the length for the definition of the grid
#'@author Julia Palacios \email{julia.pal.r@@gmail.com}
.calculate.moller1<-function(tree,lengthout,L=1){
ci<-ape::coalescent.intervals(tree)
data1<-cbind(ci$interval.length,ci$lineages)
s<-cumsum(data1[,1])
coal.factor<-data1[,2]*(data1[,2]-1)*.5
length.min<-min(data1[,1])
#maxval<-sum(data1[,1])
grid<-seq(0,L,length.out=lengthout+1)
#grid<-c(grid[grid<=maxval],maxval)
u<-diff(grid)
field<-grid[-1]-u/2
sgrid<-grid
event<-0
time<-0
E.factor<-0
where<-1
for (j in 1:lengthout){
count<-sum(s>sgrid[j] & s<=sgrid[j+1])
if (count>1){
points<-s[s>sgrid[j] & s<=sgrid[j+1]]
u<-diff(c(sgrid[j],points))
event<-c(event,rep(1,count))
time<-c(time,rep(field[j],count))
E.factor<-c(E.factor,coal.factor[where:(where+count-1)]*u)
where<-where+count
if (max(points)<sgrid[j+1]){
event<-c(event,0)
time<-c(time,field[j])
E.factor<-c(E.factor,coal.factor[where]*(sgrid[j+1]-max(points)))
}
}
if (count==1){
event<-c(event,1)
points<-s[s>sgrid[j] & s<=sgrid[j+1]]
if (points==sgrid[j+1]){
E.factor<-c(E.factor,coal.factor[where]*(sgrid[j+1]-sgrid[j]))
time<-c(time,field[j])
where<-where+1
}else {
event<-c(event,0)
E.factor<-c(E.factor,coal.factor[where]*(points-sgrid[j]))
E.factor<-c(E.factor,coal.factor[where+1]*(sgrid[j+1]-points))
time<-c(time,rep(field[j],2))
where<-where+1}
}
if (count==0){
event<-c(event,0)
E.factor<-c(E.factor,coal.factor[where]*(sgrid[j+1]-sgrid[j]))
time<-c(time,field[j])
}
}
##Fixing for when there are no observations
combine<-unique(sort(c(grid[-1],s)))
find2<-max(seq(1,length(combine))[combine<=max(s)])
event<-event[-1]
time<-time[-1]
E.factor<-E.factor[-1]
grid<-c(grid[grid<=max(s)],max(s))
data<-list(y=event[1:find2],event=event[1:find2],time=time[1:find2],E=log(E.factor[1:find2]))
# data<-list(y=event[-1],event=event[-1],time=time[-1],E=log(E.factor[-1]))
formula<-y~-1+f(time,model="rw1",hyper=list(prec = list(param = c(.001, .001))),constr=FALSE)
mod.moller.constant<-inla(formula,family="poisson",data=data,offset=E,control.predictor=list(compute=TRUE))
return(stan_output(list(result=mod.moller.constant,grid=grid)))
}
#'@title calculate.moller
#'@description Approximates the posterior distribution of Ne from a single genealogy at a regular grid of points using INLA package
#'@param tree is a phylo object with a single tree
#'@param lengthout number of grid points
#'@param L the length for the definition of the grid
#'@author Julia Palacios \email{julia.pal.r@@gmail.com}
calculate.moller<-function(tree,lengthout,L=1){
if (class(tree)=="phylo") {return(.calculate.moller1(tree,lengthout,L))}
else {
L<-ape::coalescent.intervals(tree[[1]])$total.depth
for (j in 2:length(tree)){L<-min(L,ape::coalescent.intervals(tree[[j]])$total.depth)}
result<-matrix(NA,nrow=lengthout,ncol=length(tree)+1)
result[,1:2]<-.calculate.moller1(tree[[1]],lengthout,L)[,1:2]
for (j in (2:length(tree))){
result[,j+1]<-.calculate.moller1(tree[[j]],lengthout,L)[,2]
}
return(result)
}
}
#'@title plot_INLA
#'@description Plots the output from the inla functions for Ne
#'@param INLA_out Otput from the inla functions
#'@param traj the true trajectory
#'@param xlim the limits of the x-axis, as standard in plot()
#'@author Julia Palacios \email{julia.pal.r@@gmail.com}
plot_INLA = function(INLA_out, traj=NULL, xlim=NULL, ...)
{
mod = INLA_out$result$summary.random$time
grid = mod$"ID"
if (is.null(xlim))
{
xlim=c(max(grid),0)
}
plot(grid,exp(-mod$"0.5quant"),type="l",lwd=2.5,col="blue",log="y",
xlab="Time (past to present)",ylab="Scaled Effective Pop. Size",
xlim=xlim,ylim=c(min(exp(-mod$"0.975quant"[grid > min(xlim) & grid < max(xlim)])),
max(exp(-mod$"0.025quant"[grid > min(xlim) & grid < max(xlim)]))), ...)
lines(grid,exp(-mod$"0.975quant"),lwd=2.5,col="blue",lty=2)
lines(grid,exp(-mod$"0.025quant"),lwd=2.5,col="blue",lty=2)
if (!is.null(traj))
lines(grid, traj(grid))
}
#'@title plot_INLA2
#'@description Plots a general output with 4 columns: time, median, lower and upper bound
#'@param result Matrix with four columns
#'@param traj the true trajectory
#'@param xlim (optional)
#'@author Julia Palacios \email{julia.pal.r@@gmail.com}
plot_INLA2<-function(result, traj=NULL, xlim=NULL, ...){
grid = result[,1]
if (is.null(xlim)) {
xlim = c(max(grid), 0)
}
plot(grid,result[,2],type="l",lwd=2.5,col="blue",log="y",
xlab="Time (past to present)",ylab="Scaled Effective Pop. Size",
xlim=xlim, ylim=c(min(result[grid > min(xlim) & grid < max(xlim),4]),
max(result[grid > min(xlim) & grid < max(xlim),3])))
lines(grid,result[,3],lwd=2.5,col="blue",lty=2)
lines(grid,result[,4],lwd=2.5,col="blue",lty=2)
if (!is.null(traj)) lines(grid, traj(grid))
}
#'@title plot_INLA3
#'@description Plots bunch of trees
#'@param result Matrix with many columns
#'@param traj the true trajectory
#'@param xlim (optional)
#'@author Julia Palacios \email{julia.pal.r@@gmail.com}
plot_INLA3<-function(result, traj=NULL, xlim=NULL, ...){
grid = result[,1]
if (is.null(xlim)) {
xlim = c(max(grid), 0)
}
result2<-matrix(NA,nrow=nrow(result),ncol=2)
result2[,2]<-apply(result[,2:ncol(result)],1,mean)
plot(grid,result2[,2],type="l",lwd=2.5,col="blue",log="y",
xlab="Time (past to present)",ylab="Scaled Effective Pop. Size",
xlim=xlim, ylim=c(min(result[,2:ncol(result)]), max(result[,2:ncol(result)])))
for (j in 2:(ncol(result))){
lines(grid,result[,j],lwd=1.0,col="gray")}
if (!is.null(traj)) lines(grid, traj(grid))
}
stan_output<-function(INLA_out){
mod = INLA_out$result$summary.random$time
grid = mod$"ID"
results<-matrix(NA,nrow=length(grid),ncol=4)
results[,1]<-grid
results[,4]<-exp(-mod$"0.975quant")
results[,3]<-exp(-mod$"0.025quant")
results[,2]<-exp(-mod$"0.5quant")
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.