# R/ABC_SMC.R In nLTT: Calculate the NLTT Statistic

#### Documented in abc_smc_nlttcalculate_weightmcmc_nltt

```################################################################################
#
# @brief calculate the weight of a parameter combination
#
# @author Thijs Janzen
# @since 2014-20-09, version 1.0
#
# @param    weights                 vector       Vector of weights
# @param    particles               list         List of parameter combinations
# @param    current                 vector       Current parameter combination for which we are determining the weight
# @param    sigma                   scalar       standard deviation of the perturbation
# @param    prior_density_function  function     Function to calculate the prior probability of a set of parameters
# @return                           scalar       Estimated weight
#
################################################################################

calculate_weight <- function(weights, particles,
current, sigma, prior_density_function) {
vals <- c()
for (i in seq_along(particles)) {
vals[i] <- weights[i]
for (j in seq_along(current)) {
diff <- log(current[j]) - log(particles[[i]][j])
vals[i] <- vals[i] * stats::dnorm(diff, mean = 0, sd = sigma)
}
}

numerator <- prior_density_function(current)

return(numerator / sum(vals))
}

################################################################################
#
# @brief apply the ABC routine used in our Methods in Ecology and Evolution Paper
#
# @author Thijs Janzen
# @since 2014-20-09, version 1.0
#
# @param    tree                          phylo       Phylogenetic tree
# @param    statistics                    vector      A vector containing functions that take a tree as an argument and return a single scalar value (the statistic).
# @param    simulation_function           vector      A function that implements the diversification model and returns an object of class phylo
# @param    init_epsilon_values           vector      A vector containing the initial threshold values for the summary statistics from the vector statistics.
# @param    prior_generating_function     function    Function to generate parameters from the prior distribution of these parameters (e.g. a function returning lambda and mu in case of the birth-death model)
# @param    prior_density_function        function    Function to calculate the prior probability of a set of parameters (should match prior_generating_function in shape)
# @param    number_of_particles           scalar      Number of particles to be used per iteration of the ABC-SMC algorithm.
# @param    sigma                         scalar      Standard deviation of the perturbance distribution (perturbance distribution is a gaussian with mean 0).
# @param    stop_rate                     scalar      If the acceptance rate drops below \code{stopRate}, stop the ABC-SMC algorithm  and assume convergence.
# @return                                 matrix      A matrix with n columns, where n is the number of parameters you are trying to estimate.
#
################################################################################
#' A function to perform Approximate Bayesian Computation within a
#' Sequential Markov Chain (ABC-SMC), for diversification analysis
#' of phylogenetic trees.
#' @description This function performs ABC-SMC as described in Toni 2009
#'   for given diversification model, provided a phylogenetic tree.
#'   ABC-SMC is not limited to only using the normalized LTT as statistic.
#' @usage
#'   abc_smc_nltt(
#'     tree, statistics, simulation_function, init_epsilon_values,
#'     prior_generating_function, prior_density_function,
#'     number_of_particles = 1000, sigma = 0.05, stop_rate = 1e-05
#'   )
#' @param tree an object of class \code{"phylo"}; the tree upon which we want
#'   to fit our diversification model
#' @param statistics A vector containing functions that take a tree
#'   as an argument and return a single scalar value (the statistic).
#' @param simulation_function A function that implements the
#'   diversification model and returns an object of class \code{"phylo"}.
#' @param init_epsilon_values A vector containing the initial threshold values
#'   for the summary statistics from the vector \code{statistics}.
#' @param prior_generating_function Function to generate parameters
#'   from the prior distribution of these parameters (e.g. a function returning lambda and mu in case of the birth-death model)
#' @param prior_density_function Function to calculate the prior probability
#'   of a set of parameters.
#' @param number_of_particles Number of particles to be used
#'   per iteration of the ABC-SMC algorithm.
#' @param sigma Standard deviation of the perturbance distribution
#'   (perturbance distribution is a gaussian with mean 0).
#' @param stop_rate If the acceptance rate drops below \code{stopRate},
#'   stop the ABC-SMC algorithm  and assume convergence.
#' @return A matrix with \code{n} columns,
#'   where \code{n} is the number of parameters you are trying to estimate.
#' @references  Toni, T., Welch, D., Strelkowa, N., Ipsen, A.,
#'   & Stumpf, M.P.H. (2009). Approximate Bayesian computation scheme for
#'   parameter inference and model selection in dynamical systems.
#'   Journal of the Royal Society Interface, 6(31), 187-202.
#' @export
#' @author Thijs Janzen
#' @examples
#'   \dontrun{
#'
#'   prior_gen <- function() {
#'     return( rexp(n=2, rate=0.1) )
#'   }
#'
#'   prior_dens <- function(val) {
#'     return( dexp( val[1], rate = 0.1) * dexp( val[2], rate = 0.1) )
#'   }
#'
#'   require(TESS)
#'
#'   treeSim <- function(params) {
#'     t <- TESS.sim.age(n=1, lambda = params[1], mu = params[2], age = 10)[[1]]
#'     return(t)
#'   }
#'
#'   obs <- treeSim(c(0.5,0.1))
#'
#'   statWrapper <- function(tree1) {
#'     return( nLTTstat_exact(tree1, obs, "abs"))
#'   }
#'
#'   stats <- c(statWrapper)
#'
#'   results <-  abc.smc.nltt(
#'     obs, stats, treeSim, init_epsilon_values = 0.2,
#'     prior_generating_function = prior_gen,
#'     prior_density_function = prior_dens,
#'     number_of_particles = 1000, sigma = 0.05, stop_rate = 1e-5
#'   )
#'
#'   } # end of dontrun
abc_smc_nltt <- function(tree,
statistics,
simulation_function,
init_epsilon_values,
prior_generating_function,
prior_density_function,
number_of_particles = 1000,
sigma = 0.05,
stop_rate = 1e-5) {

if (!inherits(tree, "phylo")) {
# Just checking
stop("abc_smc_nltt: ",
"tree must be of class 'phylo', ",
"but was of type '", class(tree), "' instead")
}

#statistics has to be a vector of functions
if (!inherits(statistics, "list")) {
stop("abc_smc_nltt: ",
"the statistics function has to be given in vector style, ",
}

#just to get the number of parameters to be estimated.
parameters <- prior_generating_function()

# compute the observed statistics
obs_statistics <- c()
for (i in seq_along(statistics)) {
obs_statistics[i] <- statistics[[i]](tree)
}

stats <- c()

#generate a matrix with epsilon values
#we assume that the SMC algorithm converges within 50 iterations
epsilon <- matrix(nrow = 50, ncol = length(init_epsilon_values))
for (j in seq_along(init_epsilon_values)) {
if (init_epsilon_values[j] < 0) {
stop("abc_smc_nltt: ",
"epsilon values have to be positive,",
}

for (i in 1:50) {
epsilon[i, j] <- init_epsilon_values[j] * exp(-0.5 * (i - 1))
}
}

#store weights
new_weights <- c()
new_params <- list(c(seq_along(parameters)))
previous_weights <- c()
previous_params  <- list(c(seq_along(parameters)))
indices <- 1:number_of_particles

#convergence is expected within 50 iterations
#usually convergence occurs within 20 iterations
for (i in 1:50) {
cat("\nGenerating Particles for iteration\t", i, "\n")
cat("0--------25--------50--------75--------100\n")
cat("*")
utils::flush.console()

print_frequency <- 20
tried <- 0
number_accepted <- 0

#replace all vectors
if (i > 1) {
#normalize the weights and store them as previous weights.
previous_weights <- new_weights / sum(new_weights)
new_weights <- c() #remove all currently stored weights
previous_params <- new_params #store found params
new_params <- list(c(seq_along(parameters))) #clear new params
}

stoprate_reached <- FALSE

while (number_accepted < number_of_particles) {
#in this initial step, generate parameters from the prior
if (i == 1) {
parameters <- prior_generating_function()
} else {
#if not in the initial step, generate parameters
#from the weighted previous distribution:
index <- sample(x = indices, size = 1,
replace = TRUE, prob = previous_weights)

for (p_index in seq_along(parameters)) {
parameters[p_index] <- previous_params[[index]][p_index]
}

#only perturb one parameter, to avoid extremely
#low acceptance rates due to simultaneous perturbation
to_change <- sample(seq_along(parameters), 1)

# perturb the parameter a little bit,
#on log scale, so parameter doesn't go < 0
eta <- log(parameters[to_change]) + stats::rnorm(1, 0, sigma)
parameters[to_change] <- exp(eta)
}

#reject if outside the prior
if (prior_density_function(parameters) > 0) {
#simulate a new tree, given the proposed parameters
new_tree <- simulation_function(parameters)
accept <- TRUE

#calculate the summary statistics for the simulated tree
for (k in seq_along(statistics)) {
stats[k] <- statistics[[k]](new_tree)
if (is.na(stats[k])) stats[k] <- Inf
}

#check if the summary statistics are sufficiently
#close to the observed summary statistics
for (k in seq_along(statistics)) {
if (abs(stats[k] - obs_statistics[k]) > epsilon[i, k]) {
accept <- FALSE
#the first step always accepts
if (i == 1) accept <- TRUE
break
}
}

if (accept) {
number_accepted <- number_accepted + 1
new_params[[number_accepted]] <- parameters
accepted_weight <- 1
#calculate the weight
if (i > 1) {
accepted_weight <- calculate_weight(previous_weights,
previous_params, parameters,
sigma, prior_density_function)
}
new_weights[number_accepted] <- accepted_weight

if ((number_accepted) %%
(number_of_particles / print_frequency) == 0) {
cat("**")
utils::flush.console()
}
}
}

#convergence if the acceptance rate gets too low
tried <- tried + 1
if (tried > (1 / stop_rate)) {
if ( (number_accepted / tried) < stop_rate) {
stoprate_reached <- TRUE
break
}
}
}

if (stoprate_reached) {
break
}
}

output <- c()
for (k in seq_along(previous_params)) {
for (m in seq_along(parameters)) {
}
}
return(output)
}

################################################################################
#
# @brief Estimate the likelihood of a given tree, provided a likelihood function, using a Monte Carlo Markov Chain
#
# @author Thijs Janzen
# @since 2014-20-09, version 1.0
#
# @param    phy                   phylo       Vector of weights
# @param    likelihood_function   function    Function that calculates the likelihood of our diversification model, given the tree.
#                                             function should be of the format function(parameters, phy).
# @param    parameters            vector      Initial parameters to start the chain.
# @param    logtransforms         scalar      Whether to perform jumps on logtransformed parameters (TRUE) or not (FALSE)
# @param    iterations            scalar      Length of the chain
# @param    burnin                scalar      Length of the burnin, default is 30% of iterations
# @param    thinning              scalar      Size of thinning, default = 1
# @param    sigma                 scalar      Standard deviation of the jumping distribution, which is N(0, sigma).
# @return                         mcmc        An MCMC object, as used by the package "coda".
#
################################################################################
#' @export
mcmc_nltt <- function(phy, likelihood_function,
parameters, logtransforms, iterations,
burnin = round(iterations / 3), thinning = 1, sigma = 1) {

#check data type of phy
if (!inherits(phy, "phylo")) {
# Just checking
stop("mcmc_nltt: ",
"phy must be of class 'phylo', ",
"but was of type '", class(phy), "' instead")
}

# create a list for the samples & reserve memory for the chain
chain <- array(dim = c(floor(iterations / thinning) + 1,
length(parameters)))

for (j in seq_along(parameters)) {
if (parameters[j] < 0) {
#Just checking
stop("mcmc_nltt: ",
"initial parameter values have to be above zero\n",
"but one was ", parameters[j], " instead")
}
}
# pre-compute current posterior probability
pp <- likelihood_function(parameters, phy)

cat("\nGenerating Chain\n")
cat("0--------25--------50--------75--------100\n")
cat("*")
utils::flush.console()
print_frequency <- 20

for (i in seq_len(burnin + iterations)) {
#propose new values
for (j in seq_along(parameters)) {
if (logtransforms[j] == TRUE) {
if (parameters[j] == 0) {
stop("Cannot propose new value for a parameter with value 0.0.")
}

eta           <- log(parameters[j])
new_eta       <- eta + stats::rnorm(1, 0, sigma)
new_val       <- exp(new_eta)
# calculate the Hastings ratio
hr            <- log(new_val / parameters[j])
parameters[j] <- new_val
new_pp        <- likelihood_function(parameters, phy)

#accept or reject
if (is.finite(new_pp) &&
is.finite(hr) &&
new_pp - pp + hr > log(stats::runif(1, 0, 1))) {
pp <- new_pp
} else {
parameters[j] <- exp(eta)
}
} else {

eta           <- parameters[j]
new_val       <- eta + stats::rnorm(1, 0, sigma)
#calculate the Hastings ratio
hr            <- 0.0
parameters[j] <- new_val

if (parameters[j] >= 0 & parameters[1] > 0) {
new_pp        <- likelihood_function(parameters, phy)

#accept or reject
if (is.finite(new_pp) &&
is.finite(hr) &&
new_pp - pp + hr > log(stats::runif(1, 0, 1))) {
pp <- new_pp
} else {
parameters[j] <- eta
}
} else {
parameters[j] <- eta
}
}
}

# sample the parameter
if (i >= burnin) {
if ((i) %% ((iterations - burnin) / print_frequency) == 0) {
cat("**")
utils::flush.console()
}
if ((i - burnin) %% thinning == 0) {
chain[(i - burnin) / thinning + 1, ] <- parameters
}
}
}
cat("\nFinished MCMC.\n")
#return a mcmc object, used by coda to plot
return(coda::as.mcmc(chain))
}
```

## Try the nLTT package in your browser

Any scripts or data that you put into this service are public.

nLTT documentation built on Jan. 13, 2020, 9:06 a.m.