Legend

Abbreviation|Full ---|--- nLTT|normalized lineages through time BD|Birth Death Model MBD|Multiple Birth Death Model

This is the pipeline to follow for the Razzo project.

Our aim is to compare two nLTT distributions related to two tree posteriors: one descending from MBD alignments and another one from BD alignments. Both posteriors are generated using a BD prior in BEAST2.

Overview

Load the libraries needed:

library(razzo)

We will work in this folder:

project_folder_name <- file.path(peregrine::get_pff_tempfile(), "razzo_project")

Step 1: Create parameter files

Here we create all testing parameter files, which are only a few:

if (1 == 2) {
  parameters_filenames <- create_parameters_files(
    project_folder_name = project_folder_name,
    experiment_type = "test"
  )
}

Step 2: Run the parameter files

Then we run razzo for each parameter setting

if (1 == 2) {
  for (parameters_filename in parameters_filenames) {
    razzo::run_razzo_from_file(parameters_filename)
  }
}

Step 3: figure 1

if (1 == 2) {
  if (rappdirs::app_dir()$os != "win") {
    plots <- create_fig_1(project_folder_name)
    for (plot in plots) {
      print(plot)
    }
  }
}

Step 4: what is the effect of MBD on the error?

# create_fig_2(project_folder_name)

The nLTT statistics of the BD tree shows the baseline error: the Bayesian analysis assumes BD, and that tree is BD.

mbd_nltts <- utils::read.csv(mbd_nltt_filenames[1])$x
bd_nltts <- utils::read.csv(bd_nltt_filenames[1])$x

df <- data.frame(
  model = c(rep("MBD", length(mbd_nltts)), rep("BD", length(bd_nltts))),
  nltt = c(mbd_nltts, bd_nltts),
  stringsAsFactors = TRUE
)

plot <- ggplot(
  data = df,
  aes(x = nltt, fill = model)
) + ggplot2::scale_x_continuous(limits = c(0.0, 1.0))

plot + geom_histogram(binwidth = 0.01, alpha = 0.5)
plot + geom_density(alpha = 0.5)

Step 5: measure Effective Sample Sizes

We already had a sneak-peek at the ESSes. In this step, we measure the ESSes of all files and combine it into one file:

esses_filename <- create_esses_file(project_folder_name = project_folder_name)
knitr::kable(utils::read.csv(esses_filename)[-1])

Step 6: compare marginal likelihoods

We already had a sneak-peek at the ESSes. In this step, we measure the ESSes of all files and combine it into one file:

if (rappdirs::app_dir()$os != "win") {
  marg_liks_file <- create_marg_liks_file(
    project_folder_name = project_folder_name
  )
  knitr::kable(utils::read.csv(marg_liks_file)[-1])
}

Appendix

Here we show the results of the first experiment only:

parameters_filename <- parameters_filenames[1]
razzo_params <- readRDS(parameters_filename)

First MBD tree:

ape::plot.phylo(ape::read.tree(file = razzo_params$misc_params$tree_filename))

First twin tree:

ape::plot.phylo(
  ape::read.tree(
    file = razzo_params$pir_params$twinning_params$twin_tree_filename
  )
)

First MBD alignment

image(
  ape::read.FASTA(
    file = razzo_params$pir_params$alignment_params$fasta_filename
  )
)

First twin alignment

image(
  ape::read.FASTA(
    file = razzo_params$pir_params$twinning_params$twin_alignment_filename
  )
)

First MBD posterior's trees of generative model

babette::plot_densitree(
  tracerer::parse_beast_trees(
    razzo_params$pir_params$experiments[[1]]$
      inference_model$mcmc$treelog$filename
  )
)

First MBD posterior's trees of best candidate model

babette::plot_densitree(
  tracerer::parse_beast_trees(
    razzo_params$pir_params$experiments[[2]]$
      inference_model$mcmc$treelog$filename
  )
)

First MBD posterior's ESSes

first_experiment <- razzo_params$pir_params$experiments[[1]]
knitr::kable(
  tracerer::calc_esses(
    tracerer::parse_beast_log(
      first_experiment$inference_model$mcmc$tracelog$filename
    ),
    sample_interval = first_experiment$inference_model$mcmc$store_every
  )
)

First twin posterior's trees of generative model

babette::plot_densitree(
  tracerer::parse_beast_trees(
    pirouette::to_twin_filename(
      razzo_params$pir_params$experiments[[1]]$
        inference_model$mcmc$treelog$filename
    )
  )
)

First twin posterior's trees of best candidate model

babette::plot_densitree(
  tracerer::parse_beast_trees(
    pirouette::to_twin_filename(
      razzo_params$pir_params$experiments[[2]]$
        inference_model$mcmc$treelog$filename
    )
  )
)

First BD posterior's ESSes

Plot the Effective Sample Sizes (must be at least 200 in manuscript):

knitr::kable(
  tracerer::calc_esses(
    tracerer::parse_beast_log(
      pirouette::to_twin_filename(
        razzo_params$pir_params$experiments[[2]]$
          inference_model$mcmc$tracelog$filename)
    ),
    sample_interval = razzo_params$pir_params$experiments[[2]]$
      inference_model$mcmc$store_every
  )
)

First MBD errors

ggplot(
  data = data.frame(nltt = utils::read.csv(mbd_nltt_filenames[1])$x),
  aes(x = nltt)
) + geom_histogram(binwidth = 0.01) +
  ggplot2::scale_x_continuous(limits = c(0.0, 1.0))

First BD errors

ggplot(
  data = data.frame(nltt = utils::read.csv(bd_nltt_filenames[1])$x),
  aes(x = nltt)
) + geom_histogram(binwidth = 0.01) +
  ggplot2::scale_x_continuous(limits = c(0.0, 1.0))

First MBD marginal likelihoods

if (rappdirs::app_dir()$os != "win") {
  bd_marg_lik_filenames <- rep(NA, length(parameters_filenames))
  for (i in seq_along(parameters_filenames)) {
    bd_marg_lik_filenames[i] <- create_bd_marg_lik_file(
      parameters_filename = parameters_filenames[i]
    )
  }
  knitr::kable(utils::read.csv(bd_marg_lik_filenames[1])[-1])
}

First BD marginal likelihoods

if (rappdirs::app_dir()$os != "win") {
  bd_marg_lik_filenames <- rep(NA, length(parameters_filenames))
  for (i in seq_along(parameters_filenames)) {
    bd_marg_lik_filenames[i] <- create_bd_marg_lik_file(
      parameters_filename = parameters_filenames[i]
    )
  }
  knitr::kable(utils::read.csv(bd_marg_lik_filenames[1])[-1])
}


richelbilderbeek/razzo documentation built on March 10, 2021, 1:17 p.m.