#' Estimate the marginal likelihoods for one or more inference models
#'
#' Estimate the marginal likelihoods (aka evidence)
#' for one or more inference models, based on a single alignment.
#' Also, the marginal likelihoods are compared, resulting in a
#' relative weight for each model, where a relative weight of a model
#' close to \code{1.0} means that that model is way likelier than
#' the others.
#'
#' In the process, multiple (temporary) files are created (where
#' \code{[x]} denotes the index in a list)
#'
#' \itemize{
#' \item \code{beast2_optionses[x]$input_filename}
#' path to the the BEAST2 XML input file
#' \item \code{beast2_optionses[x]$output_state_filename}
#' path to the BEAST2 XML state file
#' \item \code{inference_models[x]$mcmc$tracelog$filename}
#' path to the BEAST2 trace file with parameter estimates
#' \item \code{inference_models[x]$mcmc$treelog$filename}
#' path to the BEAST2 \code{trees} file with the posterior trees
#' \item \code{inference_models[x]$mcmc$screenlog$filename}
#' path to the BEAST2 screen output file
#' }
#'
#' These file can be deleted manually by \link[babette]{bbt_delete_temp_files},
#' else these will be deleted automatically by the operating system.
#' @inheritParams default_params_doc
#' @return a \link{data.frame} showing the estimated marginal likelihoods
#' (and its estimated error) per combination of models. Columns are:
#' \itemize{
#' \item \code{site_model_name}: name of the site model
#' \item \code{clock_model_name}: name of the clock model
#' \item \code{tree_prior_name}: name of the tree prior
#' \item \code{marg_log_lik}: estimated marginal (natural) log likelihood
#' \item \code{marg_log_lik_sd}: estimated error of \code{marg_log_lik}
#' \item \code{weight}: relative model weight, a value from 1.0 (all
#' evidence is in favor of this model combination) to 0.0 (no
#' evidence in favor of this model combination)
#' \item \code{ess}: effective sample size of the marginal likelihood
#' estimation
#' }
#' @seealso
#' \itemize{
#' \item \link{can_run_mcbette}: see if 'mcbette' can run
#' \item \link{est_marg_liks}: estimate multiple marginal likelihood of a
#' single inference mode
#' }
#' @examples
#' if (can_run_mcbette()) {
#'
#' # Use an example FASTA file
#' fasta_filename <- system.file("extdata", "simple.fas", package = "mcbette")
#'
#' # Create two inference models
#' inference_model_1 <- beautier::create_ns_inference_model(
#' site_model = beautier::create_jc69_site_model()
#' )
#' inference_model_2 <- beautier::create_ns_inference_model(
#' site_model = beautier::create_hky_site_model()
#' )
#'
#' # Shorten the run, by doing a short (dirty, unreliable) MCMC
#' inference_model_1$mcmc <- beautier::create_test_ns_mcmc()
#' inference_model_2$mcmc <- beautier::create_test_ns_mcmc()
#'
#' # Combine the inference models
#' inference_models <- list(inference_model_1, inference_model_2)
#'
#' # Create the BEAST2 options, that will write the output
#' # to different (temporary) filanems
#' beast2_options_1 <- beastier::create_mcbette_beast2_options()
#' beast2_options_2 <- beastier::create_mcbette_beast2_options()
#'
#' # Combine the two BEAST2 options sets,
#' # use reduplicated plural
#' beast2_optionses <- list(beast2_options_1, beast2_options_2)
#'
#' # Compare the models
#' marg_liks <- est_marg_liks(
#' fasta_filename,
#' inference_models = inference_models,
#' beast2_optionses = beast2_optionses
#' )
#'
#' # Interpret the results
#' interpret_marg_lik_estimates(marg_liks)
#'
#' beastier::remove_beaustier_folders()
#' beastier::check_empty_beaustier_folders()
#' }
#' @author Richèl J.C. Bilderbeek
#' @export
est_marg_liks <- function(
fasta_filename,
inference_models = list(
beautier::create_inference_model(
mcmc = beautier::create_ns_mcmc()
)
),
beast2_optionses = rep(
list(beastier::create_mcbette_beast2_options()),
times = length(inference_models)
),
verbose = FALSE,
os = rappdirs::app_dir()$os
) {
if (!file.exists(fasta_filename)) {
stop(
"'fasta_filename' must be the name of an existing FASTA file.\n",
"File '", fasta_filename, "' not found"
)
}
beautier::check_inference_models(inference_models)
if (length(inference_models) == 0) {
stop(
"'inference_models' must be a list of at least 1 inference model.\n",
"Tip: use 'create_inference_model'"
)
}
beastier::check_beast2_optionses(beast2_optionses)
for (inference_model in inference_models) {
if (!beautier::is_nested_sampling_mcmc(inference_model$mcmc)) {
stop(
"'inference_models' must have 'mcmc's for nested sampling.\n",
"Tip: use 'beautier::create_ns_mcmc'"
)
}
}
if (os == "win") {
stop(
"mcbette must run on Linux or Mac.\n",
"\n",
"It is not yet supported to call BEAST2 with packages installed\n",
"in a scripted way"
)
}
testit::assert(file.exists(fasta_filename))
testit::assert(beastier::is_beast2_installed())
testit::assert(mauricer::is_beast2_ns_pkg_installed())
n_rows <- length(inference_models)
site_model_names <- rep(NA, n_rows)
clock_model_names <- rep(NA, n_rows)
tree_prior_names <- rep(NA, n_rows)
marg_log_liks <- rep(NA, n_rows)
marg_log_lik_sds <- rep(NA, n_rows)
esses <- rep(NA, n_rows)
# Iterate over all inference models and BEAST2 optionses
for (i in seq(1, n_rows)) {
if (verbose == TRUE) {
message(
paste0(
"Estimating evidence for model ", i, "/", n_rows, " with ",
inference_models[[i]]$site_model$name, " site model, ",
inference_models[[i]]$clock_model$name, " clock model and ",
inference_models[[i]]$tree_prior$name, " tree prior"
)
)
}
inference_model <- inference_models[[i]]
beast2_options <- beast2_optionses[[i]]
beautier::check_inference_model(inference_model)
beastier::check_beast2_options(beast2_options)
beautier::check_nested_sampling_mcmc(inference_model$mcmc)
ns <- mcbette::est_marg_lik(
fasta_filename = fasta_filename,
inference_model = inference_model,
beast2_options = beast2_options,
os = os
)
if (verbose) message(ns)
testit::assert("marg_log_lik" %in% names(ns))
testit::assert("marg_log_lik_sd" %in% names(ns))
marg_log_liks[i] <- ns$marg_log_lik
marg_log_lik_sds[i] <- ns$marg_log_lik_sd
esses[i] <- ns$ess
site_model_names[i] <- inference_model$site_model$name
clock_model_names[i] <- inference_model$clock_model$name
tree_prior_names[i] <- inference_model$tree_prior$name
}
weights <- as.numeric(
mcbette::calc_weights(marg_liks = exp(Rmpfr::mpfr(marg_log_liks, 512)))
)
df <- data.frame(
site_model_name = site_model_names,
clock_model_name = clock_model_names,
tree_prior_name = tree_prior_names,
marg_log_lik = marg_log_liks,
marg_log_lik_sd = marg_log_lik_sds,
weight = weights,
ess = esses,
stringsAsFactors = TRUE
)
testthat::expect_true(
abs(1.0 - sum(weights)) < 0.01,
info = "Sum of weights should be 1.0. Maybe forgot to install libmpfr-dev?"
)
testthat::expect_true(is.factor(df$site_model_name))
df
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.