# Batch Processing of Parameter Estimates in R
#' Batch Processing of Parameter Estimates
#'
#' Calls out to `bbi nonmem params` and returns a tibble of parameter estimates.
#' The primary reason for this function is for things like bootstraps or
#' simulations where parameter estimates for a large number of models (hundreds
#' or thousands) need to be pulled in together. In those cases, this function is
#' _much_ faster than using [model_summaries()] and [param_estimates()],
#' because it does _not_ parse all of the other information contained in a
#' `bbi_nonmem_summary` object.
#'
#' @details
#'
#' The tibble will always have columns `absolute_model_path`, `run`,
#' `error_msg`, and `termination_code`. All other column names are those of the
#' parameter estimates found in the `.ext` files detected in the directory
#' passed. Note: the passed directory is _not_ searched recursively. Passed
#' directory will be searched for model files and output directories for found
#' models will be searched for `.ext` files.
#'
#' `error_msg`: column will be `NA` if error is not detected. Column will contain message if known error is detected. The known errors
#' are
#' - `"no ext file contents"`: `.ext` file detected, but contains nothing
#' - `"no estimation output detected"`: `.ext` file detected, has contents, but is missing line `-1000000000` which are the final parameter estimates
#'
#' `termination_code`: parsed from line `-1000000007` of the `.ext`
#'
#' **parameter columns:** naming will be the same as names found in `.ext` file.
#' If `.ext` files within the directory contain differing parameter names, the
#' output tibble will show all parameter names across files and NA values for
#' the files where a given parameter is missing.
#'
#' @importFrom dplyr rename select mutate everything across
#' @importFrom data.table fread
#' @importFrom tibble as_tibble
#'
#' @param .path a path to a directory containing model sudirectories for batch parameter processing.
#' @param ... args passed through to [bbi_exec()]
#' @param .dry_run show what the command would be without actually running it
#' @export
param_estimates_batch <- function(.path,
...,
.dry_run = FALSE) {
assert_bbi_version(.min_version = "3.1.0", .function = "param_estimates_batch")
.path <- fs::path_abs(.path)
if (!dir_exists(.path)) {
err_msg <-
glue("param_estimates_batch('{.path}') failed; unable to locate {.path}")
stop(err_msg, call. = FALSE)
}
# path containing .ext file
ext_file_dir <- basename(.path)
# for bbi nonmem params, need to pass the directory containing .ext
# cd to the root directory containing directory with .ext files
.path <- dirname(.path)
cmd_args <- c("nonmem", "params", "--dir", ext_file_dir)
# if .dry_run return output call
if (.dry_run) {
res <- bbi_dry_run(cmd_args, .path)
return(res)
}
res <- tryCatch(
bbi_exec(cmd_args, .dir = .path, ..., .wait = TRUE),
error = function(e) {
err_msg <-
glue(
"param_estimates_batch('{.path}') failed with the following error. This may be because the modeling run has not finished successfully.\n\nERROR: \n{e}"
)
stop(err_msg, call. = FALSE)
}
)
df <- as_tibble(fread(text = res$stdout, header = TRUE))
# throw out extra column that gets created if no models succeeded
if ("V4" %in% names(df)) df <- select(df, -"V4")
# reformat parameter names to conform with `param_estimates()` output
names(df) <- gsub("\\(([0-9]+)_([0-9]+)\\)", "(\\1,\\2)", names(df))
# format and return
df %>%
rename(
absolute_model_path = "dir",
error_msg = "error",
termination_code = "termination"
) %>%
mutate(
across(everything(), ~ ifelse(. == "", NA, .)),
absolute_model_path = file.path(.path, .data$absolute_model_path),
run = basename(.data$absolute_model_path)
) %>%
select(
"absolute_model_path",
"run",
"error_msg",
"termination_code",
everything()
)
}
#' Compare parameter estimates
#'
#' Summarizes the parameter estimates from tibble like what's returned from
#' [param_estimates_batch()], showing you the quantiles passed to `probs`,
#' likely interpreted as confidence intervals for your parameter estimates.
#' Optionally takes an "original model" to compare these quantiles against.
#' Originally conceived for comparing a model to multiple runs of the "same"
#' model, for example in a bootstrap or simulation.
#'
#' @return A tibble with the first column containing the parameter names (that
#' were column names in `.param_df`), the second column optionally containing
#' original parameter estimates (if `.orig_mod` is passed), and subsequent
#' columns containing the quantiles specified in `probs`. You can think of
#' this as essentially `.param_df`, summarized by quantiles across all rows,
#' and then pivoted.
#'
#'
#' @param .param_df A tibble with columns for each parameter and rows for each
#' model (like what's returned from [param_estimates_batch()])
#' @param .orig_mod `bbi_model` object to compare `.param_df` against. If
#' `NULL`, the default, only returns quantiles from `.param_df`. If passed,
#' will display an additional `original_estimate` column.
#' @param .compare_cols An expression that can be passed to [dplyr::select()] to
#' select which columns in `.param_df` will be pivoted and summarized.
#' Defaults to `dplyr::starts_with(c("THETA", "SIGMA", "OMEGA"))` (designed
#' for NONMEM runs), but consider using [dplyr::matches()] to select columns
#' using regex. Also see
#' [?tidyselect::language](https://tidyselect.r-lib.org/reference/language.html)
#' for more details and options.
#' @param probs Numeric vector with values between 0 and 1 to be passed through to
#' [stats::quantile()]. Represents the quantiles to calculate for parameter
#' estimates in `.param_df`.
#' @param na.rm Logical scalar, passed through to [stats::quantile()].
#'
#' @importFrom tibble rownames_to_column
#' @importFrom dplyr summarise across starts_with select rename left_join
#' @importFrom stats quantile
#' @export
param_estimates_compare <- function(
.param_df,
.orig_mod = NULL,
.compare_cols = starts_with(c("THETA", "SIGMA", "OMEGA")),
probs = c(.5, 0.025, 0.975),
na.rm = FALSE
) {
comp_df <- .param_df %>%
select({{ .compare_cols }})
if (!is.null(.orig_mod)) {
if (!inherits(.orig_mod, NM_SUM_CLASS)) {
.orig_mod <- model_summary(.orig_mod)
}
mod_df <- param_estimates(.orig_mod)
# check that param names match
n1 <- sort(mod_df$parameter_names)
n2 <- sort(names(comp_df))
.same <- suppressSpecificWarning(
all(n1 == n2),
"is not a multiple"
)
if (!.same) {
stop(paste(
glue("The models passed to `.param_df` and `.orig_mod` do not have the same parameters."),
glue(" `.param_df` parameter names: {paste(n2, collapse = ', ')}"),
glue(" `.orig_mod` parameter names: {paste(n1, collapse = ', ')}"),
sep = "\n"
))
}
}
# summarize comp to quantiles
quantile_fn <- function(x) {
quantile(x, probs = probs, na.rm = na.rm)
}
comp_df <- comp_df %>%
reframe(across(.cols = everything(), .fns = quantile_fn)) %>%
t() %>%
as.data.frame() %>%
rownames_to_column()
colnames(comp_df) <- c("parameter_names", paste0(probs * 100, "%"))
# join quantiles to original
if (!is.null(.orig_mod)) {
comp_df <- mod_df %>%
select("parameter_names", "estimate") %>%
rename(original_estimate = "estimate") %>%
left_join(comp_df, by = "parameter_names")
}
return(comp_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.