Nothing
#' Fit a lineage frequency model
#'
#' Unified interface for modeling lineage frequency dynamics. Supports
#' multiple engines that share the same input/output contract.
#'
#' @param data An [lfq_data] object.
#' @param engine Model engine to use:
#' * `"mlr"` (default): Multinomial logistic regression. Fast,
#' frequentist, no external dependencies.
#' * `"hier_mlr"`: Hierarchical MLR with partial pooling across
#' locations. Requires `.location` column in data.
#' * `"piantham"`: Piantham approximation converting MLR growth
#' rates to relative reproduction numbers. Requires
#' `generation_time` argument.
#' * `"fga"`: Fixed growth advantage model (Bayesian via CmdStan).
#' Requires 'CmdStan'; check with [lfq_stan_available()].
#' * `"garw"`: Growth advantage random walk model (Bayesian via
#' CmdStan). Allows fitness to change over time.
#' @param pivot Reference lineage name. Growth rates are reported
#' relative to this lineage (fixed at 0). Default: the lineage with
#' the highest count at the earliest time point.
#' @param horizon Forecast horizon in days (stored for later use by
#' [forecast()]). Default 28.
#' @param ci_level Confidence level for intervals. Default 0.95.
#' @param ... Engine-specific arguments passed to the internal engine
#' function. For `engine = "mlr"`: `window`, `ci_method`,
#' `laplace_smooth`. For `engine = "piantham"`: `generation_time`
#' (required). For `engine = "hier_mlr"`: `shrinkage_method`.
#'
#' @return An `lfq_fit` object (S3 class), a list containing:
#' \describe{
#' \item{engine}{Engine name (character).}
#' \item{growth_rates}{Named numeric vector of growth rates per
#' `time_scale` days (pivot = 0).}
#' \item{intercepts}{Named numeric vector of intercepts.}
#' \item{pivot}{Name of pivot lineage.}
#' \item{lineages}{Character vector of all lineage names.}
#' \item{fitted_values}{Tibble of fitted frequencies.}
#' \item{residuals}{Tibble with observed, fitted, Pearson residuals.}
#' \item{vcov_matrix}{Variance-covariance matrix.}
#' \item{loglik, aic, bic}{Model fit statistics.}
#' \item{nobs, n_timepoints, df}{Sample and model sizes.}
#' \item{ci_level, horizon}{As specified.}
#' \item{call}{The matched call.}
#' }
#'
#' @seealso [growth_advantage()] to extract fitness estimates,
#' [forecast()] for frequency prediction, [backtest()] for
#' rolling-origin evaluation.
#'
#' @details
#' The MLR engine models the frequency of lineage \eqn{v} at time
#' \eqn{t} as:
#' \deqn{p_v(t) = \frac{\exp(\alpha_v + \delta_v t)}{\sum_k \exp(\alpha_k + \delta_k t)}}
#' where \eqn{\alpha_v} is the intercept, \eqn{\delta_v} is the
#' growth rate per `time_scale` days (default 7), and the pivot
#' lineage has \eqn{\alpha = \delta = 0}. Parameters are estimated
#' by maximum likelihood via `optim(method = "BFGS")` with the
#' Hessian used for asymptotic Wald confidence intervals.
#'
#' The constant growth rate assumption is appropriate for monotonic
#' variant replacement periods (typically 2--4 months). For longer
#' periods or non-monotonic dynamics, use the `window` argument to
#' restrict the estimation window, or consider the `"garw"` engine
#' which allows time-varying growth advantages.
#'
#' @references
#' Abousamra E, Figgins M, Bedford T (2024). Fitness models provide
#' accurate short-term forecasts of SARS-CoV-2 variant frequency.
#' \emph{PLoS Computational Biology}, 20(9):e1012443.
#' \doi{10.1371/journal.pcbi.1012443}
#'
#' Piantham C, Linton NM, Nishiura H (2022). Predicting the
#' trajectory of replacements of SARS-CoV-2 variants using relative
#' reproduction numbers. \emph{Viruses}, 14(11):2556.
#' \doi{10.3390/v14112556}
#'
#' @examples
#' sim <- simulate_dynamics(
#' n_lineages = 3,
#' advantages = c("JN.1" = 1.3, "KP.3" = 0.9),
#' n_timepoints = 15, seed = 42
#' )
#' fit <- fit_model(sim, engine = "mlr")
#' fit
#'
#' @export
fit_model <- function(data,
engine = c("mlr", "hier_mlr", "piantham", "fga", "garw"),
pivot = NULL,
horizon = 28L,
ci_level = 0.95,
...) {
# Allow both built-in and registered engines
builtin <- c("mlr", "hier_mlr", "piantham", "fga", "garw")
# If default vector passed (length > 1), use match.arg
if (length(engine) > 1L) {
engine <- match.arg(engine, builtin)
}
registered_fn <- .get_registered_engine(engine)
if (is.null(registered_fn) && !engine %in% builtin) {
cli::cli_abort(c(
"Unknown engine {.val {engine}}.",
"i" = "Available: {.val {c(builtin, names(.get_registry()))}}"
))
}
if (!is_lfq_data(data)) {
cli::cli_abort(
"{.arg data} must be an {.cls lfq_data} object. Use {.fn lfq_data} first."
)
}
assert_prob(ci_level, "ci_level")
if (!is.null(registered_fn)) {
result <- registered_fn(data, pivot = pivot, ci_level = ci_level, ...)
} else {
result <- switch(engine,
mlr = .engine_mlr(data, pivot = pivot, ci_level = ci_level, ...),
hier_mlr = .engine_hier_mlr(data, pivot = pivot, ci_level = ci_level, ...),
piantham = .engine_piantham(data, pivot = pivot, ci_level = ci_level, ...),
fga = .engine_fga(data, pivot = pivot, ci_level = ci_level, ...),
garw = .engine_garw(data, pivot = pivot, ci_level = ci_level, ...)
)
}
result$engine <- engine
result$ci_level <- ci_level
result$horizon <- as.integer(horizon)
result$call <- match.call()
structure(result, class = c(paste0("lfq_fit_", engine), "lfq_fit"))
}
#' List available modeling engines
#'
#' Returns information about all modeling engines available in
#' lineagefreq. Core engines (mlr, hier_mlr, piantham) are always
#' available. Bayesian engines (fga, garw) require 'CmdStan'.
#'
#' @return A tibble with columns: `engine`, `type`, `time_varying`,
#' `available`, `description`.
#'
#' @examples
#' lfq_engines()
#'
#' @export
lfq_engines <- function() {
stan_ok <- lfq_stan_available()
builtin <- tibble::tibble(
engine = c("mlr", "hier_mlr", "piantham", "fga", "garw"),
type = c("frequentist", "frequentist", "frequentist",
"bayesian", "bayesian"),
time_varying = c(FALSE, FALSE, FALSE, FALSE, TRUE),
available = c(TRUE, TRUE, TRUE, stan_ok, stan_ok),
description = c(
"Multinomial logistic regression (MLE)",
"Hierarchical MLR with empirical Bayes shrinkage",
"Piantham Rt approximation from MLR growth rates",
"Fixed growth advantage (Stan)",
"Growth advantage random walk (Stan)"
)
)
# Append registered engines
reg <- .get_registry()
if (length(reg) > 0) {
custom <- tibble::tibble(
engine = vapply(reg, `[[`, "", "name"),
type = vapply(reg, `[[`, "", "type"),
time_varying = vapply(reg, `[[`, FALSE, "time_varying"),
available = TRUE,
description = vapply(reg, `[[`, "", "description")
)
builtin <- dplyr::bind_rows(builtin, custom)
}
builtin
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.