#' Estimate Infections, the Time-Varying Reproduction Number and the Rate of
#' Growth
#'
#' @description `r lifecycle::badge("maturing")`
#' Uses a non-parametric approach to reconstruct cases by date of infection
#' from reported cases. It uses either a generative Rt model or non-parametric
#' back calculation to estimate underlying latent infections and then maps
#' these infections to observed cases via uncertain reporting delays and a
#' flexible observation model. See the examples and function arguments for the
#' details of all options. The default settings may not be sufficient for your
#' use case so the number of warmup samples (`stan_args = list(warmup)`) may
#' need to be increased as may the overall number of samples. Follow the links
#' provided by any warnings messages to diagnose issues with the MCMC fit. It
#' is recommended to explore several of the Rt estimation approaches supported
#' as not all of them may be suited to users own use cases. See
#' [here](https://gist.github.com/seabbs/163d0f195892cde685c70473e1f5e867)
#' for an example of using `estimate_infections` within the `epinow` wrapper to
#' estimate Rt for Covid-19 in a country from the ECDC data source.
#'
#' @param data A `<data.frame>` of disease reports (confirm) by date (date).
#' `confirm` must be numeric and `date` must be in date format. Optionally,
#' `data` can also have a logical `accumulate` column which indicates whether
#' data should be added to the next data point. This is useful when modelling
#' e.g. weekly incidence data. See also the [fill_missing()] function which
#' helps add the `accumulate` column with the desired properties when dealing
#' with non-daily data. If any accumulation is done this happens after
#' truncation as specified by the `truncation` argument. If all entries
#' of `confirm` are missing (`NA`) the returned estimates will represent the
#' prior distributions.
#'
#' @param generation_time A call to [gt_opts()] (or its alias
#' [generation_time_opts()]) defining the generation time distribution used.
#' For backwards compatibility a list of summary parameters can also be passed.
#'
#' @param delays A call to [delay_opts()] defining delay distributions and
#' options. See the documentation of [delay_opts()] and the examples below for
#' details.
#'
#' @param truncation A call to [trunc_opts()] defining the truncation of
#' the observed data. Defaults to [trunc_opts()], i.e. no truncation. See the
#' [estimate_truncation()] help file for an approach to estimating this from
#' data where the `dist` list element returned by [estimate_truncation()] is
#' used as the `truncation` argument here, thereby propagating the uncertainty
#' in the estimate.
#'
#' @param forecast A list of options as generated by [forecast_opts()] defining
#' the forecast opitions. Defaults to [forecast_opts()]. If NULL then no
#' forecasting will be done.
#'
#' @param horizon Deprecated; use `forecast` instead to specify the predictive
#' horizon
#'
#' @param weigh_delay_priors Deprecated; this is now specified at the
#' distribution level in `generation_time_opts()`, `delay_opts()` and
#' `trunc_opts()` using the `weight_prior` argument.
#'
#' @param verbose Logical, defaults to `TRUE` when used interactively and
#' otherwise `FALSE`. Should verbose debug progress messages be printed.
#' Corresponds to the "DEBUG" level from `futile.logger`. See `setup_logging`
#' for more detailed logging options.
#'
#' @param filter_leading_zeros Logical, defaults to TRUE. Should zeros at the
#' start of the time series be filtered out.
#'
#' @param zero_threshold `r lifecycle::badge("experimental")` Numeric defaults
#' to Inf. Indicates if detected zero cases are meaningful by using a threshold
#' number of cases based on the 7-day average. If the average is above this
#' threshold then the zero is replaced using `fill`.
#'
#' @export
#' @return A list of output including: posterior samples, summarised posterior
#' samples, data used to fit the model, and the fit object itself.
#'
#' @seealso [epinow()] [regional_epinow()] [forecast_infections()]
#' [estimate_truncation()]
#' @inheritParams create_stan_args
#' @inheritParams create_stan_data
#' @inheritParams create_rt_data
#' @inheritParams create_backcalc_data
#' @inheritParams create_gp_data
#' @inheritParams create_obs_model
#' @inheritParams fit_model_with_nuts
#' @inheritParams calc_CrIs
#' @importFrom data.table data.table copy merge.data.table as.data.table
#' @importFrom data.table setorder rbindlist melt .N setDT
#' @importFrom lubridate days
#' @importFrom futile.logger flog.threshold flog.warn flog.debug
#' @importFrom checkmate assert_class assert_numeric assert_logical
#' assert_string
#' @examples
#' \donttest{
#' # set number of cores to use
#' old_opts <- options()
#' options(mc.cores = ifelse(interactive(), 4, 1))
#'
#' # get example case counts
#' reported_cases <- example_confirmed[1:60]
#'
#' # set an example generation time. In practice this should use an estimate
#' # from the literature or be estimated from data
#' generation_time <- Gamma(
#' shape = Normal(1.3, 0.3),
#' rate = Normal(0.37, 0.09),
#' max = 14
#' )
#' # set an example incubation period. In practice this should use an estimate
#' # from the literature or be estimated from data
#' incubation_period <- LogNormal(
#' meanlog = Normal(1.6, 0.06),
#' sdlog = Normal(0.4, 0.07),
#' max = 14
#' )
#' # set an example reporting delay. In practice this should use an estimate
#' # from the literature or be estimated from data
#' reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10)
#'
#' # for more examples, see the "estimate_infections examples" vignette
#' def <- estimate_infections(reported_cases,
#' generation_time = gt_opts(generation_time),
#' delays = delay_opts(incubation_period + reporting_delay),
#' rt = rt_opts(prior = LogNormal(mean = 2, sd = 0.1))
#' )
#' # real time estimates
#' summary(def)
#' # summary plot
#' plot(def)
#' options(old_opts)
#' }
estimate_infections <- function(data,
generation_time = gt_opts(),
delays = delay_opts(),
truncation = trunc_opts(),
rt = rt_opts(),
backcalc = backcalc_opts(),
gp = gp_opts(),
obs = obs_opts(),
forecast = forecast_opts(),
stan = stan_opts(),
CrIs = c(0.2, 0.5, 0.9),
weigh_delay_priors = TRUE,
id = "estimate_infections",
verbose = interactive(),
filter_leading_zeros = TRUE,
zero_threshold = Inf,
horizon) {
if (!missing(filter_leading_zeros)) {
lifecycle::deprecate_stop(
"1.7.0",
"estimate_infections(filter_leading_zeros)",
"filter_leading_zeros()"
)
}
if (!missing(zero_threshold)) {
lifecycle::deprecate_stop(
"1.7.0",
"estimate_infections(zero_threshold)",
"apply_zero_threshold()"
)
}
if (!missing(weigh_delay_priors)) {
lifecycle::deprecate_stop(
"1.8.0",
"estimate_infections(weigh_delay_priors)",
detail = "Weighting of priors is now done when defining them in
`generation_time_opts()`, `delay_opts()` or `trunc_opts()` using the
`weight_prior` argument."
)
}
if (!missing(horizon)) {
lifecycle::deprecate_stop(
"1.7.0",
"estimate_infections(horizon)",
"estimate_infections(forecast)",
details = "The `horizon` argument passed to `estimate_infections()` will
override any `horizon` argument passed via `forecast_opts()`."
)
}
# Validate inputs
check_reports_valid(data, model = "estimate_infections")
assert_class(generation_time, "generation_time_opts")
assert_class(delays, "delay_opts")
assert_class(truncation, "trunc_opts")
assert_class(rt, "rt_opts", null.ok = TRUE)
assert_class(backcalc, "backcalc_opts")
assert_class(gp, "gp_opts", null.ok = TRUE)
assert_class(obs, "obs_opts")
if (is.null(forecast)) {
forecast <- forecast_opts(horizon = 0)
}
assert_class(forecast, "forecast_opts")
assert_class(stan, "stan_opts")
assert_numeric(CrIs, lower = 0, upper = 1)
assert_logical(weigh_delay_priors)
assert_string(id)
assert_logical(verbose)
set_dt_single_thread()
if (!is.null(rt) && !rt$use_rt) {
rt <- NULL
}
# Check verbose settings and set logger to match
if (verbose) {
futile.logger::flog.threshold(futile.logger::DEBUG,
name = "EpiNow2.epinow.estimate_infections"
)
}
# Fill missing dates (deprecated)
reported_cases <- default_fill_missing_obs(data, obs, "confirm")
## add forecast horizon if forecasting is required
if (forecast$horizon > 0) {
horizon_args <- list(
data = reported_cases,
horizon = forecast$horizon
)
if (!is.null(forecast$accumulate)) {
horizon_args$accumulate <- forecast$accumulate
}
reported_cases <- do.call(add_horizon, horizon_args)
}
# Add breakpoints column
reported_cases <- add_breakpoints(reported_cases)
# Determine seeding time
seeding_time <- get_seeding_time(delays, generation_time, rt)
# Add initial zeroes
reported_cases <- pad_reported_cases(reported_cases, seeding_time)
# Define stan model parameters
stan_data <- create_stan_data(
reported_cases,
seeding_time = seeding_time,
rt = rt,
gp = gp,
obs = obs,
backcalc = backcalc,
forecast = forecast
)
stan_data <- c(stan_data, create_stan_delays(
gt = generation_time,
delay = delays,
trunc = truncation,
time_points = stan_data$t - stan_data$seeding_time - stan_data$horizon
))
# Set up default settings
stan_args <- create_stan_args(
stan = stan,
data = stan_data,
init = create_initial_conditions(stan_data),
verbose = verbose
)
# Fit model
fit <- fit_model(stan_args, id = id)
# Extract parameters of interest from the fit
out <- extract_parameter_samples(fit, stan_data,
reported_inf_dates = reported_cases$date,
reported_dates = reported_cases$date[-(1:stan_data$seeding_time)],
imputed_dates =
reported_cases$date[-(1:stan_data$seeding_time)][stan_data$imputed_times]
)
# Format output
format_out <- format_fit(
posterior_samples = out,
horizon = stan_data$horizon,
shift = stan_data$seeding_time,
CrIs = CrIs
)
## Join stan fit if required
if (stan$return_fit) {
format_out$fit <- fit
format_out$args <- stan_data
}
format_out$observations <- reported_cases
class(format_out) <- c("estimate_infections", class(format_out))
return(format_out)
}
#' Format Posterior Samples
#'
#' @description `r lifecycle::badge("stable")`
#' Summaries posterior samples and adds additional custom variables.
#'
#' @param posterior_samples A list of posterior samples as returned by
#' [extract_parameter_samples()].
#'
#' @param horizon Numeric, forecast horizon.
#'
#' @param shift Numeric, the shift to apply to estimates.
#'
#' @param burn_in Deprecated; this functionality is no longer available.
#'
#' @param start_date Deprecated; this functionality is no longer available.
#'
#' @inheritParams calc_summary_measures
#' @importFrom data.table fcase rbindlist
#' @importFrom lubridate days
#' @importFrom futile.logger flog.info
#' @return A list of samples and summarised posterior parameter estimates.
#' @keywords internal
format_fit <- function(posterior_samples, horizon, shift, burn_in, start_date,
CrIs) {
if (!missing(burn_in)) {
lifecycle::deprecate_stop(
"1.8.0",
"format_fit(burn_in)",
detail = "This functionality is no longer available."
)
}
if (!missing(start_date)) {
lifecycle::deprecate_stop(
"1.8.0",
"format_fit(start_date)",
detail = "This functionality is no longer available."
)
}
format_out <- list()
# bind all samples together
format_out$samples <- data.table::rbindlist(
posterior_samples,
fill = TRUE, idcol = "variable"
)
if (is.null(format_out$samples$strat)) {
format_out$samples <- format_out$samples[, strat := NA]
}
# add type based on horizon
format_out$samples <- format_out$samples[
,
type := data.table::fcase(
date > (max(date, na.rm = TRUE) - horizon),
"forecast",
date > (max(date, na.rm = TRUE) - horizon - shift),
"estimate based on partial data",
is.na(date), NA_character_,
default = "estimate"
)
]
# summarise samples
format_out$summarised <- calc_summary_measures(format_out$samples,
summarise_by = c("date", "variable", "strat", "type"),
order_by = c("variable", "date"),
CrIs = CrIs
)
format_out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.