Nothing
#' @title Get dates for which to use (trust) inferred daily reports
#'
#' @inheritParams smooth_cl
#' @inheritParams estimate_R_cl
#' @param dates.only Logical. Return use dates only or all columns of `cl.daily`.
#' @keywords internal
#' @return Data frame or vector, depending on `dates.only`
#'
get_use_dates <- function(
cl.daily,
cl.data,
prm.daily.check,
dates.only = TRUE
){
cl.daily <- cl.daily |>
summarise_by_date_iters()
# make unified df with observed data
cl.data <- (cl.data
# make date of report column to track aggregation periods
|> dplyr::mutate(date.report = date)
# attach start date based on time index column
|> attach_startdate_agg()
|> dplyr::select(-t)
# complete date sequence
|> tidyr::complete(
date = seq(min(date), max(date), by = "day")
)
# fill report date up to make grouping variable to aggregate
# inferred daily reports
|> tidyr::fill(date.report, .direction = "up")
# rename value as obs to make col clearer in unified data below
|> dplyr::rename(obs = value)
)
# unified data with aggregates and relative differences
res = (cl.daily
|> dplyr::full_join(cl.data, by = "date")
|> summarise_report_counts(prm.daily.check = prm.daily.check)
)
if(dates.only) {
res = res |>
dplyr::filter(use) |>
dplyr::pull(date)
}
return(res)
}
# helpers -----------------------------------------------------------------
#' @title Summarise observations by date for raw iterations from an ensemble
#'
#' @param df Data frame. Must have `date` and `value` columns.
#' @keywords internal
#'
summarise_by_date_iters <- function(df){
res = df |>
dplyr::group_by(date) |>
dplyr::summarise(
mean = mean(value),
lwr = stats::quantile(value, probs = 0.025),
upr = stats::quantile(value, probs = 0.975),
.groups = "drop" )
return(res)
}
#' @title Summarise daily inferred reports
#' based on original reporting schedule and calculate error
#'
#' @param df Data frame. As output by [`get_use_dates()`].
#' @inheritParams estimate_R_cl
#' @keywords internal
#' @return Data frame
#'
summarise_report_counts <- function(df, prm.daily.check){
agg.reldiff.tol <- prm.daily.check$agg.reldiff.tol
df <- (df
# aggregated fitted reports
|> dplyr::group_by(date.report)
|> dplyr::mutate(
dplyr::across(c(mean, lwr, upr), sum,
.names = "{.col}.agg"))
|> dplyr::ungroup()
|> dplyr::mutate(
# relative diffs with observed
dplyr::across(dplyr::ends_with("agg"), ~ (. - obs)/obs*100,
.names = "{.col}.reldiff")
)
)
# figure out which daily inferred reports should actually be used
# we want to filter out start of inferred daily reports until
# estimates have converged to below a specified tolerance
use.dates <- (df
|> dplyr::select(date.report, mean.agg.reldiff)
|> tidyr::drop_na()
|> dplyr::mutate(
# set "use" flag for fitted aggregated reports within a
# 10% tolerance
use = abs(mean.agg.reldiff) < agg.reldiff.tol
)
# figure out first date where fitted aggregated reports fall
# within above threshold for relative error
# drop values before that point in time
|> dplyr::mutate(use.cumm = cumsum(use))
|> dplyr::filter(use.cumm > 0)
|> dplyr::pull(date.report)
)
# TODO: show use.dates here?
# attach use flag to output data
(df |> dplyr::mutate(use = date.report %in% use.dates))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.