Nothing
#' @title Estimate a severity measure that varies over time
#'
#' @description Calculates how the severity of a disease changes over time
#' while optionally correcting for reporting delays using an epidemiological
#' delay distribution of the time between symptom onset and outcome
#' (e.g. onset-to-death for the fatality risk).
#'
#' @inheritParams cfr_static
#' @param burn_in A single integer-like value for the number of time-points
#' (typically days) to disregard at the start of the time-series, if a burn-in
#' period is desired.
#'
#' Defaults to 7, which is a sensible default value that disregards the first
#' week of cases and deaths, assuming daily data.
#'
#' To consider all case data including the start of the time-series, set this
#' argument to 0.
#'
#' @param smoothing_window An _odd_ number determining the smoothing window size
#' to use when smoothing the case and death time-series, using a rolling median
#' procedure (as the `k` argument to [stats::runmed()]) before calculating the
#' time-varying severity.
#'
#' The default behaviour is to apply no smoothing. The minimum value of this
#' argument is 1.
#'
#' @return A `<data.frame>` with the date, maximum likelihood estimate and 95%
#' confidence interval of the daily severity estimates, named
#' "severity_estimate", "severity_low", and "severity_high", with one row for
#' each day in the original data.frame.
#'
#' @details
#' # Details: Adjusting for delays between two time series
#'
#' This function estimates the number of cases which have a known outcome over
#' time, following Nishiura et al. (2009).
#' The function calculates a quantity \eqn{k_t} for each day within the input
#' data, which represents the number of cases estimated to have a known outcome,
#' on day \eqn{t}. \eqn{k_t} is calculated in the following way:
#' \deqn{k_t = \sum_{j = 0}^t c_t f_{j - t}}
#'
#' We then assume that the severity measure, for example CFR, of interest is
#' binomially distributed, in the following way:
#'
#' \deqn{d_t \sim {\sf Binomial}(k_t, \theta_t)}
#'
#' We use maximum likelihood estimation to determine the value of \eqn{\theta_t}
#' for each \eqn{t}, where \eqn{\theta} represents the severity measure of
#' interest.
#'
#' The epidemiological delay distribution passed to `delay_density` is used to
#' obtain a probability mass function parameterised by time; i.e. \eqn{f(t)}
#' which gives the probability of the binary outcome of a case (survival or
#' death) being known by time \eqn{t}. The delay distribution is parameterised
#' with disease-specific parameters before it is supplied here.
#'
#' **Note** that the function arguments `burn_in` and `smoothing_window` are not
#' explicitly used in this calculation. `burn_in` controls how many estimates at
#' the beginning of the outbreak are replaced with `NA`s --- the calculation
#' above is not applied to the first `burn_in` data points.
#' The calculation is applied to the smoothed data, if a `smoothing_window`
#' is specified.
#'
#' @references
#' Nishiura, H., Klinkenberg, D., Roberts, M., & Heesterbeek, J. A. P. (2009).
#' Early Epidemiological Assessment of the Virulence of Emerging Infectious
#' Diseases: A Case Study of an Influenza Pandemic. PLOS ONE, 4(8), e6852.
#' \doi{10.1371/journal.pone.0006852}
#'
#' @export
#'
#' @examples
#' # get data pre-loaded with the package
#' data("covid_data")
#' df_covid_uk <- covid_data[covid_data$country == "United Kingdom" &
#' covid_data$date <= as.Date("2020-09-01"), ]
#'
#' # estimate time varying severity without correcting for delays
#' cfr_time_varying <- cfr_time_varying(
#' data = df_covid_uk,
#' burn_in = 7L
#' )
#' # View
#' tail(cfr_time_varying)
#'
#' # estimate time varying severity while correcting for delays
#' # obtain onset-to-death delay distribution parameters from Linton et al. 2020
#' # J. Clinical Medicine: <https://doi.org/10.3390/jcm9020538>
#' # view only the first values
#' cfr_time_varying <- cfr_time_varying(
#' data = df_covid_uk,
#' delay_density = function(x) dlnorm(x, meanlog = 2.577, sdlog = 0.440),
#' burn_in = 7L
#' )
#' tail(cfr_time_varying)
#'
cfr_time_varying <- function(data,
delay_density = NULL,
burn_in = 7,
smoothing_window = NULL) {
# input checking
# zero count allowed to include all data
checkmate::assert_count(burn_in)
# expect rows more than burn in value
checkmate::assert_data_frame(data, min.cols = 3, min.rows = burn_in + 1)
# check that input `<data.frame>` has columns date, cases, and deaths
checkmate::assert_names(
colnames(data),
must.include = c("date", "cases", "deaths")
)
# check for any NAs among data
checkmate::assert_data_frame(
data[, c("date", "cases", "deaths")],
any.missing = FALSE
)
# check that data$date is a date column
checkmate::assert_date(data$date, any.missing = FALSE, all.missing = FALSE)
checkmate::assert_count(smoothing_window, null.ok = TRUE)
stopifnot(
"Input data must have sequential dates with none missing or duplicated" =
identical(unique(as.numeric(diff(data$date))), 1), # use numeric 1
"`smoothing_window` must be an odd number greater than 0" =
(smoothing_window %% 2 != 0),
"`delay_density` must be a function with a single required argument,
and evaluating distribution density at a vector of values and returning a
numeric vector of the same length.
E.g. function(x) stats::dgamma(x = x, shape = 5, scale = 1)" =
(test_fn_req_args(delay_density) &&
test_fn_num_out(delay_density)) || is.null(delay_density)
)
# prepare a new dataframe with smoothed columns if requested
# all temporary operations are performed on df_temp,
# data is returned with only three new columns added, and no other changes
df_temp <- data
# smooth cases if requested
if (is.null(smoothing_window)) {
# set to 0 for internal use only --- see below
smoothing_window <- 0
} else {
# smooth data if requested
df_temp$cases <- stats::runmed(
data$cases,
k = smoothing_window,
endrule = "keep"
)
df_temp$deaths <- stats::runmed(
data$deaths,
k = smoothing_window,
endrule = "keep"
)
}
##### prepare matrix for severity estimation ####
# when not correcting for delays, set estimated no. of known outcomes to cases
# this is to avoid if-else ladders
df_temp$estimated_outcomes <- df_temp$cases
# assign columns for severity estimate and intervals
severity_estimates <- matrix(
data = NA_real_, nrow = nrow(data), ncol = 3,
dimnames = list(NULL, sprintf("severity_%s", c("estimate", "low", "high")))
)
# calculation of indices to modify
# start from the final index to be smoothed
# end at the first row after the burn-in number of rows (days)
indices <- seq(nrow(data) - smoothing_window, burn_in + 1, -1)
if (!is.null(delay_density)) {
pmf_vals <- delay_density(seq(from = 0, to = nrow(data) - 1L))
df_temp[["estimated_outcomes"]] <- round(.convolve_cases_pmfs(
cases = df_temp$cases, pmf_vals = pmf_vals
))
}
#### Get severity estimates ####
# handle case where deaths are fewer than non-zero estimated (known) outcomes
# and select indices which are not smoothed or excluded by burn in
# this reduces the number of indices over which to run the binomial test
# estimated_outcomes are not allowed to be NA
indices <- intersect(
indices,
which(df_temp$deaths <= df_temp$estimated_outcomes &
df_temp$estimated_outcomes > 0)
)
# binomial test at indices
estimates_tmp <- vapply(indices, FUN = function(i) {
severity_estimate <- stats::binom.test(
df_temp$deaths[i],
df_temp$estimated_outcomes[i]
)
# return a vector
c(
severity_estimate$estimate[[1]],
severity_estimate$conf.int[[1]],
severity_estimate$conf.int[[2]]
)
}, numeric(3))
# replace the values at indices
severity_estimates[indices, ] <- t(estimates_tmp)
# create data frame
severity_estimates <- as.data.frame(severity_estimates)
# add date and return
severity_estimates$date <- data$date
# return severity estimate with names in correct order
severity_estimates[, c(
"date", "severity_estimate", "severity_low", "severity_high"
)]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.