#' Probability case remains undetected over time
#'
#' This is effectively a survival function, which shows the
#' probability that an actual case originating on day t_onset
#' remains unreported at t_1.
#'
#' @param t_1 observation time subsequent to time when case originates: t1>=t0
#' @param t_onset time when case originates
#' @param reporting_parameters named list of 'mean' and 'sd' of gamma distribution
#' characterising the reporting delay distribution
#'
#' @return a probability
#' @examples
#' incidenceinflation:::undetected_prob(2, 1, list(mean=10, sd=5))
undetected_prob <- function(t_1, t_onset, reporting_parameters){
if(t_1 < t_onset)
stop("t_1 must equal or exceed t_onset")
delay_mean <- reporting_parameters$mean
delay_sd <- reporting_parameters$sd
1 - pgamma_mean_sd(t_1 - t_onset, delay_mean, delay_sd)
}
#' Probability a case is detected by a later time given it was unobserved
#' earlier
#'
#' This is the probability that a case originating on day day_onset is detected
#' between days day_1 and day_2 given that it was unobserved on day day_1. This
#' probability is calculated using the survival functions (which indicate the
#' probability a case remains undetected by a certain time):
#' \deqn{prob(detect) = \frac{S(day_1|day_onset) - S(day_2|day_onset)}{S(day_1|day_onset)}}
#'
#' @param day_2 day of last observation: day_2 > day_1 >= day_onset
#' @param day_1 day of first observation: day_1 >= day_onset
#' @param day_onset day when case originates
#' @inheritParams undetected_prob
#'
#' @return a probability
detected_after_unobserved_prob <- function(day_2, day_1, day_onset,
reporting_parameters){
if(day_2 < day_1)
stop("second observation date must be at same time or after first")
(undetected_prob(day_1 + 0.5, day_onset, reporting_parameters) -
undetected_prob(day_2 + 0.5, day_onset, reporting_parameters)) /
undetected_prob(day_1 + 0.5, day_onset, reporting_parameters)
}
#' Cases arising on a given day which are reported between two subsequent days
#'
#' The number of cases originating on day_onset which are subsequently reported
#' between day_1 and day_2 is modelled as a binomial distribution:
#' \deqn{cases_reported\sim binomial(cases_true-cases_observed, p_detect)}
#' where cases_observed is the number of cases originating on day_onset reported on
#' day_1; p_detect is the probability a case was undetected on day_1 but
#' is detected by day_2.
#'
#' @inheritParams detected_after_unobserved_prob
#' @param cases_true true case count originating on day_onset
#' @param cases_observed observed case count for cases originating on day_onset on
#' day_1
#'
#' @return a count representing number of cases reported between day_1 and day_2
observed_cases_single <- function(cases_observed, cases_true, day_2, day_1, day_onset,
reporting_parameters){
if(cases_true < cases_observed)
stop("true case count must exceed reported")
I_remaining <- cases_true - cases_observed
if(I_remaining == 0)
cases <- 0
else
cases <- stats::rbinom(1, I_remaining,
detected_after_unobserved_prob(day_2, day_1, day_onset,
reporting_parameters))
cases
}
#' Generate trajectory of reported cases for a given count originating on a
#' particular day
#'
#' @inheritParams observed_cases_single
#' @param days_reporting a vector of days on which cases were counted
#'
#' @return a vector of reported case counts of same length as days_reporting
observed_cases_trajectory <- function(cases_true, days_reporting, day_onset,
reporting_parameters){
I_observed <- vector(length = length(days_reporting))
for(i in 1:length(days_reporting)){
if(i == 1) {
I_previous_obs <- 0
day_previous_report <- day_onset
} else {
I_previous_obs <- I_observed[i - 1]
day_previous_report <- days_reporting[i - 1]
}
new_cases <- observed_cases_single(I_previous_obs, cases_true,
days_reporting[i],
day_previous_report,
day_onset,
reporting_parameters)
I_observed[i] <- I_previous_obs + new_cases
}
I_observed
}
#' Generates true cases for a single day
#'
#' Cases are assumed to be generated by a negative-binomial renewal
#' process:
#' \deqn{cases_t \sim neg-binomial(Rt \sum_s=1^\infty w(s) cases_t-s, kappa)}
#' where kappa is the over-dispersion parameter. Here, we truncate this
#' sum at a user specified lag (determined by the length of the weight vector).
#'
#' @param Rt effective reproduction number
#' @param kappa over-dispersion parameter of negative binomial
#' @param cases_history a vector containing history of cases arranged from
#' recent to past
#' @param weights a vector containing weights representing the serial interval
#' distribution
#'
#' @return a count of cases
true_cases_single <- function(Rt, kappa, cases_history, weights){
if(length(cases_history) != length(weights))
stop("case history vector must be same length as weights")
mean_cases <- expected_cases(Rt=Rt, weights=weights, cases_history=cases_history)
stats::rnbinom(1, mu=mean_cases, size=kappa)
}
#' Generate true case series
#'
#' Cases are assumed to be generated by a negative-binomial renewal
#' process:
#' \deqn{cases_t \sim neg-binomial(Rt \sum_s=1^\infty w(s) cases_t-s, kappa)}
#' where kappa is the over-dispersion parameter. Here, we truncate this
#' sum at a user specified lag (determined by the length of the weight vector).
#'
#' @param days_total number of days to simulate process for (post-initial
#' period)
#' @param Rt_function takes day as an input and outputs an Rt value
#' @inheritParams true_cases_single
#' @inheritParams gamma_discrete_pmf
#' @param initial_parameters a named list of 'mean' and 'length'
#' which is used to generate seed cases by sampling from a negative binomial
#' distribution
#' @param serial_max maximum point at which to truncate sum in renewal process
#'
#' @return vector of true cases of length days_total
#' @export
#'
#' @examples
#' library(incidenceinflation)
#' # generate case series for 100 days with time-varying Rt
#' days_total <- 100
#'
#' # make Rt interpolation function
#' v_Rt <- c(rep(1.3, 25), rep(1, 25), rep(2, 50))
#' Rt_function <- stats::approxfun(1:days_total, v_Rt)
#'
#' # serial interval parameters
#' s_params <- list(mean=5, sd=1)
#'
#' # renewal parameter
#' kappa <- 2
#' cases <- true_cases(days_total, Rt_function, kappa, s_params)
true_cases <- function(days_total, Rt_function, kappa, serial_parameters,
initial_parameters=list(mean=3, length=5),
serial_max=40){
w <- weights_series(serial_max, serial_parameters)
initial_length <- initial_parameters$length
I_true <- vector(length=(days_total + initial_length))
# seed epidemic with initial values
I_true[1:initial_length] <- stats::rnbinom(n=initial_length,
mu=initial_parameters$mean,
size=kappa)
# generate further incidences using state equation
for(d in 1:days_total){
d1 <- d + initial_length
if(d1 > serial_max) {
I_history <- I_true[(d1 - serial_max):(d1 - 1)]
} else {
I_history <- I_true[1:d1]
d_remaining <- serial_max - d1
I_history <- c(rep(0, d_remaining), I_history)
}
I_history_rev <- rev(I_history)
Rt <- Rt_function(d)
I_true[d1] <- true_cases_single(Rt, kappa, I_history_rev, w)
}
I_true[-(1:initial_length)]
}
#' Creates a reporting parameter tibble with a single set of reporting
#' parameters
#'
#' @param time_onsets a vector of symptom onset times
#' @inheritParams undetected_prob
#'
#' @return a tibble with three columns: 'time_onset', 'mean', 'sd'
create_reporting_from_single_parameters_df <- function(time_onsets,
reporting_parameters) {
dplyr::tibble(time_onset=time_onsets,
mean=reporting_parameters$mean,
sd=reporting_parameters$sd
)
}
#' Generate reported case trajectories for each day when cases appear
#'
#' @param cases_true a vector of true cases originating each day
#' @param reporting_parameters either a named list of 'mean' and 'sd' of gamma
#' distribution indicating a single reporting delay distribution for the whole period
#' or a tibble with columns: 'time_onset', 'mean', 'sd' indicating the reporting
#' delay distribution for each time period
#' @param days_max_follow_up max days at which to simulate reporting case
#' trajectory
#'
#' @return a tibble with observed case trajectories for each time of onset
#' @export
#' @importFrom magrittr "%>%"
#' @examples
#' library(incidenceinflation)
#' observed_cases(stats::rpois(5, 5), list(mean=5, sd=1))
observed_cases <- function(cases_true, reporting_parameters,
days_max_follow_up=30){
if(methods::is(reporting_parameters, "list"))
reporting_parameters <- create_reporting_from_single_parameters_df(
seq_along(cases_true),
reporting_parameters)
if(nrow(reporting_parameters) != length(cases_true))
stop("Number of rows in reporting parameters tibble must match number of cases.")
d_max <- length(cases_true)
for(t in 1:d_max){
a_max <- min(c(d_max, t + days_max_follow_up))
obs_time <- seq(t, a_max, 1)
reporting_parameters_current <- list(mean=reporting_parameters$mean[t],
sd=reporting_parameters$sd[t])
cases_obs_trajec <- observed_cases_trajectory(
cases_true=cases_true[t],
days_reporting=obs_time,
day_onset=t,
reporting_parameters_current)
# stack into data frame containing identifying info
I_obs_single_onset <- dplyr::tibble(
time_onset=rep(t, length(cases_obs_trajec)),
time_reported=obs_time,
cases_reported=cases_obs_trajec,
cases_true=cases_true[t])
if(t == 1)
cases_obs <- I_obs_single_onset
else
cases_obs <- cases_obs %>%
dplyr::bind_rows(I_obs_single_onset)
}
cases_obs
}
#' Thins cases for a single onset time
#'
#' Only those data points around where the case count changes contain
#' information. This method searches and keeps only those data points.
#'
#' @param case_obs_single a tibble produced by observed_cases but subsetted to
#' a single onset time
#'
#' @return a thinned tibble
thin_single_series <- function(case_obs_single) {
if(dplyr::n_distinct(case_obs_single$time_onset) > 1)
stop("Only single onset time allowed.")
changepoints <- which(diff(case_obs_single$cases_reported) > 0)
postchangepoints <- changepoints + 1
num_points <- nrow(case_obs_single)
interesting_points <- sort(unique(c(1, postchangepoints, changepoints,
num_points)))
case_obs_single[interesting_points, ]
}
#' Thins cases
#'
#' Only those data points around where the case count changes contain
#' information. This method searches and keeps only those data points.
#'
#' @param case_obs a tibble produced by observed_cases
#'
#' @return a thinned tibble
#' @importFrom magrittr "%>%"
#' @export
thin_series <- function(case_obs) {
onset_times <- unique(case_obs$time_onset)
for(i in seq_along(onset_times)) {
single_df <- case_obs %>%
dplyr::filter(case_obs$time_onset == onset_times[i])
thinned_single_df <- thin_single_series(single_df)
if(i == 1)
thinned_df <- thinned_single_df
else
thinned_df <- thinned_df %>%
dplyr::bind_rows(thinned_single_df)
}
thinned_df
}
#' Generates case data with snapshots of reported cases at each onset day
#'
#' @inheritParams true_cases
#' @inheritParams observed_cases
#' @param thinned a Boolean indicating whether to thin cases to only informative time points
#'
#' @return a tibble with observed case trajectories for each time of onset
#' @export
#' @examples
#' library(incidenceinflation)
#' # generate case series for 100 days with time-varying Rt
#' days_total <- 100
#'
#' # make Rt interpolation function
#' v_Rt <- c(rep(1.3, 25), rep(1, 25), rep(2, 50))
#' Rt_function <- stats::approxfun(1:days_total, v_Rt)
#'
#' # serial interval parameters
#' s_params <- list(mean=5, sd=1)
#'
#' # renewal over-dispersion parameter
#' kappa <- 2
#'
#' # reporting delays
#' r_params <- list(mean=10, sd=5)
#' reported_cases <- generate_snapshots(
#' days_total, Rt_function,
#' s_params, r_params)
generate_snapshots <- function(days_total, Rt_function, serial_parameters,
reporting_parameters,
thinned=FALSE,
kappa=1000,
days_max_follow_up=30,
initial_parameters=list(mean=30, length=20),
serial_max=40) {
check_parameter_names(reporting_parameters,
serial_parameters)
real_cases <- true_cases(
days_total=days_total, Rt_function=Rt_function,
kappa=kappa, serial_parameters=serial_parameters,
initial_parameters=initial_parameters,
serial_max=serial_max
)
reported_cases <- observed_cases(
cases_true=real_cases,
reporting_parameters=reporting_parameters,
days_max_follow_up=days_max_follow_up)
if(thinned)
reported_cases <- thin_series(reported_cases)
reported_cases
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.