R/simulate_outbreak.R

Defines functions simulate_outbreak

Documented in simulate_outbreak

#' Simulate outbreaks with transmission trees
#'
#' This function implements an individual-based outbreak simulator which
#' generates transmission trees. A Poisson branching process is used to generate
#' new cases in time, using the distribution of the reproduction number (R) and
#' of the duration of the infectious period to determine rates of infection. A
#' density-dependence term is used so that individual infectiousness decreases
#' with the proportion of susceptible individuals in the population (see
#' details). For each simulated case, the simulator generates an individual
#' reproduction number, date of infection, symptom onset, and reporting using
#' user-specified distributions. Distributions can be provided either as
#' `distcrete` objects, as functions computing the probability mass functions
#' (PMF), or as vectors of numbers taken as the PMF for values on 0, 1, ...,
#' `length(input) - 1`
#'
#' @param duration the number of days to run the simulation for
#'
#' @param population_size the number of susceptible hosts to use in the
#'   simulation; defaults to 100
#'
#' @param R_values a vector of values to be used as reproduction number; values
#'   will be drawn at random from this vector to determine the expected numbero
#'   of secondary cases for each new case
#'
#' @param dist_incubation the distribution of the incubation period, i.e. the
#'   time interval between infection and onset of symtpoms
#'
#' @param dist_infectious_period the distribution of the infectious period,
#'   i.e. the time interval between the moment a case starts showing symptoms
#'   (onset) and the moment they infect new secondary cases
#'
#' @param dist_reporting (optional) the distribution of the reporting delay,
#'   i.e. the time interval between symptom onset and the date at which the case
#'   is notified; if `NULL` (default) reporting dates will not be simulated
#'
#' @details
#' Individual infectiousness is determined as \eqn{R x w(t - t_{onset})} where
#' \eqn{R} is the individual reproduction number, \eqn{w} is the PMF of the
#' duration of the infectious period, \eqn{t} is current time, and
#' \eqn{t_{onset}} is the date of symptom onset for the considered individual.
#' The number of new cases at time *t* is then taken from a Poisson distribution
#' with a rate of infection \eqn{\lambda_t n_s / n} where \eqn{lambda_t} is the
#' sum of all individual infectiousness at time *t*, \eqn{n_s} is the number of
#' susceptible individuals in the population, and *n* is the total population
#' size.
#' 
#' @export
#' 
#' @author Thibaut Jombart \email{thibautjombart@@gmail.com}
#'
#' @examples
#' 
#' ## make toy distributions (showing the 3 possible input types)
#' incubation <- c(0, 1, 1, 1, 1) # numbers = unscaled PMF
#' infectious_period <- make_disc_gamma(10, 7) # distcrete object
#' reporting <- function(x) dpois(x, 5) # PMF function
#' set.seed(1)
#' x <- simulate_outbreak(R = runif(100, 1, 3), # random values on [1;3]
#'                        dist_incubation= incubation,
#'                        dist_infectious_period = infectious_period,
#'                        dist_reporting = reporting)
#' dim(x)
#' head(x)
#' tail(x)
#' if (require(epicontacts)) {
#'   plot(x)
#' }

simulate_outbreak <- function(duration = 100, # duration of the simulation
                              population_size = 100,
                              R_values, # average secondary cases
                              dist_incubation, # infection -> onset
                              dist_infectious_period, # onset -> new infections
                              dist_reporting = NULL  # onset -> reporting
                              ) {

  ## General strategy to handle the distribution inputs

  ## We want to be a bit flexible as to how inputs are provided for the
  ## distributions. Inputs should always correspond to a probability mass
  ## function (pmf), from which we derive random number generators if
  ## needed. The function `make_pmf` will accept `distcrete` objects, functions
  ## computing the pmf, or a vector of numbers taken as the relative
  ## probabilities on 0:(n-1) (rescaled to sum to 1 if needed). To create random
  ## number generators, we evaluate the pmf on 0:1000, and pass this on to
  ## make_number_generator.
  
  ## determine if we need to simulate reporting dates
  simulate_reporting <- !is.null(dist_reporting)
  
  ## make random distribution from inputs, from pmf to random numbers
  x_values <- 0:1000
  pmf_incubation <- make_pmf(dist_incubation)
  pmf_infectious_period <- make_pmf(dist_infectious_period)
  r_incubation <- make_number_generator(pmf_incubation(x_values))
  r_infectious_period <- make_number_generator(pmf_infectious_period(x_values))
  
  assert_R(R_values)
  r_R <- function(n = 1) sample_(R_values, n, replace = TRUE)


  ## make index case
  index_case <- make_index_case(id = draw_labels(1),
                                date_infection = 0,
                                date_onset = r_incubation(1),
                                R = r_R(1))

  if (simulate_reporting) {
    pmf_reporting <- make_pmf(dist_reporting)
    r_reporting <- make_number_generator(pmf_reporting(x_values))
    index_case$date_report <- index_case$date_onset + r_reporting(1)
  }

  n_susceptibles <- population_size - 1
  

  ## make vectors for the outputs
  out <- index_case

  for (t in seq_len(duration)) {
    
    ## Individual infectiousness at time 't' is defined as the infectious period
    ## pmf multiplied by the individual R; the global force of infection is a
    ## rate defined as the sum of individual infectiousnesses. This rate is
    ## weighted by the proportion of susceptible individuals in the population
    ## to introduce density-dependence, the result being used to draw the number
    ## of secondary cases from a Poisson distribution
    
    if (n_susceptibles > 0) {
      individual_infectiousness <- pmf_infectious_period(t - out$date_onset) * out$R
      rate_infection <- sum(individual_infectiousness) * (n_susceptibles / population_size)
      n_new_cases <- stats::rpois(1, lambda = rate_infection)
      n_new_cases <- min(n_susceptibles, n_new_cases)
      ## proba_infection <- 1 - exp(-rate_infection)
      ## n_new_cases <- rbinom(1, size = n_susceptibles, prob = proba_infection)
    } else {
      n_new_cases <- 0
    }

    ## handle new cases
    if(n_new_cases > 0){

      ## remove susceptibles
      n_susceptibles <- n_susceptibles - n_new_cases

      
      ## build new cases: id, different dates, etc.

      ## infectors are picked from infectious cases, weighted by their
      ## respective infectiousness
      new_infectors <- sample(out$id,
                              size = n_new_cases,
                              replace = TRUE,
                              prob = individual_infectiousness)
      out$source <- c(out$source,
                      new_infectors)

      ## id
      out$id <- c(out$id,
                  draw_labels(n_new_cases))
      
      ## new dates of new infections are the current day
      new_date_infection <- rep(t, n_new_cases)
      out$date_infection <- c(out$date_infection,
                              new_date_infection)

      ## new dates of onset picked using the incubation period
      new_date_onset <- t + r_incubation(n_new_cases)
      out$date_onset <- c(out$date_onset,
                          new_date_onset)

      if (simulate_reporting) {
        ## new dates of report picked using the delay to reporting
        new_date_report <- new_date_onset + r_reporting(n_new_cases)
        out$date_report <- c(out$date_report,
                             new_date_report)
      }
      
      ## new R values are drawn at random
      new_R <- r_R(n_new_cases)
      out$R <- c(out$R,
                 new_R)
      
    }
  }


  ## output will be a data.frame with a special type for e.g. printing and
  ## conversion
  if (!simulate_reporting) {
    out$date_report <- NULL
  }
  out <- data.frame(out, stringsAsFactors = FALSE)
  class(out) <- c("outbreak", class(out))
  attr(out, "has_contacts") <- FALSE
  out
}
reconhub/simulacr documentation built on Dec. 9, 2020, 7:57 p.m.