R/coarse2estim.R

Defines functions coarse2estim

Documented in coarse2estim

#' Link coarseDataTools and EpiEstim
#'
#' \code{coarse2estim} Transforms outputs of
#' \code{coarseDataTools::dic.fit.mcmc} to right format for input into
#' \code{estimate_R}
#'
#' @param x An object generated by function
#' \code{coarseDataTools::dic.fit.mcmc}, containing posterior estimates of the
#' serial interval distribution.
#' @param dist The parametric distribution used when estimating the serial
#' interval.
#' #' Should be one of "G" (Gamma), "W" (Weibull), "L" (Lognormal), "off1G"
#' (Gamma shifted by 1), "off1W" (Weibull shifted by 1), or "off1L" (Lognormal
#' shifted by 1).  If not present, computed automatically from \code{x}.
#' @param samples A dataframe containing the posterior samples of serial
#' interval parameters corresponding to the parametric choice specified in
#' \code{dist}. If not present, computed automatically from \code{x}.
#' @param thin A positive integer corresponding to thinning parameter; of the
#' posterior sample of serial interval distributions in x, only 1 in \code{thin}
#'  will be kept, the rest will be discarded.
#' @return A list with two elements:
#' \itemize{
#' \item{si_sample: a matrix where each column gives one distribution of the
#' serial interval to be explored, obtained from x by thinning the MCMC chain.}
#' \item{si_parametric_distr: the parametric distribution used when estimating
#' the serial interval stored in x. }
#' }
#' @seealso \code{\link{estimate_R}}
#' @author The Hackout3 Parameter Estimation team.
#'
#' @importFrom stats pgamma plnorm pweibull qlnorm qweibull rgamma rmultinom
#'
#' @export
#' @examples
#' \dontrun{
#' ## Note the following examples use an MCMC routine
#' ## to estimate the serial interval distribution from data,
#' ## so they may take a few minutes to run
#'
#' ## load data on rotavirus
#' data("MockRotavirus")
#'
#' ## estimate the serial interval from data
#' SI.fit <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data,
#'                      dist = "G",
#'                      init.pars = init_mcmc_params(MockRotavirus$si_data, "G"),
#'                      burnin = 1000,
#'                      n.samples = 5000)
#'
#' ## use coarse2estim to turn this in the right format for estimate_R
#' si_sample <- coarse2estim(SI.fit, thin = 10)$si_sample
#'
#' ## use estimate_R to estimate the reproduction number
#' ## based on these estimates of the serial interval
#' R_si_from_sample <- estimate_R(MockRotavirus$incidence,
#'                             method="si_from_sample",
#'                             si_sample=si_sample,
#'                             config = make_config(list(n2 = 50)))
#' plot(R_si_from_sample)
#' }
#'
coarse2estim <- function(x = NULL, dist = x@dist, samples = x@samples,
                         thin = 10) {
  if (is.null(x)) # then check that dist and samples are what we expect
  {
    if (!(dist %in% c("G", "W", "L", "off1G", "off1W", "off1L"))) {
      stop("The supported distributions are 'G' (Gamma), 'W' (Weibull),
           'L' (Lognormal), 'off1G' (Gamma shifted by 1),
           'off1W' (Weibull shifted by 1),
           or 'off1L' (Lognormal shifted by 1). ")
    }
    if (!is.data.frame(samples)) {
      stop("samples should be a dataframe, e.g. as produced in x@samples,
           where x is the output of coarseDataTools::dic.fit.mcmc.")
    }
  }

  if (thin > 1) {
    index <- seq(1, nrow(samples), thin)
    samples <- samples[index, ]
  }
  n_samples <- nrow(samples)

  ##  Probability matrix that will be used in EpiEstim based on which
  ## distribution is specified by the user
  if (dist == "G") {
    ## For each input parameter set, find the 99th percentile, and take the
    ## maximum of these as the maximum
    ## serial interval that we need to consider
    maxValue <- max(vnapply(seq_len(n_samples), function(i) 
      ceiling(qgamma(0.999, shape = samples[i, 1], scale = samples[i, 2]))))
    max_interval <- seq_len(maxValue)
    prob_matrix <- apply(samples, 1, function(x) 
      pgamma(max_interval + 0.5, shape = x[1], scale = x[2]) - 
        pgamma(max_interval - 0.5, shape = x[1], scale = x[2]))
  } else if (dist == "W") {
    maxValue <- max(vnapply(seq_len(n_samples), function(i) 
      ceiling(qweibull(0.999, shape = samples[i, 1], scale = samples[i, 2]))))
    max_interval <- seq_len(maxValue)
    prob_matrix <- apply(samples, 1, function(x) 
      pweibull(max_interval + 0.5, shape = x[1], scale = x[2]) - 
        pweibull(max_interval - 0.5, shape = x[1], scale = x[2]))
  } else if (dist == "L") {
    maxValue <- max(vnapply(seq_len(n_samples), function(i) 
      ceiling(qlnorm(0.999, meanlog = samples[i, 1], sdlog = samples[i, 2]))))
    max_interval <- seq_len(maxValue)
    prob_matrix <- apply(samples, 1, function(x) 
      plnorm(max_interval + 0.5, meanlog = x[1], sdlog = x[2]) - 
        plnorm(max_interval - 0.5, meanlog = x[1], sdlog = x[2]))
  } else if (dist == "off1G") {
    ## offset gamma distribution with shifted min and max value of max
    ## serial interval
    maxValue <- max(vnapply(seq_len(n_samples), function(i) 
      ceiling(qgamma(0.999, shape = samples[i, 1], scale = samples[i, 2]))))
    max_interval <- seq(0, maxValue)
    prob_matrix <- apply(samples, 1, function(x) 
      pgamma(max_interval + 0.5, shape = x[1], scale = x[2]) - 
        pgamma(max_interval - 0.5, shape = x[1], scale = x[2]))
  } else if (dist == "off1W") {
    ## offset weibull distribution with shifted min and max value of max
    ## serial interval
    maxValue <- max(vnapply(seq_len(n_samples), function(i) 
      ceiling(qweibull(0.999, shape = samples[i, 1], scale = samples[i, 2]))))
    max_interval <- seq(0, maxValue)
    prob_matrix <- apply(samples, 1, function(x) 
      pweibull(max_interval + 0.5, shape = x[1], scale = x[2]) - 
        pweibull(max_interval - 0.5, shape = x[1], scale = x[2]))
  } else if (dist == "off1L") {
    ## offset lognormal distribution with shifted min and max value of max
    ## serial interval
    maxValue <- max(vnapply(seq_len(n_samples), function(i) 
      ceiling(qlnorm(0.999, meanlog = samples[i, 1], sdlog = samples[i, 2]))))
    max_interval <- seq(0, maxValue)
    prob_matrix <- apply(samples, 1, function(x) 
      plnorm(max_interval + 0.5, meanlog = x[1], sdlog = x[2]) - 
        plnorm(max_interval - 0.5, meanlog = x[1], sdlog = x[2]))
  } else {
    stop(sprintf("Distribution (%s) not supported", dist))
  }
  # adding initial 0 for P(SI=0)
  prob_matrix <- rbind(rep(0, n_samples), prob_matrix)
  # renormalising
  prob_matrix <- apply(prob_matrix, 2, function(x) x / sum(x))

  out <- list(si_sample = prob_matrix, si_parametric_distr = dist)

  return(out)
}
annecori/EpiEstim documentation built on Oct. 14, 2023, 1:54 a.m.