treerabid: Reconstruct Rabies Transmission Trees

Documented in fit_sims_pi si_fun_lnorm sim_generations sim_times_pi

# Functions to estimate detection probabilities ----
# Per Cori et al 2019

#' Simulate the number of generations between two linked cases based on their
#' serial interval
#'
#' This is based on Cori et al. 2019
#' (https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006554),
#' which works out analytical expectations of the number of unobserved generations
#' between linked cases given a detection probaility.
#'
#' @param t_diff the observed time differences between linked cases
#' @param si_fun a function for the serial interval with arguments N (the number
#'  to draw) and params (a list of parameters for the function), see si_fun_lnorm for
#'  an example.
#' @param params a list with parameters for the si_fun function to draw serial intervals
#' @param max_kappa integer, the constrained maximum number of generatios between two linked cases
#' @param kappa_weights boolean, whether to return the simulated generations for each case or
#'  the weights (i.e. the proportion of cases separated by 1:max_kappa generations)
#' @param known_kappas vector of known kappas (i.e. if some cases are traced, you know
#'  kappa = 1 for these cases)
#' @param sort whether to sort the first column and t_diff to weight towards kappa = 1
#'  to deal with higher sensitivity at higher reporting thresholds
#'
#' @return either a vector of the simulated generations or a vector of proportions of 1:max_kappa
#' @importFrom matrixStats rowCumsums
#' @importFrom Rfast rowMins
#' @export
#'
#'
sim_generations <- function(t_diff, si_fun, params, max_kappa = 100,
                            kappa_weights = TRUE, known_kappas = NULL,
                            sort = TRUE) {

  if(!is.null(known_kappas)) {
    t_diff <- t_diff[known_kappas == 0]
  }

  nr <- length(t_diff)
  vals <- si_fun(nr * max_kappa, params)
  out <- matrix(vals, nrow = nr)

  # starting one sorted from min to max (closest poss match in high rep scenario)
  if(sort) {
    t_diff <- sort(t_diff)
    out[, 1] <- sort(out[, 1])
  }

  # get the difference
  out_sum <- abs(rowCumsums(out) - t_diff) # get the diff
  gens <- rowMins(out_sum) # get the closest one to the diff

  if(!is.null(known_kappas)) {
    known_kappas[known_kappas == 0] <- gens
    gens <- known_kappas
  }

  if(kappa_weights) {
    gens <- tabulate(gens, nbins = max_kappa)/length(gens)
  }

  return(gens)
}


#' Example serial interval function for input
#'
#' @param N the number to draw
#' @param params a list with the parameters
#'
#' @return a vector of serial intervals
#' @export
#'
si_fun_lnorm <- function(N, params) {

  rlnorm(N, meanlog = params$SI_meanlog, sdlog = params$SI_sdlog)

}

#' Simulate times given generation function & pi, for validating detection estimation
#'
#' @inheritParams sim_generations
#' @param nobs the number of observations to simulation
#' @param alpha probability, the value at which to constrain kappa (i.e. to determine max_kappa
#'  for sim_generations), i.e. the probability of observing this kappa for a given pi is < alpha
#' @param pi detection probability (the proportion of cases which are detected)
#'
#' @return a vector of time differences between linked cases
#' @export
#'
sim_times_pi <- function(si_fun, nobs, params, alpha = 0.001, pi) {

  max_kappa <- get_kappa(alpha, pi)
  out <- matrix(si_fun(nobs * max_kappa, params), ncol = max_kappa)
  out <- t(apply(out, 1, cumsum))
  weights <- dgeom(seq_len(max_kappa) - 1, pi)
  kappas <- sample(seq_len(max_kappa), nobs, prob = weights, replace = TRUE)
  t_diff <- out[cbind(seq_len(nobs), kappas)]

  return(t_diff)
}

#' Fit detection probability given observed time differences between linked cases
#' and a serial interval distribution
#'
#' @inheritParams sim_generations
#' @inheritParams sim_times_pi
#' @param nsims the number of estimates to generate for the observed time differences
#' @param candidate_pis the candidate values of the detection probability to evaluate
#'
#' @return a vector of estimates of the detection probability generated by minimizing
#'  the sum of squares between the observed and the expected (only looks at values passed
#'  into candidate_pis)
#' @export
#'
#' @importFrom foreach foreach
#' @importFrom doRNG %dorng%
#'
#' @examples
#' # This example shows how to generate simulated data based on a detection estimate
#' # and a serial interval distribution and see whether the values can be recovered
#'
#' \dontrun{
#' system.time({
#' tt <- rbindlist(lapply(runif(1000), function(z) {
#'     t_diff <- sim_times_pi(si_fun_lnorm, nobs = 500, params = treerabid::params_treerabid, alpha = 0.01,
#'                           pi = z)
#'     ests <- fit_sims_pi(t_diff, nsims = 5, candidate_pis = seq(0.01, 0.99, by = 0.01),
#'                        si_fun_lnorm, params = treerabid::params_treerabid, alpha = 0.01)
#'     data.table(true = z, estimated = ests)}))
#'  })
#'
#' plot(tt$true, tt$estimated)
#' abline(a = 0, b = 1, col = "red") # the 1:1 line
#' }
#'
#'
fit_sims_pi <- function(t_diff, nsims = 1000,
                        candidate_pis, si_fun, params, alpha = 0.001,
                        known_kappas = NULL,
                        seed = 132, sort = TRUE) {

  max_max_kappa <- get_kappa(alpha, pi = min(candidate_pis))

  candidate_weights <- lapply(candidate_pis,
                              function(x) {
                                dgeom(seq_len(max_max_kappa) - 1, x)
                              })
  candidate_weights <- do.call(cbind, candidate_weights)

  out <-
    foreach(i =  seq_len(nsims), .combine = c, .options.RNG = seed,
            .packages = "treerabid") %dorng% {
              weights_sim <- sim_generations(t_diff, si_fun, params,
                                             max_kappa = max_max_kappa,
                                             kappa_weights = TRUE, known_kappas,
                                             sort)
              candidate_pis[which.min(colSums((weights_sim - candidate_weights)^2))]

            }

    return(out)

}

get_kappa <- function(alpha, pi, min_kappa = 2) {
  pmax(qgeom(1 - alpha, pi) + 1L, min_kappa)
}