# Functions to estimate detection probabilities ----
# Per Cori et al 2019
#' Simulate the number of generations between two linked cases based on their
#' serial interval
#'
#' This is based on Cori et al. 2019
#' (https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006554),
#' which works out analytical expectations of the number of unobserved generations
#' between linked cases given a detection probaility.
#'
#' @param t_diff the observed time differences between linked cases
#' @param si_fun a function for the serial interval with arguments N (the number
#' to draw) and params (a list of parameters for the function), see si_fun_lnorm for
#' an example.
#' @param params a list with parameters for the si_fun function to draw serial intervals
#' @param max_kappa integer, the constrained maximum number of generatios between two linked cases
#' @param kappa_weights boolean, whether to return the simulated generations for each case or
#' the weights (i.e. the proportion of cases separated by 1:max_kappa generations)
#' @param known_kappas vector of known kappas (i.e. if some cases are traced, you know
#' kappa = 1 for these cases)
#' @param sort whether to sort the first column and t_diff to weight towards kappa = 1
#' to deal with higher sensitivity at higher reporting thresholds
#'
#' @return either a vector of the simulated generations or a vector of proportions of 1:max_kappa
#' @importFrom matrixStats rowCumsums
#' @importFrom Rfast rowMins
#' @export
#'
#'
sim_generations <- function(t_diff, si_fun, params, max_kappa = 100,
kappa_weights = TRUE, known_kappas = NULL,
sort = TRUE) {
if(!is.null(known_kappas)) {
t_diff <- t_diff[known_kappas == 0]
}
nr <- length(t_diff)
vals <- si_fun(nr * max_kappa, params)
out <- matrix(vals, nrow = nr)
# starting one sorted from min to max (closest poss match in high rep scenario)
if(sort) {
t_diff <- sort(t_diff)
out[, 1] <- sort(out[, 1])
}
# get the difference
out_sum <- abs(rowCumsums(out) - t_diff) # get the diff
gens <- rowMins(out_sum) # get the closest one to the diff
if(!is.null(known_kappas)) {
known_kappas[known_kappas == 0] <- gens
gens <- known_kappas
}
if(kappa_weights) {
gens <- tabulate(gens, nbins = max_kappa)/length(gens)
}
return(gens)
}
#' Example serial interval function for input
#'
#' @param N the number to draw
#' @param params a list with the parameters
#'
#' @return a vector of serial intervals
#' @export
#'
si_fun_lnorm <- function(N, params) {
rlnorm(N, meanlog = params$SI_meanlog, sdlog = params$SI_sdlog)
}
#' Simulate times given generation function & pi, for validating detection estimation
#'
#' @inheritParams sim_generations
#' @param nobs the number of observations to simulation
#' @param alpha probability, the value at which to constrain kappa (i.e. to determine max_kappa
#' for sim_generations), i.e. the probability of observing this kappa for a given pi is < alpha
#' @param pi detection probability (the proportion of cases which are detected)
#'
#' @return a vector of time differences between linked cases
#' @export
#'
sim_times_pi <- function(si_fun, nobs, params, alpha = 0.001, pi) {
max_kappa <- get_kappa(alpha, pi)
out <- matrix(si_fun(nobs * max_kappa, params), ncol = max_kappa)
out <- t(apply(out, 1, cumsum))
weights <- dgeom(seq_len(max_kappa) - 1, pi)
kappas <- sample(seq_len(max_kappa), nobs, prob = weights, replace = TRUE)
t_diff <- out[cbind(seq_len(nobs), kappas)]
return(t_diff)
}
#' Fit detection probability given observed time differences between linked cases
#' and a serial interval distribution
#'
#' @inheritParams sim_generations
#' @inheritParams sim_times_pi
#' @param nsims the number of estimates to generate for the observed time differences
#' @param candidate_pis the candidate values of the detection probability to evaluate
#'
#' @return a vector of estimates of the detection probability generated by minimizing
#' the sum of squares between the observed and the expected (only looks at values passed
#' into candidate_pis)
#' @export
#'
#' @importFrom foreach foreach
#' @importFrom doRNG %dorng%
#'
#' @examples
#' # This example shows how to generate simulated data based on a detection estimate
#' # and a serial interval distribution and see whether the values can be recovered
#'
#' \dontrun{
#' system.time({
#' tt <- rbindlist(lapply(runif(1000), function(z) {
#' t_diff <- sim_times_pi(si_fun_lnorm, nobs = 500, params = treerabid::params_treerabid, alpha = 0.01,
#' pi = z)
#' ests <- fit_sims_pi(t_diff, nsims = 5, candidate_pis = seq(0.01, 0.99, by = 0.01),
#' si_fun_lnorm, params = treerabid::params_treerabid, alpha = 0.01)
#' data.table(true = z, estimated = ests)}))
#' })
#'
#' plot(tt$true, tt$estimated)
#' abline(a = 0, b = 1, col = "red") # the 1:1 line
#' }
#'
#'
fit_sims_pi <- function(t_diff, nsims = 1000,
candidate_pis, si_fun, params, alpha = 0.001,
known_kappas = NULL,
seed = 132, sort = TRUE) {
max_max_kappa <- get_kappa(alpha, pi = min(candidate_pis))
candidate_weights <- lapply(candidate_pis,
function(x) {
dgeom(seq_len(max_max_kappa) - 1, x)
})
candidate_weights <- do.call(cbind, candidate_weights)
out <-
foreach(i = seq_len(nsims), .combine = c, .options.RNG = seed,
.packages = "treerabid") %dorng% {
weights_sim <- sim_generations(t_diff, si_fun, params,
max_kappa = max_max_kappa,
kappa_weights = TRUE, known_kappas,
sort)
candidate_pis[which.min(colSums((weights_sim - candidate_weights)^2))]
}
return(out)
}
get_kappa <- function(alpha, pi, min_kappa = 2) {
pmax(qgeom(1 - alpha, pi) + 1L, min_kappa)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.