###########################################################################
#' Function to compute confidence intervals for relatedness and switch rate parameters
#'
#' Given a matrix of marker allele frequencies, a vector of inter-marker
#' distances, and estimates of the relatedness and switch rate parameters,
#' \code{compute_r_and_k_CIs} returns confidence intervals around the parameter
#' estimates. The default confidence is 95\%. The intervals are approximate.
#' They are generated using parametric bootstrap draws of the parameter
#' estimates based on genotype calls for haploid genotype pairs simulated under
#' the HMM described in [1] using the input parameter estimates. The quality of
#' the approximation and compute time increases with the number of parametric
#' bootstrap draws, which are generated in parallel using a specified number of
#' cores.
#'
#' @param fs Matrix of marker allele frequencies, i.e. the \eqn{ft}s in [1].
#' Specifically, a \eqn{m} by \eqn{Kmax} matrix, where \eqn{m} is the marker
#' count and \eqn{Kmax} is the maximum cardinality (per-marker allele count)
#' observed over all \eqn{m} markers. If, for any \eqn{t = 1,...,m}, the
#' maximum cardinality exceeds that of the \eqn{t}-th marker (i.e. if
#' \eqn{Kmax > Kt}), then all \code{fs[t,1:Kt]} are in (0,1] and all
#' \code{fs[t,(Kt+1):Kmax]} are zero. For example, if \eqn{Kt = 2} and
#' \eqn{Kmax = 4} then \code{fs[t,]} might look like \code{[0.3, 0.7, 0, 0]}.
#' @param ds Vector of \eqn{m} inter-marker distances, i.e. the \eqn{dt}s in
#' [1]. The \eqn{t}-th element of the inter-marker distance vector,
#' \code{ds[t]}, contains the distance between marker \eqn{t} and \eqn{t+1}
#' such that \code{ds[m] = Inf}, where \eqn{m} is the marker count. (Note that
#' this differs slightly from [1], where \code{ds[t]} contains the distance
#' between marker \eqn{t-1} and \eqn{t}). Distances between markers on
#' different chromosomes are also considered infinite, i.e. if the chromosome of
#' marker \eqn{t+1} is not equal to the chromosome of the \eqn{t}-th marker,
#' \code{ds[t] = Inf}.
#' @param khat Estimate of the switch rate parameter, i.e. estimate of \eqn{k}
#' in [1].
#' @param rhat Estimate of the relatedness parameter, i.e. estimate of \eqn{r}
#' in [1].
#' @param confidence Confidence level (percentage) of the confidence interval
#' (default 95\%).
#' @param nboot Number of parametric bootstrap draws from which to compute the
#' confidence interval. Larger values provide a better approximation but
#' prolong computation.
#' @param core_count Number of cores to use to do computation. Set to 2 or more
#' for parallel computation. Defaults to the number detected on the machine
#' minus one.
#' @param warn_fs Logical indicating if the function should return warnings
#' following allele frequency checks.
#' @param ... Arguments to be passed to \code{\link{simulate_Ys}} and
#' \code{\link{estimate_r_and_k}}.
#'
#' @importFrom doRNG %dorng%
#' @importFrom foreach %dopar%
#'
#' @return Confidence intervals around input switch rate parameter, \eqn{k}, and
#' relatedness parameter, \eqn{r}.
#'
#' @examples
#' # First, stimulate some data
#' simulated_Ys <- simulate_Ys(fs = frequencies$Colombia, ds = markers$distances, k = 5, r = 0.25)
#'
#' # Second, estimate the switch rate parameter, k, and relatedness parameter, r
#' krhat <- estimate_r_and_k(fs = frequencies$Colombia, ds = markers$distances, Ys = simulated_Ys)
#'
#' # Third, compute confidence intervals (CIs)
#' compute_r_and_k_CIs(fs = frequencies$Colombia, ds = markers$distances, khat = krhat['khat'], rhat = krhat['rhat'])
#'
#' @references \enumerate{ \item Taylor, A.R., Jacob, P.E., Neafsey, D.E. and
#' Buckee, C.O., 2019. Estimating relatedness between malaria parasites.
#' Genetics, 212(4), pp.1337-1351.}
#'
#' @export
###########################################################################
compute_r_and_k_CIs <- function(fs, ds, khat, rhat, confidence = 95, nboot = 100,
core_count = parallel::detectCores() - 1,
warn_fs = TRUE,
...) {
# Check frequencies
fs_checks(fs, warn = warn_fs)
# Retrieve all additional parameters
all_params <- list(...)
# Define a subset of additional parameters to pass to simulate_Ys() using
# base::do.call(); see below. Alternatively, could use R.utils::doCall() with
# .ignoreUnusedArgs=TRUE.
sim_params <- all_params
sim_params$kinit <- NULL
sim_params$rinit <- NULL
# Register available cores in order to run code in parallel Note that
# packages parallel and foreach are available through doParallel, which is
# imported
doParallel::registerDoParallel(cores = core_count)
# Generate parametric bootstrap draw of r and k estimates %dorng% is similar
# to %dopar%, however %dorng% loops are reproducible whereas %dopar% loops
# are not, %dorng% returns the whole sequence of RNG seeds as an attribute
# whereas %dopar% does not.
rkhats_boot <- foreach::foreach(iboot = 1:nboot, .combine = rbind) %dorng% {
simulated_Ys <- do.call(simulate_Ys,
args = c(list(fs, ds, k = khat, r = rhat), sim_params))
rkhats_boot <- estimate_r_and_k(fs, ds, Ys = simulated_Ys, ...)
}
# Calculate CIs as quantiles of the parametric bootstrap draw
alpha <- (1 - confidence / 100)
CI <- apply(rkhats_boot, 2, quantile, probs = c(alpha/2, 1-alpha/2))
if (any(is.na(CI))) stop("Bootstrap CI is NA")
# End of function
return(t(CI))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.