R/RcppExports.R

Defines functions expit logit log_sum_exp_2 log_sum_exp xi_fun xi_double eta_fun eta_double oracle_joint oracle_mis_vec oracle_mis obj_for_weighted_lnorm obj_for_weighted_lbb elbo obj_for_mu_sigma2_wrapper obj_for_mu_sigma2 obj_for_eps obj_for_alpha obj_for_rho pen_seq_error pen_bias compute_all_phifk compute_all_log_bb compute_all_post_prob post_prob grad_for_weighted_lnorm grad_for_weighted_lbb grad_for_eps dlbeta_deps df_deps dxi_df dlbeta_dh dxi_dh dlbeta_dxi dlbeta_dtau dc_dtau dlbeta_dc dpen_deps dpen_dh grad_for_mu_sigma2_wrapper grad_for_mu_sigma2 f1_obj flexdog_obj get_genotype_likelihoods get_wik_mat wem wem_obj wem_fixp rbetabinom rbetabinom_int qbetabinom qbetabinom_double pbetabinom pbetabinom_double dbetabinom dbetabinom_double dbernbinom dbetabinom_alpha_beta_double

Documented in dbetabinom log_sum_exp log_sum_exp_2 oracle_joint oracle_mis oracle_mis_vec pbetabinom qbetabinom rbetabinom wem

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Density function of betabinomial with the shape parameterizations
#'
#' @inheritParams dbetabinom_double
#' @param alpha The first shape parameter.
#' @param beta The second shape parameter.
#'
#' @return The density of the beta-binomial.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
dbetabinom_alpha_beta_double <- function(x, size, alpha, beta, log) {
    .Call('_updog_dbetabinom_alpha_beta_double', PACKAGE = 'updog', x, size, alpha, beta, log)
}

#' Special case of betabinomial where the beta is bernoulli mu.
#'
#' @inheritParams dbetabinom_double
#'
#' @return The density of the Bernoulli-binomial.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
dbernbinom <- function(x, size, mu, log) {
    .Call('_updog_dbernbinom', PACKAGE = 'updog', x, size, mu, log)
}

#' The density function of the beta-binomial distribution.
#'
#' @param x The quantile.
#' @param size The total number of draws.
#' @param mu The mean of the beta.
#' @param rho The overdispersion parameter of the beta.
#' @param log A logical. Should we return the log of the
#'     density \code{TRUE} or not \code{FALSE}?
#'
#' @return The density of the beta-binomial.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
dbetabinom_double <- function(x, size, mu, rho, log) {
    .Call('_updog_dbetabinom_double', PACKAGE = 'updog', x, size, mu, rho, log)
}

#' @describeIn betabinom Density function.
#'
#' @export
#'
dbetabinom <- function(x, size, mu, rho, log) {
    .Call('_updog_dbetabinom', PACKAGE = 'updog', x, size, mu, rho, log)
}

#' The distribution function of the betabinomial. This is generally
#' only advisable if q is relatively small.
#'
#' @inheritParams dbetabinom_double
#' @param q A quantile.
#' @param log_p A logical. Should return the log-probability
#'     \code{TRUE} or not \code{FALSE}?
#'
#' @return The tail-probability of the beta-binomial.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
pbetabinom_double <- function(q, size, mu, rho, log_p) {
    .Call('_updog_pbetabinom_double', PACKAGE = 'updog', q, size, mu, rho, log_p)
}

#' @describeIn betabinom Distribution function.
#'
#' @export
#'
pbetabinom <- function(q, size, mu, rho, log_p) {
    .Call('_updog_pbetabinom', PACKAGE = 'updog', q, size, mu, rho, log_p)
}

#' The quantile function of the beta-binomial distribution parameterized
#' by mean and overdispersion parameter.
#'
#' @inheritParams dbetabinom_double
#' @param p The lower tail probability.
#'
#' @return The quantile of the beta-binomial.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
qbetabinom_double <- function(p, size, mu, rho) {
    .Call('_updog_qbetabinom_double', PACKAGE = 'updog', p, size, mu, rho)
}

#' @describeIn betabinom Quantile function.
#'
#' @export
#'
qbetabinom <- function(p, size, mu, rho) {
    .Call('_updog_qbetabinom', PACKAGE = 'updog', p, size, mu, rho)
}

#' One draw from the beta-binomial distribution parameterized
#' by mean and overdispersion parameter.
#'
#' @inheritParams dbetabinom_double
#'
#' @return A random sample from the beta-binomial.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
rbetabinom_int <- function(size, mu, rho) {
    .Call('_updog_rbetabinom_int', PACKAGE = 'updog', size, mu, rho)
}

#' @describeIn betabinom Random generation.
#'
#' @export
#'
rbetabinom <- function(n, size, mu, rho) {
    .Call('_updog_rbetabinom', PACKAGE = 'updog', n, size, mu, rho)
}

wem_fixp <- function(pivec, lmat, etamat, nind, nclass, weight_vec, nvec, lambda) {
    invisible(.Call('_updog_wem_fixp', PACKAGE = 'updog', pivec, lmat, etamat, nind, nclass, weight_vec, nvec, lambda))
}

wem_obj <- function(pivec, weight_vec, lmat, lambda) {
    .Call('_updog_wem_obj', PACKAGE = 'updog', pivec, weight_vec, lmat, lambda)
}

#' EM algorithm to fit weighted ash objective.
#'
#' Solves the following optimization problem
#' \deqn{\max_{\pi} \sum_k w_k \log(\sum_j \pi_j \ell_jk).}
#' It does this using a weighted EM algorithm.
#'
#' @param weight_vec A vector of weights. Each element of \code{weight_vec} corresponds
#'     to a column of \code{lmat}.
#' @param lmat A matrix of inner weights. The columns are the "individuals" and the rows are the "classes."
#' @param pi_init The initial values of \code{pivec}. Each element of \code{pi_init}
#'     corresponds to a row of \code{lmat}.
#' @param itermax The maximum number of EM iterations to take.
#' @param obj_tol The objective stopping criterion.
#' @param lambda The penalty on the pi's. Should be greater than 0 and really really small.
#'
#' @return A vector of numerics.
#'
#' @author David Gerard
#'
#' @export
#'
#' @examples
#' set.seed(2)
#' n <- 3
#' p <- 5
#' lmat <- matrix(stats::runif(n * p), nrow = n)
#' weight_vec <- seq_len(p)
#' pi_init <- stats::runif(n)
#' pi_init <- pi_init / sum(pi_init)
#' wem(weight_vec = weight_vec,
#'     lmat       = lmat,
#'     pi_init    = pi_init,
#'     lambda     = 0,
#'     itermax    = 100,
#'     obj_tol    = 10^-6)
#'
#'
wem <- function(weight_vec, lmat, pi_init, lambda, itermax, obj_tol) {
    .Call('_updog_wem', PACKAGE = 'updog', weight_vec, lmat, pi_init, lambda, itermax, obj_tol)
}

#' E-step in \code{\link{flexdog}}.
#'
#' @inheritParams flexdog_full
#' @param probk_vec The vector of current prior probabilities of each genotype.
#'
#'
#' @return A matrix of numerics. The rows index the individuals and the
#'     columns index the genotype. These weights are used in the EM algorithm
#'     (and is indeed the E-step) in \code{\link{flexdog_full}}.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
#'
#' @seealso \code{\link{flexdog}} for the full EM algorithm.
#'
get_wik_mat <- function(probk_vec, refvec, sizevec, ploidy, seq, bias, od) {
    .Call('_updog_get_wik_mat', PACKAGE = 'updog', probk_vec, refvec, sizevec, ploidy, seq, bias, od)
}

#' Returns genotype log-likelihood matrix
#'
#' @inheritParams get_wik_mat
#'
#' @author David Gerard
#'
#' @noRd
get_genotype_likelihoods <- function(refvec, sizevec, ploidy, seq, bias, od) {
    .Call('_updog_get_genotype_likelihoods', PACKAGE = 'updog', refvec, sizevec, ploidy, seq, bias, od)
}

#' Log-likelihood that \code{\link{flexdog}} maximizes.
#'
#' @inheritParams flexdog_full
#' @param probk_vec The kth element is the prior probability of genotype k (when starting to count from 0).
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
#'
#' @return The objective (marginal log-likelihood) used in
#'     \code{\link{flexdog_full}}.
#'
flexdog_obj <- function(probk_vec, refvec, sizevec, ploidy, seq, bias, od, mean_bias, var_bias, mean_seq, var_seq, mean_od, var_od) {
    .Call('_updog_flexdog_obj', PACKAGE = 'updog', probk_vec, refvec, sizevec, ploidy, seq, bias, od, mean_bias, var_bias, mean_seq, var_seq, mean_od, var_od)
}

#' Objective for mixture of known dist and uniform dist.
#'
#' @param alpha The mixing weight.
#' @param pvec The known distribution (e.g. from assuming an
#'     F1 population).
#' @param weight_vec A vector of weights.
#'
#' @return The objective when updating \code{pivec} when \code{model = "f1"}
#'     or \code{model = "s1"} in \code{\link{flexdog_full}}.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
#'
f1_obj <- function(alpha, pvec, weight_vec) {
    .Call('_updog_f1_obj', PACKAGE = 'updog', alpha, pvec, weight_vec)
}

#' Gradient for \code{\link{obj_for_mu_sigma2}} with respect for \code{mu} and \code{sigma2}.
#'
#' @inheritParams obj_for_mu_sigma2
#'
#' @return A vector of length 2 * nind of numerics.
#'     The first element n elements are the partial derivatives with respect
#'     to \code{mu} and the second n elements are the
#'     partial derivatives with respect to \code{sigma2}
#'     in \code{\link{obj_for_mu_sigma2}}.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
#'
grad_for_mu_sigma2 <- function(mu, sigma2, phifk_mat, cor_inv, log_bb_dense) {
    .Call('_updog_grad_for_mu_sigma2', PACKAGE = 'updog', mu, sigma2, phifk_mat, cor_inv, log_bb_dense)
}

#' Gradient for \code{\link{obj_for_mu_sigma2_wrapper}} with respect for \code{muSigma2}
#' and a wrapper for \code{\link{grad_for_mu_sigma2}}
#'
#'
#' @inherit grad_for_mu_sigma2 return
#' @inheritParams obj_for_mu_sigma2
#' @param muSigma2 A vector. The first half are mu and the second half are sigma2.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
grad_for_mu_sigma2_wrapper <- function(muSigma2, phifk_mat, cor_inv, log_bb_dense) {
    .Call('_updog_grad_for_mu_sigma2_wrapper', PACKAGE = 'updog', muSigma2, phifk_mat, cor_inv, log_bb_dense)
}

#' Derivative of \deqn{-log(h) - (log(h) - \mu_h)^2 / (2\sigma_h^2)} with respect
#' to \eqn{h}.
#'
#' @param h The current bias parameter.
#' @param mu_h The mean of the log-bias.
#' @param sigma2_h The variance of the log-bias.
#'
#' @seealso \code{\link{pen_bias}} which this is a derivative for.
#'
#' @return A double.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
dpen_dh <- function(h, mu_h, sigma2_h) {
    .Call('_updog_dpen_dh', PACKAGE = 'updog', h, mu_h, sigma2_h)
}

#' Derivative of \deqn{-log(\epsilon(1 - \epsilon)) - (logit(\epsilon) - \mu_{\epsilon})^2 / (2\sigma_{\epsilon}^2)}
#' with respect to \eqn{\epsilon}.
#'
#' @param eps The current sequencing error rate.
#' @param mu_eps The mean of the logit of the sequencing error rate.
#' @param sigma2_eps The variance of the logit of the sequencing error rate.
#'
#' @seealso \code{\link{pen_seq_error}} which this is a derivative for.
#'
#' @return A double.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
dpen_deps <- function(eps, mu_eps, sigma2_eps) {
    .Call('_updog_dpen_deps', PACKAGE = 'updog', eps, mu_eps, sigma2_eps)
}

#' Derivative of the log-beta density with
#' respect to c where \eqn{c = (1 - \tau)/\tau}
#' where \eqn{\tau} is the overdispersion parameter.
#'
#' @param x The number of successes observed
#' @param n The total number of trials observed.
#' @param xi The mean of the beta-binomial.
#' @param c \eqn{(1 - \tau)/\tau} where \eqn{\tau}
#'     is the overdispersion parameter.
#'
#' @return A double.
#'
#' @keywords internal
#' @noRd
#'
#' @seealso \code{\link{dbetabinom_double}}, \code{\link{dlbeta_dtau}},
#'     \code{\link{dc_dtau}}.
#' @author David Gerard
dlbeta_dc <- function(x, n, xi, c) {
    .Call('_updog_dlbeta_dc', PACKAGE = 'updog', x, n, xi, c)
}

#' Derivative of \eqn{c = (1 - \tau) / \tau} with respect to \eqn{\tau}.
#'
#' @param tau The overdispersion parameter.
#'
#' @return A double.
#'
#' @keywords internal
#' @noRd
#'
#' @seealso \code{\link{dlbeta_dc}}, \code{\link{dlbeta_dtau}}
#'
#' @author David Gerard
dc_dtau <- function(tau) {
    .Call('_updog_dc_dtau', PACKAGE = 'updog', tau)
}

#' Derivative of the log-beta-binomial density with respect to the
#' overdispersion parameter.
#'
#' @param x The number of successes.
#' @param n The number of trials.
#' @param tau The overdispersion parameter.
#' @param p The allele dosage.
#' @param eps The sequencing error rate
#' @param h The bias parameter.
#'
#' @return A double.
#'
#' @keywords internal
#' @noRd
#'
#' @seealso \code{\link{dlbeta_dc}}, \code{\link{dc_dtau}},
#'     \code{\link{dbetabinom_double}}.
#'
#' @author David Gerard
dlbeta_dtau <- function(x, n, p, eps, h, tau) {
    .Call('_updog_dlbeta_dtau', PACKAGE = 'updog', x, n, p, eps, h, tau)
}

#' Derivative of the log-betabinomial density with respect to the
#' mean of the underlying beta.
#'
#' @return A double.
#'
#' @inheritParams dlbeta_dtau
#' @param xi The mean of the underlying beta.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
dlbeta_dxi <- function(x, n, xi, tau) {
    .Call('_updog_dlbeta_dxi', PACKAGE = 'updog', x, n, xi, tau)
}

#' Derivative of xi-function with respect to bias parameter.
#'
#' @return A double.
#'
#' @param p The dosage (between 0 and 1).
#' @param eps The sequencing error rate.
#' @param h The bias parameter.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
dxi_dh <- function(p, eps, h) {
    .Call('_updog_dxi_dh', PACKAGE = 'updog', p, eps, h)
}

#' Derivative of log-betabinomial density with respect to bias parameter.
#'
#' @inheritParams dlbeta_dtau
#'
#' @return A double.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
dlbeta_dh <- function(x, n, p, eps, h, tau) {
    .Call('_updog_dlbeta_dh', PACKAGE = 'updog', x, n, p, eps, h, tau)
}

#' Derivative of xi with respect to f.
#'
#' @param h The bias parameter.
#' @param f The post-sequencing error rate adjusted probability of an A.
#'
#' @return A double.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
dxi_df <- function(h, f) {
    .Call('_updog_dxi_df', PACKAGE = 'updog', h, f)
}

#' Derivative of f with respect to eps.
#'
#' @param p The allele dosage.
#' @param eps The sequencing error rate.
#'
#' @return A double.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
df_deps <- function(p, eps) {
    .Call('_updog_df_deps', PACKAGE = 'updog', p, eps)
}

#' Derivative of the log-beta-binomial density with respect to the
#' sequencing error rate.
#'
#' @inheritParams dlbeta_dtau
#'
#' @return A double.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
dlbeta_deps <- function(x, n, p, eps, h, tau) {
    .Call('_updog_dlbeta_deps', PACKAGE = 'updog', x, n, p, eps, h, tau)
}

#' Gradient for \code{\link{obj_for_eps}}.
#'
#' @return A double.
#'
#' @keywords internal
#' @noRd
#'
#' @inheritParams obj_for_eps
#'
#' @author David Gerard
grad_for_eps <- function(parvec, refvec, sizevec, ploidy, mean_bias, var_bias, mean_seq, var_seq, mean_od, var_od, wmat, update_bias = TRUE, update_seq = TRUE, update_od = TRUE) {
    .Call('_updog_grad_for_eps', PACKAGE = 'updog', parvec, refvec, sizevec, ploidy, mean_bias, var_bias, mean_seq, var_seq, mean_od, var_od, wmat, update_bias, update_seq, update_od)
}

#' Gradient for \code{\link{obj_for_weighted_lbb}}.
#'
#' @inheritParams obj_for_weighted_lbb
#'
#' @return A vector of length 2. The first component is the gradient of the mean of the underlying
#'     beta. The second component is the gradient of the overdispersion parameter of the
#'     underlying beta.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
grad_for_weighted_lbb <- function(parvec, ploidy, weight_vec) {
    .Call('_updog_grad_for_weighted_lbb', PACKAGE = 'updog', parvec, ploidy, weight_vec)
}

#' Gradient for \code{\link{obj_for_weighted_lnorm}}.
#'
#' @inheritParams obj_for_weighted_lnorm
#'
#' @return A vector of length 2. The first term is the derivative with respect to the mean,
#'     the second term is the derivative with respect to the standard deviation (not variance).
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
grad_for_weighted_lnorm <- function(parvec, ploidy, weight_vec) {
    .Call('_updog_grad_for_weighted_lnorm', PACKAGE = 'updog', parvec, ploidy, weight_vec)
}

#' Variational posterior probability of having \code{dosage} A alleles
#' when the ploidy is \code{ploidy}, the allele frequency is
#' \code{alpha}, the individual-specific overdispersion parameter is
#' \code{rho}, the variational mean is \code{mu}, and the variational
#' variance is \code{sigma2}.
#'
#' @param dosage The number of A alleles.
#' @param ploidy The ploidy of the individual.
#' @param mu The variational mean.
#' @param sigma2 The variational variance (not standard deviation).
#' @param alpha The allele frequency.
#' @param rho The individual's overdispersion parameter.
#'
#' @return The posterior probability of having \code{dosage} A alleles.
#'
#' @author David
#'
#' @keywords internal
#' @noRd
#'
post_prob <- function(dosage, ploidy, mu, sigma2, alpha, rho) {
    .Call('_updog_post_prob', PACKAGE = 'updog', dosage, ploidy, mu, sigma2, alpha, rho)
}

#' Computes every posterior probability for each dosage level for
#' each individual at each SNP.
#'
#' @param ploidy The ploidy of the species.
#' @param mu A matrix of variational posterior means. The rows
#'     index the individuals and the columns index the SNPs.
#' @param sigma2 A matrix of variational posterior variances.
#'     The rows index the individuals and the columns index the SNPs.
#' @param alpha A vector of allele frequencies for all SNPs.
#' @param rho A vector of inbreeding coefficients for all individuals.
#'
#' @return An array. The rows index the individuals, the columns index the
#'     SNPS, and the third dimension indexes the genotypes. Element (i, j, k)
#'     is the return of \code{\link{post_prob}}.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
#'
compute_all_post_prob <- function(ploidy, mu, sigma2, alpha, rho) {
    .Call('_updog_compute_all_post_prob', PACKAGE = 'updog', ploidy, mu, sigma2, alpha, rho)
}

#' Calculates the log-density for every individual by snp by dosage level.
#'
#' @inheritParams mupdog
#'
#' @keywords internal
#' @noRd
#'
#' @return A three dimensional array. The rows index the individuals, the
#'     columns index the SNPs, and the third dimension indexes the
#'     genotypes. This is the log-likelihood for each individual/snp/genotype
#'     combination.
#'
compute_all_log_bb <- function(refmat, sizemat, ploidy, seq, bias, od) {
    .Call('_updog_compute_all_log_bb', PACKAGE = 'updog', refmat, sizemat, ploidy, seq, bias, od)
}

#' Computes \deqn{\Phi^{-1}(F(k|K,\alpha_j,\rho_i))} for all possible (i,j,k).
#'
#' @param alpha A vector whose jth element is the allele frequency of SNP j.
#' @param rho A vector whose ith element is the inbreeding coefficient of individual i.
#' @param ploidy The ploidy of the species.
#'
#' @return A three dimensional array. The rows index the individuals,
#'     the columns index the SNPs, and the third dimension indexes the
#'     genotypes. Computes the "continuous genotype".
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
compute_all_phifk <- function(alpha, rho, ploidy) {
    .Call('_updog_compute_all_phifk', PACKAGE = 'updog', alpha, rho, ploidy)
}

#' Penalty on bias parameter.
#'
#' @param h The current value of bias parameter. Must be
#'     greater than 0. A value of 1 means no bias.
#' @param mu_h The prior mean of the log-bias parameter.
#' @param sigma2_h The prior variance (not standard deviation)
#'     of the log-bias parameter. Set to to \code{Inf} to return \code{0}.
#'
#' @return A double. The default penalty on the allelic bias parameter.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
pen_bias <- function(h, mu_h, sigma2_h) {
    .Call('_updog_pen_bias', PACKAGE = 'updog', h, mu_h, sigma2_h)
}

#' Penalty on sequencing error rate.
#'
#' @param eps The current value of sequencing error rate.
#'     Must be between 0 and 1.
#' @param mu_eps The prior mean of the logit sequencing error rate.
#' @param sigma2_eps The prior variance (not standard deviation)
#'     of the logit sequencing error rate. Set this to \code{Inf} to
#'     return \code{0}.
#'
#' @return A double. The default penalty on the sequencing error rate.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
pen_seq_error <- function(eps, mu_eps, sigma2_eps) {
    .Call('_updog_pen_seq_error', PACKAGE = 'updog', eps, mu_eps, sigma2_eps)
}

#' Objective function when updating a single inbreeding coefficient.
#'
#' @param rho The inbreeding coefficient for the individual.
#' @param mu A vector of posterior means. The jth element is the
#'     posterior mean of SNP j for the individual.
#' @param sigma2 A vector of posterior variances. The jth element
#'     is the posterior variance of SNP j for the individual.
#' @param alpha A vector of allele frequencies. The jth element
#'     is the allele frequency for SNP j.
#' @param log_bb_dense A matrix of log posterior densities. The
#'     rows index the SNPs and the columns index the dosage.
#' @param ploidy The ploidy of the species.
#'
#' @return A double. The objective when updating
#'     \code{rho} in \code{\link{mupdog}}.
#'
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
obj_for_rho <- function(rho, mu, sigma2, alpha, log_bb_dense, ploidy) {
    .Call('_updog_obj_for_rho', PACKAGE = 'updog', rho, mu, sigma2, alpha, log_bb_dense, ploidy)
}

#' Objective function when updating alpha
#'
#' @param mu A vector. The ith element is individual i's variational posterior mean at the SNP.
#' @param sigma2 A vector. The ith element is individual i's variational posterior variance at the SNP.
#' @param alpha The SNP's allele frequency.
#' @param rho A vector. The ith element is individuals i's inbreeding coefficient.
#' @param log_bb_dense A matrix of log-densities of the beta binomial. The rows index the individuals and the columns index the allele dosage.
#' @param ploidy The ploidy of the species.
#'
#'
#' @return A double. The objective when updating \code{alpha} in
#'     \code{\link{mupdog}}.
#'
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
obj_for_alpha <- function(mu, sigma2, alpha, rho, log_bb_dense, ploidy) {
    .Call('_updog_obj_for_alpha', PACKAGE = 'updog', mu, sigma2, alpha, rho, log_bb_dense, ploidy)
}

#' Objective function for updating sequencing error rate, bias, and overdispersion parameters.
#'
#' @param parvec A vector of length three. The first element is the sequencing
#'     error rate, the second element is the allele bias, and the third element
#'     is the overdispersion parameter.
#' @param refvec A vector. The ith element is the reference count for the ith individual in the SNP.
#' @param sizevec A vector. the ith element is the size count for the ith individual in the SNP/
#' @param ploidy The ploidy of the species.
#' @param mean_bias The prior mean of the log-bias.
#' @param var_bias The prior variance of the log-bias
#' @param mean_seq The prior mean of the logit sequencing error rate.
#' @param var_seq The prior variance of the logit sequencing error rate.
#' @param mean_od The prior mean of the logit of the overdispersion parameter
#' @param var_od The prior variance of the logit of the overdispersion parameter.
#' @param wmat The matrix of (variational) posterior probabilities for each dosage.
#'     The rows index the individuals and the columns index the dosage levels.
#' @param update_seq A logical. This is not used in \code{obj_for_eps},
#'     but sets the first element to \code{0.0} in \code{\link{grad_for_eps}}.
#' @param update_bias A logical. This is not used in \code{obj_for_eps},
#'     but sets the second element to \code{0.0} in \code{\link{grad_for_eps}}.
#' @param update_od A logical. This is not used in \code{obj_for_eps},
#'     but sets the third element to \code{0.0} in \code{\link{grad_for_eps}}.
#'
#' @return A double. The objective when updating \code{eps} in
#'     \code{\link{mupdog}}.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
obj_for_eps <- function(parvec, refvec, sizevec, ploidy, mean_bias, var_bias, mean_seq, var_seq, mean_od, var_od, wmat, update_bias = TRUE, update_seq = TRUE, update_od = TRUE) {
    .Call('_updog_obj_for_eps', PACKAGE = 'updog', parvec, refvec, sizevec, ploidy, mean_bias, var_bias, mean_seq, var_seq, mean_od, var_od, wmat, update_bias, update_seq, update_od)
}

#' Objective function when updating mu and sigma2.
#'
#' @param mu A vector, the ith element is the variational posterior mean of individual i for the SNP.
#' @param sigma2 A vector, the ith element is the variational posterior variance of individual i for the SNP.
#' @param phifk_mat A matrix that contains the standard normal quantile of the beta-binomial cdf at dosage k for individual i.
#'     The rows index the individuals and the columns index the dosages.
#' @param cor_inv The inverse of the underlying correlation matrix.
#' @param log_bb_dense A matrix of log-densities of the beta binomial. The rows index the individuals and the columns index the allele dosage.
#'     Allele dosage goes from -1 to ploidy, so there are ploidy + 2 elements.
#'
#' @return A double. The objective when updating \code{mu} and \code{sigma2}
#'     in \code{\link{mupdog}}.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
obj_for_mu_sigma2 <- function(mu, sigma2, phifk_mat, cor_inv, log_bb_dense) {
    .Call('_updog_obj_for_mu_sigma2', PACKAGE = 'updog', mu, sigma2, phifk_mat, cor_inv, log_bb_dense)
}

#' Wrapper for \code{\link{obj_for_mu_sigma2}} so that I can use it in \code{optim}.
#'
#' @inheritParams obj_for_mu_sigma2
#' @param muSigma2 A vector. The first half are mu and the second half are sigma2.
#' @inherit obj_for_mu_sigma2 return
#'
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
obj_for_mu_sigma2_wrapper <- function(muSigma2, phifk_mat, cor_inv, log_bb_dense) {
    .Call('_updog_obj_for_mu_sigma2_wrapper', PACKAGE = 'updog', muSigma2, phifk_mat, cor_inv, log_bb_dense)
}

#' The evidence lower bound
#'
#' @param warray An three-way array. The (i,j,k)th entry is the variational posterior probability
#'     that individual i at SNP j has dosage k - 1. See \code{\link{compute_all_post_prob}}.
#' @param lbeta_array A three-way array. The (i,j,k)th entry is the log-density of the betabinomial
#'     for individual i at SNP j and dosage k - 1. See \code{\link{compute_all_log_bb}}.
#' @param cor_inv The inverse of the correlation matrix.
#' @param postmean A matrix. The (i,j)th entry is the variational posterior mean for individual i
#'     at SNP j.
#' @param postvar A matrix. The (i,j)th entry is the variational posterior variance for individual i at SNP j.
#' @param bias A vector. The jth entry is the allele bias for SNP j.
#' @param seq A vector. The jth entry is the sequencing error rate at SNP j.
#' @param mean_bias The prior mean on the log-bias.
#' @param var_bias The prior variance on the log-bias.
#' @param mean_seq The prior mean on the logit of the sequencing error rate.
#' @param var_seq The prior variance on the logit of the sequencing error rate.
#' @param ploidy The ploidy of the species.
#'
#' @return A double. The evidence lower-bound that \code{\link{mupdog}}
#'     maximizes.
#'
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
elbo <- function(warray, lbeta_array, cor_inv, postmean, postvar, bias, seq, mean_bias, var_bias, mean_seq, var_seq, ploidy) {
    .Call('_updog_elbo', PACKAGE = 'updog', warray, lbeta_array, cor_inv, postmean, postvar, bias, seq, mean_bias, var_bias, mean_seq, var_seq, ploidy)
}

#' Objective function for updating the beta-binomial genotype distribution when
#' \code{model = "bb"} in \code{\link{flex_update_pivec}}.
#'
#' @param parvec A vector of length 2. The first term is the current mean of the
#'     underlying beta. The second term is the current overdispersion parameter.
#' @param ploidy The ploidy of the species.
#' @param weight_vec A vector of length \code{ploidy + 1} that contains the weights
#'     for each component beta-binomial.
#'
#' @return A double. The objective when updating the beta-binomial genotype
#'     distribution in \code{\link{mupdog}}.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
obj_for_weighted_lbb <- function(parvec, ploidy, weight_vec) {
    .Call('_updog_obj_for_weighted_lbb', PACKAGE = 'updog', parvec, ploidy, weight_vec)
}

#' Objective function for updating discrete normal genotype distribution
#' when \code{model = "normal"} in \code{\link{flex_update_pivec}}.
#'
#' @param parvec A vector of length 2. The first term is the current mean of the
#'     underlying normal. The second term is the current standard deviation
#'     (not variance) of the normal.
#' @param ploidy The ploidy of the species.
#' @param weight_vec A vector of length \code{ploidy + 1} that contains the weights
#'     for each component beta-binomial.
#'
#' @return A double. The objective when updating the normal
#'     genotype distribution in \code{\link{mupdog}}.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
#'
obj_for_weighted_lnorm <- function(parvec, ploidy, weight_vec) {
    .Call('_updog_obj_for_weighted_lnorm', PACKAGE = 'updog', parvec, ploidy, weight_vec)
}

#' Calculate oracle misclassification error rate.
#'
#' Given perfect knowledge of the data generating parameters,
#' \code{oracle_mis} calculates the misclassification error
#' rate, where the error rate is taken over both the data generation
#' process and the allele-distribution.
#' This is an ideal level of the misclassification error rate and
#' any real method will have a larger rate than this. This is a useful
#' approximation when you have a lot of individuals.
#'
#' To come up with \code{dist}, you need some additional assumptions.
#' For example, if the population is in Hardy-Weinberg equilibrium and
#' the allele frequency is \code{alpha} then you could calculate
#' \code{dist} using the R code: \code{dbinom(x = 0:ploidy, size = ploidy, prob = alpha)}.
#' Alternatively, if you know the genotypes of the individual's two parents are, say,
#' \code{ref_count1} and \code{ref_count2}, then you could use the \code{\link[updog]{get_q_array}}
#' function from the updog package: \code{get_q_array(ploidy)[ref_count1 + 1, ref_count2 + 1, ]}.
#'
#' @param n The read-depth.
#' @param ploidy The ploidy of the individual.
#' @param seq The sequencing error rate.
#' @param bias The allele-bias.
#' @param od The overdispersion parameter.
#' @param dist The distribution of the alleles.
#'
#' @return A double. The oracle misclassification error rate.
#'
#' @author David Gerard
#'
#' @references
#' \itemize{
#'   \item{Gerard, D., Ferrão, L. F. V., Garcia, A. A. F., & Stephens, M. (2018). Genotyping Polyploids from Messy Sequencing Data. \emph{Genetics}, 210(3), 789-807. \doi{10.1534/genetics.118.301468}.}
#' }
#'
#' @examples
#' ## Hardy-Weinberg population with allele-frequency of 0.75.
#' ## Moderate bias and moderate overdispersion.
#' ## See how oracle misclassification error rates change as we
#' ## increase the ploidy.
#' ploidy <- 2
#' dist <- stats::dbinom(0:ploidy, ploidy, 0.75)
#' oracle_mis(n = 100, ploidy = ploidy, seq = 0.001,
#'            bias = 0.7, od = 0.01, dist = dist)
#'
#' ploidy <- 4
#' dist <- stats::dbinom(0:ploidy, ploidy, 0.75)
#' oracle_mis(n = 100, ploidy = ploidy, seq = 0.001,
#'            bias = 0.7, od = 0.01, dist = dist)
#'
#' ploidy <- 6
#' dist <- stats::dbinom(0:ploidy, ploidy, 0.75)
#' oracle_mis(n = 100, ploidy = ploidy, seq = 0.001,
#'            bias = 0.7, od = 0.01, dist = dist)
#'
#' @export
#'
oracle_mis <- function(n, ploidy, seq, bias, od, dist) {
    .Call('_updog_oracle_mis', PACKAGE = 'updog', n, ploidy, seq, bias, od, dist)
}

#' Returns the oracle misclassification rates for each genotype.
#'
#' Given perfect knowledge of the data generating parameters,
#' \code{oracle_mis_vec} calculates the misclassification error
#' rate at each genotype. This differs from \code{\link{oracle_mis}}
#' in that this will \emph{not} average over the genotype distribution to
#' get an overall misclassification error rate. That is, \code{oracle_mis_vec}
#' returns a vector of misclassification error rates \emph{conditional} on
#' each genotype.
#'
#' This is an ideal level of the misclassification error rate and
#' any real method will have a larger rate than this. This is a useful
#' approximation when you have a lot of individuals.
#'
#' To come up with \code{dist}, you need some additional assumptions.
#' For example, if the population is in Hardy-Weinberg equilibrium and
#' the allele frequency is \code{alpha} then you could calculate
#' \code{dist} using the R code: \code{dbinom(x = 0:ploidy, size = ploidy, prob = alpha)}.
#' Alternatively, if you know the genotypes of the individual's two parents are, say,
#' \code{ref_count1} and \code{ref_count2}, then you could use the \code{\link[updog]{get_q_array}}
#' function from the updog package: \code{get_q_array(ploidy)[ref_count1 + 1, ref_count2 + 1, ]}.
#'
#'
#' @inheritParams oracle_mis
#'
#' @return A vector of numerics. Element i is the oracle misclassification
#'     error rate when genotyping an individual with actual
#'     genotype i + 1.
#'
#' @export
#'
#' @references
#' \itemize{
#'   \item{Gerard, D., Ferrão, L. F. V., Garcia, A. A. F., & Stephens, M. (2018). Genotyping Polyploids from Messy Sequencing Data. \emph{Genetics}, 210(3), 789-807. \doi{10.1534/genetics.118.301468}.}
#' }
#'
#' @examples
#' ## Hardy-Weinberg population with allele-frequency of 0.75.
#' ## Moderate bias and moderate overdispersion.
#' ploidy <- 4
#' dist <- stats::dbinom(0:ploidy, ploidy, 0.75)
#' om <- oracle_mis_vec(n = 100, ploidy = ploidy, seq = 0.001,
#'                      bias = 0.7, od = 0.01, dist = dist)
#' om
#'
#' ## Get same output as oracle_mis this way:
#' sum(dist * om)
#' oracle_mis(n = 100, ploidy = ploidy, seq = 0.001,
#'            bias = 0.7, od = 0.01, dist = dist)
#'
#' @author David Gerard
oracle_mis_vec <- function(n, ploidy, seq, bias, od, dist) {
    .Call('_updog_oracle_mis_vec', PACKAGE = 'updog', n, ploidy, seq, bias, od, dist)
}

#' The joint probability of the genotype and the genotype estimate
#' of an oracle estimator.
#'
#' This returns the joint distribution of the true genotypes and an oracle
#' estimator given perfect knowledge of the data generating process. This is a useful
#' approximation when you have a lot of individuals.
#'
#' To come up with \code{dist}, you need some additional assumptions.
#' For example, if the population is in Hardy-Weinberg equilibrium and
#' the allele frequency is \code{alpha} then you could calculate
#' \code{dist} using the R code: \code{dbinom(x = 0:ploidy, size = ploidy, prob = alpha)}.
#' Alternatively, if you know the genotypes of the individual's two parents are, say,
#' \code{ref_count1} and \code{ref_count2}, then you could use the \code{\link[updog]{get_q_array}}
#' function from the updog package: \code{get_q_array(ploidy)[ref_count1 + 1, ref_count2 + 1, ]}.
#'
#' See the Examples to see how to reconcile the output of \code{oracle_joint}
#' with \code{\link{oracle_mis}} and \code{\link{oracle_mis_vec}}.
#'
#' @inheritParams oracle_mis
#'
#' @return A matrix. Element (i, j) is the joint probability of estimating
#'     the genotype to be i+1 when the true genotype is j+1. That is, the
#'     estimated genotype indexes the rows and the true genotype indexes
#'     the columns. This is when
#'     using an oracle estimator.
#'
#' @references
#' \itemize{
#'   \item{Gerard, D., Ferrão, L. F. V., Garcia, A. A. F., & Stephens, M. (2018). Genotyping Polyploids from Messy Sequencing Data. \emph{Genetics}, 210(3), 789-807. \doi{10.1534/genetics.118.301468}.}
#' }
#'
#' @examples
#' ## Hardy-Weinberg population with allele-frequency of 0.75.
#' ## Moderate bias and moderate overdispersion.
#' ploidy <- 4
#' dist <- stats::dbinom(0:ploidy, ploidy, 0.75)
#' jd <- oracle_joint(n = 100, ploidy = ploidy, seq = 0.001,
#'                    bias = 0.7, od = 0.01, dist = dist)
#' jd
#'
#' ## Get same output as oracle_mis this way:
#' 1 - sum(diag(jd))
#' oracle_mis(n = 100, ploidy = ploidy, seq = 0.001,
#'            bias = 0.7, od = 0.01, dist = dist)
#'
#' ## Get same output as oracle_mis_vec this way:
#' 1 - diag(sweep(x = jd, MARGIN = 2, STATS = colSums(jd), FUN = "/"))
#' oracle_mis_vec(n = 100, ploidy = ploidy, seq = 0.001,
#'                bias = 0.7, od = 0.01, dist = dist)
#'
#' @export
#'
#' @seealso
#' \describe{
#'   \item{\code{\link{oracle_plot}}}{For visualizing the joint distribution output from \code{oracle_joint}.}
#'   \item{\code{\link{oracle_mis_from_joint}}}{For obtaining the same results as \code{\link{oracle_mis}}
#'       directly from the output of \code{oracle_joint}.}
#'   \item{\code{\link{oracle_mis_vec_from_joint}}}{For obtaining the same results as \code{\link{oracle_mis_vec}}
#'       directly from the output of \code{oracle_joint}.}
#'   \item{\code{\link{oracle_cor_from_joint}}}{For obtaining the same results as \code{\link{oracle_cor}}
#'       directly from the output of \code{oracle_joint}.}
#' }
#'
#' @author David Gerard
oracle_joint <- function(n, ploidy, seq, bias, od, dist) {
    .Call('_updog_oracle_joint', PACKAGE = 'updog', n, ploidy, seq, bias, od, dist)
}

#' Adjusts allele dosage \code{p} by the sequencing error rate \code{eps}.
#'
#' @param p The allele dosage.
#' @param eps The sequencing error rate.
#'
#' @return The probability of a reference reed adjusted by the
#'     sequencing error rate.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
eta_double <- function(p, eps) {
    .Call('_updog_eta_double', PACKAGE = 'updog', p, eps)
}

#' Adjusts allele dosage \code{p} by the sequencing error rate \code{eps}.
#'
#' @inheritParams xi_fun
#'
#' @return A vector of probabilities of a reference read adjusted
#'     by the sequencing error rate.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
eta_fun <- function(p, eps) {
    .Call('_updog_eta_fun', PACKAGE = 'updog', p, eps)
}

#' Adjusts allele dosage \code{p} by the sequencing error rate \code{eps} and the allele bias \code{h}.
#'
#' @param p The allele dosage.
#' @param eps The sequencing error rate.
#' @param h The allele bias.
#'
#' @return The probability of a reference read adjusted by both the allele
#'     bias and the sequencing error rate.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
xi_double <- function(p, eps, h) {
    .Call('_updog_xi_double', PACKAGE = 'updog', p, eps, h)
}

#' Adjusts allele dosage \code{p} by the sequencing error rate \code{eps} and the allele bias \code{h}.
#'
#' @param p A vector of allele dosages.
#' @param eps The sequencing error rate. Must either be of length 1
#'     or the same length as p.
#' @param h The allele bias. Must either be of length 1 or the same length
#'     as p.
#'
#' @return A vector of probabilities of the reference read adjusted
#'     by both the sequencing error rate and the allele bias.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
xi_fun <- function(p, eps, h) {
    .Call('_updog_xi_fun', PACKAGE = 'updog', p, eps, h)
}

#' Log-sum-exponential trick.
#'
#' @param x A vector to log-sum-exp.
#'
#' @return The log of the sum of the exponential
#'     of the elements in \code{x}.
#'
#' @author David Gerard
#'
#' @export
log_sum_exp <- function(x) {
    .Call('_updog_log_sum_exp', PACKAGE = 'updog', x)
}

#' Log-sum-exponential trick using just two doubles.
#'
#' @param x A double.
#' @param y Another double.
#'
#' @return The log of the sum of the exponential of x and y.
#'
#' @author David Gerard
#'
#' @export
log_sum_exp_2 <- function(x, y) {
    .Call('_updog_log_sum_exp_2', PACKAGE = 'updog', x, y)
}

#' The logit function.
#'
#' @param x A double between 0 and 1.
#'
#' @return The logit of \code{x}.
#'
#' @author David Gerard
#'
#' @keywords internal
#' @noRd
#'
logit <- function(x) {
    .Call('_updog_logit', PACKAGE = 'updog', x)
}

#' The expit (logistic) function.
#'
#' @param x A double.
#'
#' @return The expit (logistic) of \code{x}.
#'
#' @keywords internal
#' @noRd
#'
#' @author David Gerard
expit <- function(x) {
    .Call('_updog_expit', PACKAGE = 'updog', x)
}

# Register entry points for exported C++ functions
methods::setLoadAction(function(ns) {
    .Call('_updog_RcppExport_registerCCallable', PACKAGE = 'updog')
})

Try the updog package in your browser

Any scripts or data that you put into this service are public.

updog documentation built on Nov. 17, 2023, 9:06 a.m.