Nothing
###############################
## Functions for Generalized Method of Moments
###############################
#' Upper bounds on rates of double reduction
#'
#' Calculates the upper bounds of the double reduction parameters
#' according to the complete equation segregation model. See
#' Huang et. al. (2019) for details.
#'
#' @param ploidy The ploidy of the species. Should be even and at least 4.
#'
#' @return A vector of length \code{floor(ploidy/4)}. Element \code{i} is
#' the upper bound on the probability of \code{i} pairs of
#' identical-by-double-reduction alleles being in an individual.
#'
#' @author David Gerard
#'
#' @export
#'
#' @references
#' \itemize{
#' \item{Huang, K., Wang, T., Dunn, D. W., Zhang, P., Cao, X., Liu, R., & Li, B. (2019). Genotypic frequencies at equilibrium for polysomic inheritance under double-reduction. \emph{G3: Genes, Genomes, Genetics}, 9(5), 1693-1706. \doi{10.1534/g3.119.400132}}
#' }
#'
#' @examples
#' drbounds(4)
#' drbounds(6)
#' drbounds(8)
#' drbounds(10)
#' drbounds(12)
#' drbounds(14)
#' drbounds(16)
#'
drbounds <- function(ploidy) {
stopifnot(ploidy > 2)
stopifnot(ploidy %% 2 == 0)
ibdr <- floor(ploidy / 4)
alpha <- rep(NA_real_, length = ibdr)
for (i in seq_along(alpha)) {
jvec <- i:ibdr
alpha[[i]] <-
exp(
log_sum_exp(
(ploidy / 2 - 3 * jvec) * log(2) +
lchoose(jvec, i) +
lchoose(ploidy / 2, jvec) +
lchoose(ploidy / 2 - jvec, ploidy / 2 - 2 * jvec) -
lchoose(ploidy, ploidy / 2)
)
)
}
return(alpha)
}
#' Chi-square divergence between empirical proportions and updated proportions.
#'
#' @param nvec A vector of length ploidy + 1. \code{nvec[i]} is the observed
#' number of individuals with dosage \code{i-1}.
#' @param alpha The double reduction parameter. Should be of length
#' \code{floor(ploidy/4)}. These should sum to less than 1, since
#' \code{1-sum(alpha)} is the assumed probability of no double
#' reduction.
#' @param denom What should we use for the denominator? The expected
#' genotype frequencies (\code{denom = "expected"}) or the
#' observed genotype frequencies (\code{denom = "observed"})?
#' @param ngen The number of generations of random mating of current
#' empirical frequencies to compare against.
#'
#' @author David Gerard
#'
#' @noRd
chisqdiv <- function(nvec,
alpha,
denom = c("expected", "observed"),
ngen = 1) {
## Check parameters ----
ploidy <- length(nvec) - 1
stopifnot(ploidy %% 2 == 0, ploidy > 0)
stopifnot(length(alpha) == floor(ploidy / 4))
stopifnot(alpha >= 0, sum(alpha) <= 1)
stopifnot(nvec >= 0)
stopifnot(length(ngen) == 1, ngen >= 1)
denom <- match.arg(denom)
## calculate divergence ----
n <- sum(nvec)
qhat <- nvec / n
fq <- qhat
for (i in seq_len(ngen)) {
fq <- freqnext(freq = fq, alpha = alpha)
}
notzero <- qhat > 10^-6 | fq > 10^-6
if (denom == "expected") {
chisq <- n * sum((qhat[notzero] - fq[notzero]) ^ 2 / fq[notzero])
} else if (denom == "observed") {
chisq <- n * sum((qhat[notzero] - fq[notzero]) ^ 2 / qhat[notzero])
}
return(chisq)
}
neymandiv <- function(nvec, alpha, ngen = 1) {
chisqdiv(nvec = nvec, alpha = alpha, denom = "observed", ngen = ngen)
}
pearsondiv <- function(nvec, alpha, ngen = 1) {
chisqdiv(nvec = nvec, alpha = alpha, denom = "expected", ngen = ngen)
}
#' G-test statistic. I.e. likelihood ratio of multinomial
#'
#' @inheritParams chisqdiv
#'
#' @author David Gerard
#'
#' @noRd
gdiv <- function(nvec, alpha, ngen = 1) {
## Check parameters ----
ploidy <- length(nvec) - 1
stopifnot(ploidy %% 2 == 0, ploidy > 0)
stopifnot(length(alpha) == floor(ploidy / 4))
stopifnot(alpha >= 0, sum(alpha) <= 1)
stopifnot(nvec >= 0)
## Iterate through ngen generations of random mating ----
n <- sum(nvec)
qhat <- nvec / n
fq <- qhat
for (i in seq_len(ngen)) {
fq <- freqnext(freq = fq, alpha = alpha)
}
## Calculate statistic
ei <- fq * n
notzero <- nvec > 0
gstat <- 2 * sum(nvec[notzero] * log(nvec[notzero] / ei[notzero]))
return(gstat)
}
#' Calculate \code{\link{chisqdiv}()} with real parameterization.
#'
#' Uses \code{\link{real_to_simplex}()} to convert \code{y} to
#' \code{alpha}. Then calculates chi-square divergence with
#' \code{\link{chisqdiv}()}. Transformation is that of
#' Betancourt (2012).
#'
#' @inheritParams chisqdiv
#' @param y A real vector of length at least 1.
#'
#' @author David Gerard
#'
#' @references
#' \itemize{
#' \item{Betancourt, M. (2012, May). Cruising the simplex:
#' Hamiltonian Monte Carlo and the Dirichlet distribution.
#' In AIP Conference Proceedings 31st (Vol. 1443, No. 1, pp. 157-164).
#' American Institute of Physics. \doi{10.1063/1.3703631}}
#' \item{\url{https://mc-stan.org/docs/2_18/reference-manual/simplex-transform-section.html}}
#' }
#'
#' @noRd
obj_reals <- function(y, nvec, denom = c("expected", "observed")) {
ploidy <- length(nvec) - 1
stopifnot(floor(ploidy / 4) == length(y))
denom <- match.arg(denom)
if (length(y) == 0) {
return(chisqdiv(nvec = nvec, alpha = NULL, denom = denom))
} else {
alpha_full <- real_to_simplex(y)
return(chisqdiv(nvec = nvec, alpha = alpha_full[-1], denom = denom))
}
}
#' Estimate double reduction parameter and test for HWE
#'
#' Implements a generalized method-of-moments approach to test for
#' Hardy-Weinberg equilibrium in the presence of double reduction. This
#' also provides an estimate of double reduction given that the population
#' is at equilibrium.
#'
#' For ploides greater than 4, not all gametic frequencies are possible
#' given rates of double reduction. That is, if the model for meiosis
#' from Huang et. al. (2019) is incorrect, then this is not an appropriate
#' test for HWE.
#'
#' @param nvec A vector containing the observed genotype counts,
#' where \code{nvec[[i]]} is the number of individuals with genotype
#' \code{i-1}. This should be of length \code{ploidy+1}.
#' @param obj What chi-square function should we minimize?
#' \describe{
#' \item{\code{pearson}}{\deqn{\sum (o-e)^2/e}}
#' \item{\code{g}}{\deqn{2\sum o \log(o/2)}}
#' \item{\code{neyman}}{\deqn{\sum (o-e)^2/o}}
#' }
#' See Berkson (1980) for a description.
#' @param ngen The number of generations of random mating of current
#' empirical frequencies to compare against.
#'
#' @return A list with the following elements
#' \describe{
#' \item{\code{alpha}}{The estimated double reduction parameter(s).
#' In diploids, this value is \code{NULL}.}
#' \item{\code{chisq_hwe}}{The chi-square test statistic for testing
#' against the null of equilibrium.}
#' \item{\code{df_hwe}}{The degrees of freedom associated with
#' \code{chisq_hwe}.}
#' \item{\code{p_hwe}}{The p-value against the null of equilibrium.}
#' \item{\code{chisq_ndr}}{The chi-square test statistic for
#' testing against the null of no double reduction (conditional
#' on equilibrium). In diploids, this value is \code{NULL}.}
#' \item{\code{df_ndr}}{The degrees of freedom associated with
#' \code{chisq_ndr}.}
#' \item{\code{p_ndr}}{The p-value against the null of no double
#' reduction (conditional on equilibrium). In diploids, this value
#' is \code{NULL}.}
#' }
#'
#' @author David Gerard
#'
#' @references
#' \itemize{
#' \item{Agresti, A., & Coull, B. A. (1998). Approximate is better than "exact" for interval estimation of binomial proportions. The American Statistician, 52(2), 119-126. \doi{10.1080/00031305.1998.10480550}}
#' \item{Berkson, J. (1980). Minimum chi-square, not maximum likelihood!. The Annals of Statistics, 8(3), 457-487. \doi{10.1214/aos/1176345003}}
#' \item{Huang, K., Wang, T., Dunn, D. W., Zhang, P., Cao, X., Liu, R., & Li, B. (2019). Genotypic frequencies at equilibrium for polysomic inheritance under double-reduction. G3: Genes, Genomes, Genetics, 9(5), 1693-1706. \doi{10.1534/g3.119.400132}}
#' }
#'
#' @examples
#' ## Diploid at various levels of deviation from HWE
#' hwemom(c(10, 20, 10))
#' hwemom(c(12, 16, 12))
#' hwemom(c(14, 12, 14))
#'
#' ## Tetraploid with strong deviation from HWE
#' nvec <- c(25, 0, 0, 0, 25)
#' hwemom(nvec)
#'
#' ## Hexaploid with exact frequencies at HWE
#' nvec <- round(hwefreq(r = 0.5,
#' alpha = 0.2,
#' ploidy = 6,
#' niter = 100,
#' tol = -Inf) * 100)
#' hwemom(nvec)
#'
#' ## Octoploid case with exact frequencies at HWE
#' nvec <- round(hwefreq(r = 0.5,
#' alpha = c(0.1, 0.01),
#' ploidy = 8,
#' niter = 100,
#' tol = -Inf) * 1000)
#' hwemom(nvec)
#'
#' @noRd
hwemom <- function(nvec,
obj = c("g", "pearson", "neyman"),
ngen = 1) {
## Check parameters ----
ploidy <- length(nvec) - 1
stopifnot(ploidy %% 2 == 0, ploidy > 0)
stopifnot(nvec >= 0)
stopifnot(is.vector(nvec))
stopifnot(length(ngen) == 1, ngen >= 1)
ibdr <- floor(ploidy / 4)
obj <- match.arg(obj)
## Return NA's if too few counts ----
if (sum(nvec > 0.5) < ibdr + 1) {
retlist <- list()
retlist <- list(chisq_hwe = NA_real_,
df_hwe = NA_real_,
p_hwe = NA_real_,
alpha = rep(NA_real_, times = ibdr),
chisq_ndr = NA_real_,
df_ndr = NA_real_,
p_ndr = NA_real_)
retlist$r <- sum(0:ploidy * nvec) / (ploidy * sum(nvec))
return(retlist)
}
## Choose objective function ----
if (obj == "pearson") {
divfun <- pearsondiv
if (ngen == 1) {
grfun <- dpearsondiv_dalpha
} else {
grfun <- NULL
}
} else if (obj == "g") {
divfun <- gdiv
if (ngen == 1) {
grfun <- dgdiv_dalpha
} else {
grfun <- NULL
}
} else if (obj == "neyman") {
divfun <- neymandiv
if (ngen == 1) {
grfun <- dneymandiv_dalpha
} else {
grfun <- NULL
}
}
## tell folks to use other stuff ----
if (ploidy == 4) {
message("You should use `hwetetra()` for tetraploids")
} else if (ploidy == 2) {
message("Don't use this function. There are far better packages for diploids.")
}
minval <- sqrt(.Machine$double.eps) ## minimum DR to explore
## optimize ----
if (ibdr == 0) {
## Diploid: Just return chi-square
chisq <- divfun(nvec = nvec, alpha = NULL, ngen = ngen)
pval <- stats::pchisq(q = chisq,
df = ploidy - ibdr - 1,
lower.tail = FALSE)
phat <- sum(0:ploidy * nvec) / (ploidy * sum(nvec))
retlist <- list(p = c(1 - phat, phat),
chisq_hwe = chisq,
df_hwe = ploidy - ibdr - 1,
p_hwe = pval,
alpha = NULL,
chisq_ndr = NULL,
df_ndr = NULL,
p_ndr = NULL)
} else if (ibdr == 1) {
## Tetraploid or Hexaploid: Use Brent's method
upper_alpha <- drbounds(ploidy = ploidy)
oout <- stats::optim(par = minval,
fn = divfun,
method = "Brent",
lower = minval,
upper = upper_alpha,
nvec = nvec,
ngen = ngen)
pval_hwe <- stats::pchisq(q = oout$value,
df = ploidy - ibdr - 1,
lower.tail = FALSE)
chisq_null <- divfun(nvec = nvec,
alpha = rep(0, length.out = ibdr),
ngen = ngen)
chisq_alpha <- (chisq_null - oout$value)
pval_alpha <- stats::pchisq(q = chisq_alpha,
df = ibdr,
lower.tail = FALSE) / 2
retlist <- list(chisq_hwe = oout$value,
df_hwe = ploidy - ibdr - 1,
p_hwe = pval_hwe,
alpha = oout$par,
chisq_ndr = chisq_alpha,
df_ndr = ibdr,
p_ndr = pval_alpha)
} else {
## Higher Ploidy: Use L-BFGS-B using boundaries from CES model
upper_alpha <- drbounds(ploidy = ploidy)
oout <- stats::optim(par = rep(minval, ibdr),
fn = divfun,
gr = grfun,
method = "L-BFGS-B",
lower = rep(minval, ibdr),
upper = upper_alpha,
nvec = nvec,
ngen = ngen)
alpha <- oout$par
pval_hwe <- stats::pchisq(q = oout$value,
df = ploidy - ibdr - 1,
lower.tail = FALSE)
chisq_null <- divfun(nvec = nvec,
alpha = rep(0, length.out = ibdr),
ngen = ngen)
chisq_alpha <- (chisq_null - oout$value)
pval_alpha <- stats::pchisq(q = chisq_alpha,
df = ibdr,
lower.tail = FALSE) / 2
retlist <- list(chisq_hwe = oout$value,
df_hwe = ploidy - ibdr - 1,
p_hwe = pval_hwe,
alpha = alpha,
chisq_ndr = chisq_alpha,
df_ndr = ibdr,
p_ndr = pval_alpha)
}
## get estimate of allele frequency ----
retlist$r <- sum(0:ploidy * nvec) / (ploidy * sum(nvec))
return(retlist)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.