R/lrt_gknown.R

Defines functions lrt_dr_npp_g4 obj_dr_npp lrt_ndr_pp_g4 obj_ndr_pp lrt_dr_pp_g4 lrt_init obj_dr_pp lrt_ndr_npp_g4 like_gknown_2 like_gknown_3 get_df onbound nzeros nzeros_old is_impossible lrt_men_g4

Documented in like_gknown_2 like_gknown_3 lrt_men_g4

#####################
## Likelihood methods when genotypes are known
#####################

#' Likelihood ratio test for segregation distortion with known genotypes
#'
#' This will run a likelihood ratio test using the genotypes of an F1 population
#' of tetraploids for the null of Mendelian segregation (accounting for double
#' reduction and preferential pairing) against the alternative of
#' segregation distortion. This is when the genotypes are assumed known.
#'
#' @section Impossible genotypes:
#' Some offspring genotype combinations are impossible given the parental
#' genotypes. If these impossible genotypes combinations show up, we return a
#' p-value of 0, a log-likelihood ratio statistic of Infinity, and missing
#' values for all other return items. The impossible genotypes are:
#' \describe{
#'   \item{\code{g1 = 0 && g2 = 0}}{Only offspring genotypes of 0 are possible.}
#'   \item{\code{g1 = 4 && g2 = 4}}{Only offspring genotypes of 4 are possible.}
#'   \item{\code{g1 = 0 && g2 = 4 || g1 == 4 && g2 == 0}}{Only offspring genotypes of 2 are possible.}
#'   \item{\code{g1 = 0 && g2 \%in\% c(1, 2, 3) || g1 = \%in\% c(1, 2, 3) && g2 == 0}}{Only offspring genotypes of 0, 1, and 2 are possible.}
#'   \item{\code{g1 = 4 && g2 \%in\% c(1, 2, 3) || g1 = \%in\% c(1, 2, 3) && g2 == 4}}{Only offspring genotypes of 2, 3, and 4 are possible.}
#' }
#'
#' @section Unidentified parameters:
#' When \code{g1 = 2} or \code{g2 = 2} (or both), the model is not identified
#' and those estimates (\code{alpha}, \code{xi1}, and \code{xi2}) are
#' meaningless. Do NOT interpret them.
#'
#' The estimate of \code{alpha} (double reduction rate) IS identified as
#' long as at least one parent is simplex, and no parent is duplex.
#' However, the estimates of the double reduction rate have extremely high
#' variance.
#'
#' @param x A vector of genotype counts. \code{x[i]} is the number of
#'    offspring with genotype \code{i-1}.
#' @param g1 The genotype of parent 1.
#' @param g2 The genotype of parent 2.
#' @param drbound The maximum rate of double reduction. A default of 1/6
#'     is provided, which is the rate under the complete equational
#'     segregation model of meiosis.
#' @param pp A logical. Should we account for preferential pairing
#'     (\code{TRUE}) or not (\code{FALSE})?
#' @param dr A logical. Should we account for double reduction
#'     (\code{TRUE}) or not (\code{FALSE})?
#' @param alpha If \code{dr = FALSE}, this is the known rate of double
#'     reduction.
#' @param xi1 If \code{pp = FALSE}, this is the known preferential pairing
#'     parameter of parent 1.
#' @param xi2 If \code{pp = FALSE}, this is the known preferential pairing
#'     parameter of parent 2.
#'
#' @return A list with the following elements
#' \describe{
#'   \item{\code{statistic}}{The log-likelihood ratio test statistic.}
#'   \item{\code{df}}{The degrees of freedom.}
#'   \item{\code{p_value}}{The p-value.}
#'   \item{\code{alpha}}{The estimated double reduction rate.}
#'   \item{\code{xi1}}{The estimated preferential pairing parameter of parent 1.}
#'   \item{\code{xi2}}{The estimated preferential pairing parameter of parent 2.}
#' }
#'
#' @author David Gerard
#'
#' @examples
#' set.seed(100)
#' gf <- offspring_gf_2(alpha = 1/12, xi1 = 0.2, xi2 = 0.6, p1 = 1, p2 = 0)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_men_g4(x = x, g1 = 1, g2 = 0)
#'
#' @export
lrt_men_g4 <- function(
    x,
    g1,
    g2,
    drbound = 1/6,
    pp = TRUE,
    dr = TRUE,
    alpha = 0,
    xi1 = 1/3,
    xi2 = 1/3) {
  ## check input
  stopifnot(length(x) == 5,
            x >= 0,
            g1 >= 0,
            g1 <= 4,
            g2 >= 0,
            g2 <= 4,
            drbound >= 0,
            drbound <= 1,
            alpha >= 0,
            alpha <= 1,
            xi1 >= 0,
            xi1 <= 1,
            xi2 >= 0,
            xi2 <= 1,
            is.logical(pp),
            is.logical(dr),
            length(g1) == 1,
            length(g2) == 1,
            length(drbound) == 1,
            length(pp) == 1,
            length(dr) == 1,
            length(alpha) == 1,
            length(xi1) == 1,
            length(xi2) == 1)

  TOL <- sqrt(.Machine$double.eps)
  if (is_impossible(x = x, g1 = g1, g2 = g2, dr = dr || alpha > TOL)) {
    ## Check if data are possible under null
    ret <- list(
      statistic = Inf,
      p_value = 0,
      df = NA_real_,
      alpha = NA_real_,
      xi1 = NA_real_,
      xi2 = NA_real_)
    return(ret)
  } else if (sum(x > 0.5) == 1) {
    ## only one genotype, and data conform to it
    ret <- list(
      statistic = 0,
      p_value = 1,
      df = 0,
      alpha = NA_real_,
      xi1 = NA_real_,
      xi2 = NA_real_)
    return(ret)
  }

  if (pp && dr) {
    ret <- lrt_dr_pp_g4(x = x, g1 = g1, g2 = g2, drbound = drbound)
  } else if (pp && !dr) {
    ret <- lrt_ndr_pp_g4(x = x, g1 = g1, g2 = g2, alpha = alpha)
  } else if (!pp && dr) {
    ret <- lrt_dr_npp_g4(x = x, g1 = g1, g2 = g2, drbound = drbound, xi1 = xi1, xi2 = xi2)
  } else {
    ret <- lrt_ndr_npp_g4(x = x, g1 = g1, g2 = g2, alpha = alpha, xi1 = xi1, xi2 = xi2)
  }

  return(ret)
}

#' Checks to see if x is impossible given parental genotypes
#'
#' @param x The offspring genotype counts
#' @param g1 Parent 1's genotype
#' @param g2 Parent 2's genotype
#' @param df Is double reduction possible?
#'
#' @return TRUE if impossible, FALSE if possible
#'
#' @author David Gerard
#'
#' @examples
#' is_impossible(x = c(1, 2, 3, 0, 0), g1 = 0, g2 = 3, dr = TRUE)
#'
#'
#' @noRd
is_impossible <- function(x, g1, g2, dr = TRUE) {
  if (g1 == 0 && g2 == 0) {
    return(!all(x[2:5] == 0))
  } else if (g1 == 4 & g2 == 4) {
    return(!all(x[1:4] == 0))
  } else if (g1 == 0 && g2 == 4 || g1 == 4 && g2 == 0) {
    return(!all(x[c(1, 2, 4, 5)] == 0))
  } else if (dr) {
    if (g1 == 0 && g2 %in% c(1, 2, 3) || g1 %in% c(1, 2, 3) && g2 == 0) {
      return(!all(x[4:5] == 0))
    } else if (g1 == 4 && g2 %in% c(1, 2, 3) || g1 %in% c(1, 2, 3) && g2 == 4) {
      return(!all(x[1:2] == 0))
    }
  } else if (!dr) {
    if (g1 == 0 && g2 == 1 || g1 == 1 && g2 == 0) {
      return(!all(x[3:5] == 0))
    } else if (g1 == 0 && g2 == 2 || g1 == 2 && g2 == 0 || g1 == 1 && g2 == 1) {
      return(!all(x[4:5] == 0))
    } else if (g1 == 0 && g2 == 3 || g1 == 3 && g2 == 0) {
      return(!all(x[c(1, 4, 5)] == 0))
    } else if (g1 == 1 && g2 == 4 || g1 == 4 && g2 == 1) {
      return(!all(x[c(1, 2, 5)] == 0))
    } else if (g1 == 2 && g2 == 4 || g1 == 4 && g2 == 2 || g1 == 3 && g2 == 3) {
      return(!all(x[1:2] == 0))
    } else if (g1 == 3 && g2 == 4 || g1 == 4 && g2 == 3) {
      return(!all(x[1:3] == 0))
    } else if (g1 == 1 && g2 == 2 || g1 == 2 && g2 == 1) {
      return(!all(x[5] == 0))
    } else if (g1 == 1 && g2 == 3 || g1 == 3 && g2 == 1) {
      return(!all(x[c(1, 5)] == 0))
    } else if (g1 == 2 && g2 == 3 || g1 == 3 && g2 == 2) {
      return(!all(x[1] == 0))
    }
  }

  return(FALSE)
}

#' Returns number of theoretical zeros, to adjust df in lrt when genotypes are known
#'
#' @inheritParams is_impossible
#'
#' @noRd
nzeros_old <- function(g1, g2, dr = TRUE) {
  if (g1 == 0 && g2 == 0) {
    return(4)
  } else if (g1 == 4 & g2 == 4) {
    return(4)
  } else if (g1 == 0 && g2 == 4 || g1 == 4 && g2 == 0) {
    return(4)
  } else if (dr) {
    if (g1 == 0 && g2 %in% c(1, 2, 3) || g1 %in% c(1, 2, 3) && g2 == 0) {
      return(2)
    } else if (g1 == 4 && g2 %in% c(1, 2, 3) || g1 %in% c(1, 2, 3) && g2 == 4) {
      return(2)
    }
  } else if (!dr) {
    if (g1 == 0 && g2 == 1 || g1 == 1 && g2 == 0) {
      return(3)
    } else if (g1 == 0 && g2 == 2 || g1 == 2 && g2 == 0 || g1 == 1 && g2 == 1) {
      return(2)
    } else if (g1 == 0 && g2 == 3 || g1 == 3 && g2 == 0) {
      return(3)
    } else if (g1 == 1 && g2 == 4 || g1 == 4 && g2 == 1) {
      return(3)
    } else if (g1 == 2 && g2 == 4 || g1 == 4 && g2 == 2 || g1 == 3 && g2 == 3) {
      return(2)
    } else if (g1 == 3 && g2 == 4 || g1 == 4 && g2 == 3) {
      return(3)
    } else if (g1 == 1 && g2 == 2 || g1 == 2 && g2 == 1) {
      return(1)
    } else if (g1 == 1 && g2 == 3 || g1 == 3 && g2 == 1) {
      return(2)
    } else if (g1 == 2 && g2 == 3 || g1 == 3 && g2 == 2) {
      return(1)
    }
  }

  return(0)
}

#' Unlike nzeros_old, this one will check empirically for zeros
#'
#' @param g1 parent 1's genotype
#' @param g2 parent 2's genotype
#' @param alpha The double reduction rate (estimated or otherwise)
#' @param xi1 Parent 1's preferential pairing parameter (estimated or otherwise).
#' @param xi1 Parent 2's preferential pairing parameter (estimated or otherwise).
#'
#' @author David Gerard
#'
#' @noRd
nzeros <- function(g1, g2, alpha, xi1, xi2) {
  gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = g1, p2 = g2)
  TOL <- 1e-6
  return(sum(gf < TOL))
}


#' Tests if null parameters are estimated on boundary of parameterspace
#'
#' @inheritParams get_df
#'
#' @author David Gerard
#'
#' @return Either \code{TRUE} if the parameter is on the boundary, and
#' \code{FALSE} otherwise.
#'
#' @noRd
onbound <- function(g1, g2, alpha, xi1, xi2, dr, pp, drbound = 1/6, TOL = 1e-5) {
  ob <- TRUE
  if (dr && (g1 %in% c(1, 3)) && (g2 %in% c(1, 3))) {
    if (alpha > TOL && alpha < drbound - TOL) {
      ob <- FALSE
    } else {
      ## do nothing
    }
  } else if (dr && ((g1 %in% c(0, 4) && g2 %in% c(1, 3)) || (g1 %in% c(1, 3) && g2 %in% c(0, 4)))) {
    if (alpha > TOL && alpha < drbound - TOL) {
      ob <- FALSE
    } else {
      ## do nothing
    }
  } else if (dr && !pp && (g1 == 2 || g2 == 2)) {
    if (alpha > TOL && alpha < drbound - TOL) {
      ob <- FALSE
    } else {
      ## do nothing
    }
  } else if (pp && !dr && g1 == 2 && g2 != 2) {
    if (xi1 > TOL && xi1 < 1 - TOL) {
      ob <- FALSE
    } else {
      ## do nothing
    }
  } else if (pp && !dr && g1 != 2 && g2 == 2) {
    if (xi2 > TOL && xi2 < 1 - TOL) {
      ob <- FALSE
    } else {
      ## do nothing
    }
  } else if (pp && !dr && g1 == 2 && g2 == 2) {
    if ((xi1 < TOL || xi1 > 1 - TOL) && (xi2 < TOL || xi2 > 1 - TOL)) {
      ## do nothing
    } else {
      ob <- FALSE
    }
  } else if (pp && dr && g1 == 2 && g2 %in% c(0, 1, 3, 4)) {
    if (alpha < TOL && (xi1 < TOL || xi1 > 1 - TOL)) {
      ## do nothing
    } else if (alpha > drbound - TOL) {
      ## do nothing
    } else {
      ob <- FALSE
    }
  } else if (pp && dr && g1 %in% c(0, 1, 3, 4) && g2 == 2) {
    if (alpha < TOL && (xi2 < TOL || xi2 > 1 - TOL)) {
      ## do nothing
    } else if (alpha > drbound - TOL) {
      ## do nothing
    } else {
      ob <- FALSE
    }
  } else if (pp && dr && g1 == 2 && g2 == 2) {
    if (alpha < TOL && (xi1 < TOL || xi1 > 1 - TOL)) {
      ## do nothing
    } else if (alpha < TOL && (xi2 < TOL || xi2 > 1 - TOL)) {
      ## do nothing
    } else if (alpha > drbound - TOL) {
      ## do nothing
    } else {
      ob <- FALSE
    }
  } else {
    ## do nothing
    ## should only get here is !pp && !dr
  }

  return(ob)
}

#' Experimental calculation of degrees of freedom
#'
#' The complicated non-identifiability actually makes calculating the degrees
#' of freedom really hard. This function was created by experimentally running
#' through every possible scenario to see what is the empirical null
#' distribution. We are effectively using the strategy of Susko (2013), where
#' we estimate the degrees of freedom. The estimated number of degrees of freedom
#' under the alternative is the number of non-zero elements minus 1. The number
#' under the null is 1 if the parameter is not estimated on the boundary, and
#' zero otherwise.
#'
#' @param g1 Parent 1's genotype
#' @param g2 Parent 2's genotype
#' @param alpha The estimated double reduction rate
#' @param xi1 The estimated preferential pairing parameter for parent 1.
#' @param xi2 The estimated preferential pairing parmaeter for parent 2.
#' @param TOL tolerance to check boundary of parameter space.
#' @param dr Was double reduction being estimated?
#' @param pp Was preferential pairing being estimated?
#'
#' @author David Gerard
#'
#' @references
#' \itemize{
#'   \item{Susko, E. (2013). Likelihood ratio tests with boundary constraints using data-dependent degrees of freedom. \emph{Biometrika}, 100(4), 1019-1023. \doi{https://doi.org/10.1093/biomet/ast032}}
#' }
#'
#' @noRd
get_df <- function(g1, g2, alpha, xi1, xi2, dr, pp, drbound = 1/6, TOL = 1e-5) {
  nz <- nzeros(g1 = g1, g2 = g2, alpha = alpha, xi1 = xi1, xi2 = xi2)

  ## Number of parameters under alternative
  df_alt <- 4 - nz ## Number of non-zeros (5 - nz) minuz 1, so 4 - nz

  df_null <- !onbound(
    g1 = g1,
    g2 = g2,
    alpha = alpha,
    xi1 = xi1,
    xi2 = xi2,
    dr = dr,
    pp = pp,
    drbound = drbound,
    TOL = TOL)

  df <- df_alt - df_null

  df <- max(df, 1)

  return(df)
}


#' Likelihood under three parameter model when genotypes are known
#'
#' This is under the three parameter model.
#'
#' @param x A vector of length 5. \code{x[i]} is the count of individuals with
#'     genotype \code{i-1}.
#' @param tau The probability of quadrivalent formation.
#' @param beta The probability of double reduction given quadrivalent formation.
#' @param gamma1 The probability of AA_aa pairing for parent 1.
#' @param gamma2 The probability of AA_aa pairing for parent 2.
#' @param g1 Parent 1's genotype.
#' @param g2 Parent 2's genotype.
#' @param log_p A logical. Should we return the log likelihood or not?
#' @param pen A tiny penalty to help with numerical stability
#'
#' @return The (log) likelihood.
#'
#' @examples
#' x <- c(1, 4, 5, 3, 1)
#' tau <- 0.5
#' beta <- 0.1
#' gamma1 <- 0.5
#' gamma2 <- 0.3
#' g1 <- 1
#' g2 <- 2
#' like_gknown_3(
#'   x = x,
#'   tau = tau,
#'   beta = beta,
#'   gamma1 = gamma1,
#'   gamma2 = gamma2,
#'   g1 = g1,
#'   g2 = g2)
#'
#' @author David Gerard
#'
#' @export
like_gknown_3 <- function(x, tau, beta, gamma1, gamma2, g1, g2, log_p = TRUE, pen = 0) {
  stopifnot(length(x) == 5)
  stopifnot(tau >= 0, tau <= 1,
            beta >= 0, beta <= 1,
            gamma1 >= 0, gamma1 <= 1,
            x >= 0,
            g1 >= 0, g1 <= 4,
            g2 >= 0, g2 <= 4)

  gf <- offspring_gf_3(
    tau = tau,
    beta = beta,
    gamma1 = gamma1,
    gamma2 = gamma2,
    p1 = g1,
    p2 = g2)

  gf <- gf + pen
  gf <- gf / sum(gf)

  return(stats::dmultinom(x = x, prob = gf, log = log_p))
}

#' Likelihood under three parameter model when genotypes are known
#'
#' This is under the two parameter model.
#'
#' @param x A vector of length 5. \code{x[i]} is the count of individuals with
#'     genotype \code{i-1}.
#' @param alpha The double reduction rate.
#' @param xi1 The preferential pairing parameter of parent 1.
#' @param xi2 The preferential pairing parameter of parent 2.
#' @param g1 Parent 1's genotype.
#' @param g2 Parent 2's genotype.
#' @param log_p A logical. Should we return the log likelihood or not?
#' @param pen A tiny penalty to help with numerical stability
#'
#' @return The (log) likelihood.
#'
#' @examples
#' x <- c(1, 4, 5, 3, 1)
#' alpha <- 0.01
#' xi1 <- 0.5
#' xi2 <- 0.3
#' g1 <- 1
#' g2 <- 2
#' like_gknown_2(
#'   x = x,
#'   alpha = alpha,
#'   xi1 = xi1,
#'   xi2 = xi2,
#'   g1 = g1,
#'   g2 = g2)
#'
#' @author David Gerard
#'
#' @export
like_gknown_2 <- function(x, alpha, xi1, xi2, g1, g2, log_p = TRUE, pen = 0) {
  stopifnot(length(x) == 5)
  stopifnot(alpha >= 0, alpha <= 1,
            xi1 >= 0, xi1 <= 1,
            xi2 >= 0, xi2 <= 1,
            x >= 0,
            g1 >= 0, g1 <= 4,
            g2 >= 0, g2 <= 4)
  gf <- offspring_gf_2(
    alpha = alpha,
    xi1 = xi1,
    xi2 = xi2,
    p1 = g1,
    p2 = g2)

  gf <- gf + pen
  gf <- gf / sum(gf)

  return(stats::dmultinom(x = x, prob = gf, log = log_p))
}

#' Likelihood ratio test assuming no double reduction and no preferential pairing
#'
#' This is when all genotypes are known.
#'
#' @inheritParams like_gknown_3
#' @param alpha The known rate of double reduction.
#' @param xi1 The known preferential pairing parameter of parent 1.
#' @param xi2 The known preferential pairing parameter of parent 2.
#'
#' @return A list of length three with the following elements
#' \describe{
#'   \item{\code{statistic}}{The likelihood ratio test statistic.}
#'   \item{\code{df}}{The degrees of freedom.}
#'   \item{\code{p_value}}{The p-value.}
#' }
#'
#' @author David Gerard
#'
#' @noRd
lrt_ndr_npp_g4 <- function(x, g1, g2, alpha = 0, xi1 = 1/3, xi2 = 1/3) {

  ## check impossible genotype ----
  TOL <- sqrt(.Machine$double.eps)
  if (is_impossible(x = x, g1 = g1, g2 = g2, dr = alpha > TOL)) {
    ret <- list(
      statistic = Inf,
      p_value = 0,
      df = NA_real_,
      alpha = NA_real_,
      xi1 = NA_real_,
      xi2 = NA_real_)
    return(ret)
  }


  ## Run test
  l1 <- stats::dmultinom(x = x, prob = x / sum(x), log = TRUE)
  l0 <- like_gknown_2(
    x = x,
    alpha = alpha,
    xi1 = xi1,
    xi2 = xi2,
    g1 = g1,
    g2 = g2,
    log_p = TRUE)

  llr <- -2 * (l0 - l1)

  ## Get degrees of freedom ----
  df <- get_df(g1 = g1, g2 = g2, alpha = alpha, xi1 = xi1, xi2 = xi2, dr = FALSE, pp = FALSE)

  p_value <- stats::pchisq(q = llr, df = df, lower.tail = FALSE, log.p = FALSE)

  ret <- list(
    statistic = llr,
    p_value = p_value,
    df = df,
    alpha = alpha,
    xi1 = xi1,
    xi2 = xi2)

  return(ret)
}

## Methods when dr and pp are not known ---------------------------------------

#' Objective when estimating both double reduction and preferential pairing
#'
#' @param par either just alpha (when no parent genotype is 2)
#'   or (tau, beta, gamma1) or (tau, beta, gamma2) or
#'   (tau, beta, gamma1, gamma2).
#' @param x offspring genotype counts
#' @param g1 first parent genotype
#' @param g2 second parent genotype
#' @param pen A tiny penalty to help with numerical stability
#'
#' @author David Gerard
#'
#' @noRd
obj_dr_pp <- function(par, x, g1, g2, pen = 1e-6) {
  par[par < 0] <- 0 ## to deal with -1e-18 etc
  par[par > 1] <- 1
  if (g1 != 2 && g2 != 2) {
    stopifnot(length(par) == 1)
    obj <- like_gknown_3(
      x = x,
      tau = 1,
      beta = par[[1]],
      gamma1 = 1/3,
      gamma2 = 1/3,
      g1 = g1,
      g2 = g2,
      log_p = TRUE,
      pen = pen)
  } else if (g1 == 2 && g2 != 2){
    stopifnot(length(par) == 3)
    obj <- like_gknown_3(
      x = x,
      tau = par[[1]],
      beta = par[[2]],
      gamma1 = par[[3]],
      gamma2 = 1/3,
      g1 = g1,
      g2 = g2,
      log_p = TRUE,
      pen = pen)
  } else if (g1 != 2 && g2 == 2){
    stopifnot(length(par) == 3)
    obj <- like_gknown_3(
      x = x,
      tau = par[[1]],
      beta = par[[2]],
      gamma1 = 1/3,
      gamma2 = par[[3]],
      g1 = g1,
      g2 = g2,
      log_p = TRUE,
      pen = pen)
  } else {
    stopifnot(length(par) == 4)
    obj <- like_gknown_3(
      x = x,
      tau = par[[1]],
      beta = par[[2]],
      gamma1 = par[[3]],
      gamma2 = par[[4]],
      g1 = g1,
      g2 = g2,
      log_p = TRUE,
      pen = pen)
  }
  return(obj)
}

#' Initial parameter values
#'
#' @param g1 parent 1 genotype
#' @param g2 parent 2 genotype.
#' @param drbound The maximum double reduction rate
#' @param type How should we initialize? "random" or "half"?
#' @param fudge How much to add to the lower bound and subtract from
#'     the upper bound.
#'
#' @return A list of length 3. The first element is the initial values,
#'     the second is the lower bound of parameters, the third element
#'     is the upper bound of parameters.
#'
#' @author David Gerard
#'
#' @noRd
lrt_init <- function(g1, g2, drbound = 1/6, type = c("random", "half"), fudge = 0) {
  type <- match.arg(type)
  if (type == "random") {
    mult <- stats::runif(4)
  } else if (type == "half") {
    mult <- c(0.5, 0.5, 1/3, 1/3)
  }

  if (g1 != 2 && g2 != 2) {
    out <- list(
      par = c(alpha = drbound * mult[[1]]),
      lower = fudge,
      upper = drbound)
  } else if (g1 == 2 & g2 != 2) {
    out <- list(
      par = c(tau = mult[[1]], beta = drbound * mult[[2]], gamma1 = mult[[3]]),
      lower = rep(fudge, length.out = 3),
      upper = c(1 - fudge, drbound, 1 - fudge))
  } else if (g1 != 2 & g2 == 2) {
    out <- list(
      par = c(tau = mult[[1]], beta = drbound * mult[[2]], gamma2 = mult[[3]]),
      lower = rep(fudge, length.out = 3),
      upper = c(1 - fudge, drbound, 1 - fudge))
  } else {
    out <- list(
      par = c(tau = mult[[1]], beta = drbound * mult[[2]], gamma1 = mult[[3]], gamma2 = mult[[4]]),
      lower = rep(fudge, length.out = 4),
      upper = c(1 - fudge, drbound, 1 - fudge, 1 - fudge))
  }
  return(out)
}

#' LRT when both double reduction and preferential pairing are not known.
#'
#' All genotypes are known.
#'
#' @inheritParams like_gknown_3
#' @param drbound the upper bound on the double reduction rate.
#' @param ntry The number of times to run the gradient ascent.
#'
#' @author David Gerard
#'
#' @examples
#' set.seed(1000)
#' alpha <- 1/12
#' xi1 <- 1/3
#' xi2 <- 1/3
#' n <- 1000
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 0, p2 = 0)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 0, g2 = 0)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 1, p2 = 0)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 1, g2 = 0)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 1, p2 = 1)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 1, g2 = 1)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 1, p2 = 2)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 1, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 1, p2 = 3)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 1, g2 = 3)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 1, p2 = 4)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 1, g2 = 4)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 2, p2 = 0)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 2, g2 = 0)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 2, p2 = 1)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 2, g2 = 1)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 2, p2 = 2)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 2, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 2, p2 = 3)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 2, g2 = 3)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 2, p2 = 4)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 2, g2 = 4)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 3, p2 = 0)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 3, g2 = 0)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 3, p2 = 1)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 3, g2 = 1)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 3, p2 = 2)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 3, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 3, p2 = 3)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 3, g2 = 3)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 3, p2 = 4)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 3, g2 = 4)
#'
#' gf <- offspring_gf_2(alpha = alpha, xi1 = xi1, xi2 = xi2, p1 = 4, p2 = 4)
#' x <- offspring_geno(gf = gf, n = n)
#' lrt_dr_pp_g4(x = x, g1 = 4, g2 = 4)
#'
#' @noRd
lrt_dr_pp_g4 <- function(x, g1, g2, drbound = 1/6, ntry = 5) {

  if (g1 %in% c(0, 4) && g2 %in% c(0, 4)) {
    ret <- lrt_ndr_npp_g4(x = x, g1 = g1, g2 = g2)
    return(ret)
  }

  ## Deal with impossible values under null ----
  if (is_impossible(x = x, g1 = g1, g2 = g2, dr = TRUE)) {
    ret <- list(
      statistic = Inf,
      p_value = 0,
      df = NA_real_,
      alpha = NA_real_,
      xi1 = NA_real_,
      xi2 = NA_real_)
    return(ret)
  }

  ## max likelihood under alt
  l1 <- stats::dmultinom(x = x, prob = x / sum(x), log = TRUE)

  ## Find MLE under null
  l0 <- -Inf
  bout <- NULL
  for (i in seq_len(ntry)) {
    params <- lrt_init(g1 = g1, g2 = g2, drbound = drbound, type = "random")
    method <- ifelse(length(params$par) == 1, "Brent", "L-BFGS-B")
    oout <- stats::optim(
      par = params$par,
      fn = obj_dr_pp,
      method = method,
      lower = params$lower,
      upper = params$upper,
      control = list(fnscale = -1),
      x = x,
      g1 = g1,
      g2 = g2)

    if (oout$value > l0) {
      l0 <- oout$value
      bout <- oout
    }
  }

  ## Get df and calculate p-value
  if (g1 != 2 & g2 != 2) {
    alpha <- bout$par[[1]]
    xi1 <- NA_real_
    xi2 <- NA_real_
  } else if (g1 == 2 & g2 != 2) {
    tau <- bout$par[[1]]
    beta <- bout$par[[2]]
    gamma1 <- bout$par[[3]]
    two <- three_to_two(tau = tau, beta = beta, gamma = gamma1)
    alpha <- two[[1]]
    xi1 <- two[[2]]
    xi2 <- NA_real_
  } else if (g1 != 2 & g2 == 2) {
    tau <- bout$par[[1]]
    beta <- bout$par[[2]]
    gamma2 <- bout$par[[3]]
    two <- three_to_two(tau = tau, beta = beta, gamma = gamma2)
    alpha <- two[[1]]
    xi1 <- NA_real_
    xi2 <- two[[2]]
  } else if (g1 == 2 & g2 == 2) {
    tau <- bout$par[[1]]
    beta <- bout$par[[2]]
    gamma1 <- bout$par[[3]]
    gamma2 <- bout$par[[4]]
    two <- three_to_two(tau = tau, beta = beta, gamma = gamma1)
    alpha <- two[[1]]
    xi1 <- two[[2]]
    two <- three_to_two(tau = tau, beta = beta, gamma = gamma2)
    stopifnot(alpha == two[[1]])
    xi2 <- two[[2]]
  }

  ## Get degrees of freedom ----
  df <- get_df(g1 = g1, g2 = g2, alpha = alpha, xi1 = xi1, xi2 = xi2, dr = TRUE, pp = TRUE, drbound = drbound)

  llr <- -2 * (l0 - l1)

  p_value <- stats::pchisq(q = llr, df = df, lower.tail = FALSE)

  ret <-  list(
    statistic = llr,
    p_value = p_value,
    df = df,
    alpha = alpha,
    xi1 = xi1,
    xi2 = xi2)

  return(ret)
}

## No dr and estimate pp ------------------------------------------------------

#' Objective for lrt_ndr_pp_g4
#'
#' @inheritParams obj_dr_pp
#' @param par if g1 != 2 or g2 != 2, then should be xi1 (or xi2). If
#'     both g1 == 2 and g2 == 2, then first element is xi1 and second
#'     element is xi2.
#' @param alpha The known rate of double reduction.
#' @param pen A tiny penalty to help with numerical stability.
#'
#' @author David Gerard
#'
#' @noRd
obj_ndr_pp <- function(par, x, g1, g2, alpha = 0, pen = 1e-6) {
  par[par < 0] <- 0 ## to deal with -1e-18 etc
  par[par > 1] <- 1
  if (g1 != 2 && g2 != 2) {
    stop("obj_ndr_pp: have to have g1 == 2 or g2 == 2")
  } else if (g1 == 2 && g2 != 2) {
    stopifnot(length(par) == 1)
    obj <- like_gknown_2(
      x = x,
      alpha = alpha,
      xi1 = par[[1]],
      xi2 = 1/3,
      g1 = g1,
      g2 = g2,
      log_p = TRUE,
      pen = pen)
  } else if (g1 != 2 && g2 == 2) {
    stopifnot(length(par) == 1)
    obj <- like_gknown_2(
      x = x,
      alpha = alpha,
      xi1 = 1/3,
      xi2 = par[[1]],
      g1 = g1,
      g2 = g2,
      log_p = TRUE,
      pen = pen)
  } else if (g1 == 2 && g2 == 2) {
    stopifnot(length(par) == 2)
    obj <- like_gknown_2(
      x = x,
      alpha = alpha,
      xi1 = par[[1]],
      xi2 = par[[2]],
      g1 = g1,
      g2 = g2,
      log_p = TRUE,
      pen = pen)
  }
  return(obj)
}

#' LRT when double reduction is known, but not preferential pairing.
#'
#' All genotypes are known.
#'
#' @inherit lrt_dr_pp_g4
#' @param alpha The known rate of double reduction.
#' @param fudge How much to add to lower bound or subtract from upper bound.
#'
#' @author David Gerard
#'
#' @examples
#' gf <- offspring_gf_2(alpha = 0, xi1 = 1/3, xi2 = 1/3, p1 = 0, p2 = 2)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_ndr_pp_g4(x = x, g1 = 0, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = 0, xi1 = 1/3, xi2 = 1/3, p1 = 1, p2 = 2)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_ndr_pp_g4(x = x, g1 = 1, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = 0, xi1 = 1/3, xi2 = 1/3, p1 = 2, p2 = 2)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_ndr_pp_g4(x = x, g1 = 2, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = 0, xi1 = 1/3, xi2 = 1/3, p1 = 3, p2 = 2)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_ndr_pp_g4(x = x, g1 = 3, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = 0, xi1 = 1/3, xi2 = 1/3, p1 = 4, p2 = 2)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_ndr_pp_g4(x = x, g1 = 4, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = 0, xi1 = 1/3, xi2 = 1/3, p1 = 2, p2 = 0)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_ndr_pp_g4(x = x, g1 = 2, g2 = 0)
#'
#' gf <- offspring_gf_2(alpha = 0, xi1 = 1/3, xi2 = 1/3, p1 = 2, p2 = 1)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_ndr_pp_g4(x = x, g1 = 2, g2 = 1)
#'
#' gf <- offspring_gf_2(alpha = 0, xi1 = 1/3, xi2 = 1/3, p1 = 2, p2 = 3)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_ndr_pp_g4(x = x, g1 = 2, g2 = 3)
#'
#' gf <- offspring_gf_2(alpha = 0, xi1 = 1/3, xi2 = 1/3, p1 = 2, p2 = 4)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_ndr_pp_g4(x = x, g1 = 2, g2 = 4)
#'
#' @noRd
lrt_ndr_pp_g4 <- function(x, g1, g2, alpha = 0, ntry = 5, fudge = 0) {
  if (g1 != 2 && g2 != 2) {
    return(lrt_ndr_npp_g4(x = x, g1 = g1, g2 = g2, alpha = alpha))
  }

  ## look at impossible genotypes under null ----
  TOL <- sqrt(.Machine$double.eps)
  if (is_impossible(x = x, g1 = g1, g2 = g2, dr = alpha > TOL)) {
    ret <- list(
      statistic = Inf,
      p_value = 0,
      df = NA_real_,
      alpha = NA_real_,
      xi1 = NA_real_,
      xi2 = NA_real_)
    return(ret)
  }

  ## run test ----

  ## max likelihood under alt
  l1 <- stats::dmultinom(x = x, prob = x / sum(x), log = TRUE)

  ## max likelihood under null
  if (g1 == 2 && g2 != 2) {
    oout <- stats::optim(
      par = 1/3,
      fn = obj_ndr_pp,
      lower = fudge,
      upper = 1 - fudge,
      method = "Brent",
      control = list(fnscale = -1),
      x = x,
      alpha = alpha,
      g1 = g1,
      g2 = g2)
    xi1 <- oout$par[[1]]
    xi2 <- NA_real_
  } else if (g1 != 2 && g2 == 2) {
    oout <- stats::optim(
      par = 1/3,
      fn = obj_ndr_pp,
      lower = fudge,
      upper = 1 - fudge,
      method = "Brent",
      control = list(fnscale = -1),
      x = x,
      alpha = alpha,
      g1 = g1,
      g2 = g2)
    xi1 <- NA_real_
    xi2 <- oout$par[[1]]
  } else if (g1 == 2 && g2 == 2) {
    oout <- list()
    oout$value <- -Inf
    for (i in seq_len(ntry)) {
      par_init <- stats::runif(n = 2, min = 0, max = 1)
      bout <- stats::optim(
        par = par_init,
        fn = obj_ndr_pp,
        lower = c(fudge, fudge),
        upper = c(1 - fudge, 1 - fudge),
        method = "L-BFGS-B",
        control = list(fnscale = -1),
        x = x,
        alpha = alpha,
        g1 = g1,
        g2 = g2)
      if (bout$value > oout$value) {
        oout <- bout
      }
    }
    xi1 <- oout$par[[1]]
    xi2 <- oout$par[[2]]
  }
  l0 <- oout$value

  ## Get degrees of freedom ----
  df <- get_df(g1 = g1, g2 = g2, alpha = alpha, xi1 = xi1, xi2 = xi2, dr = FALSE, pp = TRUE)

  llr <- -2 * (l0 - l1)

  p_value <- stats::pchisq(q = llr, df = df, lower.tail = FALSE)

  ret <-  list(
    statistic = llr,
    p_value = p_value,
    df = df,
    alpha = alpha,
    xi1 = xi1,
    xi2 = xi2)

  return(ret)
}

## Estimate DR, fix pp -------------------------------------------------------


#' Wrapper for like_gknnown_2 with rounding at 0 and 1.
#'
#' @inheritParams like_gknown_2
#' @param par Alpha, the double reduction rate
#'
#' @author David Gerard
#'
#' @noRd
obj_dr_npp <- function(par, x, xi1, xi2, g1, g2, pen) {
  par[par < 0] <- 0 ## deal with -1e-18 etc
  par[par > 1] <- 1
  ll <- like_gknown_2(x = x, alpha = par, xi1 = xi1, xi2 = xi2, g1 = g1, g2 = g2, log_p = TRUE, pen = pen)
  return(ll)
}

#' LRT when double reduction is not known, but no preferential pairing.
#'
#' All genotypes are known.
#'
#' @inherit lrt_dr_pp_g4
#' @param xi1 The known preferential pairingn parameter of parent 1.
#' @param xi2 The known preferential pairingn parameter of parent 2.
#' @param fudge How much to add to lower bound or subtract from upper bound.
#'
#' @author David Gerard
#'
#' @examples
#'
#' gf <- offspring_gf_2(alpha = 1/12, xi1 = 1/3, xi2 = 1/3, p1 = 0, p2 = 2)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_dr_npp_g4(x = x, g1 = 0, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = 1/12, xi1 = 1/3, xi2 = 1/3, p1 = 1, p2 = 2)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_dr_npp_g4(x = x, g1 = 1, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = 1/12, xi1 = 1/3, xi2 = 1/3, p1 = 2, p2 = 2)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_dr_npp_g4(x = x, g1 = 2, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = 1/12, xi1 = 1/3, xi2 = 1/3, p1 = 3, p2 = 2)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_dr_npp_g4(x = x, g1 = 3, g2 = 2)
#'
#' gf <- offspring_gf_2(alpha = 1/12, xi1 = 1/3, xi2 = 1/3, p1 = 4, p2 = 2)
#' x <- offspring_geno(gf = gf, n = 100)
#' lrt_dr_npp_g4(x = x, g1 = 4, g2 = 2)
#'
#' @noRd
lrt_dr_npp_g4 <- function(x, g1, g2, drbound = 1/6, xi1 = 1/3, xi2 = 1/3, fudge = 0) {
  ## Same as in lrt_dr_pp_g4 when no 2's
  if (g1 != 2 & g2 != 2) {
    return(lrt_dr_pp_g4(x = x, g1 = g1, g2 = g2, drbound = drbound))
  }

  ## Deal with impossible genotypes under null
  if (is_impossible(x = x, g1 = g1, g2 = g2, dr = TRUE)) {
    ret <- list(
      statistic = Inf,
      p_value = 0,
      df = NA_real_,
      alpha = NA_real_,
      xi1 = NA_real_,
      xi2 = NA_real_)
    return(ret)
  }

  ## max likelihood under alt
  l1 <- stats::dmultinom(x = x, prob = x / sum(x), log = TRUE)

  ## max likelihood under null
  oout <- stats::optim(par = drbound / 2,
                       fn = obj_dr_npp,
                       method = "Brent",
                       lower = fudge,
                       upper = drbound,
                       control = list(fnscale = -1),
                       x = x,
                       xi1 = xi1,
                       xi2 = xi2,
                       g1 = g1,
                       g2 = g2,
                       pen = 1e-6)
  l0 <- oout$value
  alpha <- oout$par[[1]]

  ## Get degrees of freedom ----
  df <- get_df(g1 = g1, g2 = g2, alpha = alpha, xi1 = xi1, xi2 = xi2, dr = TRUE, pp = FALSE, drbound = drbound)

  llr <- -2 * (l0 - l1)
  p_value <- stats::pchisq(q = llr, df = df, lower.tail = FALSE)

  ret <- list(
    statistic = llr,
    p_value = p_value,
    df = df,
    alpha = alpha,
    xi1 = xi1,
    xi2 = xi2)

  return(ret)
}

Try the segtest package in your browser

Any scripts or data that you put into this service are public.

segtest documentation built on July 1, 2025, 1:07 a.m.