R/bmdd_wrapper.R
In MicrobiomeStat: Statistical Methods for Microbiome Compositional Data

Documented in bmdd

#' Bimodal Dirichlet Distribution Estimation with Intelligent Method Selection
#'
#' Estimates parameters of the bimodal Dirichlet distribution using a variational
#' EM algorithm. Automatically selects the optimal implementation: high-performance
#' C++ (NLopt) when possible, or R when covariates are needed.
#'
#' @param W A matrix of observed data. For count data, an m-by-n matrix where
#'   m is the number of taxa and n is the number of samples. For proportion data,
#'   the rows should sum to 1.
#' @param type Character string specifying data type: "count" or "proportion".
#'   Default is "count".
#' @param Z Optional matrix of covariates for alpha parameters (taxa effects).
#'   If provided, forces use of R implementation.
#' @param formula.Z Optional formula for Z covariates.
#' @param U Optional matrix of covariates for pi parameters (sample effects).
#'   If provided, forces use of R implementation.
#' @param formula.U Optional formula for U covariates.
#' @param Z.standardizing Logical, whether to standardize Z covariates. Default TRUE.
#' @param U.standardizing Logical, whether to standardize U covariates. Default TRUE.
#' @param alp.eta Logical, whether to model alpha0 as a function of Z. Default FALSE.
#' @param alp.kap Logical, whether to model alpha1 as a function of Z. Default FALSE.
#' @param pi.xi Logical, whether to model pi as a function of U. Default FALSE.
#' @param pi.zeta Logical, whether to model pi variance as a function of U. Default FALSE.
#' @param para.alp.init Optional initial values for alpha parameters.
#' @param para.pi.init Optional initial values for pi parameters.
#' @param gam.init Optional initial values for gamma.
#' @param iterlim Maximum number of iterations. Default 500.
#' @param tol Convergence tolerance. Default 1e-6.
#' @param trace Logical, whether to print progress. Default FALSE.
#' @param method Character string to force specific implementation: "auto" (default),
#'   "nlopt" (force C++), or "R" (force R).
#' @param inner.loop Logical for NLopt: use inner loop optimization. Default TRUE.
#' @param inner.iterlim Maximum inner loop iterations for NLopt. Default 20.
#' @param inner.tol Inner loop convergence tolerance for NLopt. Default 1e-6.
#' @param alp.iterlim Maximum iterations for alpha optimization in NLopt. Default 100.
#' @param alp.tol Convergence tolerance for alpha in NLopt. Default 1e-6.
#' @param alp.min Minimum alpha value for NLopt. Default 1e-3.
#' @param alp.max Maximum alpha value for NLopt. Default 1e3.
#' @param ... Additional arguments (ignored, for compatibility).
#'
#' @return A list containing:
#'   \item{gamma}{Estimated gamma parameters (bimodality indicators).}
#'   \item{pi}{Estimated pi parameters (mixing proportions).}
#'   \item{beta}{Estimated posterior Dirichlet parameters.}
#'   \item{alpha0}{Estimated alpha0 parameters (mode 0).}
#'   \item{alpha1}{Estimated alpha1 parameters (mode 1).}
#'   \item{converge}{Logical indicating convergence.}
#'   \item{iter}{Number of iterations used.}
#'   \item{method}{Character string indicating which method was used: "nlopt" or "R".}
#'
#' @details
#' This function automatically chooses the best implementation:
#' \itemize{
#'   \item \strong{NLopt C++} (default): When no covariates (Z, U) are specified.
#'     Provides 50-90x speedup using optimized L-BFGS-B algorithm with analytical
#'     gradients. Scientifically equivalent to R version (correlation > 0.999).
#'   \item \strong{R}: When covariates are needed or explicitly requested via
#'     \code{method = "R"}. Supports full covariate modeling for alpha and pi.
#' }
#'
#' The NLopt implementation requires the NLopt library to be installed on your
#' system. If NLopt is not available, the function automatically falls back to
#' the R implementation with a warning.
#'
#' @examples
#' \dontrun{
#' # Simulate data
#' m <- 100  # taxa
#' n <- 50   # samples
#' W <- matrix(rpois(m*n, 100), m, n)
#'
#' # Standard usage (auto-selects NLopt for speed)
#' result <- bmdd(W, type = "count")
#'
#' # Force specific method
#' result_fast <- bmdd(W, type = "count", method = "nlopt")  # Force NLopt
#' result_full <- bmdd(W, type = "count", method = "R")      # Force R
#'
#' # With covariates (automatically uses R)
#' Z <- matrix(rnorm(m * 2), m, 2)
#' result_cov <- bmdd(W, type = "count", Z = Z, alp.eta = TRUE)
#'
#' # Access results
#' head(result$beta)    # Posterior parameters
#' head(result$gamma)   # Bimodality indicators
#' }
#'
#' @references
#' Zhou, H., Yang, L., Chen, J., & Zhang, X. (2023). Bimodal Dirichlet
#' Distributions and Its Application to Microbiome Data Analysis.
#'
#' @export
bmdd <- function(W,
                 type = c('count', 'proportion'),
                 Z = NULL,
                 formula.Z = NULL,
                 U = NULL,
                 formula.U = NULL,
                 Z.standardizing = TRUE,
                 U.standardizing = TRUE,
                 alp.eta = FALSE,
                 alp.kap = FALSE,
                 pi.xi = FALSE,
                 pi.zeta = FALSE,
                 para.alp.init = NULL,
                 para.pi.init = NULL,
                 gam.init = NULL,
                 iterlim = 500,
                 tol = 1e-6,
                 trace = FALSE,
                 method = c("auto", "nlopt", "R"),
                 inner.loop = TRUE,
                 inner.iterlim = 20,
                 inner.tol = 1e-6,
                 alp.iterlim = 100,
                 alp.tol = 1e-6,
                 alp.min = 1e-3,
                 alp.max = 1e3,
                 ...) {

  type <- match.arg(type)
  method <- match.arg(method)

  # Determine if covariates are being used
  has_covariates <- !is.null(Z) || !is.null(formula.Z) ||
                    !is.null(U) || !is.null(formula.U) ||
                    alp.eta || alp.kap || pi.xi || pi.zeta

  # Auto-select method
  if (method == "auto") {
    if (has_covariates) {
      method <- "R"
      if (trace) {
        message("Using R implementation (covariates detected)")
      }
    } else {
      # Try NLopt, fall back to R if not available
      nlopt_available <- tryCatch({
        # Test if NLopt compiled successfully
        test <- bmdd_nlopt_check_available()
        TRUE
      }, error = function(e) {
        FALSE
      })

      if (nlopt_available) {
        method <- "nlopt"
        if (trace) {
          message("Using NLopt C++ implementation (50-90x faster)")
        }
      } else {
        method <- "R"
        warning("NLopt library not available. Using R implementation. ",
                "Install NLopt for 50-90x speedup: ",
                "macOS: brew install nlopt, ",
                "Linux: sudo apt-get install libnlopt-dev",
                call. = FALSE)
      }
    }
  }

  # Force method if explicitly requested
  if (method == "nlopt" && has_covariates) {
    warning("Covariates detected but method='nlopt' requested. ",
            "NLopt does not support covariates. Switching to R implementation.",
            call. = FALSE)
    method <- "R"
  }

  # Call appropriate implementation
  if (method == "nlopt") {
    result <- bmdd.nlopt(
      W = W,
      type = type,
      para.alp.init = para.alp.init,
      para.pi.init = para.pi.init,
      gam.init = gam.init,
      iterlim = iterlim,
      tol = tol,
      trace = trace,
      inner.loop = inner.loop,
      inner.iterlim = inner.iterlim,
      inner.tol = inner.tol,
      alp.iterlim = alp.iterlim,
      alp.tol = alp.tol,
      alp.min = alp.min,
      alp.max = alp.max
    )
    result$method <- "nlopt"
  } else {
    result <- bmdd.R(
      W = W,
      type = type,
      Z = Z,
      formula.Z = formula.Z,
      U = U,
      formula.U = formula.U,
      Z.standardizing = Z.standardizing,
      U.standardizing = U.standardizing,
      alp.eta = alp.eta,
      alp.kap = alp.kap,
      pi.xi = pi.xi,
      pi.zeta = pi.zeta,
      para.alp.init = para.alp.init,
      para.pi.init = para.pi.init,
      gam.init = gam.init,
      iterlim = iterlim,
      tol = tol,
      trace = trace
    )
    result$method <- "R"
  }

  return(result)
}

#' Check if NLopt Implementation is Available
#'
#' @return Logical indicating if NLopt C++ implementation is available
#' @keywords internal
bmdd_nlopt_check_available <- function() {
  tryCatch({
    # Try to call a simple NLopt function
    .Call("test_nlopt_available", PACKAGE = "MicrobiomeStat")
    TRUE
  }, error = function(e) {
    FALSE
  })
}