Nothing
#' Bimodal Dirichlet Distribution Estimation with Intelligent Method Selection
#'
#' Estimates parameters of the bimodal Dirichlet distribution using a variational
#' EM algorithm. Automatically selects the optimal implementation: high-performance
#' C++ (NLopt) when possible, or R when covariates are needed.
#'
#' @param W A matrix of observed data. For count data, an m-by-n matrix where
#' m is the number of taxa and n is the number of samples. For proportion data,
#' the rows should sum to 1.
#' @param type Character string specifying data type: "count" or "proportion".
#' Default is "count".
#' @param Z Optional matrix of covariates for alpha parameters (taxa effects).
#' If provided, forces use of R implementation.
#' @param formula.Z Optional formula for Z covariates.
#' @param U Optional matrix of covariates for pi parameters (sample effects).
#' If provided, forces use of R implementation.
#' @param formula.U Optional formula for U covariates.
#' @param Z.standardizing Logical, whether to standardize Z covariates. Default TRUE.
#' @param U.standardizing Logical, whether to standardize U covariates. Default TRUE.
#' @param alp.eta Logical, whether to model alpha0 as a function of Z. Default FALSE.
#' @param alp.kap Logical, whether to model alpha1 as a function of Z. Default FALSE.
#' @param pi.xi Logical, whether to model pi as a function of U. Default FALSE.
#' @param pi.zeta Logical, whether to model pi variance as a function of U. Default FALSE.
#' @param para.alp.init Optional initial values for alpha parameters.
#' @param para.pi.init Optional initial values for pi parameters.
#' @param gam.init Optional initial values for gamma.
#' @param iterlim Maximum number of iterations. Default 500.
#' @param tol Convergence tolerance. Default 1e-6.
#' @param trace Logical, whether to print progress. Default FALSE.
#' @param method Character string to force specific implementation: "auto" (default),
#' "nlopt" (force C++), or "R" (force R).
#' @param inner.loop Logical for NLopt: use inner loop optimization. Default TRUE.
#' @param inner.iterlim Maximum inner loop iterations for NLopt. Default 20.
#' @param inner.tol Inner loop convergence tolerance for NLopt. Default 1e-6.
#' @param alp.iterlim Maximum iterations for alpha optimization in NLopt. Default 100.
#' @param alp.tol Convergence tolerance for alpha in NLopt. Default 1e-6.
#' @param alp.min Minimum alpha value for NLopt. Default 1e-3.
#' @param alp.max Maximum alpha value for NLopt. Default 1e3.
#' @param ... Additional arguments (ignored, for compatibility).
#'
#' @return A list containing:
#' \item{gamma}{Estimated gamma parameters (bimodality indicators).}
#' \item{pi}{Estimated pi parameters (mixing proportions).}
#' \item{beta}{Estimated posterior Dirichlet parameters.}
#' \item{alpha0}{Estimated alpha0 parameters (mode 0).}
#' \item{alpha1}{Estimated alpha1 parameters (mode 1).}
#' \item{converge}{Logical indicating convergence.}
#' \item{iter}{Number of iterations used.}
#' \item{method}{Character string indicating which method was used: "nlopt" or "R".}
#'
#' @details
#' This function automatically chooses the best implementation:
#' \itemize{
#' \item \strong{NLopt C++} (default): When no covariates (Z, U) are specified.
#' Provides 50-90x speedup using optimized L-BFGS-B algorithm with analytical
#' gradients. Scientifically equivalent to R version (correlation > 0.999).
#' \item \strong{R}: When covariates are needed or explicitly requested via
#' \code{method = "R"}. Supports full covariate modeling for alpha and pi.
#' }
#'
#' The NLopt implementation requires the NLopt library to be installed on your
#' system. If NLopt is not available, the function automatically falls back to
#' the R implementation with a warning.
#'
#' @examples
#' \dontrun{
#' # Simulate data
#' m <- 100 # taxa
#' n <- 50 # samples
#' W <- matrix(rpois(m*n, 100), m, n)
#'
#' # Standard usage (auto-selects NLopt for speed)
#' result <- bmdd(W, type = "count")
#'
#' # Force specific method
#' result_fast <- bmdd(W, type = "count", method = "nlopt") # Force NLopt
#' result_full <- bmdd(W, type = "count", method = "R") # Force R
#'
#' # With covariates (automatically uses R)
#' Z <- matrix(rnorm(m * 2), m, 2)
#' result_cov <- bmdd(W, type = "count", Z = Z, alp.eta = TRUE)
#'
#' # Access results
#' head(result$beta) # Posterior parameters
#' head(result$gamma) # Bimodality indicators
#' }
#'
#' @references
#' Zhou, H., Yang, L., Chen, J., & Zhang, X. (2023). Bimodal Dirichlet
#' Distributions and Its Application to Microbiome Data Analysis.
#'
#' @export
bmdd <- function(W,
type = c('count', 'proportion'),
Z = NULL,
formula.Z = NULL,
U = NULL,
formula.U = NULL,
Z.standardizing = TRUE,
U.standardizing = TRUE,
alp.eta = FALSE,
alp.kap = FALSE,
pi.xi = FALSE,
pi.zeta = FALSE,
para.alp.init = NULL,
para.pi.init = NULL,
gam.init = NULL,
iterlim = 500,
tol = 1e-6,
trace = FALSE,
method = c("auto", "nlopt", "R"),
inner.loop = TRUE,
inner.iterlim = 20,
inner.tol = 1e-6,
alp.iterlim = 100,
alp.tol = 1e-6,
alp.min = 1e-3,
alp.max = 1e3,
...) {
type <- match.arg(type)
method <- match.arg(method)
# Determine if covariates are being used
has_covariates <- !is.null(Z) || !is.null(formula.Z) ||
!is.null(U) || !is.null(formula.U) ||
alp.eta || alp.kap || pi.xi || pi.zeta
# Auto-select method
if (method == "auto") {
if (has_covariates) {
method <- "R"
if (trace) {
message("Using R implementation (covariates detected)")
}
} else {
# Try NLopt, fall back to R if not available
nlopt_available <- tryCatch({
# Test if NLopt compiled successfully
test <- bmdd_nlopt_check_available()
TRUE
}, error = function(e) {
FALSE
})
if (nlopt_available) {
method <- "nlopt"
if (trace) {
message("Using NLopt C++ implementation (50-90x faster)")
}
} else {
method <- "R"
warning("NLopt library not available. Using R implementation. ",
"Install NLopt for 50-90x speedup: ",
"macOS: brew install nlopt, ",
"Linux: sudo apt-get install libnlopt-dev",
call. = FALSE)
}
}
}
# Force method if explicitly requested
if (method == "nlopt" && has_covariates) {
warning("Covariates detected but method='nlopt' requested. ",
"NLopt does not support covariates. Switching to R implementation.",
call. = FALSE)
method <- "R"
}
# Call appropriate implementation
if (method == "nlopt") {
result <- bmdd.nlopt(
W = W,
type = type,
para.alp.init = para.alp.init,
para.pi.init = para.pi.init,
gam.init = gam.init,
iterlim = iterlim,
tol = tol,
trace = trace,
inner.loop = inner.loop,
inner.iterlim = inner.iterlim,
inner.tol = inner.tol,
alp.iterlim = alp.iterlim,
alp.tol = alp.tol,
alp.min = alp.min,
alp.max = alp.max
)
result$method <- "nlopt"
} else {
result <- bmdd.R(
W = W,
type = type,
Z = Z,
formula.Z = formula.Z,
U = U,
formula.U = formula.U,
Z.standardizing = Z.standardizing,
U.standardizing = U.standardizing,
alp.eta = alp.eta,
alp.kap = alp.kap,
pi.xi = pi.xi,
pi.zeta = pi.zeta,
para.alp.init = para.alp.init,
para.pi.init = para.pi.init,
gam.init = gam.init,
iterlim = iterlim,
tol = tol,
trace = trace
)
result$method <- "R"
}
return(result)
}
#' Check if NLopt Implementation is Available
#'
#' @return Logical indicating if NLopt C++ implementation is available
#' @keywords internal
bmdd_nlopt_check_available <- function() {
tryCatch({
# Try to call a simple NLopt function
.Call("test_nlopt_available", PACKAGE = "MicrobiomeStat")
TRUE
}, error = function(e) {
FALSE
})
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.