Nothing
#' Limited-memory BFGS with Box Constraints (L-BFGS-B)
#'
#' @description
#' Performs bound-constrained minimization using the L-BFGS-B algorithm.
#' This implementation handles box constraints via Generalized Cauchy Point (GCP)
#' estimation and subspace minimization, featuring a limited-memory (two-loop recursion)
#' inverse Hessian approximation.
#'
#' @section Comparison with Existing Functions:
#' This function adds three features for rigorous convergence control. First, it
#' applies an \bold{AND rule}: all selected convergence criteria must be satisfied
#' simultaneously. Second, users can choose among \bold{eight distinct criteria}
#' (e.g., changes in \eqn{f}, \eqn{x}, gradient, or predicted decrease) instead of relying
#' on fixed defaults. Third, it provides an \bold{optional verification} using the
#' Hessian computed from derivatives (analytically when provided, or via numerical
#' differentiation). Checking the positive definiteness of this Hessian at the final
#' solution reduces the risk of declaring convergence at non-minimizing stationary
#' points, such as saddle points.
#'
#' @references
#' Byrd, R. H., Lu, P., Nocedal, J., & Zhu, C. (1995). A limited memory algorithm
#' for bound constrained optimization. \emph{SIAM Journal on Scientific Computing},
#' 16(5), 1190-1208.
#'
#' Morales, J. L., & Nocedal, J. (2011). L-BFGS-B: Remark on algorithm 778:
#' L-BFGS-B: Fortran subroutines for large-scale bound-constrained optimization.
#' \emph{ACM Transactions on Mathematical Software}, 38(1), 1-4.
#'
#' @param start Numeric vector. Initial values for the parameters.
#' @param objective Function. The scalar objective function to be minimized.
#' @param gradient Function (optional). Returns the gradient vector. If \code{NULL},
#' numerical derivatives are used.
#' @param hessian Function (optional). Returns the Hessian matrix. Used for final
#' positive definiteness verification if \code{use_posdef = TRUE}.
#' @param lower,upper Numeric vectors. Lower and upper bounds for the parameters.
#' Can be scalars if all parameters share the same bounds.
#'
#' @param control A list of control parameters:
#' \itemize{
#' \item \code{use_abs_f}: Logical. Criterion: \eqn{|f_{new} - f_{old}| <} \code{tol_abs_f}.
#' \item \code{use_rel_f}: Logical. Criterion: \eqn{|(f_{new} - f_{old}) / f_{old}| <} \code{tol_rel_f}.
#' \item \code{use_abs_x}: Logical. Criterion: \eqn{\max |x_{new} - x_{old}| <} \code{tol_abs_x}.
#' \item \code{use_rel_x}: Logical. Criterion: \eqn{\max |(x_{new} - x_{old}) / x_{old}| <} \code{tol_rel_x}.
#' \item \code{use_grad}: Logical. Criterion: \eqn{\|g\|_\infty <} \code{tol_grad}.
#' \item \code{use_posdef}: Logical. Criterion: Positive definiteness of the Hessian.
#' \item \code{max_iter}: Maximum number of iterations (default: 10000).
#' \item \code{m}: Number of L-BFGS memory updates (default: 5).
#' \item \code{tol_abs_f}, \code{tol_rel_f}: Tolerances for function value change.
#' \item \code{tol_abs_x}, \code{tol_rel_x}: Tolerances for parameter change.
#' \item \code{tol_grad}: Tolerance for the projected gradient (default: 1e-4).
#' }
#' @param ... Additional arguments passed to objective, gradient, and Hessian functions.
#'
#' @return A list containing optimization results and metadata.
#' @export
#' @examples
# Simple quadratic function optimization
#' quad <- function(x) (x[1] - 2)^2 + (x[2] + 1)^2
#' res <- l_bfgs_b(start = c(0, 0), objective = quad)
#' print(res$par)
l_bfgs_b <- function(
start,
objective,
gradient = NULL,
hessian = NULL,
lower = -Inf,
upper = Inf,
control = list(),
...
) {
# ---------- 1. Configuration (Synced with Suite) ----------
ctrl0 <- list(
# Convergence flags
use_abs_f = FALSE,
use_rel_f = FALSE,
use_abs_x = FALSE,
use_rel_x = TRUE,
use_grad = TRUE,
use_posdef = TRUE,
use_pred_f = FALSE,
use_pred_f_avg = FALSE,
# Algorithm parameters
max_iter = 10000L,
m = 5L,
tol_abs_f = 1e-6,
tol_rel_f = 1e-6,
tol_abs_x = 1e-6,
tol_rel_x = 1e-6,
tol_grad = 1e-4,
tol_pred_f = 1e-4,
tol_pred_f_avg = 1e-4,
wolfe_c1 = 1e-4,
ls_alpha0 = 1.0,
ls_max_steps = 30L,
ls_shrink = 0.5,
curvature_eps = 1e-10,
damp_phi = 0.2,
bound_eps = 1e-10,
diff_method = "forward"
)
ctrl <- utils::modifyList(ctrl0, control)
# ---------- 2. Internal Helpers ----------
eval_obj <- function(z) as.numeric(objective(z, ...))[1]
grad_func <- if (!is.null(gradient)) {
function(z) as.numeric(gradient(z, ...))
} else {
function(z) fast_grad(objective, z, diff_method = ctrl$diff_method, ...)
}
hess_func <- if (!is.null(hessian)) {
function(z) hessian(z, ...)
} else {
function(z) fast_hess(objective, z, diff_method = ctrl$diff_method, ...)
}
project <- function(x, l, u) pmax(l, pmin(x, u))
get_projected_grad <- function(x, g, l, u) {
pg <- g
pg[x <= l + ctrl$bound_eps & g > 0] <- 0
pg[x >= u - ctrl$bound_eps & g < 0] <- 0
pg
}
two_loop <- function(v, s_list, y_list, rho_list, theta) {
mcur <- length(s_list)
if (mcur == 0L) return((1 / theta) * v)
q <- v
alpha <- numeric(mcur)
for (i in rev(seq_len(mcur))) {
alpha[i] <- rho_list[[i]] * sum(s_list[[i]] * q)
q <- q - alpha[i] * y_list[[i]]
}
r <- (1 / theta) * q
for (i in seq_len(mcur)) {
beta <- rho_list[[i]] * sum(y_list[[i]] * r)
r <- r + s_list[[i]] * (alpha[i] - beta)
}
r
}
# ---------- 3. Initialization ----------
x <- as.numeric(start)
n <- length(x)
# start_clock defined here to ensure it exists for cpu_time calculation
start_clock <- proc.time()
lower <- if(length(lower) == 1) rep(lower, n) else lower
upper <- if(length(upper) == 1) rep(upper, n) else upper
x <- project(x, lower, upper)
f <- eval_obj(x)
g <- grad_func(x)
s_list <- list(); y_list <- list(); rho_list <- list()
it <- 0L; converged <- FALSE; status <- "running"
x_old <- x; f_old <- NA_real_; s_last <- NULL; g_inf <- NA_real_
# ---------- 4. Main Loop ----------
tryCatch({
repeat {
it <- it + 1L
if (it > ctrl$max_iter) { status <- "iteration_limit_reached"; break }
pg <- get_projected_grad(x, g, lower, upper)
g_inf <- max(abs(pg))
# 4.1) Convergence Verification (AND Rule)
res_conv <- TRUE
if (ctrl$use_grad) res_conv <- res_conv && (g_inf <= ctrl$tol_grad)
if (ctrl$use_abs_f && !is.na(f_old)) res_conv <- res_conv && (abs(f - f_old) <= ctrl$tol_abs_f)
if (ctrl$use_rel_f && !is.na(f_old)) res_conv <- res_conv && (abs((f - f_old) / max(1, abs(f_old))) <= ctrl$tol_rel_f)
if (ctrl$use_abs_x && it > 1L) res_conv <- res_conv && (max(abs(x - x_old)) <= ctrl$tol_abs_x)
if (ctrl$use_rel_x && it > 1L) res_conv <- res_conv && (max(abs(x - x_old)) / max(1, max(abs(x_old)))) <= ctrl$tol_rel_x
if (res_conv && it > 1L) {
if (isTRUE(ctrl$use_posdef)) {
H_eval <- tryCatch(hess_func(x), error = function(e) NULL)
if (is_pd_fast(H_eval)) { converged <- TRUE; status <- "converged"; break } else res_conv <- FALSE
} else { converged <- TRUE; status <- "converged"; break }
}
# 4.2) Update theta scaling for B0
theta <- if (length(s_list) > 0) {
sy_tmp <- sum(s_list[[length(s_list)]] * y_list[[length(y_list)]])
yy_tmp <- sum(y_list[[length(y_list)]]^2)
if (sy_tmp > 1e-12) yy_tmp / sy_tmp else 1.0
} else 1.0
# 4.3) Subspace Step via Two-loop Recursion
p <- -two_loop(g, s_list, y_list, rho_list, theta)
# Feasibility enforcement for direction
p[x <= lower + ctrl$bound_eps & p < 0] <- 0
p[x >= upper - ctrl$bound_eps & p > 0] <- 0
if (max(abs(p)) < 1e-14) p <- -pg # Fallback to projected gradient
# 4.4) Backtracking Line Search
alpha <- ctrl$ls_alpha0; step_ok <- FALSE
for (ls_it in seq_len(ctrl$ls_max_steps)) {
x_try <- project(x + alpha * p, lower, upper)
f_try <- eval_obj(x_try)
s_try <- x_try - x
# Armijo condition for projected step
if (is.finite(f_try) && f_try <= f + ctrl$wolfe_c1 * sum(g * s_try)) {
x_new <- x_try; f_new <- f_try; s_last <- s_try; step_ok <- TRUE; break
}
alpha <- alpha * ctrl$ls_shrink
}
if (!step_ok) { status <- "line_search_failed"; break }
# 4.5) L-BFGS Memory Update with Powell Damping
g_new <- grad_func(x_new); s_vec <- x_new - x; y_vec <- g_new - g
sy <- sum(s_vec * y_vec)
# Post-line-search convergence check (handles exact solutions, e.g., quadratics)
g_inf_new <- max(abs(g_new), na.rm = TRUE)
if (ctrl$use_grad && g_inf_new <= ctrl$tol_grad) {
x <- x_new; f <- f_new; g <- g_new; g_inf <- g_inf_new
if (isTRUE(ctrl$use_posdef)) {
H_eval <- tryCatch(hess_func(x), error = function(e) NULL)
if (is_pd_fast(H_eval)) { converged <- TRUE; status <- "converged"; break }
} else { converged <- TRUE; status <- "converged"; break }
}
# Implicit B*s approximation using theta
Bs_approx <- theta * s_vec; sBs <- sum(s_vec * Bs_approx)
update_ok <- FALSE; y_star <- y_vec; sy_star <- sy
if (isTRUE(ctrl$use_damped)) {
if (sy < ctrl$damp_phi * sBs) {
theta_damp <- ((1 - ctrl$damp_phi) * sBs) / (sBs - sy + 1e-16)
y_star <- theta_damp * y_vec + (1 - theta_damp) * Bs_approx
sy_star <- sum(s_vec * y_star)
}
if (sy_star > ctrl$curvature_eps) { y_vec <- y_star; sy <- sy_star; update_ok <- TRUE }
} else {
if (sy > ctrl$curvature_eps) update_ok <- TRUE
}
if (update_ok) {
if (length(s_list) >= ctrl$m) {
s_list[[1]] <- NULL; y_list[[1]] <- NULL; rho_list[[1]] <- NULL
}
s_list[[length(s_list) + 1L]] <- s_vec
y_list[[length(y_list) + 1L]] <- y_vec
rho_list[[length(rho_list) + 1L]] <- 1 / (sy + 1e-16)
}
x_old <- x; f_old <- f; x <- x_new; f <- f_new; g <- g_new
}
}, error = function(e) { status <<- paste0("runtime_error: ", conditionMessage(e)) })
# ---------- 5. Final Status & Output Construction ----------
final_clock <- proc.time() - start_clock
list(
par = x,
objective = f,
converged = converged,
status = status,
iter = it,
cpu_time = as.numeric(final_clock[1] + final_clock[2])[1],
elapsed_time = as.numeric(final_clock[3])[1],
max_grad = as.numeric(g_inf)[1]
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.