context("API tests")
# Repeat some of the basic tests, using the consumer API
test_that("steepest descent with constant step size", {
res <- mize(rb0, rosenbrock_fg,
method = "SD", max_iter = 3,
line_search = "const", step0 = 0.0001, grad_tol = 1e-5,
check_conv_every = NULL
)
expect_equal(res$nf, 1)
expect_equal(res$ng, 4)
expect_equal(res$f, 12.81, tol = 1e-3)
expect_equal(res$g2n, 147.11, tol = 1e-3)
expect_equal(res$par, c(-1.144, 1.023), tol = 1e-3)
})
test_that("grad norm not returned (or calculated) if grad tol is NULL", {
res <- mize(rb0, rosenbrock_fg,
method = "SD", max_iter = 3,
line_search = "const", step0 = 0.0001, grad_tol = NULL,
check_conv_every = NULL
)
expect_equal(res$nf, 1)
expect_equal(res$ng, 3)
expect_equal(res$f, 12.81, tol = 1e-3)
expect_true(is.null(res$g2n))
expect_equal(res$par, c(-1.144, 1.023), tol = 1e-3)
})
test_that("L-BFGS with More-Thuente LS", {
# can abbreviate line search name and initializer
res <- mize(rb0, rosen_no_hess,
method = "L-BFGS", max_iter = 3,
line_search = "mo", c1 = 5e-10, c2 = 1e-9, step0 = "scipy",
step_next_init = "q", scale_hess = FALSE, grad_tol = 1e-5
)
expect_equal(res$nf, 17)
expect_equal(res$ng, 17)
expect_equal(res$f, 3.53, tol = 1e-3)
expect_equal(res$g2n, 24.98, tol = 1e-3)
expect_equal(res$par, c(-0.785, 0.558), tol = 1e-3)
})
test_that("L-BFGS with More-Thuente LS and inv Hess initial guess", {
res <- mize(rb0, rosenbrock_fg,
method = "L-BFGS", max_iter = 3,
line_search = "mo", c1 = 5e-10, c2 = 1e-9, step0 = "scipy",
step_next_init = "q", scale_hess = FALSE, grad_tol = 1e-5
)
expect_equal(res$nf, 19)
expect_equal(res$ng, 19)
expect_equal(res$f, 3.71, tol = 1e-3)
expect_equal(res$g2n, 27.40, tol = 1e-3)
expect_equal(res$par, c(-0.820, 0.610), tol = 1e-3)
})
test_that("L-BFGS with More-Thuente LS and max alpha guess increase", {
# can abbreviate line search name and initializer
res <- mize(rb0, rosen_no_hess,
method = "L-BFGS", max_iter = 3,
line_search = "mo", c1 = 5e-10, c2 = 1e-9, step0 = "scipy",
step_next_init = "q", scale_hess = FALSE, grad_tol = 1e-5,
ls_max_alpha_mult = 2
)
# Get to the same result as without ls_max_alpha_mult but more evaluations
expect_equal(res$nf, 21)
expect_equal(res$ng, 21)
expect_equal(res$f, 3.53, tol = 1e-3)
expect_equal(res$g2n, 24.98, tol = 1e-3)
expect_equal(res$par, c(-0.785, 0.558), tol = 1e-3)
})
test_that("BFGS with Hessian inverse diagonal", {
res <- mize(rb0, rosenbrock_fg,
method = "BFGS", max_iter = 3,
line_search = "more-thuente", c1 = 5e-10, c2 = 1e-9,
step0 = "sci", ls_max_alpha = 0.5,
step_next_init = "quad", scale_hess = FALSE, grad_tol = 1e-5
)
expect_equal(res$nf, 4)
expect_equal(res$ng, 4)
expect_equal(res$f, 4.461, tol = 1e-3)
expect_equal(res$g2n, 6.524, tol = 1e-3)
expect_equal(res$par, c(-1.112, 1.231), tol = 1e-3)
})
test_that("BFGS with Hessian inverse matrix", {
rb_hi <- rosenbrock_fg
rb_hi$hi <- function(par) {
solve(rosenbrock_fg$hs(par))
}
res <- mize(rb0, rb_hi,
method = "BFGS", max_iter = 3,
line_search = "more-thuente", c1 = 5e-10, c2 = 1e-9,
step0 = "sci", ls_max_alpha = 0.5,
step_next_init = "quad", scale_hess = FALSE, grad_tol = 1e-5
)
expect_equal(res$nf, 4)
expect_equal(res$ng, 4)
expect_equal(res$f, 4.968, tol = 1e-3)
expect_equal(res$g2n, 31.81, tol = 1e-3)
expect_equal(res$par, c(-1.160, 1.290), tol = 1e-3)
})
test_that("BFGS with More-Thuente LS and max alpha", {
res <- mize(rb0, rosen_no_hess,
method = "BFGS", max_iter = 3,
line_search = "more-thuente", c1 = 5e-10, c2 = 1e-9,
step0 = "sci", ls_max_alpha = 0.5,
step_next_init = "quad", scale_hess = FALSE, grad_tol = 1e-5
)
expect_equal(res$nf, 14)
expect_equal(res$ng, 14)
expect_equal(res$f, 3.726, tol = 1e-3)
expect_equal(res$g2n, 18.83, tol = 1e-3)
expect_equal(res$par, c(-0.893, 0.760), tol = 1e-3)
})
test_that("BFGS with More-Thuente LS and fixed initial alpha guess", {
res <- mize(rb0, rosen_no_hess,
method = "BFGS", max_iter = 3,
line_search = "more-thuente", c1 = 5e-10, c2 = 1e-9,
step0 = "sci",
step_next_init = 0.1, scale_hess = FALSE, grad_tol = 1e-5
)
# Get to the same result as without step_next_init but more evaluations
expect_equal(res$nf, 21)
expect_equal(res$ng, 21)
expect_equal(res$f, 3.53, tol = 1e-3)
expect_equal(res$g2n, 24.98, tol = 1e-3)
expect_equal(res$par, c(-0.785, 0.558), tol = 1e-3)
})
test_that("SR1 with More-Thuente LS", {
res <- mize(rb0, rosen_no_hess,
method = "SR1", max_iter = 3,
line_search = "more-thuente", c1 = 1e-4, c2 = 0.9,
step0 = "sci",
step_next_init = "quad", scale_hess = FALSE, grad_tol = 1e-5
)
expect_equal(res$nf, 6)
expect_equal(res$ng, 6)
expect_equal(res$f, 3.47, tol = 1e-3)
expect_equal(res$g2n, 17.87, tol = 1e-3)
expect_equal(res$par, c(-0.824, 0.641), tol = 1e-3)
})
test_that("SR1 with approx Hessian init", {
res <- mize(rb0, rosenbrock_fg,
method = "SR1", max_iter = 3,
line_search = "more-thuente", c1 = 1e-4, c2 = 0.9,
step0 = "sci",
step_next_init = "quad", scale_hess = FALSE, grad_tol = 1e-5
)
expect_equal(res$nf, 4)
expect_equal(res$ng, 4)
expect_equal(res$f, 4.432, tol = 1e-3)
expect_equal(res$g2n, 5.289, tol = 1e-3)
expect_equal(res$par, c(-1.105, 1.219), tol = 1e-3)
})
test_that("CG with Schmidt LS", {
# lower case names should be ok for method, cg_update, step0 etc.
res <- mize(rb0, rosenbrock_fg,
method = "cg",
cg_update = "pr+",
max_iter = 3,
line_search = "schmidt", c1 = 1e-4, c2 = 0.1, step0 = "schmidt",
step_next_init = "slope", ls_max_alpha_mult = 10,
grad_tol = 1e-5
)
expect_equal(res$nf, 10)
expect_equal(res$ng, 10)
expect_equal(res$f, 2.859, tol = 1e-3)
expect_equal(res$g2n, 3.650, tol = 1e-3)
expect_equal(res$par, c(-0.682, 0.483), tol = 1e-3)
})
# Tests error referenced in https://github.com/jlmelville/mize/pull/1
test_that("ls_max_fn", {
res <- mize(rb0, rosenbrock_fg,
method = "cg",
cg_update = "pr+",
max_iter = 3,
line_search = "schmidt", c1 = 1e-4, c2 = 0.1, step0 = "schmidt",
step_next_init = "slope", ls_max_alpha_mult = 10, ls_max_fn = 2,
grad_tol = 1e-5
)
expect_equal(res$nf, 7)
expect_equal(res$ng, 7)
expect_equal(res$f, 3.947, tol = 1e-3)
expect_equal(res$g2n, 1.872, tol = 1e-3)
expect_equal(res$par, c(-0.986, 0.978), tol = 1e-3)
})
test_that("CG with Rasmussen LS", {
# lower case names should be ok for method, cg_update, step0 etc.
res <- mize(rb0, rosenbrock_fg,
method = "cg",
cg_update = "pr+",
max_iter = 3,
line_search = "ras", c1 = 5e-10, c2 = 1e-9, step0 = "ras",
step_next_init = "slope", ls_max_alpha_mult = 10,
grad_tol = 1e-5
)
expect_equal(res$nf, 27)
expect_equal(res$ng, 27)
expect_equal(res$f, 3.53, tol = 1e-3)
expect_equal(res$g2n, 24.98, tol = 1e-3)
expect_equal(res$par, c(-0.785, 0.558), tol = 1e-3)
})
test_that("HZ CG with HZ LS", {
# Also use HZ suggestions for initial step guess and next step guess
# (the latter of which costs an extra fn evaluation per iteration)
res <- mize(rb0, rosenbrock_fg,
method = "cg",
cg_update = "hz+",
max_iter = 3,
line_search = "hz", c1 = 5e-10, c2 = 1e-9, step0 = "hz",
step_next_init = "hz", grad_tol = 1e-5
)
expect_equal(res$nf, 10)
expect_equal(res$ng, 8)
expect_equal(res$f, 4.09, tol = 1e-3)
expect_equal(res$g2n, 1.789, tol = 1e-3)
expect_equal(res$par, c(-1.020, 1.050), tol = 1e-3)
})
test_that("CG with Rasmussen LS and max_fn termination", {
res <- mize(rb0, rosenbrock_fg,
method = "cg",
cg_update = "pr+",
max_iter = 3, max_fn = 20, ls_max_alpha_mult = 10,
line_search = "ras", c1 = 5e-10, c2 = 1e-9, step0 = "ras",
step_next_init = "slope", grad_tol = 1e-5
)
expect_equal(res$nf, 20)
expect_equal(res$ng, 20)
expect_equal(res$f, 3.54, tol = 1e-3)
expect_equal(res$g2n, 23.23, tol = 1e-3)
expect_equal(res$par, c(-0.806, 0.596), tol = 1e-3)
})
test_that("NAG with Rasmussen LS", {
res <- mize(rb0, rosenbrock_fg,
method = "NAG",
nest_convex_approx = FALSE, nest_q = 0, nest_burn_in = 0,
max_iter = 3,
line_search = "rasmussen", c1 = 5e-10, c2 = 1e-9,
step0 = "rasmussen",
step_next_init = "slope", ls_max_alpha_mult = 10,
grad_tol = 1e-5
)
expect_equal(res$nf, 29)
expect_equal(res$ng, 29)
expect_equal(res$f, 3.56, tol = 1e-3)
expect_equal(res$g2n, 7.2, tol = 1e-3)
expect_equal(res$par, c(-0.869, 0.781), tol = 1e-3)
})
test_that("bold driver SD and classical momentum", {
res <- mize(rb0, rosenbrock_fg,
method = "SD", norm_direction = TRUE,
line_search = "bold",
mom_type = "classical",
mom_schedule = "ramp", mom_init = 0.1, mom_final = 0.3,
max_iter = 3, grad_tol = 1e-5
)
expect_equal(res$nf, 12)
expect_equal(res$ng, 5) # extra grad eval needed to get data for best found
expect_equal(res$f, 4.38, tol = 1e-3)
expect_equal(res$g2n, 23.21, tol = 1e-3)
expect_equal(res$par, c(-1.006, 1.071), tol = 1e-3)
})
test_that("bold driver SD and nesterov momentum", {
res <- mize(rb0, rosenbrock_fg,
method = "SD", norm_direction = TRUE,
line_search = "bold",
mom_type = "nesterov",
mom_schedule = "ramp", mom_init = 0.1, mom_final = 0.3,
max_iter = 3, grad_tol = 1e-5
)
expect_equal(res$nf, 12)
expect_equal(res$ng, 5)
expect_equal(res$f, 4.38, tol = 1e-3)
expect_equal(res$g2n, 23.21, tol = 1e-3)
expect_equal(res$par, c(-1.006, 1.071), tol = 1e-3)
})
test_that("Delta bar delta adaptive learning rate and nesterov momentum", {
res <- mize(rb0, rosenbrock_fg,
method = "DBD", norm_direction = TRUE,
step0 = 0.1,
mom_type = "nesterov",
mom_schedule = 0.2,
max_iter = 3, grad_tol = 1e-5, rel_tol = NULL, abs_tol = NULL
)
expect_equal(res$nf, 1)
expect_equal(res$ng, 4)
expect_equal(res$f, 4.84, tol = 1e-3)
expect_equal(res$g2n, 37.85, tol = 1e-3)
expect_equal(res$par, c(-0.993, 1.079), tol = 1e-3)
})
test_that("Terminates semi-gracefully if function value is non-finite", {
res <- mize(rb0, rosenbrock_fg, "DBD", step0 = 1, check_conv_every = 1)
expect_equal(res$terminate$what, "fn_inf")
expect_equal(res$iter, 4)
})
test_that("Terminates semi-gracefully if gradient is non-finite", {
# If we don't check convergence often enough, solution can diverge
# in between checks. If NaN is detected in a gradient calculation, we
# terminate early even if not on a convergence check iteration
res <- mize(rb0, rosenbrock_fg, "DBD", step0 = 1, check_conv_every = 10)
expect_equal(res$terminate$what, "gr_inf")
expect_equal(res$iter, 6)
})
test_that("Step tolerance is triggered when progress stalls", {
# NULL abs_tol to stop it from triggering before step_tol
res <- mize(rb0, rosen_no_hess, "L-BFGS",
memory = 5, abs_tol = NULL,
step_tol = 1e-7, step_next_init = "quad",
step0 = 1
)
expect_equal(res$nf, 57)
expect_equal(res$ng, 57)
expect_equal(res$f, 0, tol = 1e-3)
expect_equal(res$par, c(1, 1))
expect_equal(res$terminate$what, "step_tol")
})
test_that("Step tolerance is not triggered when restarting", {
res <- mize(rb0, rosenbrock_fg,
method = "NAG", max_iter = 55, restart = "fn",
store_progress = TRUE
)
expect_equal(res$iter, 55)
expect_equal(res$progress["52", "step"], 0)
expect_equal(res$terminate$what, "max_iter")
})
test_that("max_fn errs on the side of caution", {
# In this test we ask for 15 function evaluations, but only get 14
# this is because we need one function evaluation spare to calculate
# f for the return value and mize has determined it isn't available
# for "free" by being calculated during the iteration
res <- mize(rb0, rosenbrock_fg,
method = "NAG", max_fn = 15,
step_next_init = "slope",
ls_max_alpha_mult = 10
)
expect_equal(res$terminate$what, "max_fn")
expect_equal(res$terminate$val, 14)
expect_equal(res$nf, 14)
expect_equal(res$f, 4.08, tol = 1e-3)
})
test_that("max_fg also errs on the side of caution", {
res <- mize(rb0, rosenbrock_fg,
method = "NAG", max_fg = 30,
step_next_init = "slope", ls_max_alpha_mult = 10
)
expect_equal(res$terminate$what, "max_fg")
expect_equal(res$terminate$val, 29)
expect_equal(res$nf, 15)
expect_equal(res$ng, 14)
expect_equal(res$f, 4.08, tol = 1e-3)
})
test_that("max_fn with DBD", {
# Don't leave one function evaluation spare with DBD because it
# doesn't use them during its iteration
res <- mize(rb0, rosenbrock_fg, method = "DBD", max_fn = 30)
expect_equal(res$terminate$what, "max_fn")
expect_equal(res$terminate$val, 30)
expect_equal(res$nf, 30)
expect_equal(res$ng, 30)
expect_equal(res$f, 4.133, tol = 1e-3)
})
test_that("max_fg with DBD", {
res <- mize(rb0, rosenbrock_fg, method = "DBD", max_fg = 30)
expect_equal(res$terminate$what, "max_fg")
expect_equal(res$terminate$val, 30)
expect_equal(res$nf, 15)
expect_equal(res$ng, 15)
expect_equal(res$f, 7.914, tol = 1e-3)
})
test_that("max functions per line search", {
# This starts at the minimum and probably due to a lack of smoothness
# at the minimum due to floating point issues rather than the function itself,
# doesn't make a lot of progress - but at least stops after 20 steps
# without a max on the line search, this can take 100s of steps to give up
# (or worse)
res <- mize(c(3, 0.5), tricky_fg(), method = "SD", ls_max_fn = 20)
expect_equal(res$terminate$what, "step_tol")
expect_equal(res$terminate$val, 0)
expect_equal(res$nf, 21)
expect_equal(res$ng, 21)
expect_equal(res$f, 0, tol = 1e-3)
})
test_that("backtracking line search", {
res <- mize(rb0, rosenbrock_fg,
method = "NAG", line_search = "BACK",
max_iter = 3, step_next_init = "slope"
)
expect_equal(res$nf, 7)
expect_equal(res$ng, 6)
expect_equal(res$f, 20.44, tol = 1e-3)
expect_equal(res$par, c(-1.184, 1.006), tol = 1e-3)
})
test_that("MT safeguard cubic", {
# Chosen only because I couldn't find a simpler example that yielded a
# difference
res <- mize(rb0, rosenbrock_fg,
method = "CG", c2 = 0.9, grad_tol = 0.1,
max_iter = 11, ls_safe_cubic = TRUE
)
# These three should be different from ls_safe_cubic = FALSE
expect_equal(res$nf, 25) # 24 otherwise
expect_equal(res$ng, 25)
expect_equal(res$g2n, 4.604, tol = 1e-3) # approx 4.627 otherwise
# These are the same within tolerance
expect_equal(res$f, 2.8, tol = 1e-3)
expect_equal(res$par, c(-0.6605, 0.4568), tol = 1e-3)
})
test_that("Truncated Newton with constant step size", {
res <- mize(rb0, rosenbrock_fg,
method = "TN", max_iter = 3,
line_search = "const", step0 = 1, grad_tol = 1e-5,
check_conv_every = NULL
)
expect_equal(res$nf, 1)
expect_equal(res$ng, 8)
expect_equal(res$f, 4.118, tol = 1e-3)
expect_equal(res$g2n, 4.219, tol = 1e-3)
expect_equal(res$par, c(-1.023, 1.062), tol = 1e-3)
})
# Ensure TN direction can't exceed gr budget
test_that("Truncated Newton with max_gr", {
res <- mize(rb0, rosenbrock_fg,
method = "TN", max_iter = 3,
check_conv_every = NULL, line_search = "const", step0 = 1,
max_gr = 6
)
# Should give the same f/par results as without max_gr, as we would quit with -ve
# curvature anyway
expect_equal(res$nf, 1)
# If grad_tol or ginf_tol was calculated we would get max_gr + 1
expect_equal(res$ng, 6)
expect_equal(res$f, 4.118, tol = 1e-3)
expect_equal(res$par, c(-1.023, 1.062), tol = 1e-3)
})
test_that("Report ng correctly with simple backtracking line search", {
res <- mize(rb0, rosenbrock_fg, method = "L-BFGS",
line_search = "backtracking", step_next_init = 1, max_iter = 2,
step_down = 0.5)
expect_equal(res$nf, 3)
expect_equal(res$ng, 2)
res <- mize(rb0, rosenbrock_fg, method = "L-BFGS",
line_search = "backtracking", step_next_init = 1, max_iter = 2)
expect_equal(res$nf, 3)
expect_equal(res$ng, 3)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.