Nothing
## ----setup, include = FALSE, echo = FALSE, message = FALSE--------------------
knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, comment = "#>")
library(mize)
## ----Definining a function and gradient to optimize---------------------------
rb_fg <- list(
fn = function(x) { 100 * (x[2] - x[1] * x[1]) ^ 2 + (1 - x[1]) ^ 2 },
gr = function(x) { c( -400 * x[1] * (x[2] - x[1] * x[1]) - 2 * (1 - x[1]),
200 * (x[2] - x[1] * x[1])) })
## ----A function list with an optional fg item---------------------------------
rb_fg <- list(
fn = function(x) { 100 * (x[2] - x[1] * x[1]) ^ 2 + (1 - x[1]) ^ 2 },
gr = function(x) { c( -400 * x[1] * (x[2] - x[1] * x[1]) - 2 * (1 - x[1]),
200 * (x[2] - x[1] * x[1])) },
fg = function(x) {
a <- x[2] - x[1] * x[1]
b <- 1 - x[1]
list(
fn = 100 * a ^ 2 + b ^ 2,
gr = c( -400 * x[1] * a - 2 * b,
200 * a)
)
}
)
## ----Defining a starting point------------------------------------------------
rb0 <- c(-1.2, 1)
## ----Defaults-----------------------------------------------------------------
res <- mize(rb0, rb_fg)
# What were the final parameter values? (should be close to c(1, 1))
res$par
# What was the function value at that point (should be close to 0)
res$f
# How many iterations did it take?
res$iter
# How many function evaluations?
res$nf
# How many gradient evaluations?
res$ng
# Why did the optimization terminate?
res$terminate
## ----Verbose mode-------------------------------------------------------------
res <- mize(rb0, rb_fg, grad_tol = 1e-3, ginf_tol = 1e-3, max_iter = 10,
verbose = TRUE)
## ----Log every 10 iterations--------------------------------------------------
res <- mize(rb0, rb_fg, grad_tol = 1e-3, verbose = TRUE, log_every = 10)
## ----Returning stored progress------------------------------------------------
res <- mize(rb0, rb_fg, store_progress = TRUE, log_every = 10)
res$progress
## ----Steepest descent---------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "SD")
## ----BFGS---------------------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "BFGS")
## ----BFGS without scaled Hessian----------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "BFGS", scale_hess = FALSE)
## ----LBFGS--------------------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "L-BFGS", memory = 7)
## ----LBFGS without scaled Hessian---------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "L-BFGS", scale_hess = FALSE)
## ----CG with PR+--------------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG")
## ----CG with HZ+--------------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", cg_update = "HZ+")
## ----NAG----------------------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "NAG")
## ----NAG with 100 steps-------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 100, method = "NAG", store_progress = TRUE)
plot(res$progress$nf, log(res$progress$f), type = "l")
res$f
## ----NAG with 100 steps and less aggressive momentum--------------------------
resq <- mize(rb0, rb_fg, max_iter = 100, method = "NAG", nest_q = 0.001,
store_progress = TRUE)
plot(res$progress$nf, log(res$progress$f), type = "l",
ylim = range(log(res$progress$f), log(resq$progress$f)))
lines(resq$progress$nf, log(resq$progress$f), col = "red")
resq$f
## ----Momentum-----------------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "MOM", mom_schedule = 0.9)
## ----Momentum plot------------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 100, method = "MOM", mom_schedule = 0.9,
store_progress = TRUE)
plot(res$progress$nf, log(res$progress$f), type = "l")
res$f
## ----momentum with a switch function------------------------------------------
# Switch from a momentum of 0.4 to 0.8 at iteration 5
res <- mize(rb0, rb_fg, max_iter = 10, method = "MOM", mom_schedule = "switch",
mom_init = 0.4, mom_final = 0.8, mom_switch_iter = 5)
## ----momentum with a ramp function--------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "MOM", mom_schedule = "ramp",
mom_init = 0.4, mom_final = 0.8)
## ----momentum with nesterov schedule------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "MOM", mom_schedule = "nsconvex")
## ----momentum with nesterov schedule and non-zero q---------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "MOM", mom_schedule = "nsconvex",
nest_q = 0.001)
## ----momentum with random momentum--------------------------------------------
mom_fn <- function(iter, max_iter) {
runif(n = 1, min = 0, max = 1)
}
res <- mize(rb0, rb_fg, max_iter = 10, method = "MOM", mom_schedule = mom_fn)
## ----Simplified Nesterov momentum---------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "MOM", mom_schedule = 0.9,
mom_type = "nesterov")
## ----Nesterov versus classical momentum---------------------------------------
resc <- mize(rb0, rb_fg, max_iter = 100, method = "MOM", mom_schedule = 0.9,
store_progress = TRUE)
resn <- mize(rb0, rb_fg, max_iter = 100, method = "MOM", mom_schedule = 0.9,
mom_type = "nesterov",
store_progress = TRUE)
# Best f found for Nesterov momentum
resn$f
# Best f found for classical momentum
resc$f
plot(resc$progress$nf, log(resc$progress$f), type = "l",
ylim = range(log(resc$progress$f), log(resn$progress$f)))
lines(resn$progress$nf, log(resn$progress$f), col = "red")
## ----Nesterov momentum with convex approximation------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "MOM",
mom_schedule = "nsconvex", nest_convex_approx = TRUE,
mom_type = "nesterov")
## ----other Wolfe line search--------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", line_search = "Rasmussen")
# Use Mark Schmidt's minFunc line search
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", line_search = "Schmidt")
# Hager-Zhang line search
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", line_search = "Hager-Zhang")
# Hager-Zhang can be abbreviated to "HZ"
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", line_search = "HZ")
# You can explicitly set More-Thuente too
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", line_search = "More-Thuente")
# More-Thuente can be abbreviated to "MT"
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", line_search = "MT")
## ----Line search parameters---------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", cg_update = "HZ+",
c2 = 0.5, c1 = 0.1)
## ----Line search with slope ratio---------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, step_next_init = "slope")
## ----Line search with Hager-Zhang QuadStep------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, step_next_init = "hz",
line_search = "mt")
## ----Line search with scipy initialization------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, step0 = "scipy")
## ----Line search with initial step length of 1--------------------------------
# An initial guess of 1 for the step length isn't bad for L-BFGS
res <- mize(rb0, rb_fg, max_iter = 10, step0 = 1, method = "L-BFGS")
## ----BFGS with no Newton step-------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "BFGS", try_newton_step = FALSE)
## ----alternative Wolfe conditions---------------------------------------------
# Rasmussen line search with standard Wolfe conditions
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", line_search = "Rasmussen",
strong_curvature = FALSE)
# Hager-Zhang with strong Wolfe conditions
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", line_search = "HZ",
strong_curvature = TRUE, approx_armijo = FALSE)
# More-Thuente with approx Armijo conditions
res <- mize(rb0, rb_fg, max_iter = 10, method = "CG", line_search = "MT",
approx_armijo = TRUE)
## ----constant step size-------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "SD", line_search = "constant",
norm_direction = TRUE, step0 = 0.01)
## ----backtracking with cubic interpolation------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, line_search = "backtracking", step0 = 1,
c1 = 0.1)
## ----backtracking with halved step size---------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, line_search = "backtracking",
step0 = 1, c1 = 0.1, step_down = 0.5)
## ----bold driver--------------------------------------------------------------
# increase step size by 10%, but reduce by 50%
res <- mize(rb0, rb_fg, max_iter = 10, line_search = "bold",
step0 = 1, step_down = 0.5, step_up = 1.1)
## ----max line search functions------------------------------------------------
# No more than 10 gradient evaluations allowed per line search
res <- mize(rb0, rb_fg, max_iter = 10, ls_max_gr = 10)
## -----------------------------------------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "DBD",
step0 = "rasmussen", step_down = 0.5, step_up = 1.1,
dbd_weight = 0.5)
## ----t-SNE style DBD parameters-----------------------------------------------
res <- mize(rb0, rb_fg, max_iter = 10, method = "DBD",
step0 = "rasmussen", step_down = 0.8, step_up = 0.2,
step_up_fun = "+")
## -----------------------------------------------------------------------------
# DBD with rel_tol and abs_tol is explicitly set
res <- mize(rb0, rb_fg, max_iter = 10, method = "DBD",
step0 = "rasmussen", step_down = 0.8, step_up = 0.2,
step_up_fun = "+", rel_tol = 1e-8, abs_tol = 1e-8)
# 10 gradient calculations as expected
res$ng
# But 10 function calculations too, only used in the tolerance check
res$nf
# Turn off the rel_tol and abs_tol and let max_iter handle termination
res <- mize(rb0, rb_fg, max_iter = 10, method = "DBD",
step0 = "rasmussen", step_down = 0.8, step_up = 0.2,
step_up_fun = "+", rel_tol = NULL, abs_tol = NULL,
grad_tol = 1e-5)
# 11 gradient calculations
res$ng
# Only one function evalation needed (to calculate res$f)
res$nf
## ----momentum with restart----------------------------------------------------
resc <- mize(rb0, rb_fg, max_iter = 100, method = "MOM", mom_schedule = 0.9,
store_progress = TRUE)
resf <- mize(rb0, rb_fg, max_iter = 100, method = "MOM", mom_schedule = 0.9,
store_progress = TRUE, restart = "fn")
resg <- mize(rb0, rb_fg, max_iter = 100, method = "MOM", mom_schedule = 0.9,
store_progress = TRUE, restart = "gr")
plot(resc$progress$nf, log(resc$progress$f), type = "l",
ylim = range(log(resc$progress$f), log(resf$progress$f),
log(resg$progress$f)))
lines(resf$progress$nf, log(resf$progress$f), col = "red")
lines(resg$progress$nf, log(resg$progress$f), col = "blue")
## ----momentum with restart and wait time--------------------------------------
resfw <- mize(rb0, rb_fg, max_iter = 100, method = "MOM", mom_schedule = 0.9,
store_progress = TRUE, restart = "fn", restart_wait = 1)
resgw <- mize(rb0, rb_fg, max_iter = 100, method = "MOM", mom_schedule = 0.9,
store_progress = TRUE, restart = "gr", restart_wait = 1)
plot(resc$progress$nf, log(resc$progress$f), type = "l",
ylim = range(log(resc$progress$f), log(resf$progress$f),
log(resg$progress$f), log(resfw$progress$f),
log(resgw$progress$f)))
lines(resf$progress$nf, log(resf$progress$f), col = "red")
lines(resfw$progress$nf, log(resfw$progress$f), col = "blue")
lines(resgw$progress$nf, log(resgw$progress$f), col = "orange")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.