Nothing
# Momentum ----------------------------------------------------------------
# Create a direction sub stage for momentum
momentum_direction <- function(normalize = FALSE) {
make_direction(list(
name = "classical_momentum",
calculate = function(opt, stage, sub_stage, par, fg, iter) {
sub_stage$value <- opt$cache$update_old
if (sub_stage$normalize) {
sub_stage$value <- normalize(sub_stage$value)
}
list(sub_stage = sub_stage)
},
depends = c("update_old"),
normalize = normalize
))
}
# Creates a step size sub stage for momentum
# mu_fn a function that takes an iteration number and returns the momentum.
# Adaptive restart can restart the momentum in which case the function will
# be passed an "effective" iteration number which may be smaller than the
# actual iteration value.
# use_init_mom If TRUE, then always use the momentum coefficient specified by
# mu_fn even when the effective iteration is 1 (first iteration or restart).
# In some cases using non-standard momentum (e.g. Nesterov or linear-weighted),
# this could result in the resulting step length being shorter or longer
# than would be otherwise expected. If FALSE, then the momentum coefficient
# is always zero.
make_momentum_step <- function(mu_fn,
min_momentum = 0,
max_momentum = 1,
use_init_mom = FALSE,
verbose = FALSE) {
make_step_size(list(
name = "momentum_step",
init = function(opt, stage, sub_stage, par, fg, iter) {
sub_stage$value <- 0
sub_stage$t <- 1
list(sub_stage = sub_stage)
},
calculate = function(opt, stage, sub_stage, par, fg, iter) {
if (!use_init_mom && sub_stage$t <= 1) {
sub_stage$value <- 0
}
else {
sub_stage$value <-
sclamp(sub_stage$mu_fn(sub_stage$t, opt$convergence$max_iter),
min = sub_stage$min_value,
max = sub_stage$max_value
)
}
list(sub_stage = sub_stage)
},
after_step = function(opt, stage, sub_stage, par, fg, iter, par0,
update) {
sub_stage$t <- sub_stage$t + 1
list(sub_stage = sub_stage)
},
mu_fn = mu_fn,
min_value = min_momentum,
max_value = max_momentum,
t = 0
))
}
# Function Factories ------------------------------------------------------
# A function that switches from one momentum value to another at the
# specified iteration.
make_switch <- function(init_value = 0.5, final_value = 0.8,
switch_iter = 250) {
function(iter, max_iter) {
if (iter >= switch_iter) {
return(final_value)
}
else {
return(init_value)
}
}
}
# A function that increases from init_value to final_value over
# max_iter iterations. Iter 0 will always return a value of zero, iter 1
# begins with init_value.
#
# wait - if set to a non-zero value, recalculates the values so that
# the init_value is used for 'wait' extra iterations, but with final_value
# still reached after max_iter iterations. Set to 1 for momentum calculations
# where in most cases the momentum on the first iteration would be either
# ignored or the value overridden and set to zero anyway. Stops a larger than
# expected jump on iteration 2.
make_ramp <- function(init_value = 0,
final_value = 0.9,
wait = 0) {
function(iter, max_iter) {
# actual number of iterations
iters <- max_iter - 1 - wait
# denominator of linear scaling
n <- max(iters, 1)
m <- (final_value - init_value) / n
t <- iter - 1 - wait
if (t < 0) {
return(init_value)
}
(m * t) + init_value
}
}
# A function that returns a constant momentum value
make_constant <- function(value) {
function(iter, max_iter) {
value
}
}
# Momentum Correction -----------------------------------------------------
# Normally, momentum schemes are given as eps*grad + mu*old_update, but
# some momentum schemes define the update as: (1-mu)*eps*grad + mu*old_update
# which can easily be expanded as: eps*grad + mu*old_update - mu*eps*grad
# i.e. add an extra stage to substract a fraction (mu worth) of the gradient
# descent
# The momentum correction direction: the opposite direction the gradient
# descent.
momentum_correction_direction <- function() {
make_direction(list(
name = "momentum_correction_direction",
calculate = function(opt, stage, sub_stage, par, fg, iter) {
# message("Calculating momentum correction direction")
grad_stage <- opt$stages[["gradient_descent"]]
sub_stage$value <- -grad_stage$direction$value
list(sub_stage = sub_stage)
}
))
}
# The momentum correction step size: mu times the gradient descent step size.
momentum_correction_step <- function() {
make_step_size(list(
name = "momentum_correction_step",
calculate = function(opt, stage, sub_stage, par, fg, iter) {
grad_stage <- opt$stages[["gradient_descent"]]
grad_step <- grad_stage$step_size$value
mom_stage <- opt$stages[["momentum"]]
mom_step <- mom_stage$step_size$value
sub_stage$value <- grad_step * mom_step
list(sub_stage = sub_stage)
}
))
}
# Momentum Dependencies ------------------------------------------------------------
# Save this update for use in the next step
require_update_old <- function(opt, par, fg, iter, par0, update) {
opt$cache$update_old <- update
opt
}
attr(require_update_old, "event") <- "after step"
attr(require_update_old, "name") <- "update_old"
attr(require_update_old, "depends") <- "update_old_init"
# Initialize the old update vector
require_update_old_init <- function(opt, stage, sub_stage, par, fg, iter) {
opt$cache$update_old <- rep(0, length(par))
list(opt = opt)
}
attr(require_update_old_init, "event") <- "init momentum direction"
attr(require_update_old_init, "name") <- "update_old_init"
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.