# Functions used only for testing
# Step Size Expectation ---------------------------------------------------
expect_step <- function(actual, x, f, df, alpha = x, nfev, tolerance = 1e-4, check_grad = TRUE) {
expect_equal(actual$step$par, x, tolerance = tolerance)
expect_equal(actual$step$f, f, tolerance = tolerance)
if (check_grad) {
expect_equal(actual$step$df, df, tolerance = tolerance)
}
expect_equal(actual$step$alpha, alpha, tolerance = tolerance)
expect_equal(actual$nfn, nfev)
}
# Finite Difference -------------------------------------------------------
gfd <- function(par, fn, rel_eps = sqrt(.Machine$double.eps)) {
g <- rep(0, length(par))
for (i in 1:length(par)) {
oldx <- par[i]
if (oldx != 0) {
eps <- oldx * rel_eps
}
else {
eps <- 1e-3
}
par[i] <- oldx + eps
fplus <- fn(par)
par[i] <- oldx - eps
fminus <- fn(par)
par[i] <- oldx
g[i] <- (fplus - fminus) / (2 * eps)
}
g
}
make_gfd <- function(fn, eps = 1.e-3) {
function(par) {
gfd(par, fn, eps)
}
}
hfd <- function(par, fn, rel_eps = sqrt(.Machine$double.eps)) {
hs <- matrix(0, nrow = length(par), ncol = length(par))
for (i in 1:length(par)) {
for (j in i:length(par)) {
oldxi <- par[i]
oldxj <- par[j]
if (oldxi != 0 && oldxj != 0) {
eps <- min(oldxi, oldxj) * rel_eps
}
else {
eps <- 1e-3
}
if (i != j) {
par[i] <- par[i] + eps
par[j] <- par[j] + eps
fpp <- fn(par)
par[j] <- oldxj - eps
fpm <- fn(par)
par[i] <- oldxi - eps
par[j] <- oldxj + eps
fmp <- fn(par)
par[j] <- oldxj - eps
fmm <- fn(par)
par[i] <- oldxi
par[j] <- oldxj
val <- (fpp - fpm - fmp + fmm) / (4 * eps * eps)
hs[i, j] <- val
hs[j, i] <- val
}
else {
f <- fn(par)
oldxi <- par[i]
par[i] <- oldxi + 2 * eps
fpp <- fn(par)
par[i] <- oldxi + eps
fp <- fn(par)
par[i] <- oldxi - 2 * eps
fmm <- fn(par)
par[i] <- oldxi - eps
fm <- fn(par)
par[i] <- oldxi
hs[i, i] <- (-fpp + 16 * fp - 30 * f + 16 * fm - fmm) / (12 * eps * eps)
}
}
}
hs
}
make_hfd <- function(fn, eps = 1.e-3) {
function(par) {
hfd(par, fn, eps)
}
}
make_fg <- function(fn, gr = NULL, hs = NULL) {
if (is.null(gr)) {
gr <- make_gfd(fn)
}
if (is.null(hs)) {
hs <- make_hfd(fn)
}
list(
fn = fn,
gr = gr,
hs = hs
)
}
# Rosenbrock ---------------------------------------------------------------
rb0 <- c(-1.2, 1)
# taken from the optim man page
rosenbrock_fg <- list(
fn = function(x) {
x1 <- x[1]
x2 <- x[2]
100 * (x2 - x1 * x1)^2 + (1 - x1)^2
},
gr = function(x) {
x1 <- x[1]
x2 <- x[2]
c(
-400 * x1 * (x2 - x1 * x1) - 2 * (1 - x1),
200 * (x2 - x1 * x1)
)
},
hs = function(x) {
xx <- 1200 * x[1] * x[1] - 400 * x[2] + 2
xy <- x[1] * -400
yy <- 200
matrix(c(xx, xy, xy, yy), nrow = 2)
},
hi = function(x) {
1 / c(1200 * x[1] * x[1] - 400 * x[2] + 2, 200)
},
fg = function(x) {
x1 <- x[1]
x2 <- x[2]
a <- (x2 - x1 * x1)
b <- 1 - x1
list(
fn = 100 * a * a + b * b,
gr = c(
-400 * x1 * a - 2 * b,
200 * a
)
)
},
n = 2
)
rosen_no_hess <- rosenbrock_fg
rosen_no_hess$hs <- NULL
rosen_no_hess$hi <- NULL
# log-sum-exp -------------------------------------------------------------
# http://papers.nips.cc/paper/5322-a-differential-equation-for-modeling-nesterovs-accelerated-gradient-method-theory-and-insights.pdf
log_sum_exp_fg <- function(n = 50, m = 200, rho = 20, bvar = 2) {
A <- matrix(stats::rnorm(m * n), nrow = m)
b <- stats::rnorm(n = m, mean = 0, sd = sqrt(bvar))
lse_fg(A = A, b = b, rho = rho)
}
# Smooth and convex, but not strongly convex
lse_fg <- function(A, b, rho) {
fn <- function(x) {
rho * log(sum(exp((A %*% x - b) / rho)))
}
gr <- function(x) {
rAxb <- exp((A %*% x - b) / rho)
mult <- sweep(A, 1, rAxb, "*")
num <- colSums(mult)
as.vector(num / sum(rAxb))
}
res <- list(
fn = fn,
gr = gr,
A = A,
b = b,
rho = rho
)
res$grr <- make_gfd(fn = res$fn)
res
}
# Function with unhelpful Hessian -----------------------------------------
tricky_fg <- function() {
res <- list(
fn = function(par) {
x1 <- par[1]
x2 <- par[2]
(1.5 - x1 + x1 * x2)^2 +
(2.25 - x1 + x1 * x2 * x2)^2 +
(2.625 - x1 + x1 * x2 * x2 * x2)^2
}
)
res$gr <- make_gfd(res$fn)
res$hs <- make_hfd(res$fn)
res
}
# Line Search Util --------------------------------------------------------
# Create Initial Step Value
#
# Given a set of start parameters and a search direction, initializes the
# step data. Utility function for testing.
make_step0 <- function(fg, x, pv, f = fg$fn(x), df = fg$gr(x)) {
list(
x = x,
alpha = 0,
f = f,
df = df,
d = dot(pv, df)
)
}
# More'-Thuente test functions --------------------------------------------
# Test Function 1 ---------------------------------------------------------
# 1 phi(a) = -(a) / (a^2 + b) phi'(a) = (a^2 - b) / (a^2 + b)^2 b = 2
fn1 <- function(alpha, beta = 2) {
-alpha / (alpha^2 + beta)
}
gr1 <- function(alpha, beta = 2) {
(alpha^2 - beta) / ((alpha^2 + beta)^2)
}
fcn1 <- function(x) {
list(f = fn1(x), g = gr1(x))
}
# Test Function 2 ---------------------------------------------------------
# 2 phi(a) = (a + b)^5 - 2(a + b)^4 phi'(a) = (a+b)^3*(5a+5b-8) b = 0.004
fn2 <- function(alpha, beta = 0.004) {
(alpha + beta)^5 - 2 * (alpha + beta)^4
}
gr2 <- function(alpha, beta = 0.004) {
(alpha + beta)^3 * (5 * alpha + 5 * beta - 8)
}
fcn2 <- function(x) {
list(f = fn2(x), g = gr2(x))
}
# Test Function 3 ---------------------------------------------------------
# 3 phi(a) = phi_0(a) + [2(1-b)/(l*pi)]sin(l*pi*alpha*0.5)
# phi'(a) = phi_0'(a) + (1-b)cos(l*pi*alpha*0.5)
# phi_0(a) = 1 - a if a <= 1 - b phi_0'(a) = -1
# = a - 1 if a >= 1 + b phi_0'(a) = 1
# = (a-1)^2/2b + b/2 if a in [1-b,1+b] phi_0'(a) = a - 1 /b
# b = 0.01 l = 39
fn3 <- function(alpha, beta = 0.01, l = 39) {
if (alpha <= 1 - beta) {
phi_0 <- 1 - alpha
} else if (alpha >= 1 + beta) {
phi_0 <- alpha - 1
} else {
phi_0 <- (alpha - 1)^2 / (2 * beta) + (beta / 2)
}
phi_0 + (2 * (1 - beta) * sin(l * pi * alpha * 0.5)) / (l * pi)
}
# phi'(a) = phi_0'(a) + (1-b)cos(l*pi*alpha*0.5)
gr3 <- function(alpha, beta = 0.01, l = 39) {
if (alpha <= 1 - beta) {
dphi_0 <- -1
} else if (alpha >= 1 + beta) {
dphi_0 <- 1
} else {
dphi_0 <- (alpha - 1) / beta
}
dphi_0 + (1 - beta) * cos(l * pi * alpha * 0.5)
}
fcn3 <- function(x) {
list(f = fn3(x), g = gr3(x))
}
# Utils for Test Functions 4-6 --------------------------------------------
# gamma(b) = (1+b^2)^1/2 - b
yanaig <- function(beta) {
sqrt(1 + beta^2) - beta
}
yanai1 <- function(alpha, beta) {
sqrt((1 - alpha)^2 + beta^2)
}
gryanai1 <- function(alpha, beta) {
(alpha - 1) / yanai1(alpha, beta)
}
yanai2 <- function(alpha, beta) {
sqrt(alpha^2 + beta^2)
}
gryanai2 <- function(alpha, beta) {
alpha / yanai2(alpha, beta)
}
# phi(a) = gamma(b_1)[(1-a)^2 + b_2^2]^1/2 + gamma(b_2)[a^2 + b_1^2]^1/2
yanai <- function(alpha, beta1, beta2) {
yanaig(beta1) * yanai1(alpha, beta2) +
yanaig(beta2) * yanai2(alpha, beta1)
}
# phi'(a) = -[gamma(b_1)(1-a)]/sqrt[(1-a)^2 + b_2^2] + gamma(b_2)a/sqrt([a^2 + b_1^2])
gryanai <- function(alpha, beta1, beta2) {
(yanaig(beta1) * gryanai1(alpha, beta2)) + (yanaig(beta2) * gryanai2(alpha, beta1))
}
# Test Function 4 ---------------------------------------------------------
fn4 <- function(alpha) {
yanai(alpha, beta1 = 0.001, beta2 = 0.001)
}
gr4 <- function(alpha) {
gryanai(alpha, beta1 = 0.001, beta2 = 0.001)
}
fcn4 <- function(x) {
list(f = fn4(x), g = gr4(x))
}
# Test Function 5 ---------------------------------------------------------
fn5 <- function(alpha) {
yanai(alpha, beta1 = 0.01, beta2 = 0.001)
}
gr5 <- function(alpha) {
gryanai(alpha, beta1 = 0.01, beta2 = 0.001)
}
fcn5 <- function(x) {
list(f = fn5(x), g = gr5(x))
}
# Test Function 6 ---------------------------------------------------------
fn6 <- function(alpha) {
yanai(alpha, beta1 = 0.001, beta2 = 0.01)
}
gr6 <- function(alpha) {
gryanai(alpha, beta1 = 0.001, beta2 = 0.01)
}
fcn6 <- function(x) {
list(f = fn6(x), g = gr6(x))
}
f1 <- list(fn = fn1, gr = gr1)
f2 <- list(fn = fn2, gr = gr2)
f3 <- list(fn = fn3, gr = gr3)
f4 <- list(fn = fn4, gr = gr4)
f5 <- list(fn = fn5, gr = gr5)
f6 <- list(fn = fn6, gr = gr6)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.