Nothing
# Tests for integration of gradient() and hessian() with R's optimizers.
# ============================================================================
# gradient() with optim()
# ============================================================================
test_that("gradient() works as optim() gr argument on a quadratic", {
# Minimize f(x) = (x - 3)^2: minimum at x = 3
f_ll <- function(theta) {
x <- theta[1]
-(x - 3)^2 # negative because we'll negate for minimization
}
nll <- function(theta) -f_ll(theta)
ngr <- function(theta) -gradient(f_ll, theta)
fit <- optim(0, fn = nll, gr = ngr, method = "BFGS")
expect_equal(fit$par, 3, tolerance = 1e-6)
expect_equal(fit$convergence, 0)
})
test_that("gradient() with optim() on 2D quadratic", {
# Minimize f(x,y) = (x-1)^2 + (y-2)^2
f_ll <- function(theta) {
-(theta[1] - 1)^2 - (theta[2] - 2)^2
}
nll <- function(theta) -f_ll(theta)
ngr <- function(theta) -gradient(f_ll, theta)
fit <- optim(c(0, 0), fn = nll, gr = ngr, method = "BFGS")
expect_equal(fit$par, c(1, 2), tolerance = 1e-6)
})
test_that("negated gradient gives correct minimization direction", {
# gradient() gives the gradient of the log-likelihood (ascent direction)
# -gradient() gives the descent direction for minimization
f_ll <- function(theta) {
mu <- theta[1]
-0.5 * (5 - mu)^2
}
s <- gradient(f_ll, 3) # d/dmu = (5 - 3) = 2 (positive: ascend toward 5)
expect_equal(s[1], 2)
neg_s <- -gradient(f_ll, 3)
expect_equal(neg_s[1], -2) # negative: descend toward 5 from below
})
# ============================================================================
# gradient() + hessian() with nlminb()
# ============================================================================
test_that("gradient() and hessian() work with nlminb()", {
f_ll <- function(theta) {
-(theta[1] - 3)^2 - 2 * (theta[2] + 1)^2
}
nll <- function(theta) -f_ll(theta)
ngr <- function(theta) -gradient(f_ll, theta)
nhess <- function(theta) -hessian(f_ll, theta)
fit <- nlminb(c(0, 0), objective = nll, gradient = ngr, hessian = nhess)
expect_equal(fit$par, c(3, -1), tolerance = 1e-6)
expect_equal(fit$convergence, 0)
})
# ============================================================================
# AD gradient matches optim's numerical gradient (same solution)
# ============================================================================
test_that("optim with AD gradient matches optim without gradient", {
set.seed(42)
data_norm <- rnorm(50, mean = 5, sd = 2)
n <- length(data_norm)
sum_x <- sum(data_norm)
sum_x2 <- sum(data_norm^2)
ll <- function(theta) {
mu <- theta[1]; sigma <- theta[2]
-n * log(sigma) - (1 / (2 * sigma^2)) * (sum_x2 - 2 * mu * sum_x + n * mu^2)
}
nll <- function(theta) -ll(theta)
ngr <- function(theta) -gradient(ll, theta)
# Use L-BFGS-B with lower bound on sigma to prevent log(negative) NaN warnings
start <- c(3, 1.5)
fit_no_gr <- optim(start, fn = nll, method = "L-BFGS-B",
lower = c(-Inf, 0.01))
fit_ad_gr <- optim(start, fn = nll, gr = ngr, method = "L-BFGS-B",
lower = c(-Inf, 0.01))
# Both should converge to the same point
expect_equal(fit_ad_gr$par, fit_no_gr$par, tolerance = 1e-5)
expect_equal(fit_ad_gr$convergence, 0)
})
# ============================================================================
# Standard errors from -hessian() match analytical values
# ============================================================================
test_that("SEs from -hessian() match analytical for Normal(mu, sigma)", {
fix <- make_normal_fixture()
n <- fix$n; sum_x <- fix$sum_x; sum_x2 <- fix$sum_x2
ll <- function(theta) {
mu <- theta[1]; sigma <- theta[2]
-n * log(sigma) - (1 / (2 * sigma^2)) * (sum_x2 - 2 * mu * sum_x + n * mu^2)
}
# Find MLE
mle <- c(fix$mle_mu, fix$mle_sigma)
# Observed information at MLE
obs_info <- -hessian(ll, mle)
vcov <- solve(obs_info)
se_ad <- sqrt(diag(vcov))
# Analytical SEs at the MLE:
# Var(mu_hat) = sigma^2/n, Var(sigma_hat) = sigma^2/(2n)
se_analytical <- c(
fix$mle_sigma / sqrt(n),
fix$mle_sigma / sqrt(2 * n)
)
expect_equal(se_ad, se_analytical, tolerance = 1e-6)
})
# ============================================================================
# Poisson MLE via optim() matches sum(x)/n
# ============================================================================
test_that("Poisson MLE via optim + AD matches analytical", {
set.seed(123)
data_pois <- rpois(100, lambda = 3.5)
n_pois <- length(data_pois)
sum_x_pois <- sum(data_pois)
sum_lfact <- sum(lfactorial(data_pois))
ll <- function(theta) {
lambda <- theta[1]
sum_x_pois * log(lambda) - n_pois * lambda - sum_lfact
}
nll <- function(theta) -ll(theta)
ngr <- function(theta) -gradient(ll, theta)
fit <- optim(1, fn = nll, gr = ngr, method = "BFGS")
analytical_mle <- sum_x_pois / n_pois
expect_equal(fit$par, analytical_mle, tolerance = 1e-6)
})
# ============================================================================
# Logistic regression: optim + AD matches glm()
# ============================================================================
test_that("logistic regression via optim + AD matches glm()", {
set.seed(7)
n_lr <- 200
x1 <- rnorm(n_lr)
x2 <- rnorm(n_lr)
X <- cbind(1, x1, x2)
beta_true <- c(-0.5, 1.2, -0.8)
eta_true <- X %*% beta_true
prob_true <- 1 / (1 + exp(-eta_true))
y <- rbinom(n_lr, 1, prob_true)
ll_dual <- function(theta) {
result <- dual_constant(0)
for (i in seq_len(n_lr)) {
eta_i <- theta[1] * X[i, 1] + theta[2] * X[i, 2] + theta[3] * X[i, 3]
result <- result + y[i] * eta_i - log(1 + exp(eta_i))
}
result
}
ll_num <- function(beta) {
eta <- X %*% beta
sum(y * eta - log(1 + exp(eta)))
}
nll <- function(theta) -ll_num(theta)
ngr <- function(theta) -gradient(ll_dual, theta)
fit_optim <- optim(c(0, 0, 0), fn = nll, gr = ngr, method = "BFGS")
fit_glm <- glm(y ~ x1 + x2, family = binomial)
expect_equal(fit_optim$par, unname(coef(fit_glm)), tolerance = 1e-4)
})
test_that("logistic regression SEs from AD match glm()", {
set.seed(7)
n_lr <- 200
x1 <- rnorm(n_lr)
x2 <- rnorm(n_lr)
X <- cbind(1, x1, x2)
beta_true <- c(-0.5, 1.2, -0.8)
eta_true <- X %*% beta_true
prob_true <- 1 / (1 + exp(-eta_true))
y <- rbinom(n_lr, 1, prob_true)
ll_dual <- function(theta) {
result <- dual_constant(0)
for (i in seq_len(n_lr)) {
eta_i <- theta[1] * X[i, 1] + theta[2] * X[i, 2] + theta[3] * X[i, 3]
result <- result + y[i] * eta_i - log(1 + exp(eta_i))
}
result
}
ll_num <- function(beta) {
eta <- X %*% beta
sum(y * eta - log(1 + exp(eta)))
}
nll <- function(theta) -ll_num(theta)
ngr <- function(theta) -gradient(ll_dual, theta)
fit_optim <- optim(c(0, 0, 0), fn = nll, gr = ngr, method = "BFGS")
fit_glm <- glm(y ~ x1 + x2, family = binomial)
# SEs from AD Hessian
obs_info <- -hessian(ll_dual, fit_optim$par)
vcov_ad <- solve(obs_info)
se_ad <- sqrt(diag(vcov_ad))
se_glm <- summary(fit_glm)$coefficients[, "Std. Error"]
expect_equal(se_ad, unname(se_glm), tolerance = 0.01)
})
# ============================================================================
# nlminb converges for multi-parameter model
# ============================================================================
test_that("nlminb with full AD converges for Normal(mu, sigma)", {
fix <- make_normal_fixture()
n <- fix$n; sum_x <- fix$sum_x; sum_x2 <- fix$sum_x2
ll <- function(theta) {
mu <- theta[1]; sigma <- theta[2]
-n * log(sigma) - (1 / (2 * sigma^2)) * (sum_x2 - 2 * mu * sum_x + n * mu^2)
}
nll <- function(theta) -ll(theta)
ngr <- function(theta) -gradient(ll, theta)
nhess <- function(theta) -hessian(ll, theta)
fit <- nlminb(c(0, 1), objective = nll, gradient = ngr, hessian = nhess)
expect_equal(fit$par[1], fix$mle_mu, tolerance = 1e-6)
expect_equal(fit$par[2], fix$mle_sigma, tolerance = 1e-4)
expect_equal(fit$convergence, 0)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.