Implements the forward-mode auto-differentiation for multivariate functions using the matrix-calculus notation from Magnus and Neudecker (1988). Two key features of the package are: (i) the package incorporates various optimisaton strategies to improve performance; this includes applying memoisation to cut down object construction time, using sparse matrix representation to save derivative calculation, and creating specialised matrix operations with Rcpp to reduce computation time; (ii) the package supports differentiating random variable with respect to their parameters, targetting MCMC (and in general simulation-based) applications.
devtools::install_github("kcf-jackson/ADtools")
Given a function , where
, the Jacobina matrix
of
w.r.t.
is given by
Consider where
is a matrix, and
is a
vector.
library(ADtools)
f <- function(X, y) X %*% y
X <- randn(2, 2)
y <- matrix(c(1, 1))
print(list(X = X, y = y, f = f(X, y)))
## $X
## [,1] [,2]
## [1,] 0.41331040 0.7085659
## [2,] 0.01066195 -1.2300747
##
## $y
## [,1]
## [1,] 1
## [2,] 1
##
## $f
## [,1]
## [1,] 1.121876
## [2,] -1.219413
Since has dimension (2,
2) and
has dimension
(2, 1), the input space has dimension
, and the output has dimension
, i.e.
maps
to
and the
Jacobian of
should be
.
# Full Jacobian matrix
f_AD <- auto_diff(f, at = list(X = X, y = y))
f_AD@dx # returns a Jacobian matrix
## d_X1 d_X2 d_X3 d_X4 d_y1 d_y2
## d_output_1 1 0 1 0 0.41331040 0.7085659
## d_output_2 0 1 0 1 0.01066195 -1.2300747
auto_diff
also supports computing a partial Jacobian matrix. For
instance, suppose we are only interested in the derivative w.r.t. y
,
then we can run
f_AD <- auto_diff(f, at = list(X = X, y = y), wrt = "y")
f_AD@dx # returns a partial Jacobian matrix
## d_y1 d_y2
## d_output_1 0.41331040 0.7085659
## d_output_2 0.01066195 -1.2300747
It is good practice to always check the result with finite-differencing.
This can be done by calling finite_diff
which has the same interface
as auto_diff
.
f_FD <- finite_diff(f, at = list(X = X, y = y))
f_FD
## d_X1 d_X2 d_X3 d_X4 d_y1 d_y2
## d_output_1 1 0 1 0 0.41331039 0.7085659
## d_output_2 0 1 0 1 0.01066194 -1.2300747
set.seed(123)
n <- 1000
p <- 3
X <- randn(n, p)
beta <- randn(p, 1)
y <- X %*% beta + rnorm(n)
gradient_descent <- function(f, vary, fix, learning_rate = 0.01, tol = 1e-6, show = F) {
repeat {
df <- auto_diff(f, at = append(vary, fix), wrt = names(vary))
if (show) print(df@x)
delta <- learning_rate * as.numeric(df@dx)
vary <- relist(unlist(vary) - delta, vary)
if (max(abs(delta)) < tol) break
}
vary
}
lm_loss <- function(y, X, beta) sum((y - X %*% beta)^2)
# Estimate
gradient_descent(
f = lm_loss, vary = list(beta = rnorm(p, 1)), fix = list(y = y, X = X), learning_rate = 1e-4
)
## $beta
## [1] -0.1417494 -0.3345771 -1.4484226
# Truth
t(beta)
## [,1] [,2] [,3]
## [1,] -0.1503075 -0.3277571 -1.448165
set.seed(123)
n <- 30 # small data
p <- 10
X <- randn(n, p)
beta <- randn(p, 1)
y <- X %*% beta + rnorm(n)
gibbs_gaussian <- function(X, y, b_0, B_0, alpha_0, delta_0, num_steps = 1e4) {
# Initialisation
init_sigma <- 1 / sqrt(rgamma0(1, alpha_0 / 2, scale = 2 / delta_0))
n <- length(y)
alpha_1 <- alpha_0 + n
sigma_g <- init_sigma
inv_B_0 <- solve(B_0)
inv_B_0_times_b_0 <- inv_B_0 %*% b_0
XTX <- crossprod(X)
XTy <- crossprod(X, y)
beta_res <- vector("list", num_steps)
sigma_res <- vector("list", num_steps)
pb <- txtProgressBar(1, num_steps, style = 3)
for (i in 1:num_steps) {
# Update beta
B_g <- solve(sigma_g^(-2) * XTX + inv_B_0)
b_g <- B_g %*% (sigma_g^(-2) * XTy + inv_B_0_times_b_0)
beta_g <- t(rmvnorm0(1, b_g, B_g))
# Update sigma
delta_g <- delta_0 + sum((y - X %*% beta_g)^2)
sigma_g <- 1 / sqrt(rgamma0(1, alpha_1 / 2, scale = 2 / delta_g))
# Keep track
beta_res[[i]] <- beta_g
sigma_res[[i]] <- sigma_g
setTxtProgressBar(pb, i)
}
list(sigma = sigma_res, beta = beta_res)
}
gibbs_deriv <- auto_diff(
gibbs_gaussian,
at = list(
b_0 = numeric(p), B_0 = diag(p), alpha_0 = 4, delta_0 = 4,
X = X, y = y, num_steps = 5000
),
wrt = c("b_0", "B_0", "alpha_0", "delta_0")
)
library(magrittr)
library(knitr)
library(kableExtra)
matrix_ls_to_array <- function(x) {
structure(unlist(x), dim = c(dim(x[[1]]), length(x)), dimnames = dimnames(x[[1]]))
}
tidy_mcmc <- function(mcmc_res, var0) {
mcmc_res[[var0]] %>%
purrr::map(~.x@dx) %>%
matrix_ls_to_array()
}
tidy_table <- function(x) {
x %>% kable() %>% kable_styling() %>% scroll_box(width = "100%")
}
posterior_Jacobian <- apply(tidy_mcmc(gibbs_deriv, "beta"), c(1,2), mean)
tidy_table(posterior_Jacobian)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.