# inference_laplace_full
# Exact inference using the laplace approximation and a full GP
# y is response data
# data is a dataframe containing columns on which the kernel acts,
# giving observations on which to train the model
# kernel is a gpe kernel object
# mean_function is a gpe mean function (for now just an R function)
inference_laplace_full <- function(y,
data,
kernel,
likelihood,
mean_function,
inducing_data,
weights,
verbose = verbose) {
# NB inducing_data is ignored
# apply mean function to get prior mean at observation locations
mn_prior <- mean_function(data)
# control parameters
tol <- 10 ^ -12
itmax = 50
# self kernel (with observation noise)
Kxx <- kernel(data, data)
# number of observations
n <- nrow(data)
# diagonal matrix
eye <- diag(n)
# initialise a
a <- rep(0, n)
# set f to the prior
f <- mn_prior
# initialise loop
obj.old <- Inf
obj <- -sum(likelihood$d0(y, f, weights))
it <- 0
# start newton iterations
while ((obj.old - obj) > tol & it < itmax) {
# increment iterator and update objective
it <- it + 1
obj.old <- obj
# get the negative log Hessian and its root
W <- -(likelihood$d2(y, f, weights))
rW <- sqrt(W)
# difference between posterior mode and prior
cf <- f - mn_prior
# get cholesky factorisation
L <- jitchol(rW %*% t(rW) * Kxx + eye)
# get direction of the posterior mode
b <- W * cf + likelihood$d1(y, f, weights)
mat2 <- rW * (Kxx %*% b)
adiff <- b - rW * backsolve(L, forwardsolve(t(L), mat2)) - a
# make sure it's a vector, not a matrix
dim(adiff) <- NULL
# find optimum step size toward the mode using Brent's method
res <- optimise(laplace_psiline_full,
interval = c(0, 2),
adiff = adiff,
a = a,
K = Kxx,
y = y,
d0 = likelihood$d0,
mn = mn_prior,
weights)
# move to the new posterior mode
a <- a + res$minimum * adiff
f <- Kxx %*% a + mn_prior
obj <- laplace_psi(a,
f,
mn_prior,
y,
likelihood$d0,
weights)
}
# recompute hessian at mode
W <- -(likelihood$d2(y, f, weights))
# return marginal negative log-likelihood
lZ <- -(a %*% (f - mn_prior))[1, 1] / 2 +
sum(likelihood$d0(y, f, weights)) -
sum(log(diag(L)))
# return posterior object
posterior <- createPosterior(inference_name = 'inference_laplace_full',
lZ = lZ,
data = data,
kernel = kernel,
likelihood = likelihood,
mean_function = mean_function,
inducing_data = inducing_data,
weights,
mn_prior = mn_prior,
L = L,
a = a,
W = W)
# return a posterior object
return (posterior)
}
# projection for full inference
project_laplace_full <- function(posterior,
new_data,
variance = c('none', 'diag', 'matrix')) {
# get the required variance argument
variance <- match.arg(variance)
# prior mean over the test locations
mn_prior_xp <- posterior$mean_function(new_data)
# projection matrix
Kxxp <- posterior$kernel(posterior$data,
new_data)
# its transpose
Kxpx <- t(Kxxp)
# get posterior mean
mu <- Kxpx %*% posterior$components$a + mn_prior_xp
# NB can easily modify this to return only the diagonal elements
# (variances) with kernel(..., diag = TRUE)
# calculation of the diagonal of t(v) %*% v is also easy:
# (colSums(v ^ 2))
if (variance == 'none') {
# if mean only
var <- NULL
} else {
# compute common variance components
rW <- sqrt(as.vector(posterior$components$W))
# get posterior covariance
v <- backsolve(posterior$components$L,
rW * Kxxp,
transpose = TRUE)
if (variance == 'diag') {
# if diagonal (elementwise) variance only
# diagonal matrix of the prior covariance on xp
Kxpxp_diag <- posterior$kernel(new_data, diag = TRUE)
# diagonal elements of t(v) %*% v
vtv_diag <- colSums(v ^ 2)
# diagonal elements of the posterior
K_diag <- diag(Kxpxp_diag) - vtv_diag
var <- K_diag
} else {
# if full variance
# prior covariance on xp
Kxpxp <- posterior$kernel(new_data)
# posterior covariance on xp
K <- Kxpxp - crossprod(v)
var <- K
}
}
# return both
ans <- list(mu = mu,
var = var)
return (ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.