In jacobcvt12/GP: Gaussian Processes

Here we generate some binomial data and visualize.

# libraries
library(ggplot2)
library(mvtnorm)
library(fields)
theme_set(theme_classic())

# generate binomial data for linear model
set.seed(90210)
N <- 1000
beta <- 2.5

x <- seq(-1, 1, len=N)
z <- 4 * x + beta * x ^ 2 + 7.6 * x ^ 3                       # linear function
p.true <- 1 / (1 + exp(-z))
y <- rbinom(N, 1, p.true)

data <- data.frame(x=x, p=p.true, y=y)

ggplot(data, aes(x, y)) +
    geom_point() +
    geom_line(aes(x=x, y=p))

Now we naively use the laplace approximation for the binary GP classifier.

# GP classification
K.func <- function(X.p, X.q=NULL) {
    sigma <- exp(-0.5 * rdist(X.p, X.q) ^ 2)
    return(sigma)
}

f <- rep(0, length(x))   # initialize to 0 
K <- K.func(x)           # calcualte K
I <- diag(1, length(x))

# initialize objective function to high value
obj <- 1000
obj.new <- 0
eps <- 1e-6

# mode finding for f via Newton's method
while (abs(obj) > eps) {
    # calculate probability using logistic function
    pi <- as.vector(1 / (1 + exp(-f)))

    # construct W (- Hessian of logistic log likelihood)
    W <- diag(pi * (1 - pi))
    W.sqrt <- sqrt(W)

    # calculate gradient
    grad <- y - pi

    B <- I + W.sqrt %*% K %*% W.sqrt
    L <- chol(B)
    b <- W %*% f + grad
    a <- b - W.sqrt %*% backsolve(L, forwardsolve(t(L), W.sqrt %*% K %*% b))
    f <- K %*% a

    obj.old <- obj.new
    obj.new <- -0.5 * t(a) %*% f + sum(-log(1 + exp(-as.vector(f))))
    obj <- obj.new - obj.old
}

x.star <- 1.05

# laplace approximation for binary GP classifier
pi <- as.vector(1 / (1 + exp(-f)))

# construct W (- Hessian of logistic log likelihood)
W <- diag(pi * (1 - pi))
W.sqrt <- sqrt(W)

# calculate gradient
grad <- y - pi

# covariance of x and x.star
k.star <- K.func(x, x.star)

# choleski decomposition
B <- I + W.sqrt %*% K %*% W.sqrt
L <- chol(B)

# mean predictive posterior f
f.bar.star <- t(k.star) %*% grad

# variance of predictive posterior f
v <- forwardsolve(t(L), W.sqrt %*% k.star)
V.f.star <- K.func(x.star, x.star) - t(v) %*% v

# calculate integral by monte carlo
# details:
# let f(x) = Normal(x | f.bar.star, V.f.star)
# then \int \sigma(x) f(x) dx = E_f[h(x)]
# expected value by sampling X_1, ..., X_n ~ f and
# \bar{\sigma}_n = \frac{1}{n} \sum \sigma(x_i)
# note that 1,000 monte carlo samples is usually a decent approximation
pi.bar.star <- mean(1 / (1 + exp(-rnorm(1e3, f.bar.star, sqrt(V.f.star)))))