In jlin-vt/SML: Statistical Machine Learning

knitr::opts_chunk$set(echo = TRUE)
library(SML)
library(ggplot2)
library(MASS)

1. Example: Predictions using noise-free observations

Define the points at which we want to define the functions.

x.star <- seq(-5, 5, len = 50)

Calculate the covariance matrix and make postive-definite kernels.

sigma <- CalcSigma(x.star, x.star)

all(eigen(sigma)$values > 0);
diag(sigma) <- diag(sigma)  + 0.001
all(eigen(sigma)$values > 0);

Generate a number of functions from the process.

n.samples <- 3
values <- matrix(NA, nrow = length(x.star), ncol = n.samples)
for (i in 1:n.samples) {
  # Each column represents a sample from a multivariate normal distribution
  # with zero mean and covariance sigma
  values[, i] <- mvrnorm(1, rep(0, length(x.star)), sigma)
}

plot(x.star, values[, 1], type = 'l', ylim = c(-2, 2))
for (i in 2:n.samples){
  lines(x.star, values[, i])
}

Now let's assume that we have some known data points; this is the case of Figure 2.2(b). In the book, the notation 'f' is used for f$y below. I've done this to make the ggplot code easier later on.

f <- data.frame(x = c(-4, -3, -1, 0, 2),
                y = c(-2, 0, 1, 2, -1))

Calculate the covariance matrices using the same x.star values as above.

x <- f$x
k.xx <- CalcSigma(x, x)
k.xxs <- CalcSigma(x, x.star)
k.xsx <- CalcSigma(x.star, x)
k.xsxs <- CalcSigma(x.star, x.star)

These matrix calculations correspond to equation (2.19) in the book.

f.star.bar <- k.xsx %*% solve(k.xx) %*% f$y
cov.f.star <- k.xsxs - k.xsx %*% solve(k.xx) %*% k.xxs

This time we'll plot more samples. We could of course simply plot a +/- 2 standard deviation confidence interval as in the book but I want to show the samples explicitly here.

n.samples <- 50
values <- matrix(rep(0, length(x.star) * n.samples), ncol = n.samples)
for (i in 1:n.samples) {
  values[, i] <- mvrnorm(1, f.star.bar, cov.f.star)
}

plot(x.star, values[, 1], type = 'l', ylim = c(-3, 3), col = 'gray')
for (i in 2:n.samples){
  lines(x.star, values[, i], col = 'gray')
}
points(f$x, f$y, pch=21, col = 'blue')

2. Example: Predictions using noisy observations

Now assume that each of the observed data points have normally-distributed noise.

The standard deviation of the noise

sigma.n <- 0.1

Recalculate the mean and covariance functions

f.bar.star <- k.xsx %*% solve(k.xx + sigma.n ** 2 * diag(1, ncol(k.xx))) %*% f$y
cov.f.star <- k.xsxs - k.xsx %*% solve(k.xx + sigma.n ** 2 * diag(1, ncol(k.xx))) %*% k.xxs

Recalulate the sample functions

values <- matrix(rep(0, length(x.star) * n.samples), ncol = n.samples)
for (i in 1:n.samples) {
  values[, i] <- mvrnorm(1, f.bar.star, cov.f.star)
}

plot(x.star, values[, 1], type = 'l', ylim = c(-3, 3), col = 'gray')
for (i in 2:n.samples){
  lines(x.star, values[, i], col = 'gray')
}
points(f$x, f$y, pch = 21, col = 'blue')
for (i in 1:length(f$x)){
  lines(c(f$x[i], f$x[i]), c(f$y[i] - 2 * sigma.n, f$y[i] + 2 * sigma.n), col = 'red', lwd = 2)
}