knitr::opts_chunk$set(echo = TRUE) library(SML) library(ggplot2) library(MASS)
Define the points at which we want to define the functions.
x.star <- seq(-5, 5, len = 50)
Calculate the covariance matrix and make postive-definite kernels.
sigma <- CalcSigma(x.star, x.star) all(eigen(sigma)$values > 0); diag(sigma) <- diag(sigma) + 0.001 all(eigen(sigma)$values > 0);
Generate a number of functions from the process.
n.samples <- 3 values <- matrix(NA, nrow = length(x.star), ncol = n.samples) for (i in 1:n.samples) { # Each column represents a sample from a multivariate normal distribution # with zero mean and covariance sigma values[, i] <- mvrnorm(1, rep(0, length(x.star)), sigma) } plot(x.star, values[, 1], type = 'l', ylim = c(-2, 2)) for (i in 2:n.samples){ lines(x.star, values[, i]) }
Now let's assume that we have some known data points; this is the case of Figure 2.2(b). In the book, the notation 'f' is used for f$y below. I've done this to make the ggplot code easier later on.
f <- data.frame(x = c(-4, -3, -1, 0, 2), y = c(-2, 0, 1, 2, -1))
Calculate the covariance matrices using the same x.star values as above.
x <- f$x k.xx <- CalcSigma(x, x) k.xxs <- CalcSigma(x, x.star) k.xsx <- CalcSigma(x.star, x) k.xsxs <- CalcSigma(x.star, x.star)
These matrix calculations correspond to equation (2.19) in the book.
f.star.bar <- k.xsx %*% solve(k.xx) %*% f$y cov.f.star <- k.xsxs - k.xsx %*% solve(k.xx) %*% k.xxs
This time we'll plot more samples. We could of course simply plot a +/- 2 standard deviation confidence interval as in the book but I want to show the samples explicitly here.
n.samples <- 50 values <- matrix(rep(0, length(x.star) * n.samples), ncol = n.samples) for (i in 1:n.samples) { values[, i] <- mvrnorm(1, f.star.bar, cov.f.star) } plot(x.star, values[, 1], type = 'l', ylim = c(-3, 3), col = 'gray') for (i in 2:n.samples){ lines(x.star, values[, i], col = 'gray') } points(f$x, f$y, pch=21, col = 'blue')
Now assume that each of the observed data points have normally-distributed noise.
The standard deviation of the noise
sigma.n <- 0.1
Recalculate the mean and covariance functions
f.bar.star <- k.xsx %*% solve(k.xx + sigma.n ** 2 * diag(1, ncol(k.xx))) %*% f$y cov.f.star <- k.xsxs - k.xsx %*% solve(k.xx + sigma.n ** 2 * diag(1, ncol(k.xx))) %*% k.xxs
Recalulate the sample functions
values <- matrix(rep(0, length(x.star) * n.samples), ncol = n.samples) for (i in 1:n.samples) { values[, i] <- mvrnorm(1, f.bar.star, cov.f.star) } plot(x.star, values[, 1], type = 'l', ylim = c(-3, 3), col = 'gray') for (i in 2:n.samples){ lines(x.star, values[, i], col = 'gray') } points(f$x, f$y, pch = 21, col = 'blue') for (i in 1:length(f$x)){ lines(c(f$x[i], f$x[i]), c(f$y[i] - 2 * sigma.n, f$y[i] + 2 * sigma.n), col = 'red', lwd = 2) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.