opts_chunk$set(tidy=FALSE, warning=FALSE, message=FALSE, cache=FALSE, comment=NA, dev="CairoPDF", fig.width=6, fig.height=4) library(ggplot2) # plotting opts_knit$set(upload.fun = socialR::flickr.url) theme_set(theme_bw(base_size=10)) theme_update(panel.background = element_rect(fill = "transparent", colour = NA), plot.background = element_rect(fill = "transparent", colour = NA)) cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
set.seed(12345)
#' @import MASS #' @import reshape2 samples <- function(obs, l=1, sigma.n=0.3, x_predict = seq(-5,5,len=50), n=3){ require(MASS); require(reshape2) SE <- function(Xi,Xj, l) exp(-0.5 * (Xi - Xj) ^ 2 / l ^ 2) cov <- function(X, Y) outer(X, Y, SE, l) cov_xx_inv <- solve(cov(obs$x, obs$x) + sigma.n^2 * diag(1, length(obs$x))) Ef <- cov(x_predict, obs$x) %*% cov_xx_inv %*% obs[[2]] Cf <- cov(x_predict, x_predict) - cov(x_predict, obs$x) %*% cov_xx_inv %*% cov(obs$x, x_predict) values <- mvrnorm(n, Ef, Cf) dat <- data.frame(x=x_predict, t(values)) dat <- melt(dat, id="x") Ey <- data.frame(x=x_predict, y=(Ef), ymin=(Ef-2*sqrt(diag(Cf))), ymax=(Ef+2*sqrt(diag(Cf)))) list(dat=dat, Ey=Ey, obs=obs, x_predict=x_predict, l=l, sigma.n=sigma.n, n=n) merge(obs, merge(dat, Ey, "x", all=TRUE), "x", all=TRUE) } x_predict <- seq(-5,5,len=50) obs <- data.frame(x = c(x_predict[c(5,10,15,20,25)]), observed = x_predict[c(10, 15, 20, 25, 10)]) S <- samples(obs, l=1, sigma.n=.5, x_predict, n=3) ggplot(S) + geom_ribbon(aes(x,y,ymin=ymin, ymax=ymax), fill="grey80") + geom_point(aes(x,observed)) + geom_line(aes(x,value, color=variable))
ggplot(S$dat,aes(x=x,y=value)) + geom_ribbon(data=S$Ey, aes(x=x,y=y, ymin=ymin, ymax=ymax), fill="grey80") + # Var geom_line(aes(color=variable)) + #REPLICATES geom_line(data=S$Ey,aes(x=x,y=y), size=1) + #MEAN geom_point(data=S$obs,aes(x=x,y=y)) + #OBSERVED DATA scale_y_continuous(lim=c(-4,4), name="output, f(x)") + xlab("input, x")
Note that unlike the previous case, the posterior no longer collapses completely around the neighborhood of the test points.
We can also compute the likelihood (and marginal likelihood over the prior) of the data directly from the inferred multivariate normal distribution, which can allow us to tune the hyperparameters such as the characteristic length scale $\ell$ and the observation noise $\sigma_n$. The most obvious approach would be to do so by maximum likelihood, giving point estimates of the hyper-parameters, though presumably we could be Bayesian about these as well.
The likelihood is given by $$\log(p(y | X)) = -\tfrac{1}{2} \mathbf{y}^T (K + \sigma_n^2 \mathbf{I})^{-1} y - \tfrac{1}{2} \log\left| K + \sigma_n^2 \mathbf{I} \right| - \tfrac{n}{2}\log 2 \pi$$
Which is:
#' observation pairs, obs$x, obs$x, #' SE covariance between two observations minusloglik <- function(pars){ l <- pars[1] sigma.n <- pars[2] cov <- function(X, Y) outer(X, Y, SE, l) I <- diag(1, length(obs$x)) K <- cov(obs$x, obs$x) 0.5 * t(obs$y) %*% solve(K + sigma.n^2 * I) %*% obs$y + log(det(K + sigma.n^2*I)) + length(obs$y) * log(2 * pi) / 2 }
pars <- c(l=1, sigma.n=1) o <- optim(pars, minusloglik) o$par
lpriors <- function(pars){ d.p <- c(5, 5) s2.p <- c(5, 5) prior <- unname( dgamma(exp(pars[1]), d.p[1], scale = d.p[2]) * dgamma(exp(pars[2]), s2.p[1], s2.p[2]) # dunif(exp(pars[1]), 0, 100) * # dunif(exp(pars[2]), 0, 100) ) log(prior) } posterior <- function(pars, x, y){ l <- exp(pars[1]) sigma.n <- exp(pars[2]) cov <- function(X, Y) outer(X, Y, SE, l) I <- diag(1, length(x)) K <- cov(x, x) loglik <- - 0.5 * t(y) %*% solve(K + sigma.n^2 * I) %*% y - log(det(K + sigma.n^2*I)) - length(y) * log(2 * pi) / 2 loglik + lpriors(pars) }
posterior(log(pars), obs$x, obs$y) posterior(log(o$par), obs$x, obs$y)
require(mcmc) n <- 1e4 out <- metrop(posterior, log(pars), n, x = obs$x, y = obs$y) out$accept
postdist <- cbind(index=1:n, as.data.frame(exp(out$batch))) names(postdist) <- c("index", names(pars)) df <- melt(postdist, id="index") # TRACES ggplot(df) + geom_line(aes(index, value)) + facet_wrap(~ variable, scale="free", ncol=1) ggplot(df) + geom_line(aes(index, log(value))) + facet_wrap(~ variable, scale="free", ncol=1)
d.p <- c(5, 5) s2.p <- c(5, 5) s2_prior <- function(x) dgamma(x, s2.p[1], s2.p[2]) d_prior <- function(x) dgamma(x, d.p[1], scale = d.p[2]) prior_fns <- list(l = d_prior, sigma.n = s2_prior) require(plyr) prior_curves <- ddply(df, "variable", function(dd){ grid <- seq(min(dd$value), max(dd$value), length = 100) data.frame(value = grid, density = prior_fns[[dd$variable[1]]](grid)) }) # Posteriors (easier to read than histograms) ggplot(df, aes(value)) + stat_density(geom="path", position="identity", alpha=0.7) + geom_line(data=prior_curves, aes(x=value, y=density), col="red") + facet_wrap(~ variable, scale="free", ncol=2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.