opts_chunk$set(tidy=FALSE, warning=FALSE, message=FALSE, cache=FALSE, comment=NA, dev="CairoPDF", fig.width=6, fig.height=4)
library(ggplot2) # plotting 
opts_knit$set(upload.fun = socialR::flickr.url)
theme_set(theme_bw(base_size=10))
theme_update(panel.background = element_rect(fill = "transparent", colour = NA),
             plot.background = element_rect(fill = "transparent", colour = NA))
cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", 
               "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
set.seed(12345)
#' @import MASS
#' @import reshape2
samples <- function(obs, l=1, sigma.n=0.3, x_predict = seq(-5,5,len=50), n=3){

  require(MASS); require(reshape2)
  SE <- function(Xi,Xj, l) exp(-0.5 * (Xi - Xj) ^ 2 / l ^ 2)
  cov <- function(X, Y) outer(X, Y, SE, l)
  cov_xx_inv <- solve(cov(obs$x, obs$x) + sigma.n^2 * diag(1, length(obs$x)))
  Ef <- cov(x_predict, obs$x) %*% cov_xx_inv %*% obs[[2]]
  Cf <- cov(x_predict, x_predict) - 
        cov(x_predict, obs$x)  %*% cov_xx_inv %*% cov(obs$x, x_predict)

  values <- mvrnorm(n, Ef, Cf)
  dat <- data.frame(x=x_predict, t(values))
  dat <- melt(dat, id="x")
  Ey <- data.frame(x=x_predict, 
                   y=(Ef), 
                   ymin=(Ef-2*sqrt(diag(Cf))), 
                   ymax=(Ef+2*sqrt(diag(Cf))))
  list(dat=dat, Ey=Ey, obs=obs, x_predict=x_predict, l=l, sigma.n=sigma.n, n=n)

  merge(obs, merge(dat, Ey, "x", all=TRUE), "x", all=TRUE)
}

x_predict <- seq(-5,5,len=50)
obs <- data.frame(x = c(x_predict[c(5,10,15,20,25)]),
                  observed = x_predict[c(10,  15,  20,  25, 10)])

S <- samples(obs, l=1, sigma.n=.5, x_predict, n=3)

ggplot(S) + 
  geom_ribbon(aes(x,y,ymin=ymin, ymax=ymax), fill="grey80") + 
  geom_point(aes(x,observed)) + 
  geom_line(aes(x,value, color=variable))
ggplot(S$dat,aes(x=x,y=value)) +
  geom_ribbon(data=S$Ey, aes(x=x,y=y, ymin=ymin, ymax=ymax), fill="grey80") + # Var
  geom_line(aes(color=variable)) + #REPLICATES
  geom_line(data=S$Ey,aes(x=x,y=y), size=1) + #MEAN
  geom_point(data=S$obs,aes(x=x,y=y)) +  #OBSERVED DATA
  scale_y_continuous(lim=c(-4,4), name="output, f(x)") +
  xlab("input, x") 

Note that unlike the previous case, the posterior no longer collapses completely around the neighborhood of the test points.

We can also compute the likelihood (and marginal likelihood over the prior) of the data directly from the inferred multivariate normal distribution, which can allow us to tune the hyperparameters such as the characteristic length scale $\ell$ and the observation noise $\sigma_n$. The most obvious approach would be to do so by maximum likelihood, giving point estimates of the hyper-parameters, though presumably we could be Bayesian about these as well.

The likelihood is given by $$\log(p(y | X)) = -\tfrac{1}{2} \mathbf{y}^T (K + \sigma_n^2 \mathbf{I})^{-1} y - \tfrac{1}{2} \log\left| K + \sigma_n^2 \mathbf{I} \right| - \tfrac{n}{2}\log 2 \pi$$

Which is:

#' observation pairs, obs$x, obs$x, 
#' SE covariance between two observations 
minusloglik <- function(pars){ 

  l <- pars[1]
  sigma.n <- pars[2]

  cov <- function(X, Y) outer(X, Y, SE, l)
  I <- diag(1, length(obs$x))
  K <- cov(obs$x, obs$x) 

  0.5 * t(obs$y) %*% solve(K + sigma.n^2 * I) %*% obs$y +
    log(det(K + sigma.n^2*I)) +
    length(obs$y) * log(2 * pi) / 2
  }
pars <- c(l=1, sigma.n=1)
o <- optim(pars, minusloglik)
o$par
lpriors <- function(pars){
   d.p <- c(5, 5)
  s2.p <- c(5, 5)  

  prior <- unname(
    dgamma(exp(pars[1]), d.p[1], scale = d.p[2]) *
    dgamma(exp(pars[2]), s2.p[1], s2.p[2]) 
#    dunif(exp(pars[1]), 0, 100) * 
#    dunif(exp(pars[2]), 0, 100)
  )

  log(prior)
}

posterior <- function(pars, x, y){

  l <- exp(pars[1])
  sigma.n <- exp(pars[2])

  cov <- function(X, Y) outer(X, Y, SE, l)
  I <- diag(1, length(x))
  K <- cov(x, x) 

  loglik <- - 0.5 * t(y) %*% solve(K + sigma.n^2 * I) %*% y -
    log(det(K + sigma.n^2*I)) -
    length(y) * log(2 * pi) / 2

  loglik + lpriors(pars)
}
posterior(log(pars), obs$x, obs$y)
posterior(log(o$par), obs$x, obs$y)
require(mcmc)
n <- 1e4
out <- metrop(posterior, log(pars), n, x = obs$x, y = obs$y)
out$accept
postdist <- cbind(index=1:n, as.data.frame(exp(out$batch)))
names(postdist) <- c("index", names(pars))
df <- melt(postdist, id="index")
# TRACES
ggplot(df) + geom_line(aes(index, value)) + facet_wrap(~ variable, scale="free", ncol=1)

ggplot(df) + geom_line(aes(index, log(value))) + facet_wrap(~ variable, scale="free", ncol=1)
d.p <- c(5, 5)
s2.p <- c(5, 5)  

s2_prior <- function(x) dgamma(x, s2.p[1], s2.p[2])
d_prior <- function(x) dgamma(x, d.p[1], scale = d.p[2])
prior_fns <- list(l = d_prior, sigma.n = s2_prior)


require(plyr)
prior_curves <- ddply(df, "variable", function(dd){
    grid <- seq(min(dd$value), max(dd$value), length = 100)
    data.frame(value = grid, density = prior_fns[[dd$variable[1]]](grid))
})

# Posteriors (easier to read than histograms)
ggplot(df, aes(value)) + 
  stat_density(geom="path", position="identity", alpha=0.7) +
  geom_line(data=prior_curves, aes(x=value, y=density), col="red") + 
  facet_wrap(~ variable, scale="free", ncol=2)


cboettig/nonparametric-bayes documentation built on May 13, 2019, 2:09 p.m.