In cboettig/nonparametric-bayes: Nonparametric Bayes inference for ecological models

opts_chunk$set(tidy=FALSE, warning=FALSE, message=FALSE, cache=FALSE, comment=NA, dev="CairoPDF", fig.width=6, fig.height=4)
library(ggplot2) # plotting 
opts_knit$set(upload.fun = socialR::flickr.url)
theme_set(theme_bw(base_size=10))
theme_update(panel.background = element_rect(fill = "transparent", colour = NA),
             plot.background = element_rect(fill = "transparent", colour = NA))
cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", 
               "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

set.seed(12345)

#' @import MASS
#' @import reshape2
samples <- function(obs, l=1, sigma.n=0.3, x_predict = seq(-5,5,len=50), n=3){

  require(MASS); require(reshape2)
  SE <- function(Xi,Xj, l) exp(-0.5 * (Xi - Xj) ^ 2 / l ^ 2)
  cov <- function(X, Y) outer(X, Y, SE, l)
  cov_xx_inv <- solve(cov(obs$x, obs$x) + sigma.n^2 * diag(1, length(obs$x)))
  Ef <- cov(x_predict, obs$x) %*% cov_xx_inv %*% obs[[2]]
  Cf <- cov(x_predict, x_predict) - 
        cov(x_predict, obs$x)  %*% cov_xx_inv %*% cov(obs$x, x_predict)

  values <- mvrnorm(n, Ef, Cf)
  dat <- data.frame(x=x_predict, t(values))
  dat <- melt(dat, id="x")
  Ey <- data.frame(x=x_predict, 
                   y=(Ef), 
                   ymin=(Ef-2*sqrt(diag(Cf))), 
                   ymax=(Ef+2*sqrt(diag(Cf))))
  list(dat=dat, Ey=Ey, obs=obs, x_predict=x_predict, l=l, sigma.n=sigma.n, n=n)

  merge(obs, merge(dat, Ey, "x", all=TRUE), "x", all=TRUE)
}

x_predict <- seq(-5,5,len=50)
obs <- data.frame(x = c(x_predict[c(5,10,15,20,25)]),
                  observed = x_predict[c(10,  15,  20,  25, 10)])

S <- samples(obs, l=1, sigma.n=.5, x_predict, n=3)

ggplot(S) + 
  geom_ribbon(aes(x,y,ymin=ymin, ymax=ymax), fill="grey80") + 
  geom_point(aes(x,observed)) + 
  geom_line(aes(x,value, color=variable))

ggplot(S$dat,aes(x=x,y=value)) +
  geom_ribbon(data=S$Ey, aes(x=x,y=y, ymin=ymin, ymax=ymax), fill="grey80") + # Var
  geom_line(aes(color=variable)) + #REPLICATES
  geom_line(data=S$Ey,aes(x=x,y=y), size=1) + #MEAN
  geom_point(data=S$obs,aes(x=x,y=y)) +  #OBSERVED DATA
  scale_y_continuous(lim=c(-4,4), name="output, f(x)") +
  xlab("input, x")

Note that unlike the previous case, the posterior no longer collapses completely around the neighborhood of the test points.

We can also compute the likelihood (and marginal likelihood over the prior) of the data directly from the inferred multivariate normal distribution, which can allow us to tune the hyperparameters such as the characteristic length scale $\ell$ and the observation noise $\sigma_n$. The most obvious approach would be to do so by maximum likelihood, giving point estimates of the hyper-parameters, though presumably we could be Bayesian about these as well.

The likelihood is given by $$\log(p(y | X)) = -\tfrac{1}{2} \mathbf{y}^T (K + \sigma_n^2 \mathbf{I})^{-1} y - \tfrac{1}{2} \log\left| K + \sigma_n^2 \mathbf{I} \right| - \tfrac{n}{2}\log 2 \pi$$

Which is:

#' observation pairs, obs$x, obs$x, 
#' SE covariance between two observations 
minusloglik <- function(pars){ 

  l <- pars[1]
  sigma.n <- pars[2]

  cov <- function(X, Y) outer(X, Y, SE, l)
  I <- diag(1, length(obs$x))
  K <- cov(obs$x, obs$x) 

  0.5 * t(obs$y) %*% solve(K + sigma.n^2 * I) %*% obs$y +
    log(det(K + sigma.n^2*I)) +
    length(obs$y) * log(2 * pi) / 2
  }

pars <- c(l=1, sigma.n=1)
o <- optim(pars, minusloglik)
o$par

lpriors <- function(pars){
   d.p <- c(5, 5)
  s2.p <- c(5, 5)  

  prior <- unname(
    dgamma(exp(pars[1]), d.p[1], scale = d.p[2]) *
    dgamma(exp(pars[2]), s2.p[1], s2.p[2]) 
#    dunif(exp(pars[1]), 0, 100) * 
#    dunif(exp(pars[2]), 0, 100)
  )

  log(prior)
}

posterior <- function(pars, x, y){

  l <- exp(pars[1])
  sigma.n <- exp(pars[2])

  cov <- function(X, Y) outer(X, Y, SE, l)
  I <- diag(1, length(x))
  K <- cov(x, x) 

  loglik <- - 0.5 * t(y) %*% solve(K + sigma.n^2 * I) %*% y -
    log(det(K + sigma.n^2*I)) -
    length(y) * log(2 * pi) / 2

  loglik + lpriors(pars)
}

posterior(log(pars), obs$x, obs$y)
posterior(log(o$par), obs$x, obs$y)

require(mcmc)
n <- 1e4
out <- metrop(posterior, log(pars), n, x = obs$x, y = obs$y)
out$accept

postdist <- cbind(index=1:n, as.data.frame(exp(out$batch)))
names(postdist) <- c("index", names(pars))
df <- melt(postdist, id="index")
# TRACES
ggplot(df) + geom_line(aes(index, value)) + facet_wrap(~ variable, scale="free", ncol=1)

ggplot(df) + geom_line(aes(index, log(value))) + facet_wrap(~ variable, scale="free", ncol=1)

d.p <- c(5, 5)
s2.p <- c(5, 5)  

s2_prior <- function(x) dgamma(x, s2.p[1], s2.p[2])
d_prior <- function(x) dgamma(x, d.p[1], scale = d.p[2])
prior_fns <- list(l = d_prior, sigma.n = s2_prior)


require(plyr)
prior_curves <- ddply(df, "variable", function(dd){
    grid <- seq(min(dd$value), max(dd$value), length = 100)
    data.frame(value = grid, density = prior_fns[[dd$variable[1]]](grid))
})

# Posteriors (easier to read than histograms)
ggplot(df, aes(value)) + 
  stat_density(geom="path", position="identity", alpha=0.7) +
  geom_line(data=prior_curves, aes(x=value, y=density), col="red") + 
  facet_wrap(~ variable, scale="free", ncol=2)

cboettig/nonparametric-bayes documentation built on May 13, 2019, 2:09 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

cboettig/nonparametric-bayes
Nonparametric Bayes inference for ecological models

In cboettig/nonparametric-bayes: Nonparametric Bayes inference for ecological models

R Package Documentation

Browse R Packages

We want your feedback!

cboettig/nonparametric-bayes Nonparametric Bayes inference for ecological models

In cboettig/nonparametric-bayes: Nonparametric Bayes inference for ecological models

R Package Documentation

Browse R Packages

We want your feedback!

cboettig/nonparametric-bayes
Nonparametric Bayes inference for ecological models