knitr::opts_chunk$set( collapse = TRUE, comment = "#>", dev = "png", dev.args = list(type = "cairo-png"), fig.width = 7, fig.height = 5 )
library(inlabru) library(ggplot2)
See the method
vignette for details of the general inlabru
method.
Here, we'll take a look at a small toy example to see the difference between
a few posterior approximation methods.
Note: This vignette is not completed yet.
Hierarchical model: $$ \begin{aligned} \lambda &\sim \pExp(\gamma) \ (y_i|\lambda) &\sim \pPo(\lambda), \text{ independent across $i=1,\dots,n$} \end{aligned} $$ With $\ol{y}=\frac{1}{n}\sum_{i=1}^n y_i$, the posterior density is $$ \begin{aligned} p(\lambda | {y_i}) &\propto p(\lambda, y_1,\dots,y_n) \ &\propto \exp(-\gamma\lambda) \exp(-n\lambda) \lambda^{n\ol{y}} \ &= \exp{-(\gamma+n)\lambda} \lambda^{n\ol{y}}, \end{aligned} $$ which is the density of a $\pGa(\alpha = 1+n\ol{y}, \beta = \gamma+n)$ distribution.
Introducing a latent Gaussian variable $u\sim\pN(0,1)$, the model can be reformulated as $$ \begin{aligned} \lambda(u) &=-\ln{1-\Phi(u)}/\gamma \ (y_i|u) &\sim \pPo(\lambda(u)) \end{aligned} $$ We will need some derivatives of $\lambda$ with respect to $u$: $$ \begin{aligned} \frac{\partial\lambda(u)}{\partial u} &= \frac{1}{\gamma}\frac{\phi(u)}{1-\Phi(u)} = \lambda'(u) \ \frac{\partial^2\lambda(u)}{\partial u^2} &= - \frac{1}{\gamma}\frac{\phi(u)}{1-\Phi(u)} \left( u + \frac{\phi(u)}{1-\Phi(u)} \right) = -\lambda'(u)\left{u-\gamma\lambda'(u)\right} \ \frac{\partial\ln\lambda(u)}{\partial u} &= \frac{1}{\lambda(u)} \frac{\partial\lambda(u)}{\partial u} =\frac{1}{-\ln{1-\Phi(u)}} \frac{\phi(u)}{1-\Phi(u)} = \frac{\lambda'(u)}{\lambda(u)} \ \frac{\partial^2\ln\lambda(u)}{\partial u^2} &= \frac{1}{\lambda(u)} \frac{\partial^2\lambda(u)}{\partial u^2} -\frac{1}{\lambda(u)^2} \left(\frac{\partial\lambda(u)}{\partial u}\right)^2 = \frac{-\lambda'(u){u - \gamma\lambda'(u)}}{\lambda(u)} - \frac{\lambda'(u)^2}{\lambda(u)^2}\ &= -\frac{\lambda'(u)}{\lambda(u)}\left{ u - \gamma\lambda'(u) +\frac{\lambda'(u)}{\lambda(u)} \right} \end{aligned} $$
lambda <- function(u, gamma) { -pnorm(u, lower.tail = FALSE, log.p = TRUE) / gamma } lambda_inv <- function(lambda, gamma) { qnorm(-lambda * gamma, lower.tail = FALSE, log.p = TRUE) } D1lambda <- function(u, gamma) { exp(dnorm(u, log = TRUE) - pnorm(u, lower.tail = FALSE, log.p = TRUE)) / gamma } D2lambda <- function(u, gamma) { D1L <- D1lambda(u, gamma) -D1L * (u - gamma * D1L) } D1log_lambda <- function(u, gamma) { D1lambda(u, gamma) / lambda(u, gamma) } D2log_lambda <- function(u, gamma) { D1logL <- D1log_lambda(u, gamma) -D1logL * (u - gamma * D1lambda(u, gamma = gamma) + D1logL) }
A basic approximation of the posterior distribution for $\lambda$ given $y$
can be defined as a deterministic transformation of a Gaussian distribution
obtained from a 2nd order Taylor approximation of $\ln p(u|{y_i})$ at the posterior
mode $u_0$ of $p(u|{y_i})$. The needed derivatives are
$$
\begin{aligned}
\frac{\partial\ln p(u|{y_i})}{\partial u} &= \frac{\partial\ln\phi(u)}{\partial u}
- n\lambda'(u) + n\ol{y}\frac{\lambda'(u)}{\lambda(u)}
= -u + n\frac{\lambda'(u)}{\lambda(u)}\left{
\ol{y} - \lambda(u)
\right}
\
\frac{\partial^2\ln p(u|{y_i})}{\partial u^2}
&=
-1
-
n
\frac{\lambda'(u)}{\lambda(u)}\left{
u - \gamma\lambda'(u) + \frac{\lambda'(u)}{\lambda(u)}
\right}
\left{
\ol{y} - \lambda(u)
\right}
-
n
\frac{\lambda'(u)^2}{\lambda(u)}
\end{aligned}
$$
At the mode $u_0$, the first order derivative is zero, and
$$
\begin{aligned}
\left.\frac{\partial^2\ln p(u|{y_i})}{\partial u^2}\right|{u=u_0}
&=
-1
-
\left{
u_0 - \gamma\lambda'(u_0) + \frac{\lambda'(u_0)}{\lambda(u_0)}
\right}
u_0
-
n
\frac{\lambda'(u_0)^2}{\lambda(u_0)} .
\end{aligned}
$$
The quadratic approximation of the log-posterior density at the mode $u_0$ is then
$$
\ln \breve{p}(u|{y_i}) = \text{const} - \frac{(u-u_0)^2}{2}
\left[
-
\left.\frac{\partial^2\ln p(u|{y_i})}{\partial u^2}\right|{u=u_0}
\right]
$$
In inlabru
, the approximation first linearises $\ln \lambda(u)$ at $u_0$ before applying
the Taylor approximation of $\ln p(u|{y_i})$. The linearised log-predictor is
$$
\ln \ol{\lambda}(u) = \ln \lambda(u_0) + \frac{\lambda'(u_0)}{\lambda(u_0)}(u - u_0)
$$
so that
$$
\ol{\lambda}'(u) = \frac{\lambda'(u_0)}{\lambda(u_0)} \ol{\lambda}(u)
$$
and the second order derivative of the linearised log-posterior density is
$$
\begin{aligned}
\left.\frac{\partial^2\ln \ol{p}(u|{y_i})}{\partial u^2}\right|_{u=u_0}
&=
-1
-
n
\frac{\lambda'(u_0)^2}{\lambda(u_0)} .
\end{aligned}
$$
log_p <- function(u, y, gamma) { L <- lambda(u, gamma) n <- length(y) dnorm(u, log = TRUE) - n * L + n * mean(y) * log(L) - sum(lgamma(y + 1)) } D1log_p <- function(u, y, gamma) { n <- length(y) -u + n * D1log_lambda(u, gamma) * (mean(y) - lambda(u, gamma)) } D2log_p <- function(u, y, gamma) { n <- length(y) -1 + n * D2log_lambda(u, gamma) * (mean(y) - lambda(u, gamma)) - n * D1log_lambda(u, gamma) * D1lambda(u, gamma) }
g <- 1 y <- c(0, 1, 2) y <- c(0, 0, 0, 0, 0) y <- rpois(5, 5) mu_quad <- uniroot( D1log_p, lambda_inv((1 + sum(y)) / (g + length(y)) * c(1 / 10, 10), gamma = g), y = y, gamma = g )$root sd_quad <- (-D2log_p(mu_quad, y = y, gamma = g))^-0.5 sd_lin <- (1 + length(y) * D1log_lambda(mu_quad, gamma = g)^2 * lambda(mu_quad, gamma = g))^-0.5 lambda0 <- lambda(mu_quad, gamma = g)
ggplot() + xlim( lambda(mu_quad - 4 * sd_quad, gamma = g), lambda(mu_quad + 4 * sd_quad, gamma = g) ) + xlab("lambda") + ylab("Density") + geom_function( fun = function(x) { exp(log_p(lambda_inv(x, gamma = g), y = y, gamma = g)) / D1lambda(lambda_inv(x, gamma = g), gamma = g) / ( exp(log_p(lambda_inv(lambda0, gamma = g), y = y, gamma = g)) / D1lambda(lambda_inv(lambda0, gamma = g), gamma = g) ) * dgamma(lambda0, shape = 1 + sum(y), rate = g + length(y)) }, mapping = aes(col = "Theory"), n = 1000 ) + geom_function( fun = dgamma, args = list(shape = 1 + sum(y), rate = g + length(y)), mapping = aes(col = "Theory"), n = 1000 ) + geom_function( fun = function(x) { dnorm(lambda_inv(x, gamma = g), mean = mu_quad, sd = sd_quad) / D1lambda(lambda_inv(x, gamma = g), gamma = g) }, mapping = aes(col = "Quadratic"), n = 1000 ) + geom_function( fun = function(x) { dnorm(lambda_inv(x, gamma = g), mean = mu_quad, sd = sd_lin) / D1lambda(lambda_inv(x, gamma = g), gamma = g) }, mapping = aes(col = "Linearised"), n = 1000 ) + geom_vline(mapping = aes( xintercept = (1 + sum(y)) / (g + length(y)), lty = "Bayes mean" )) + geom_vline(mapping = aes(xintercept = lambda0, lty = "Bayes mode")) + geom_vline(mapping = aes(xintercept = mean(y), lty = "Plain mean"))
ggplot() + xlim( lambda_inv(lambda0, gamma = g) - 4 * sd_quad, lambda_inv(lambda0, gamma = g) + 4 * sd_quad ) + xlab("u") + ylab("Density") + geom_function( fun = function(x) { exp(log_p(x, y = y, gamma = g) - log_p(lambda_inv(lambda0, gamma = g), y = y, gamma = g)) * (dgamma(lambda0, shape = 1 + sum(y), rate = g + length(y)) * D1lambda(lambda_inv(lambda0, gamma = g), gamma = g)) }, mapping = aes(col = "Theory"), n = 1000 ) + geom_function( fun = function(x) { dgamma(lambda(x, gamma = g), shape = 1 + sum(y), rate = g + length(y)) * D1lambda(x, gamma = g) }, mapping = aes(col = "Theory"), n = 1000 ) + geom_function( fun = dnorm, args = list(mean = mu_quad, sd = sd_quad), mapping = aes(col = "Quadratic"), n = 1000 ) + geom_function( fun = dnorm, args = list(mean = mu_quad, sd = sd_lin), mapping = aes(col = "Linearised"), n = 1000 ) + geom_vline(mapping = aes(xintercept = lambda_inv((1 + sum(y)) / (g + length(y)), gamma = g ), lty = "Bayes mean")) + geom_vline(mapping = aes(xintercept = lambda_inv(lambda0, gamma = g), lty = "Bayes mode")) + geom_vline(mapping = aes(xintercept = lambda_inv(mean(y), gamma = g), lty = "Plain mean"))
ggplot() + xlim( lambda(mu_quad - 4 * sd_quad, gamma = g), lambda(mu_quad + 4 * sd_quad, gamma = g) ) + xlab("lambda") + ylab("CDF") + geom_function( fun = pgamma, args = list(shape = 1 + sum(y), rate = g + length(y)), mapping = aes(col = "Theory"), n = 1000 ) + geom_function( fun = function(x) { pnorm(lambda_inv(x, gamma = g), mean = mu_quad, sd = sd_quad) }, mapping = aes(col = "Quadratic"), n = 1000 ) + geom_function( fun = function(x) { pnorm(lambda_inv(x, gamma = g), mean = mu_quad, sd = sd_lin) }, mapping = aes(col = "Linearised"), n = 1000 ) + geom_vline(mapping = aes( xintercept = (1 + sum(y)) / (g + length(y)), lty = "Bayes mean" )) + geom_vline(mapping = aes(xintercept = lambda0, lty = "Bayes mode")) + geom_vline(mapping = aes(xintercept = mean(y), lty = "Plain mean"))
ggplot() + xlim( lambda_inv(lambda0, gamma = g) - 4 * sd_quad, lambda_inv(lambda0, gamma = g) + 4 * sd_quad ) + xlab("u") + ylab("CDF") + geom_function( fun = function(x) { pgamma(lambda(x, gamma = g), shape = 1 + sum(y), rate = g + length(y)) }, mapping = aes(col = "Theory"), n = 1000 ) + geom_function( fun = pnorm, args = list(mean = mu_quad, sd = sd_quad), mapping = aes(col = "Quadratic"), n = 1000 ) + geom_function( fun = pnorm, args = list(mean = mu_quad, sd = sd_lin), mapping = aes(col = "Linearised"), n = 1000 ) + geom_vline(mapping = aes( xintercept = lambda_inv((1 + sum(y)) / (g + length(y)), gamma = g ), lty = "Bayes mean" )) + geom_vline(mapping = aes( xintercept = lambda_inv(lambda0, gamma = g), lty = "Bayes mode" )) + geom_vline(mapping = aes( xintercept = lambda_inv(mean(y), gamma = g), lty = "Plain mean" ))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.