License: CC BY-SA 4.0
References:
"Optimal statistical decisions", DeGroot (1970), theorem 1 in section 9.6 on page 169
"A first course in Bayesian statistical methods", Hoff (2009), section 5.3 starting on page 73
In this document, the Normal distribution, $\mathcal{N}$, is parametrized in terms of its mean and variance. Nevertheless, equations are easier to work with in terms of precision, which is the inverse of the variance.
The Gamma distribution, $\mathcal{G}$, is parametrized in terms of shape and rate.
[ {y_1, \ldots, y_N } = { \boldsymbol{y} } ]
[ { \mu, \tau } ]
[ \forall n \in {1,\ldots,N}, \; y_n | \mu, \tau \; \sim \; \mathcal{N}(\mu, 1/\tau) ]
$p(\Theta) = p(\mu, \tau) = p(\tau) \times p(\mu | \tau)$
$\tau \sim \mathcal{G}(a_0, b_0)$
$\mu | \tau \sim \mathcal{N}(m_0, 1 / (\tau t_0))$
Hyper-parameters:
shape $a_0 > 0$ and rate $b_0 > 0$
mean $m_0 \in \mathbb{R}$ and precision $t_0 > 0$
$p(\Theta | \mathcal{D}) \propto p(\Theta) \times p(\mathcal{D} | \Theta)$
$\tau | \boldsymbol{y} \sim \mathcal{G}(a_N, b_N)$
$\mu | \boldsymbol{y}, \tau \sim \mathcal{N}(m_N, 1 / t_N)$
with:
$a_N = a_0 + N/2$
$b_N = b_0 + \frac{1}{2} \sum_n (y_n - \bar{y})^2 + \frac{t_0 N (\bar{y} - m_0)^2}{2 (t_0 + N)}$
$m_N = \frac{t_0 m_0 + N \bar{y}}{t_0 + N}$
$t_N = \tau (t_0 + N)$
TODO
Bayesian inference uses posterior distributions: $p(\Theta | \mathcal{D}) = p(\mu, \tau | \boldsymbol{y}) = p(\tau | \boldsymbol{y}) \, p(\mu | \boldsymbol{y}, \tau)$
Let us start with the conditional posterior of $\mu$:
\begin{align} p(\mu | \boldsymbol{y}, \tau) &\propto p(\mu | \tau) \, p(\boldsymbol{y} | \mu, \tau) \ \end{align}
set.seed(7263) truth <- c()
a.0 <- 3 truth <- append(x=truth, values=setNames(object=a.0, nm="a.0")) b.0 <- 2 truth <- append(x=truth, values=setNames(object=b.0, nm="b.0")) truth <- append(x=truth, values=setNames(object=a.0 / b.0, nm="prior.mean.tau")) truth <- append(x=truth, values=setNames(object=a.0 / b.0^2, nm="prior.var.tau")) plot(x=seq(0,11,0.1), y=dgamma(x=seq(0,11,0.1), shape=a.0, rate=b.0), main=paste0("Gamma(shape=", a.0, ", rate=", b.0, ")"), xlab="x", ylab="pdf", type="l") (tau <- rgamma(n=1, shape=a.0, rate=b.0)) truth <- append(x=truth, values=setNames(object=tau, nm="tau")) truth <- append(x=truth, values=setNames(object=1 / tau, nm="sigma2")) truth <- append(x=truth, values=setNames(object=sqrt(1 / tau), nm="sigma"))
m.0 <- 73 truth <- append(x=truth, values=setNames(object=m.0, nm="prior.mean.mu")) t.0 <- 10^(-1) truth <- append(x=truth, values=setNames(object=1 / (tau * t.0), nm="prior.var.mu")) plot(x=seq(60,86,0.1), y=dnorm(x=seq(60,86,0.1), mean=m.0, sd=sqrt(1/(tau*t.0))), main=paste0("Normal(mean=", m.0, ", sd=", format(sqrt(1/(tau*t.0)), digits=3), ")"), xlab="x", ylab="pdf", type="l") (mu <- rnorm(n=1, mean=m.0, sd=sqrt(1/(tau * t.0)))) truth <- append(x=truth, values=setNames(object=mu, nm="mu"))
N <- 5*10^1 y <- rnorm(n=N, mean=mu, sd=sqrt(1/tau)) summary(y) hist(y, breaks="FD", freq=FALSE, col="grey", border="white", main="Simulated data", ylim=c(0, dnorm(x=mu, mean=mu, sd=sqrt(1/tau)))) abline(v=mu, col="red") curve(dnorm(x=x, mean=mu, sd=sqrt(1/tau)), add=TRUE, col="red")
truth <- append(x=truth, values=setNames(object=mean(y), nm="mle.mu")) truth <- append(x=truth, values=setNames(object=1 / var(y), nm="mle.tau")) truth <- append(x=truth, values=setNames(object=var(y), nm="mle.sigma2")) truth <- append(x=truth, values=setNames(object=sd(y), nm="mle.sigma")) truth[grep("mle", names(truth))]
a.N <- a.0 + N / 2 truth <- append(x=truth, values=setNames(object=a.N, nm="a.N")) b.N <- b.0 + (1/2) * sum((y - mean(y))^2) + (t.0 * N * (mean(y) - m.0)^2) / (2 * (t.0 + N)) truth <- append(x=truth, values=setNames(object=b.N, nm="b.N")) truth <- append(x=truth, values=setNames(object=a.N / b.N, nm="post.mean.tau")) truth <- append(x=truth, values=setNames(object=a.N / b.N^2, nm="post.var.tau")) truth <- append(x=truth, values=setNames(object=b.N / (a.N - 1), nm="post.mean.sigma2")) truth <- append(x=truth, values=setNames(object=b.N^2 / ((a.N - 1)^2 * (a.N - 2)), nm="post.var.sigma2")) m.N <- (t.0 * m.0 + N * mean(y)) / (t.0 + N) truth <- append(x=truth, values=setNames(object=m.N, nm="post.mean.mu.cond")) t.N <- (a.N / b.N) * (t.0 + N) truth <- append(x=truth, values=setNames(object=1 / t.N, nm="post.var.mu.cond"))
truth
plot(x=0, y=0, type="n", xlim=c(truth["prior.mean.mu"] - 2 * sqrt(truth["prior.var.mu"]), truth["prior.mean.mu"] + 2 * sqrt(truth["prior.var.mu"])), ylim=c(0,2), xlab="x", ylab="density", main="Bayesian inference of a Normal with unknown mean and variance") abline(v=truth["mu"], lwd=2, lty=2) curve(dnorm(x=x, mean=truth["prior.mean.mu"], sd=sqrt(truth["prior.var.mu"])), add=TRUE, col="green", lwd=2) curve(dnorm(x=x, mean=mu, sd=sqrt(1/tau)), add=TRUE, col="black", lwd=2) curve(dnorm(x=x, mean=truth["post.mean.mu.cond"], sd=sqrt(truth["post.var.mu.cond"])), add=TRUE, col="red", lwd=2, n=10^3+1) legend(x="topright", legend=c("true mu|tau", "prior(mu|tau)", "likelihood(y;mu,tau)", "posterior(mu|y,tau)"), col=c("black", "green","black","red"), lwd=2, lty=c(2,1,1,1), bty="n")
print(sessionInfo(), locale=FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.