#' @title shrink.estimate
#' @param lambda regularization parameter in [0, 1], vector of length K or scalar
#' @param ybar vector of group means
#' @details Lambda = 1 is no pooling (all groups different). Lambda = 0 is complete pooling (all
#' groups same). Values of lambda should probably be > n/sum(n).
shrink.estimate <- function(lambda, ybar, n) {
grand.mean <- mean(ybar)
lambda * ybar + (1 - lambda) * grand.mean
}
#' @title emperical.bayes
#' "An Introduction to Emperical Bayes Data Analysis", Casella 1985
emperical.bayes <- function(y, g, sigma2) {
ybar.j <- tapply(y, g, mean)
ybar.. <- mean(y)
#emperical.bayes.est(ybar.j, ybar, sigma2 = 2)
emperical.bayes.aov(y, g)
}
emperical.bayes.aov <- function(y, g) {
m <- length(unique(g))
a <- anova(lm(y ~ g))
T <- a$"F value"[1]
ybar.j <- tapply(y, g, mean)
ybar.. <- mean(y)
(m-3)/(m-1) * 1/T * ybar.. + (1- ((m-3)/(m-1)*(1/T))) *ybar.j
}
#' @title emperical.bayes.est
emperical.bayes.est <- function(ybar.j, ybar, sigma2) {
ss <- sum( (ybar.j - ybar..)^2 )
( ((m-3)*sigma2) / ss ) * ybar.. + (1 - ((m-3)*sigma2) / ss) * ybar.j
}
#' @title emperical.oneway
#' Pg. 118 BDA3
#' The problems of this approach are detailed under "Difficulty with a natural non-Bayesian estimate
#' of the hyperparameters". In summary, we are ignoring our uncertainty about mu and tau and so we
#' will be over-confident in our estimates of theta.
emperical.oneway <- function(y, g, size) {
ybar.j <- tapply(y, g, mean)
sigma2.j <- tapply(y, g, var) # assume known sigma2
ybar.. <- sum(ybar.j / sigma2.j) / sum(1/sigma2.j) # pooled estimate
# Use ANOVA point estimates for tau2 and mu
a <- anova(lm(y ~ g))
ms.b <- a[1, "Mean Sq"]
ms.w <- a[2, "Mean Sq"]
tau2 <- (ms.b - ms.w) / length(y) * 100
mu <- ybar..
theta.hat <- (ybar.j/sigma2.j + mu/tau2) / (1/sigma2.j + 1/tau2)
V. <- 1 / (1/sigma2.j + 1/tau2)
theta <- (sigma2.j / (sigma2.j + tau2))*mu +
(tau2 / (tau2 + sigma2.j)) * ybar.j
return(theta)
}
#' @title oneway.uniform
#' Pg. 115 BDA3
#' Assumes that sigma2 is known. In this case, use sample estimate.
#' This isn't a horrible assumption, see BDA3 end of Ch 8.
oneway.uniform <- function(y, g, size) {
ybar.j <- tapply(y, g, mean)
sigma2.j <- tapply(y, g, var) # assume known sigma2
tau2 <- sample.oneway.tau2(size, sigma2.max = 120, sigma2.j, ybar.j)
mu <- sample.oneway.mu(size, tau2, ybar.j, sigma2.j)
theta.j <- sample.oneway.theta(size, mu, tau2, ybar.j, sigma2.j)
colnames(theta.j) <- names(ybar.j)
return(list(tau2 = tau2, mu = mu, theta.j = theta.j))
}
#' @title sample.oneway.theta
sample.oneway.theta <- function(size, mu, tau2, ybar.j, sigma2.j) {
theta.est <- t(sapply(1:size, function(i)
oneway.theta(mu[i], tau2[i], ybar.j, sigma2.j)))
t(sapply(1:size, function(i)
sapply(1:length(ybar.j), function(j)
rnorm(1, unlist(theta.est[i, 1])[j], sqrt(unlist(theta.est[i, 2])[j])))))
}
#' @title sample.oneway.mu
sample.oneway.mu <- function(size, tau2, ybar.j, sigma2.j) {
mu.est <- t(sapply(tau2, oneway.mu, ybar.j, sigma2.j))
sapply(1:size, function(i)
rnorm(1, mu.est[i, 1], sqrt(mu.est[i, 2])))
}
#' @title sample.oneway.tau2
sample.oneway.tau2 <- function(size, sigma2.max, sigma2.j, ybar.j) {
tau2.grid <- seq(from = 1, to = sigma2.max, length.out = 1000)
mu.est <- t(sapply(tau2.grid, oneway.mu, ybar.j, sigma2.j))
p.tau2 <- sapply(1:length(tau2.grid), function(i)
p.tau.given.y(tau2 = tau2.grid[i], mu.est = mu.est[i, 1], V.mu = mu.est[i, 2],
sigma2.j = sigma2.j, ybar.j = ybar.j))
#p.tau2 <- sapply(tau2.grid, p.tau.given.y, mu.est[, 1], mu.est[, 2], sigma2.j, ybar.j)
#p.tau2 <- sapply(tau2.grid, oneway.tau2, sigma2.j, ybar.j)
sample(tau2.grid, size, prob = exp(p.tau2 - max(p.tau2)), replace = TRUE)
}
#' @title p.tau.given.y
#' @note Should be changed to log-scale for numerical stability.
p.tau.given.y <- function(tau2, mu.est, V.mu, ybar.j, sigma2.j, p.tau = 1 ) {
#log(p.tau) + 0.5*log(V.mu) +
# sum( -0.5*log(sigma2.j + tau2) + (-(ybar.j - mu.est)^2 / (2*(sigma2.j +tau2))) )
sum(dnorm(ybar.j, mu.est, sqrt(sigma2.j + tau2), log = TRUE)) -
dnorm(mu.est, mu.est, sqrt(V.mu), log = TRUE)
}
#' @title oneway.tau2
#' Pg. 117 BDA3
#' @note Assumes that tau2 \propto 1
oneway.tau2 <- function(tau2, sigma2.j, ybar.j) {
#est <- oneway.mu(tau2, ybar.j, sigma2.j, list = TRUE)
est <- t(sapply(tau2, oneway.mu, ybar.j, sigma2.j))
#sqrt(est$V.mu) * prod( (sigma2.j + tau2)^(-1/2) *
# exp( -(ybar.j - est$mu.hat)^2 / (2*sigma2.j + 2*tau2) ) )
sqrt(est[, 2]) * prod( (sigma2.j + tau2)^(-1/2) *
exp( -(ybar.j - est[, 1])^2 / (2*sigma2.j + 2*tau2) ) )
}
#' @title oneway.theta
#' Pg. 116 BDA3
oneway.theta <- function(mu, tau2, ybar.j, sigma2.j) {
theta.hat.j <- (ybar.j/sigma2.j + mu/tau2) / (1/sigma2.j + 1/tau2)
V.j <- 1 / (1/sigma2.j + 1/tau2)
#theta.hat.j <- outer(mu/tau2, ybar.j/sigma2.j, "+") / outer(1/tau2, 1/sigma2.j, "+")
#V.j <- t(1 / outer(1/sigma2.j, 1/tau2, "+"))
return(list(theta.hat.j = theta.hat.j, V.j = V.j))
}
#' @title oneway.mu
#' Pg. 117 BDA3
oneway.mu <- function(tau2, ybar.j, sigma2.j) {
mu.hat <- sum(ybar.j / (sigma2.j + tau2)) / sum(1 / (sigma2.j + tau2))
V.mu <- 1 / sum(1 / (sigma2.j + tau2))
return(cbind(mu.hat, V.mu))
## mu.hat <- colSums(matrix(1/ybar.j, ncol = 25) %*% outer(sigma2.j, tau2, "+")) /
## colSums(1 / outer(sigma2.j, tau2, "+"))
## V.mu <- 1 / colSums(1 / outer(sigma2.j, tau2, "+"))
##return(list(mu.hat = mu.hat, V.mu = V.mu))
}
#' @title oneway.normal.gibbs.sampler
#' Gibbs sampler for the "one-way normal random effects model with known
#' variance". This is a simple case of hierarchical linear models.
#' @details
#' The following is the code for the Gibbs sampler. The result is iid samples from the posterior
#' distribution (our beliefs about network lift and its uncertainty after seeing the data).
#'
#' Its actually a pretty simple idea: write down the full conditional distributions of the parameters
#' and iteraively generate draws from each using their most recent estimates. By certain Markov
#' properties, the draws converage to draws of the posterior (stationary) as $n \rightarrow \infty$.
#'
#' The model is similar to the Frequentist one-way random effects model (`nlme` and `lme` in R) and
#' assumes that the within-network variance is equal across all networks (which is likely violated in
#' the case of networks like ESPN).
oneway.gibbs.sampler <- function(y = NULL, g, n.samples, n.burnin, priors,
y.bar = NULL, y.s2 = NULL, y.n = NULL) {
stopifnot(is.vector(priors), is.vector(g), n.samples %% 1 == 0, n.burnin %% 1 ==0)
if (is.null(y) && is.null(y.bar))
stop("you must specify either data or summary statistics")
# Keep an extra iter for initial values
n.iters <- n.samples + n.burnin + 1
groups <- unique(g) # group names
m <- length(groups) # number of groups
# Data or sufficient statistics?
use.data <- (is.null(y.bar) || is.null(y.s2) || is.null(y.n))
n <- if (use.data) length(y) else sum(y.n) # number of obs
# Initalize prior parameters using sample estimates
if (use.data) { # Estimate from data
message("Usinig data to calculate summary statistics.")
y.n <- y.s2 <- y.bar <- rep(NA, m)
for (j in 1:m) {
group.data <- y[g == groups[j]]
y.bar[j] <- mean(group.data)
y.s2[j] <- var(group.data)
y.n[j] <- length(group.data)
}
}
# First (init) row of parameter matrix
phi.0 <- c(y.bar, mean(y.s2), mean(y.bar), var(y.bar))
theta.names <- if (!is.null(names(g))) names(g)
else if (is.character(groups)) groups
else sprintf("theta%i", 1:m)
phi.names <- c(theta.names, "sigma2", "mu", "tau2")
# Full parameter matrix [(m + 3) x n.iters]
# Extra three comes from sigma2, mu, and tau2 which don't vary by group
phi <- construct.phi(phi.0, n.iters, n.burnin, phi.names)
for (i in 2:n.iters) {
# Sample theta's from full conditionals
for (j in 1:m) {
theta.var <- 1 / ( y.n[j]/phi$sigma2[i-1] + 1/phi$tau2[i-1] )
theta.mean <- theta.var * ( y.bar[j] * y.n[j] / phi$sigma2[i-1] + phi$mu[i-1]/phi$tau2[i-1] )
phi[i, j] <- rnorm(1, theta.mean, sqrt(theta.var))
}
# Sample sigma2 from full conditional
nu.n <- priors["nu.0"] + n
ss <- priors["nu.0"] * priors["sigma2.0"]
for (j in 1:m) {
ss <- ss + sum( (y[g == groups[j]] - phi[i, j])^2 )
}
phi$sigma2[i] <- 1 / rgamma(1, nu.n/2, ss/2)
# Sample mu from full conditional
mu.var <- 1 / ( m / phi$tau2[i-1] + 1/priors["gamma2.0"] )
mu.mean <- mu.var * (m * rowMeans(phi[i, 1:m]) / phi$tau2[i-1] + priors["mu.0"] / priors["gamma2.0"] )
phi$mu[i] <- rnorm(1, mu.mean, sqrt(mu.var))
# Sample tau2 from full conditional
eta.m <- priors["eta.0"] + m
ss <- priors["eta.0"] * priors["tau2.0"] + sum( (phi[i, 1:m] - phi$mu[i])^2 )
phi$tau2[i] <- 1 / rgamma(1, eta.m/2, ss/2)
}
return(phi)
}
var.ss <- function(sum.x, sum.x.sq, n) {
if (any(n < 2)) stop("n < 2")
var <- (sum.x.sq - (sum.x)^2/n) / (n-1)
if (any(var < 0)) stop("negative variance")
var
}
mean.ss <- function(sum.x, n) sum.x / n
mowd <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.