Nothing
#' Hierarchical Exponential Family Model
#'
#' Produces random samples from the posterior distribution of the parameters
#' of certain hierarchical exponential family models.
#'
#' @param n An integer scalar. The size of the posterior sample required.
#' @param model A character string. Abbreviated name for the
#' response-population distribution combination.
#' For a hierarchical normal model see \code{\link{hanova1}}
#' (hierarchical one-way analysis of variance (ANOVA)).
#' @param data A numeric matrix. The format depends on \code{model}.
#' See \strong{Details}.
#' @param ... Optional further arguments to be passed to
#' \code{\link[rust]{ru}}.
#' @param prior The log-prior for the parameters of the hyperprior
#' distribution. If the user wishes to specify their own prior then
#' \code{prior} must be an object returned from a call to
#' \code{\link{set_user_prior}}.
#' Otherwise, \code{prior} is a character scalar giving the name of the
#' required in-built prior.
#' If \code{prior} is not supplied then a default prior is used.
#' See \strong{Details}.
#' @param hpars A numeric vector. Used to set parameters (if any) in
#' an in-built prior.
#' @param param A character scalar.
#' If \code{param = "trans"} (the default) then the marginal posterior
#' of hyperparameter vector \eqn{\phi} is reparameterized in a way
#' designed to improve the efficiency of sampling from this posterior.
#' If \code{param = "original"} the original parameterization is used.
#' The former tends to make the optimizations involved in the
#' ratio-of-uniforms algorithm more stable and to increase the probability
#' of acceptance, but at the expense of slower function evaluations.
#' @param init A numeric vector of length 2. Optional initial estimates
#' for the search for the mode of the posterior density of the
#' hyperparameter vector \eqn{\phi}.
#' @param nrep A numeric scalar. If \code{nrep} is not \code{NULL} then
#' \code{nrep} gives the number of replications of the original dataset
#' simulated from the posterior predictive distribution.
#' Each replication is based on one of the samples from the posterior
#' distribution. Therefore, \code{nrep} must not be greater than \code{n}.
#' In that event \code{nrep} is set equal to \code{n}.
#' @details Conditional on population-specific parameter vectors
#' \eqn{\theta}1, ..., \eqn{\theta}\eqn{J}
#' the observed \emph{response} data \eqn{y}1, ..., \eqn{y}J within each
#' population are modelled as random samples from a distribution in an
#' exponential family. The population parameters \eqn{\theta}1, ...,
#' \eqn{\theta}\eqn{J} are modelled as random samples from a common
#' \emph{population distribution}, chosen to be conditionally conjugate
#' to the response distribution, with \emph{hyperparameter} vector
#' \eqn{\phi}. Conditionally on
#' \eqn{\theta}1, ..., \eqn{\theta}\eqn{J}, \eqn{y}1, ..., \eqn{y}\eqn{J}
#' are independent of each other and are independent of \eqn{\phi}.
#' A \emph{hyperprior} is placed on \eqn{\phi}. The user can either
#' choose parameter values of a default hyperprior or specify their own
#' hyperprior using \code{\link{set_user_prior}}.
#'
#' The \code{\link[rust]{ru}} function in the \code{\link[rust]{rust}}
#' package is used to draw a random sample
#' from the marginal posterior of the hyperparameter vector \eqn{\phi}.
#' Then, conditional on these values, population parameters are sampled
#' directly from the conditional posterior density of
#' \eqn{\theta}1, ..., \eqn{\theta}\eqn{J} given \eqn{\phi} and the data.
#'
#' We outline each \code{model}, specify the format of the
#' \code{data}, give the default (log-)priors (up to an additive constant)
#' and detail the choices of ratio-of-uniforms parameterization
#' \code{param}.
#'
#' \strong{Beta-binomial:} For \eqn{j = 1, ..., J},
#' \eqn{Yj | pj} are i.i.d binomial\eqn{(nj, pj)},
#' where \eqn{pj} is the probability of success in group \eqn{j}
#' and \eqn{nj} is the number of trials in group \eqn{j}.
#' \eqn{pj} are i.i.d. beta\eqn{(\alpha, \beta)}, so
#' and \eqn{\phi = (\alpha, \beta)}.
#' \code{data} is a 2-column matrix: the numbers of successes in column 1
#' and the corresponding numbers of trials in column 2.
#'
#' \emph{Priors:}
#'
#' \code{prior = "bda"} (the default):
#' \eqn{log \pi(\alpha, \beta) = - 2.5 log(\alpha + \beta),
#' \alpha > 0, \beta > 0.} [See Section 5.3 of Gelman et al. (2014).]
#'
#' \code{prior = "gamma"}: independent gamma priors on \eqn{\alpha}
#' and \eqn{\beta}, i.e.
#' \eqn{log \pi(\alpha, \beta) =
#' (s1 - 1)log\alpha - r1 \alpha +
#' (s2 - 1)log\beta - r2 \beta, \alpha > 0, \beta > 0.}
#' where the respective shape (\eqn{s1}, \eqn{s2}) and rate
#' (\eqn{r1}, \eqn{r2}) parameters are specified using
#' \code{hpars} = \eqn{(s1, r1, s2, r2)}. The default setting is
#' \code{hpars = c(1, 0.01, 1, 0.01).}
#'
#' \emph{Parameterizations for sampling:}
#'
#' \code{param = "original"} is (\eqn{\alpha, \beta}),
#' \code{param = "trans"} (the default) is
#' \eqn{\phi1 = logit(\alpha/(\alpha+\beta)) = log(\alpha/\beta),
#' \phi2 = log(\alpha+\beta)}.
#' See Section 5.3 of Gelman et al. (2014).
#'
#' \strong{Gamma-Poisson:} For \eqn{j = 1, ..., J},
#' \eqn{Yj | \lambda}j are i.i.d Poisson(\eqn{e}j\eqn{\lambda}j),
#' where
#' \eqn{ej} is the \emph{exposure} in group \eqn{j}, based on the
#' total length of observation time and/or size of the population at
#' risk of the event of interest and \eqn{\lambda}j is the mean number
#' of events per unit of exposure.
#' \eqn{\lambda}j are i.i.d. gamma\eqn{(\alpha, \beta)}, so
#' \eqn{\phi = (\alpha, \beta)}.
#' \code{data} is a 2-column matrix: the counts \eqn{yj} of the numbers of
#' events in column 1 and the corresponding exposures \eqn{ej} in column 2.
#'
#' \emph{Priors:}
#'
#' \code{prior = "gamma"} (the default): independent gamma priors
#' on \eqn{\alpha} and \eqn{\beta}, i.e.
#' \eqn{log \pi(\alpha, \beta) =
#' (s1 - 1)log\alpha - r1 \alpha +
#' (s2 - 1)log\beta - r2 \beta, \alpha > 0, \beta > 0.}
#' where the respective shape (\eqn{s1}, \eqn{s2}) and rate
#' (\eqn{r1}, \eqn{r2}) parameters are specified using
#' \code{hpars} = \eqn{(s1, r1, s2, r2)}. The default setting is
#' \code{hpars = c(1, 0.01, 1, 0.01).}
#'
#' \emph{Parameterizations for sampling:}
#'
#' \code{param = "original"} is (\eqn{\alpha, \beta}),
#' \code{param = "trans"} (the default) is
#' \eqn{\phi1 = log(\alpha/\beta), \phi2 = log(\beta).}
#' @return An object (list) of class \code{"hef"}, which has the same
#' structure as an object of class "ru" returned from \code{\link[rust]{ru}}.
#' In particular, the columns of the \code{n}-row matrix \code{sim_vals}
#' contain the simulated values of \eqn{\phi}.
#' In addition this list contains the arguments \code{model}, \code{data}
#' and \code{prior} detailed above, an \code{n} by \eqn{J} matrix
#' \code{theta_sim_vals}: column \eqn{j} contains the simulated values of
#' \eqn{\theta}\eqn{j} and \code{call}: the matched call to \code{hef}.
#'
#' If \code{nrep} is not \code{NULL} then this list also contains
#' \code{data_rep}, a numerical matrix with \code{nrep} columns.
#' Each column contains a replication of the first column of the original
#' data \code{data[, 1]}, simulated from the posterior predictive
#' distribution.
#' @references Gelman, A., Carlin, J. B., Stern, H. S. Dunson, D. B.,
#' Vehtari, A. and Rubin, D. B. (2014) \emph{Bayesian Data Analysis}.
#' Chapman & Hall / CRC.
#' \url{http://www.stat.columbia.edu/~gelman/book/}
#' @seealso The \code{\link[rust]{ru}} function in the \code{\link[rust]{rust}}
#' package for details of the arguments that can be passed to \code{ru} via
#' \code{hef}.
#' @seealso \code{\link{hanova1}} for hierarchical one-way analysis of
#' variance (ANOVA).
#' @seealso \code{\link{set_user_prior}} to set a user-defined prior.
#' @examples
#' ############################ Beta-binomial #################################
#'
#' # ------------------------- Rat tumor data ------------------------------- #
#'
#' # Default prior, sampling on (rotated) (log(mean), log(alpha + beta)) scale
#' rat_res <- hef(model = "beta_binom", data = rat)
#' \donttest{
#' # Hyperparameters alpha and beta
#' plot(rat_res)
#' # Parameterization used for sampling
#' plot(rat_res, ru_scale = TRUE)
#' }
#' summary(rat_res)
#'
#' # Choose rats with extreme sample probabilities
#' pops <- c(which.min(rat[, 1] / rat[, 2]), which.max(rat[, 1] / rat[, 2]))
#' # Population-specific posterior samples: separate plots
#' plot(rat_res, params = "pop", plot_type = "both", which_pop = pops)
#' # Population-specific posterior samples: one plot
#' plot(rat_res, params = "pop", plot_type = "dens", which_pop = pops,
#' one_plot = TRUE, add_legend = TRUE)
#'
#' # Default prior, sampling on (rotated) (alpha, beta) scale
#' rat_res <- hef(model = "beta_binom", data = rat, param = "original")
#' \donttest{
#' plot(rat_res)
#' plot(rat_res, ru_scale = TRUE)
#' }
#' summary(rat_res)
#'
#' # To produce a plot akin to Figure 5.3 of Gelman et al. (2014) we
#' # (a) Use the same prior for (alpha, beta)
#' # (b) Don't use axis rotation (rotate = FALSE)
#' # (c) Plot on the scale used for ratio-of-uniforms sampling (ru_scale = TRUE)
#' # (d) Note that the mode is relocated to (0, 0) in the plot
#' rat_res <- hef(model = "beta_binom", data = rat, rotate = FALSE)
#' \donttest{
#' plot(rat_res, ru_scale = TRUE)
#' }
#' # This is the estimated location of the posterior mode
#' rat_res$f_mode
#'
#' # User-defined prior, passing parameters
#' # (equivalent to prior = "gamma" with hpars = c(1, 0.01, 1, 0.01))
#' user_prior <- function(x, hpars) {
#' return(dexp(x[1], hpars[1], log = TRUE) + dexp(x[2], hpars[2], log = TRUE))
#' }
#' user_prior_fn <- set_user_prior(user_prior, hpars = c(0.01, 0.01))
#' rat_res <- hef(model = "beta_binom", data = rat, prior = user_prior_fn)
#' \donttest{
#' plot(rat_res)
#' }
#' summary(rat_res)
#'
#' ############################ Gamma-Poisson #################################
#'
#' # ------------------------ Pump failure data ------------------------------ #
#'
#' pump_res <- hef(model = "gamma_pois", data = pump)
#' # Hyperparameters alpha and beta
#' \donttest{
#' plot(pump_res)
#' }
#' # Parameterization used for sampling
#' plot(pump_res, ru_scale = TRUE)
#' summary(pump_res)
#'
#' # Choose pumps with extreme sample rates
#' pops <- c(which.min(pump[, 1] / pump[, 2]), which.max(pump[, 1] / pump[, 2]))
#' plot(pump_res, params = "pop", plot_type = "dens", which_pop = pops)
#' @export
hef <- function(n = 1000, model = c("beta_binom", "gamma_pois"),
data, ..., prior = "default", hpars = NULL,
param = c("trans", "original"), init = NULL, nrep = NULL) {
model <- match.arg(model)
param <- match.arg(param)
the_call <- match.call()
#
# Calculate the data summaries required in the posterior
# and check for posterior propriety, where possible
ds <- switch(model,
beta_binom = binomial_data(data, prior),
gamma_pois = poisson_data(data))
#
# Create a list that defines the prior and any parameters in the prior
prior <- check_prior(prior, model, hpars)
#
# Set the function to calculate the log-likelihood
loglik_fn <- switch(model,
beta_binom = beta_binom_marginal_loglik,
gamma_pois = gamma_pois_marginal_loglik)
#
# Set a model-specific log-posterior
logpost <- function(x, ds) {
loglik <- do.call(loglik_fn, c(list(x = x), ds))
if (is.infinite(loglik)) {
return(loglik)
}
logprior <- do.call(prior$prior, c(list(x), prior[-1]))
return(loglik + logprior)
}
# Extract arguments to be passed to ru() and set default values for
# trans and rotate if they have not been supplied.
ru_args <- list(...)
if (is.null(ru_args$trans)) {
if (param == "original") {
ru_args$trans <- "none"
} else {
ru_args$trans <- "user"
}
}
if (is.null(ru_args$rotate)) {
ru_args$rotate <- TRUE
}
# Calculate initial estimates
if (is.null(init)) {
init <- switch(model,
beta_binom = beta_init_ests(data, param = param),
gamma_pois = gamma_init_ests(data, param = param))
} else {
if (length(init) != 2) {
stop("init must have length 2")
}
}
#
# Create list of objects to send to function ru()
fr_list <- list(param = param)
fr <- switch(model,
beta_binom = do.call(beta_create_ru_list, fr_list),
gamma_pois = do.call(gamma_create_ru_list, fr_list))
#
# Set ru_args$n_grid and ru_args$ep_bc to NULL just in case they have been
# specified in ...
ru_args$n_grid <- NULL
ru_args$ep_bc <- NULL
for_ru <- c(list(logf = logpost, ds = ds), fr, list(init = init, n = n),
ru_args)
#
# If we use the transformed parameterization then set the transformation
# and the log-Jacobian of this transformation.
if (param == "trans") {
phi_to_theta <- switch(model,
beta_binom = beta_phi_to_theta,
gamma_pois = gamma_phi_to_theta)
log_j <- switch(model,
beta_binom = beta_log_j,
gamma_pois = gamma_log_j)
for_ru <- c(for_ru, list(phi_to_theta = phi_to_theta, log_j = log_j))
}
res <- do.call(rust::ru, for_ru)
#
# Sample from the conditional posterior distribution of the population
# parameters given the hyperparameters and the data
temp <- switch(model,
beta_binom = beta_binom_cond_sim(res$sim_vals, data, n),
gamma_pois = gamma_pois_cond_sim(res$sim_vals, data, n))
res <- c(res, temp)
#
# Add information about the model, the data and the prior
res$model <- model
res$data <- data
res$prior <- prior
res$ru <- 1:2
#
# Simulate nrep datasets from the posterior predictive distribution
if (!is.null(nrep)) {
nrep <- min(nrep, n)
res$data_rep <- switch(model,
beta_binom = sim_pred_beta_binom(res$theta_sim_vals,
data, nrep),
gamma_pois = sim_pred_gamma_pois(res$theta_sim_vals,
data, nrep))
# Transpose, so that the replicates are in the rows
res$data_rep <- t(res$data_rep)
}
res$call <- the_call
class(res) <- "hef"
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.