R/binom_beta.R

Defines functions sim_pred_beta_binom beta_binom_cond_sim check_beta_binom_marginal_loglik beta_binom_marginal_loglik binomial_data

Documented in sim_pred_beta_binom

#------------------------------- Beta-binomial -------------------------------#

# Calculate sufficient statistics

binomial_data <- function(data, prior) {
  if (!is.matrix(data) & !is.data.frame(data)) {
    stop("beta_binom: data must be a matrix or a data frame")
  }
  if (ncol(data) != 2) {
    stop("beta_binom: data must have 2 columns")
  }
  if (any(!is_wholenumber(data))) {
    stop("beta_binom: the data must be whole numbers")
  }
  n <- data[, 2]
  if (any(n <= 0)) {
    stop("beta_binom: the values in data column 2 (n) must be positive")
  }
  y <- data[, 1]
  if (any(y < 0)) {
    stop("beta_binom: the values in data column 1 (y) must be non-negative")
  }
  if (any(y > n)) {
    stop("beta_binom: in data column 1 (y) cannot exceed column 2 (n), rowwise")
  }
  # If a default improper prior is used then check for posterior propriety
  if (is.character(prior)) {
    if (prior == "bda" | prior == "default") {
       if (all(pmin(y, n - y) == 0)) {
         stop("''bda'': improper posterior unless 0 < y < n at least once")
       }
    }
  }
  # Calculate the values needed in beta_binom_marginal_loglik
  ny <- n - y
  tab_y <- table(y[y > 0])
  tab_ny <- table(ny[ny > 0])
  tab_n <- table(n)
  y_vals <- as.numeric(names(tab_y))
  w_y <- as.numeric(tab_y)
  y_mat <- cbind(y_vals, w_y)
  ny_vals <- as.numeric(names(tab_ny))
  w_ny <- as.numeric(tab_ny)
  ny_mat <- cbind(ny_vals, w_ny)
  n_vals <- as.numeric(names(tab_n))
  w_n <- as.numeric(tab_n)
  n_mat <- cbind(n_vals, w_n)
  #
  return(list(y_mat = y_mat, ny_mat = ny_mat, n_mat = n_mat))
}

# Marginal log-likelihood
# posterior for (alpha, beta) not including the prior for (alpha, beta)
# Note: we need to be careful to avoid underflow when either or both
# alpha and beta are very large

beta_binom_marginal_loglik <- function(x, y_mat, ny_mat, n_mat) {
  if (any(x <= 0)) {
    return(-Inf)
  }
  s <- x[1] + x[2]
  mu <- x[1] / s
  f1 <- function(y) {
    return(sum(log(mu + (y - 1:y) / s)))
  }
  t1 <- sum(y_mat[, 2] * vapply(y_mat[, 1], f1, 0))
  f2 <- function(ny) {
    return(sum(log(1 - mu + (ny - 1:ny) / s)))
  }
  t2 <- sum(ny_mat[, 2] * vapply(ny_mat[, 1], f2, 0))
  f3 <- function(n) {
    return(sum(log(1 + (n - 1:n) / s)))
  }
  t3 <- sum(n_mat[, 2] * vapply(n_mat[, 1], f3, 0))
  return(t1 + t2 - t3)
}

# Obvious coding - used only by testthat to test that
# beta_binom_marginal_loglik is correct

check_beta_binom_marginal_loglik <- function(x, y, n) {
  if (any(x <= 0)) {
    return(-Inf)
  }
  return(sum(lbeta(y + x[1], n - y + x[2]) - lbeta(x[1], x[2])))
}

# Sample from the conditional posterior distribution of the population
# parameters given the hyperparameters and the data

beta_binom_cond_sim <- function(x, data, n_sim) {
  alpha <- x[, 1]
  beta <- x[, 2]
  y <- data[, 1]
  n <- data[, 2]
  len_y <- length(y)
  theta_sim_vals <- matrix(NA, ncol = len_y, nrow = n_sim)
  for (i in 1:len_y) {
    theta_sim_vals[, i] <- stats::rbeta(n_sim, alpha + y[i],
                                        beta + n[i] - y[i])
  }
  colnames(theta_sim_vals) <- paste("p[",1:len_y,"]", sep = "")
  return(list(theta_sim_vals = theta_sim_vals))
}

# --------------------------- sim_pred_beta_binom --------------------------- #

#' Simulate from a beta-binomial posterior predictive distribution
#'
#' Simulates \code{nrep} draws from the posterior predictive distribution
#' of the beta-binomial model described in \code{\link{hef}}.
#' This function is called within \code{\link{hef}} when the argument
#' \code{nrep} is supplied.
#' @param theta_sim_vals A numeric matrix with \code{nrow(data)} columns.
#'   Each row of \code{theta_sim_vals} contains binomial success probabilities
#'   simulated from their posterior distribution.
#' @param data A 2-column numeric matrix: the numbers of successes in column 1
#'   and the corresponding numbers of trials in column 2.
#' @param nrep A numeric scalar.  The number of replications of the original
#'   dataset simulated from the posterior predictive distribution.
#'   If \code{nrep} is greater than \code{nrow(theta_sim_vals)} then
#'   \code{nrep} is set equal to \code{nrow(theta_sim_vals)}.
#' @return A numeric matrix with \code{nrep} columns.  Each column contains
#'   a draw from the posterior predictive distribution of the number of
#'   successes.
#' @examples
#' rat_res <- hef(model = "beta_binom", data = rat)
#' rat_sim_pred <- sim_pred_beta_binom(rat_res$theta_sim_vals, rat, 50)
#' @export
sim_pred_beta_binom <- function(theta_sim_vals, data, nrep) {
  nrep <- min(nrep, nrow(theta_sim_vals))
  # Extract the first nrep rows from the posterior sample of thetas
  thetas <- theta_sim_vals[1:nrep, , drop = FALSE]
  # Function to simulate one set of data
  bin_fn <- function(x) {
    return(stats::rbinom(n = length(data[, 2]), size = data[, 2], prob = x))
  }
  return(apply(thetas, 1, bin_fn))
}
paulnorthrop/bang documentation built on Dec. 11, 2023, 11:10 p.m.