#' @title Data Augmentation algorithm for multinomial data
#' @description Implement the Data Augmentation algorithm for multvariate multinomial data given
#' observed counts of complete and missing data (\eqn{Y_obs} and \eqn{Y_mis}). Allows for specification
#' of a Dirichlet conjugate prior.
#' @param x_y A \code{data.frame} of observed counts for complete observations.
#' @param z_Os_y A \code{data.frame} of observed marginal-counts for incomplete observations.
#' @param enum_comp A \code{data.frame} specifying a vector of all possible observed patterns.
#' @param conj_prior A string specifying the conjugate prior. One of
#' \code{c("none", "data.dep", "flat.prior", "non.informative")}.
#' @param alpha The vector of counts \eqn{\alpha} for a \eqn{Dir(\alpha)} prior. Must be specified if
#' \code{conj_prior} is either \code{c("data.dep", "flat.prior")}. If \code{flat.prior}, specify
#' as a scalar. If \code{data.dep}, specify as a vector with key matching \code{enum_comp}.
#' @param burnin A scalar specifying the number of iterations to use as a burnin. Defaults
#' to \code{100}.
#' @param post_draws An integer specifying the number of draws from the posterior distribution.
#' Defaults to \code{1000}.
#' @param verbose Logical. If \code{TRUE}, provide verbose output on each iteration.
#' @return An object of class \code{\link{mod_imputeMulti-class}}.
#' @seealso \code{\link{multinomial_em}}, \code{\link{multinomial_impute}}
#'
#' @examples \dontrun{
#' data(tract2221)
#' x_y <- multinomial_stats(tract2221[,1:4], output= "x_y")
#' z_Os_y <- multinomial_stats(tract2221[,1:4], output= "z_Os_y")
#' x_possible <- multinomial_stats(tract2221[,1:4], output= "possible.obs")
#'
#' imputeDA_mle <- multinomial_data_aug(x_y, z_Os_y, x_possible, n_obs= nrow(tract2221),
#' conj_prior= "none", verbose= TRUE)
#' }
#'
#' @export
multinomial_data_aug <- function(x_y, z_Os_y, enum_comp,
conj_prior= c("none", "data.dep", "flat.prior", "non.informative"),
alpha= NULL, burnin= 100, post_draws= 1000,
verbose= FALSE) {
# check some errors
conj_prior <- match.arg(conj_prior, several.ok= FALSE)
if (conj_prior %in% c("data.dep", "flat.prior") & is.null(alpha) ) {
stop("Please supply argument alpha as prior.")
}
mc <- match.call()
z_p <- ncol(z_Os_y)
# 01. Merge in prior if supplied; calculate if requested
#----------------------------------------------
enum_comp <- check_prior(conj_prior= conj_prior, alpha= alpha, verbose= verbose,
outer= FALSE, enum_comp= enum_comp)
if (verbose) print("Setting up Iteration 1.")
# pattern match marginally missing to complete
no_col_XP <- which(names(enum_comp) %in% c("alpha", "theta_y"))
comp_ind <- search_z_Os_y(z_Os_y, enum_comp[,-no_col_XP, with= FALSE])
# 02. I and P Steps
#----------------------------------------------
iter <- 0
while (iter < burnin) {
# I Step
log_lik <- log_lik0 <- 0
enum_comp$counts <- 0
for (s in 1:nrow(z_Os_y)) {
# random allocation of observed marginal counts to complete pattern y
# (x_y| z_Os_y, theta) = \sum_s (Xsy|Zsy, gamma)
# (Xsy|Zy_theta) ~ M(Zsy, gamma)
b_Os_y <- sum(enum_comp$theta_y[unlist(comp_ind[[s]])])
E_Xsy_Zy_theta <- as.vector(stats::rmultinom(1, size= z_Os_y$counts[s],
prob= enum_comp$theta_y[unlist(comp_ind[[s]])] / b_Os_y)) # normalized probability
# expected count += random draw based on marginally-observed
enum_comp$counts[unlist(comp_ind[[s]])] <- enum_comp$counts[unlist(comp_ind[[s]])] + E_Xsy_Zy_theta
# update log-lik
if (b_Os_y > 0) {
log_lik <- log_lik + z_Os_y$counts[s] * log(b_Os_y)
}
}
# expected count += observed counts
enum_comp$counts[as.integer(rownames(x_y))] <- enum_comp$counts[as.integer(rownames(x_y))] + x_y$counts
# update log-lik
log_lik <- log_lik + sum(ifelse(enum_comp$theta_y[as.integer(rownames(x_y))] == 0, 0,
x_y$counts * log(enum_comp$theta_y[as.integer(rownames(x_y))])))
# P Step
if (conj_prior == "none") {
# in case of random zeros: use non-informative prior
enum_comp$theta_y1 <- as.vector(gtools::rdirichlet(n=1, alpha= enum_comp$counts + 1))
} else {
enum_comp$theta_y1 <- as.vector(gtools::rdirichlet(n=1, alpha= enum_comp$counts + enum_comp$alpha))
}
### update iteration; print likelihood if verbose
iter <- iter + 1
if (verbose) {
cat("Iteration", iter, ": log-likelihood =", sprintf("%.10f", log_lik), "... \n")
}
enum_comp$theta_y <- enum_comp$theta_y1
log_lik0 <- log_lik
}
# 03. if iter >= max_iter, exit
# MLE for theta_y is taken to be the mean of n= post_draws draws from the
# posterior distribution
#----------------------------------------------
# update log-lik for prior
if (conj_prior != "none") {
log_lik <- log_lik + sum(ifelse(enum_comp$alpha == 0 | enum_comp$theta_y == 0, 0,
enum_comp$alpha * log(enum_comp$theta_y)))
}
if (conj_prior == "none") {
# in case of random zeros: use non-informative prior
theta_post <- gtools::rdirichlet(n= post_draws, alpha= enum_comp$counts + 1)
enum_comp$theta_y <- colMeans(theta_post)
} else {
theta_post <- gtools::rdirichlet(n= post_draws, alpha= enum_comp$counts + enum_comp$alpha)
enum_comp$theta_y <- colMeans(theta_post)
}
enum_comp$theta_y1 <- NULL
enum_comp$counts <- NULL
mod <- methods::new("mod_imputeMulti",
method= "DA",
mle_call= mc,
mle_iter= iter,
mle_log_lik= log_lik,
mle_cp= conj_prior,
mle_x_y= enum_comp)
# mod <- list(method= "DA",
# mle_call= mc,
# mle_iter= iter,
# mle_log_lik= log_lik,
# mle_cp= conj_prior,
# mle_x_y= enum_comp)
# class(mod) <- "mod_imputeMulti"
return(mod)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.