Nothing
#' Simulate model coefficients after multiple imputation
#'
#' `misim()` simulates model parameters from multivariate normal or t distributions after multiple imputation that are then used by [sim_apply()] to calculate quantities of interest.
#'
#' @param fitlist a list of model fits, one for each imputed dataset, or a `mira` object (the output of a call to `with()` applied to a `mids` object in `mice`).
#' @param n the number of simulations to run for each imputed dataset; default is 1000. More is always better but resulting calculations will take longer.
#' @param vcov a square covariance matrix of the coefficient covariance estimates, a function to use to extract it from `fit`, or a list thereof with an element for each imputed dataset. By default, uses [stats::vcov()] or [insight::get_varcov()] if that doesn't work.
#' @param coefs a vector of coefficient estimates, a function to use to extract it from `fit`, or a list thereof with an element for each imputed dataset. By default, uses [stats::coef()] or [insight::get_parameters()] if that doesn't work.
#' @param dist a character vector containing the name of the multivariate distribution(s) to use to draw simulated coefficients. Should be one of `"normal"` (multivariate normal distribution) or `"t_{#}"` (multivariate t distribution), where `{#}` corresponds to the desired degrees of freedom (e.g., `"t_100"`). If `NULL`, the right distributions to use will be determined based on heuristics; see [sim()] for details.
#'
#' @returns
#' A `clarify_misim` object, which inherits from `clarify_sim` and has the following components:
#' \item{sim.coefs}{a matrix containing the simulated coefficients with a column for each coefficient and a row for each simulation for each imputation}
#' \item{coefs}{a matrix containing the original coefficients extracted from `fitlist` or supplied to `coefs`, with a row per imputation.}
#' \item{fit}{the list of model fits supplied to `fitlist`}
#' \item{imp}{a identifier of which imputed dataset each set of simulated coefficients corresponds to.}
#' The `"dist"` attribute contains `"normal"` if the coefficients were sampled from a multivariate normal distribution and `"t({df})"` if sampled from a multivariate t distribution. The `"clarify_hash"` attribute contains a unique hash generated by [rlang::hash()].
#'
#' @details
#' `misim()` essentially combines multiple `sim()` calls applied to a list of model fits, each fit in an imputed dataset, into a single combined pool of simulated coefficients. When simulation-based inference is to be used with multiply imputed data, many imputations are required; see Zhou and Reiter (2010).
#'
#' @references
#' Zhou, X., & Reiter, J. P. (2010). A Note on Bayesian Inference After Multiple Imputation. *The American Statistician*, 64(2), 159–163. \doi{10.1198/tast.2010.09109}
#'
#' @seealso
#' * [sim()] for simulating model coefficients for a single dataset
#' * [sim_apply()] for applying a function to each set of simulated coefficients
#' * [sim_ame()] for computing average marginal effects in each simulation draw
#' * [sim_setx()] for computing marginal predictions and first differences at typical values in each simulation draw
#'
#' @examplesIf requireNamespace("Amelia", quietly = TRUE)
#' data("africa", package = "Amelia")
#'
#' # Multiple imputation using Amelia
#' a.out <- Amelia::amelia(x = africa, m = 10,
#' cs = "country",
#' ts = "year", logs = "gdp_pc",
#' p2s = 0)
#'
#' fits <- with(a.out, lm(gdp_pc ~ infl * trade))
#'
#' # Simulate coefficients
#' s <- misim(fits)
#' s
#' @export
misim <- function(fitlist, n = 1e3, vcov = NULL, coefs = NULL, dist = NULL) {
if (missing(fitlist)) {
fitlist <- NULL
}
if (inherits(fitlist, "mira")) {
fitlist <- fitlist$analyses
}
if (is_null(fitlist)) {
if (is_null(coefs) || is_null(vcov)) {
.err("when `fitlist` is not supplied, arguments must be supplied to both `coefs` and `vcov`")
}
if (!is.list(coefs) && !is.list(vcov)) {
.err("when `fitlist` is not supplied, at least one of `coefs` or `vcov` must be a list")
}
nimp <- if (is.list(coefs)) length(coefs) else length(vcov)
}
else {
check_fitlist(fitlist)
nimp <- length(fitlist)
}
chk::chk_count(n)
if (!is.list(coefs)) {
coefs <- rep.int(list(coefs), nimp)
}
else if (length(coefs) != nimp) {
if (is_null(fitlist)) {
.err("when `fitlist` is not supplied and `coefs` is supplied as a list, `coefs` must have as many entries as there are entries in `vcov`")
}
else {
.err("when supplied as a list, `coefs` must have as many entries as there are models in `fitlist`")
}
}
coef_supplied <- {
if (all(lengths(coefs) == 0L)) "null"
else if (all_apply(coefs, is.function)) "fun"
else if (all_apply(coefs, check_valid_coef)) "num"
else {
.err("`coefs` must be a vector of coefficients, a function that extracts one from each model in `fitlist`, or a list thereof")
}
}
if (!is.list(vcov)) {
vcov <- rep.int(list(vcov), nimp)
}
else if (length(vcov) != nimp) {
if (is_null(fitlist)) {
.err("when `fitlist` is not supplied and `vcov` is supplied as a list, `vcov` must have as many entries as there are entries in `coefs`")
}
else {
.err("when supplied as a list, `vcov` must have as many entries as there are models in `fitlist`")
}
}
vcov_supplied <- {
if (all_apply(vcov, is_null)) "null"
else if (all_apply(vcov, is.matrix)) "num"
else "marginaleffects_code"
}
for (i in seq_len(nimp)) {
coefs[[i]] <- process_coefs(coefs[[i]], fitlist[[i]], coef_supplied)
}
for (i in seq_len(nimp)) {
vcov[[i]] <- process_vcov(vcov[[i]], fitlist[[i]], vcov_supplied)
}
check_coefs_vcov_length_mi(vcov, coefs, vcov_supplied, coef_supplied)
chk::chk_count(n)
if (is_not_null(dist)) {
if (length(dist) == 1L) {
dist <- rep.int(list(dist), nimp)
}
else if (length(dist) == nimp) {
dist <- as.list(dist)
}
else {
.err("when supplied as a vector, `dist` must have as many values as there are imputations")
}
}
samplers <- lapply(seq_len(nimp), function(i) {
get_sampling_dist(fitlist[[i]], dist[[i]])
})
sim.coefs <- do.call("rbind", lapply(seq_len(nimp), function(i) {
samplers[[i]](n, coefs[[i]], vcov[[i]])
}))
out <- list(sim.coefs = sim.coefs,
coefs = do.call("rbind", coefs),
fit = fitlist,
imp = rep(seq_len(nimp), each = n))
dists <- lapply(samplers, .attr, "dist") |>
unlist()
if (all_the_same(dists)) {
dists <- dists[1L]
}
attr(out, "dist") <- dists
attr(out, "use_fit") <- is_not_null(fitlist)
attr(out, "sim_hash") <- rlang::hash(out$sim.coefs)
class(out) <- c("clarify_misim", "clarify_sim")
out
}
#' @exportS3Method print clarify_misim
print.clarify_misim <- function(x, ...) {
obj <- deparse1(substitute(x))
cat("A `clarify_misim` object\n")
cat(sprintf(" - %s coefficients, %s imputations with %s simulated values each\n",
ncol(x$sim.coefs), nrow(x$coefs), nrow(x$sim.coefs) / nrow(x$coefs)))
cat(" - sampled distributions: ")
if (length(.attr(x, "dist")) == 1L) {
cat(sprintf("multivariate %s\n", .attr(x, "dist")))
}
else {
cat("multiple different multivariate distributions")
if (exists(obj)) {
cat(sprintf(' (use `attr(%s, "dist") to view them\n', obj))
}
else {
cat("\n")
}
}
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.