Nothing
#' @title Simulate model coefficients after multiple imputation
#'
#' @description `misim()` simulates model parameters from multivariate normal or t distributions after multiple imputation that are then used by [sim_apply()] to calculate quantities of interest.
#'
#' @param fitlist a list of model fits, one for each imputed dataset, or a `mira` object (the output of a call to `with()` applied to a `mids` object in `mice`).
#' @param n the number of simulations to run for each imputed dataset; default is 1000. More is always better but resulting calculations will take longer.
#' @param vcov a square covariance matrix of the coefficient covariance estimates, a function to use to extract it from `fit`, or a list thereof with an element for each imputed dataset. By default, uses [stats::vcov()] or [insight::get_varcov()] if that doesn't work.
#' @param coefs a vector of coefficient estimates, a function to use to extract it from `fit`, or a list thereof with an element for each imputed dataset. By default, uses [stats::coef()] or [insight::get_parameters()] if that doesn't work.
#' @param dist a character vector containing the name of the multivariate distribution(s) to use to draw simulated coefficients. Should be one of `"normal"` (multivariate normal distribution) or `"t_{#}"` (multivariate t distribution), where `{#}` corresponds to the desired degrees of freedom (e.g., `"t_100"`). If `NULL`, the right distributions to use will be determined based on heuristics; see [sim()] for details.
#'
#' @return
#' A `clarify_misim` object, which inherits from `clarify_sim` and has the following components:
#' \item{sim.coefs}{a matrix containing the simulated coefficients with a column for each coefficient and a row for each simulation for each imputation}
#' \item{coefs}{a matrix containing the original coefficients extracted from `fitlist` or supplied to `coefs`, with a row per imputation.}
#' \item{fit}{the list of model fits supplied to `fitlist`}
#' \item{imp}{a identifier of which imputed dataset each set of simulated coefficients corresponds to.}
#' The `"dist"` attribute contains `"normal"` if the coefficients were sampled from a multivariate normal distribution and `"t({df})"` if sampled from a multivariate t distribution. The `"clarify_hash"` attribute contains a unique hash generated by [rlang::hash()].
#'
#' @details
#' `misim()` essentially combines multiple `sim()` calls applied to a list of model fits, each fit in an imputed dataset, into a single combined pool of simulated coefficients. When simulation-based inference is to be used with multiply imputed data, many imputations are required; see Zhou and Reiter (2010).
#'
#' @references
#'
#' Zhou, X., & Reiter, J. P. (2010). A Note on Bayesian Inference After Multiple Imputation. *The American Statistician*, 64(2), 159–163. \doi{10.1198/tast.2010.09109}
#'
#' @examplesIf requireNamespace("Amelia", quietly = TRUE)
#' data("africa", package = "Amelia")
#'
#' # Multiple imputation using Amelia
#' a.out <- Amelia::amelia(x = africa, m = 10,
#' cs = "country",
#' ts = "year", logs = "gdp_pc",
#' p2s = 0)
#'
#' fits <- with(a.out, lm(gdp_pc ~ infl * trade))
#'
#' # Simulate coefficients
#' s <- misim(fits)
#' s
#'
#' @seealso
#' * [sim()] for simulating model coefficients for a single dataset
#' * [sim_apply()] for applying a function to each set of simulated coefficients
#' * [sim_ame()] for computing average marginal effects in each simulation draw
#' * [sim_setx()] for computing marginal predictions and first differences at typical values in each simulation draw
#' @export
#'
misim <- function(fitlist,
n = 1e3,
vcov = NULL,
coefs = NULL,
dist = NULL) {
if (missing(fitlist)) fitlist <- NULL
if (inherits(fitlist, "mira")) {
fitlist <- fitlist$analyses
}
if (is.null(fitlist)) {
if (is.null(coefs) || is.null(vcov)) {
.err("when `fitlist` is not supplied, arguments must be supplied to both `coefs` and `vcov`")
}
if (!is.list(coefs) && !is.list(vcov)) {
.err("when `fitlist` is not supplied, at least one of `coefs` or `vcov` must be a list")
}
nimp <- if (!is.list(coefs)) length(vcov) else length(coefs)
}
else {
check_fitlist(fitlist)
nimp <- length(fitlist)
}
chk::chk_count(n)
if (!is.list(coefs)) {
coefs <- lapply(seq_len(nimp), function(i) coefs)
}
else if (length(coefs) != nimp) {
if (is.null(fitlist)) {
.err("when `fitlist` is not supplied and `coefs` is supplied as a list, `coefs` must have as many entries as there are entries in `vcov`")
}
else {
.err("when supplied as a list, `coefs` must have as many entries as there are models in `fitlist`")
}
}
coef_supplied <- {
if (all(vapply(coefs, is.null, logical(1L)))) "null"
else if (all(vapply(coefs, is.function, logical(1L)))) "fun"
else if (all(vapply(coefs, check_valid_coef, logical(1L)))) "num"
else {
.err("`coefs` must be a vector of coefficients, a function that extracts one from each model in `fitlist`, or a list thereof")
}
}
if (!is.list(vcov)) {
vcov <- lapply(seq_len(nimp), function(i) vcov)
}
else if (length(vcov) != nimp) {
if (is.null(fitlist)) {
.err("when `fitlist` is not supplied and `vcov` is supplied as a list, `vcov` must have as many entries as there are entries in `coefs`")
}
else {
.err("when supplied as a list, `vcov` must have as many entries as there are models in `fitlist`")
}
}
vcov_supplied <- {
if (all(vapply(vcov, is.null, logical(1L)))) "null"
else if (all(vapply(vcov, is.matrix, logical(1L)))) "num"
else "marginaleffects_code"
}
for (i in seq_len(nimp)) {
coefs[[i]] <- process_coefs(coefs[[i]], fitlist[[i]], coef_supplied)
}
for (i in seq_len(nimp)) {
vcov[[i]] <- process_vcov(vcov[[i]], fitlist[[i]], vcov_supplied)
}
check_coefs_vcov_length_mi(vcov, coefs, vcov_supplied, coef_supplied)
chk::chk_count(n)
if (!is.null(dist)) {
if (length(dist) == 1) {
dist <- lapply(seq_len(nimp), function(i) dist)
}
else if (length(dist) != nimp) {
.err("when supplied as a vector, `dist` must have as many values as there are imputations")
}
else {
dist <- as.list(dist)
}
}
samplers <- lapply(seq_len(nimp), function(i) {
get_sampling_dist(fitlist[[i]], dist[[i]])
})
sim.coefs <- do.call("rbind", lapply(seq_len(nimp), function(i) {
samplers[[i]](n, coefs[[i]], vcov[[i]])
}))
out <- list(sim.coefs = sim.coefs,
coefs = do.call("rbind", coefs),
fit = fitlist,
imp = rep(seq_len(nimp), each = n))
dists <- unlist(lapply(samplers, attr, "dist"))
if (all_the_same(dists)) dists <- dists[1]
attr(out, "dist") <- dists
attr(out, "use_fit") <- !is.null(fitlist)
attr(out, "sim_hash") <- rlang::hash(out$sim.coefs)
class(out) <- c("clarify_misim", "clarify_sim")
out
}
#' @export
print.clarify_misim <- function(x, ...) {
obj <- deparse1(substitute(x))
cat("A `clarify_misim` object\n")
cat(sprintf(" - %s coefficients, %s imputations with %s simulated values each\n",
ncol(x$sim.coefs), nrow(x$coefs), nrow(x$sim.coefs) / nrow(x$coefs)))
cat(" - sampled distributions: ")
if (length(attr(x, "dist")) == 1) {
cat(sprintf("multivariate %s\n", attr(x, "dist")))
}
else {
cat("multiple different multivariate distributions")
if (exists(obj)) {
cat(sprintf(" (use `attr(%s, \"dist\") to view them\n"), obj)
}
else {
cat("\n")
}
}
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.