Nothing
#' Apply a function to simulated parameter values
#'
#' `sim_apply()` applies a function that produces quantities of interest to each set of simulated coefficients produced by [sim()]; these calculated quantities form the posterior sampling distribution for the quantities of interest. Capabilities are available for parallelization.
#'
#' @param sim a `clarify_sim` object; the output of a call to [sim()] or [misim()].
#' @param FUN a function to be applied to each set of simulated coefficients. See Details.
#' @param verbose `logical`; whether to display a text progress bar indicating progress and estimated time remaining for the procedure. Default is `TRUE`.
#' @param cl a cluster object created by [parallel::makeCluster()], or an integer to indicate the number of child-processes (integer values are ignored on Windows) for parallel evaluations. See [pbapply::pblapply()] for details. If `NULL`, no parallelization will take place.
#' @param ... for `sim_apply()`, optional arguments passed to `FUN`. For `print()`, ignored.
#' @param x a `clarify_est` object.
#' @param digits the minimum number of significant digits to be used; passed to [print.data.frame()].
#' @param max.ests the maximum number of estimates to display.
#'
#' @details
#' `sim_apply()` applies a function, `FUN`, to each set of simulated coefficients, similar to [apply()]. This function should return a numeric vector containing one or more estimated quantities. This should be a named vector to more easily keep track of the meaning of each estimated quantity. Care should be taken to ensure that the returned vector is the same length each time `FUN` is called. `NA`s are allowed in the output but should be avoided if possible.
#'
#' The arguments to `FUN` can be specified in a few ways. If `FUN` has an
#' argument called `coefs`, a simulated set of coefficients will be passed to
#' this argument, and `FUN` should compute and return a quantity based on the
#' coefficients (e.g., the difference between two coefficients if one wants to
#' test whether two coefficients are equal). If `FUN` has an argument called
#' `fit`, a model fit object of the same type as the one originally supplied
#' to `sim()` (e.g., an `lm` or `glm` object) will be passed to this argument,
#' where the coefficients of the fit object have been replaced by the
#' simulated coefficients generated by `sim()`, and `FUN` should compute and
#' return a quantity based on the model fit (e.g., a computation based on the
#' output of `predict()`). If neither `coefs` nor `fit` are the names of
#' arguments to `FUN`, the model fit object with replaced coefficients will be
#' supplied to the first argument of `FUN`.
#'
#' When custom coefficients are supplied to `sim()`, i.e., when the `coefs`
#' argument to `sim()` is not left at its default value, `FUN` must accept a
#' `coefs` argument and a warning will be thrown if it accepts a `fit`
#' argument. This is because `sim_apply()` does not know how to reconstruct
#' the original fit object with the new coefficients inserted. The quantities
#' computed by `sim_apply()` must therefore be computed directly from the
#' coefficients.
#'
#' If `FUN` is not supplied at all, the simulated values of the coefficients will be returned in the output with a warning. Set `FUN` to `NULL` or `verbose` to `FALSE` to suppress this warning.
#'
#' ## `sim_apply()` with multiply imputed data
#'
#' When using [misim()] and `sim_apply()` with multiply imputed data, the
#' coefficients are supplied to the model fit corresponding to the imputation
#' identifier associated with each set of coefficients, which means if `FUN`
#' uses a dataset extracted from a model (e.g., using [insight::get_data()]), it will do so from the model fit in
#' the corresponding imputation.
#'
#' The original estimates (see Value below) are computed as the mean of the
#' estimates across the imputations using the original coefficients averaged
#' across imputations. That is, first, the coefficients estimated in the
#' models in the imputed datasets are combined to form a single set of pooled
#' coefficients; then, for each imputation, the quantities of interest are
#' computed using the pooled coefficients; finally, the mean of the resulting
#' estimates across the imputations are taken as the "original" estimates.
#' Note this procedure is only valid for quantities with symmetric sampling
#' distributions, which excludes quantities like risk ratios and odds ratios,
#' but includes log risk ratios and log odds ratios. The desired quantities
#' can be transformed from their log versions using
#' [transform()].
#'
#' @returns
#' A `clarify_est` object, which is a matrix with a column for each
#' estimated quantity and a row for each simulation. The original estimates
#' (`FUN` applied to the original coefficients or model fit object) are stored
#' in the attribute `"original"`. The `"sim_hash"` attribute contains the
#' simulation hash produced by `sim()`.
#'
#' @seealso
#' * [sim()] for generating the simulated coefficients
#' * [summary.clarify_est()] for computing p-values and confidence intervals for
#' the estimated quantities
#' * [plot.clarify_est()] for plotting estimated
#' quantities and their simulated posterior sampling distribution.
#'
#' @examplesIf rlang::is_installed("MatchIt")
#' data("lalonde", package = "MatchIt")
#' fit <- lm(re78 ~ treat + age + race + nodegree + re74,
#' data = lalonde)
#' coef(fit)
#'
#' set.seed(123)
#' s <- sim(fit, n = 500)
#'
#' # Function to compare predicted values for two units
#' # using `fit` argument
#' sim_fun <- function(fit) {
#' pred1 <- unname(predict(fit, newdata = lalonde[1,]))
#' pred2 <- unname(predict(fit, newdata = lalonde[2,]))
#' c(pred1 = pred1, pred2 = pred2)
#' }
#'
#' est <- sim_apply(s, sim_fun, verbose = FALSE)
#'
#' # Add difference between predicted values as
#' # additional quantity
#' est <- transform(est, `diff 1-2` = pred1 - pred2)
#'
#' # Examine estimates and confidence intervals
#' summary(est)
#'
#' # Function to compare coefficients using `coefs`
#' # argument
#' sim_fun <- function(coefs) {
#' setNames(coefs["racewhite"] - coefs["racehispan"],
#' "wh - his")
#' }
#'
#' est <- sim_apply(s, sim_fun, verbose = FALSE)
#'
#' # Examine estimates and confidence intervals
#' summary(est)
#'
#' # Another way to do the above:
#' est <- sim_apply(s, FUN = NULL)
#' est <- transform(est,
#' `wh - his` = `racewhite` - `racehispan`)
#'
#' summary(est, parm = "wh - his")
#' @export
sim_apply <- function(sim,
FUN,
verbose = TRUE,
cl = NULL,
...) {
chk::chk_is(sim, "clarify_sim")
chk::chk_flag(verbose)
missing.fun <- missing(FUN)
if (missing.fun || is_null(FUN)) {
if (verbose && missing.fun) {
.wrn("`FUN` not supplied; returning simulated coefficients")
}
ests <- sim$sim.coefs
attr(ests, "original") <- {
if (inherits(sim, "clarify_misim")) colMeans(sim$coefs)
else sim$coefs
}
attr(ests, "sim_hash") <- .attr(sim, "sim_hash")
class(ests) <- c("clarify_est", class(ests))
return(ests)
}
FUN <- process_FUN(FUN)
opb <- pbapply::pboptions(type = if (verbose) "timer" else "none")
on.exit(pbapply::pboptions(opb))
if (inherits(sim, "clarify_misim")) {
nimp <- nrow(sim$coefs)
apply_FUN <- .make_apply_FUN_mi(FUN)
# Test apply_FUN() on averaged coefficients, which will be final ests
test.coefs <- colMeans(sim$coefs)
test <- try(lapply(seq_len(nimp), function(i) {
apply_FUN(fit = sim$fit, coefs = test.coefs, imp = i, ...)
}), silent = TRUE)
if (is_error(test)) {
.err("`FUN` failed to run on an initial check with the following error:\n",
conditionMessage(.attr(test, "condition")))
}
# test <- apply(do.call("rbind", test), 2, median)
test <- colMeans(do.call("rbind", test))
if (is_null(names(test))) {
names(test) <- paste0("est", seq_along(test))
}
ests.list <- pbapply::pblapply(seq_len(nrow(sim$sim.coefs)), function(i) {
apply_FUN(fit = sim$fit, coefs = sim$sim.coefs[i, ], imp = sim$imp[i], ...)
}, cl = cl)
}
else {
apply_FUN <- .make_apply_FUN(FUN)
# Test apply_FUN() on original model coefficients
test <- try(apply_FUN(fit = sim$fit, coefs = sim$coefs, ...), silent = TRUE)
if (is_error(test)) {
.err("`FUN` failed to run on an initial check with the following error:\n",
conditionMessage(.attr(test, "condition")))
}
if (is_null(names(test))) {
names(test) <- paste0("est", seq_along(test))
}
ests.list <- pbapply::pblapply(seq_len(nrow(sim$sim.coefs)), function(i) {
apply_FUN(fit = sim$fit, coefs = sim$sim.coefs[i, ], ...)
}, cl = cl)
}
check_ests.list(ests.list, test)
ests <- do.call("rbind", ests.list)
check_ests(ests)
colnames(ests) <- names(test)
attr(ests, "original") <- test
attr(ests, "sim_hash") <- .attr(sim, "sim_hash")
class(ests) <- c("clarify_est", class(ests))
ests
}
#' @exportS3Method print clarify_est
#' @rdname sim_apply
print.clarify_est <- function(x, digits = 4L, max.ests = 6L, ...) {
chk::chk_whole_number(digits)
chk::chk_count(max.ests)
n.ests <- length(coef(x))
max.ests <- min(max.ests, n.ests)
cat(sprintf("A `clarify_est` object (from %s)\n",
if (inherits(x, "clarify_ame")) "`sim_ame()`"
else if (inherits(x, "clarify_setx")) "`sim_setx()`"
else "`sim_apply()`"))
cat(sprintf(" - %s simulated values\n", nrow(x)))
cat(sprintf(" - %s %s estimated:", n.ests,
ngettext(n.ests, "quantity", "quantities")))
data.frame(names(coef(x)),
coef(x),
fix.empty.names = FALSE) |>
.print_estimate_table(digits = digits,
topn = floor(max.ests / 2))
invisible(x)
}
.make_apply_FUN <- function(FUN) {
if (isTRUE(.attr(FUN, "use_coefs")) && isTRUE(.attr(FUN, "use_fit"))) {
function(fit, coefs, ...) {
fit <- marginaleffects::set_coef(fit, coefs)
FUN(fit = fit, coefs = coefs, ...)
}
}
else if (isTRUE(.attr(FUN, "use_coefs"))) {
function(fit, coefs, ...) {
FUN(coefs = coefs, ...)
}
}
else if (isTRUE(.attr(FUN, "use_fit"))) {
function(fit, coefs, ...) {
fit <- marginaleffects::set_coef(fit, coefs)
FUN(fit = fit, ...)
}
}
else {
function(fit, coefs, ...) {
fit <- marginaleffects::set_coef(fit, coefs)
FUN(fit, ...)
}
}
}
.make_apply_FUN_mi <- function(FUN) {
if (isTRUE(.attr(FUN, "use_coefs")) && isTRUE(.attr(FUN, "use_fit"))) {
function(fit, coefs, imp, ...) {
fit <- marginaleffects::set_coef(fit[[imp]], coefs)
FUN(fit = fit, coefs = coefs, ...)
}
}
else if (isTRUE(.attr(FUN, "use_coefs"))) {
function(fit, coefs, imp, ...) {
FUN(coefs = coefs, ...)
}
}
else if (isTRUE(.attr(FUN, "use_fit"))) {
function(fit, coefs, imp, ...) {
fit <- marginaleffects::set_coef(fit[[imp]], coefs)
FUN(fit = fit, ...)
}
}
else {
function(fit, coefs, imp, ...) {
fit <- marginaleffects::set_coef(fit[[imp]], coefs)
FUN(fit, ...)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.