Nothing
#' Apply a function to simulated parameter values
#'
#' @description `sim_apply()` applies a function that produces quantities of
#' interest to each set of simulated coefficients produced by [sim()]; these
#' calculated quantities form the posterior sampling distribution for the
#' quantities of interest. Capabilities are available for parallelization.
#'
#' @param sim a `clarify_sim` object; the output of a call to [sim()] or
#' [misim()].
#' @param FUN a function to be applied to each set of simulated coefficients.
#' See Details.
#' @param verbose `logical`; whether to display a text progress bar indicating
#' progress and estimated time remaining for the procedure. Default is `TRUE`.
#' @param cl a cluster object created by [parallel::makeCluster()], or an
#' integer to indicate the number of child-processes (integer values are
#' ignored on Windows) for parallel evaluations. See [pbapply::pblapply()] for
#' details. If `NULL`, no parallelization will take place.
#' @param ... optional arguments passed to `FUN`.
#'
#' @details `sim_apply()` applies a function, `FUN`, to each set of simulated
#' coefficients, similar to [apply()]. This function should return a numeric
#' vector containing one or more estimated quantities. This should be a named
#' vector to more easily keep track of the meaning of each estimated quantity.
#' Care should be taken to ensure that the returned vector is the same length
#' each time `FUN` is called. `NA`s are allowed in the output but should be
#' avoided if possible.
#'
#' The arguments to `FUN` can be specified in a few ways. If `FUN` has an
#' argument called `coefs`, a simulated set of coefficients will be passed to
#' this argument, and `FUN` should compute and return a quantity based on the
#' coefficients (e.g., the difference between two coefficients if one wants to
#' test whether two coefficients are equal). If `FUN` has an argument called
#' `fit`, a model fit object of the same type as the one originally supplied
#' to `sim()` (e.g., an `lm` or `glm` object) will be passed to this argument,
#' where the coefficients of the fit object have been replaced by the
#' simulated coefficients generated by `sim()`, and `FUN` should compute and
#' return a quantity based on the model fit (e.g., a computation based on the
#' output of `predict()`). If neither `coefs` nor `fit` are the names of
#' arguments to `FUN`, the model fit object with replaced coefficients will be
#' supplied to the first argument of `FUN`.
#'
#' When custom coefficients are supplied to `sim()`, i.e., when the `coefs`
#' argument to `sim()` is not left at its default value, `FUN` must accept a
#' `coefs` argument and a warning will be thrown if it accepts a `fit`
#' argument. This is because `sim_apply()` does not know how to reconstruct
#' the original fit object with the new coefficients inserted. The quantities
#' computed by `sim_apply()` must therefore be computed directly from the
#' coefficients.
#'
#' If `FUN` is not supplied at all, the simulated values of the coefficients will be returned in the output with a warning. Set `FUN` to `NULL` or `verbose` to `FALSE` to suppress this warning.
#'
#' ## `sim_apply()` with multiply imputed data
#'
#' When using [misim()] and `sim_apply()` with multiply imputed data, the
#' coefficients are supplied to the model fit corresponding to the imputation
#' identifier associated with each set of coefficients, which means if `FUN`
#' uses a dataset extracted from a model (e.g., using [insight::get_data()]), it will do so from the model fit in
#' the corresponding imputation.
#'
#' The original estimates (see Value below) are computed as the mean of the
#' estimates across the imputations using the original coefficients averaged
#' across imputations. That is, first, the coefficients estimated in the
#' models in the imputed datasets are combined to form a single set of pooled
#' coefficients; then, for each imputation, the quantities of interest are
#' computed using the pooled coefficients; finally, the mean of the resulting
#' estimates across the imputations are taken as the "original" estimates.
#' Note this procedure is only valid for quantities with symmetric sampling
#' distributions, which excludes quantities like risk ratios and odds ratios,
#' but includes log risk ratios and log odds ratios. The desired quantities
#' can be transformed from their log versions using
#' [transform()].
#'
#' @return A `clarify_est` object, which is a matrix with a column for each
#' estimated quantity and a row for each simulation. The original estimates
#' (`FUN` applied to the original coefficients or model fit object) are stored
#' in the attribute `"original"`. The `"sim_hash"` attribute contains the
#' simulation hash produced by `sim()`.
#'
#' @seealso
#' * [sim()] for generating the simulated coefficients
#' * [summary.clarify_est()] for computing p-values and confidence intervals for
#' the estimated quantities
#' * [plot.clarify_est()] for plotting estimated
#' quantities and their simulated posterior sampling distribution.
#'
#' @examples
#'
#' data("lalonde", package = "MatchIt")
#' fit <- lm(re78 ~ treat + age + race + nodegree + re74,
#' data = lalonde)
#' coef(fit)
#'
#' set.seed(123)
#' s <- sim(fit, n = 500)
#'
#' # Function to compare predicted values for two units
#' # using `fit` argument
#' sim_fun <- function(fit) {
#' pred1 <- unname(predict(fit, newdata = lalonde[1,]))
#' pred2 <- unname(predict(fit, newdata = lalonde[2,]))
#' c(pred1 = pred1, pred2 = pred2)
#' }
#'
#' est <- sim_apply(s, sim_fun, verbose = FALSE)
#'
#' # Add difference between predicted values as
#' # additional quantity
#' est <- transform(est, `diff 1-2` = pred1 - pred2)
#'
#' # Examine estimates and confidence intervals
#' summary(est)
#'
#' # Function to compare coefficients using `coefs`
#' # argument
#' sim_fun <- function(coefs) {
#' setNames(coefs["racewhite"] - coefs["racehispan"],
#' "wh - his")
#' }
#'
#' est <- sim_apply(s, sim_fun, verbose = FALSE)
#'
#' # Examine estimates and confidence intervals
#' summary(est)
#'
#' # Another way to do the above:
#' est <- sim_apply(s, FUN = NULL)
#' est <- transform(est,
#' `wh - his` = `racewhite` - `racehispan`)
#'
#' summary(est, parm = "wh - his")
#'
#' @export
sim_apply <- function(sim,
FUN,
verbose = TRUE,
cl = NULL,
...) {
chk::chk_is(sim, "clarify_sim")
chk::chk_flag(verbose)
missing.fun <- missing(FUN)
if (missing.fun || is.null(FUN)) {
if (verbose && missing.fun) {
.wrn("`FUN` not supplied; returning simulated coefficients")
}
ests <- sim$sim.coefs
attr(ests, "original") <- {
if (inherits(sim, "clarify_misim"))
colMeans(sim$coefs)
else
sim$coefs
}
attr(ests, "sim_hash") <- attr(sim, "sim_hash")
class(ests) <- c("clarify_est", class(ests))
return(ests)
}
FUN <- process_FUN(FUN)
opb <- pbapply::pboptions(type = if (verbose) "timer" else "none")
on.exit(pbapply::pboptions(opb))
if (inherits(sim, "clarify_misim")) {
nimp <- nrow(sim$coefs)
apply_FUN <- .make_apply_FUN_mi(FUN)
# Test apply_FUN() on averaged coefficients, which will be final ests
test.coefs <- colMeans(sim$coefs)
test <- try(lapply(seq_len(nimp), function(i) {
apply_FUN(fit = sim$fit, coefs = test.coefs, imp = i, ...)
}), silent = TRUE)
if (is_error(test)) {
.err("`FUN` failed to run on an initial check with the following error:\n",
conditionMessage(attr(test, "condition")))
}
# test <- apply(do.call("rbind", test), 2, median)
test <- colMeans(do.call("rbind", test))
if (is.null(names(test))) names(test) <- paste0("est", seq_along(test))
ests.list <- pbapply::pblapply(seq_len(nrow(sim$sim.coefs)), function(i) {
apply_FUN(fit = sim$fit, coefs = sim$sim.coefs[i, ], imp = sim$imp[i], ...)
}, cl = cl)
}
else {
apply_FUN <- .make_apply_FUN(FUN)
# Test apply_FUN() on original model coefficients
test <- try(apply_FUN(fit = sim$fit, coefs = sim$coefs, ...), silent = TRUE)
if (is_error(test)) {
.err("`FUN` failed to run on an initial check with the following error:\n",
conditionMessage(attr(test, "condition")))
}
if (is.null(names(test))) names(test) <- paste0("est", seq_along(test))
ests.list <- pbapply::pblapply(seq_len(nrow(sim$sim.coefs)), function(i) {
apply_FUN(fit = sim$fit, coefs = sim$sim.coefs[i, ], ...)
}, cl = cl)
}
check_ests.list(ests.list, test)
ests <- do.call("rbind", ests.list)
check_ests(ests)
colnames(ests) <- names(test)
attr(ests, "original") <- test
attr(ests, "sim_hash") <- attr(sim, "sim_hash")
class(ests) <- c("clarify_est", class(ests))
ests
}
#' @export
print.clarify_est <- function(x, digits = NULL, max.ests = 6, ...) {
chk::chk_count(max.ests)
max.ests <- min(max.ests, length(attr(x, "original")))
cat(sprintf("A `clarify_est` object (from %s)\n",
if (inherits(x, "clarify_ame")) "`sim_ame()`"
else if (inherits(x, "clarify_setx")) "`sim_setx()`"
else "`sim_apply()`"))
cat(sprintf(" - %s simulated values\n", nrow(x)))
cat(sprintf(" - %s %s estimated:", length(attr(x, "original")),
ngettext(length(attr(x, "original")), "quantity", "quantities")))
print.data.frame(data.frame(names(attr(x, "original"))[seq_len(max.ests)],
attr(x, "original")[seq_len(max.ests)],
fix.empty.names = FALSE),
row.names = FALSE, right = FALSE)
if (max.ests != length(attr(x, "original"))) {
cat(sprintf("# ... and %s more\n", length(attr(x, "original")) - max.ests))
}
invisible(x)
}
.make_apply_FUN <- function(FUN) {
if (isTRUE(attr(FUN, "use_coefs")) && isTRUE(attr(FUN, "use_fit"))) {
function(fit, coefs, ...) {
fit <- marginaleffects::set_coef(fit, coefs)
FUN(fit = fit, coefs = coefs, ...)
}
}
else if (isTRUE(attr(FUN, "use_coefs"))) {
function(fit, coefs, ...) {
FUN(coefs = coefs, ...)
}
}
else if (isTRUE(attr(FUN, "use_fit"))) {
function(fit, coefs, ...) {
fit <- marginaleffects::set_coef(fit, coefs)
FUN(fit = fit, ...)
}
}
else {
function(fit, coefs, ...) {
fit <- marginaleffects::set_coef(fit, coefs)
FUN(fit, ...)
}
}
}
.make_apply_FUN_mi <- function(FUN) {
if (isTRUE(attr(FUN, "use_coefs")) && isTRUE(attr(FUN, "use_fit"))) {
function(fit, coefs, imp, ...) {
fit <- marginaleffects::set_coef(fit[[imp]], coefs)
FUN(fit = fit, coefs = coefs, ...)
}
}
else if (isTRUE(attr(FUN, "use_coefs"))) {
function(fit, coefs, imp, ...) {
FUN(coefs = coefs, ...)
}
}
else if (isTRUE(attr(FUN, "use_fit"))) {
function(fit, coefs, imp, ...) {
fit <- marginaleffects::set_coef(fit[[imp]], coefs)
FUN(fit = fit, ...)
}
}
else {
function(fit, coefs, imp, ...) {
fit <- marginaleffects::set_coef(fit[[imp]], coefs)
FUN(fit, ...)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.