#' Generate simulated data from specification
#'
#' Use specification from
#' \code{\link[=specify.formula]{specify}} or
#' \code{\link{define}} to produce simulated data.
#'
#' This is the third step in the simulation
#' process: after specifying the population model
#' and defining the metaparameters, if any,
#' \code{generate} is the workhorse function that
#' actually generates the simulated datasets, one
#' for each replication and combination of
#' metaparameters. You likely want to use the
#' output of \code{generate} to fit model(s) with
#' \code{\link[=fit.simpr_tibble]{fit}}.
#'
#' Errors you get using this function usually have
#' to do with how you specified the simulation in
#' \code{\link[=specify.formula]{specify}} and
#' \code{\link{define}}.
#'
#' @param x a \code{simpr_spec} object generated
#' by \code{\link{define}} or
#' \code{\link[=specify.formula]{specify}},
#' containing the specifications of the
#' simulation
#' @param .reps number of replications to run (a
#' whole number greater than 0)
#' @param \dots filtering criteria for which rows
#' to simulate, passed to
#' \code{\link[dplyr]{filter}}. This is useful
#' for reproducing just a few selected rows of a
#' simulation without needing to redo the entire
#' simulation, see \code{vignette("Reproducing
#' simulations")},
#' @param .sim_name name of the list-column to be
#' created, containing simulation results.
#' Default is \code{"sim"}
#' @param .quiet Should simulation errors be
#' broadcast to the user as they occur?
#' @param .warn_on_error Should there be a warning
#' when simulation errors occur? See
#' \code{vignette("Managing simulation
#' errors")}.
#' @param .stop_on_error Should the simulation
#' stop immediately when simulation errors
#' occur?
#' @param .debug Run simulation in debug mode,
#' allowing objects, etc. to be explored for
#' each generated variable specification.
#' @param .progress A logical, for whether or not
#' to print a progress bar for multiprocess,
#' multisession, and multicore plans.
#' @param .options The \code{future} specific
#' options to use with the workers when using
#' futures. This must be the result from a call
#' to
#' \code{\link[furrr:furrr_options]{furrr_options(seed
#' = TRUE)}}.
#' @seealso
#' \code{\link[=specify.formula]{specify}} and
#' \code{\link{define}} for examples of how
#' these functions affect the output of
#' \code{generate}. See
#' \code{vignette("Optimization")} and the
#' \code{furrr} website for more information on
#' working with futures:
#' \url{https://furrr.futureverse.org/}
#' @return a \code{\link{simpr_sims}} object,
#' which is a tibble with a row for each
#' repetition (a total of \code{rep}
#' repetitions) for each combination of
#' metaparameters and some extra metadata used
#' by \code{\link[=fit.simpr_tibble]{fit}}. The
#' columns are \code{rep} for the repetition
#' number, the names of the metaparameters, and
#' a list-column (named by the argument
#' \code{sim_name}) containing the dataset for
#' each repetition and metaparameter
#' combination. \code{simpr_sims} objects can be
#' manipulated elementwise by \code{dplyr} and
#' \code{tidyr} verbs: the command is applied to
#' each element of the simulation list-column.
#' @examples
#' meta_list_out = specify(a = ~ MASS::mvrnorm(n, rep(0, 2), Sigma = S)) %>%
#' define(n = c(10, 20, 30),
#' S = list(independent = diag(2), correlated = diag(2) + 2)) %>%
#' generate(1)
#'
#' ## View overall structure of the result and a single simulation output
#' meta_list_out
#'
#' ## Changing .reps will change the number of replications and thus the number of
#' ## rows in the output
#' meta_list_2 = specify(a = ~ MASS::mvrnorm(n, rep(0, 2), Sigma = S)) %>%
#' define(n = c(10, 20, 30),
#' S = list(independent = diag(2), correlated = diag(2) + 2)) %>%
#' generate(2)
#'
#' meta_list_2
#'
#' ## Fitting, tidying functions can be included in this step by running those functions and then
#' ## generate. This can save computation time when doing large
#' ## simulations, especially with parallel processing
#' meta_list_generate_after = specify(a = ~ MASS::mvrnorm(n, rep(0, 2), Sigma = S)) %>%
#' define(n = c(10, 20, 30),
#' S = list(independent = diag(2), correlated = diag(2) + 2)) %>%
#' fit(lm = ~ lm(a_2 ~ a_1, data = .)) %>%
#' tidy_fits %>%
#' generate(1)
#'
#' meta_list_generate_after
#'
#' @export
generate.simpr_spec = function(x, .reps, ..., .sim_name = "sim",
.quiet = TRUE, .warn_on_error = TRUE,
.stop_on_error = FALSE,
.debug = FALSE,
.progress = FALSE,
.options = furrr_options(seed = TRUE)) {
validate_reps(.reps)
## Create specs from conditions, .reps; add unique id as well.
spec_core = data.frame(rep = 1:.reps)
# If conditions exist, cross join them with the number of repetitions
if(!is.null(x$conditions) && nrow(x$conditions) > 0)
spec_core = dplyr::cross_join(data.frame(rep = 1:.reps),
x$conditions)
# Add sim_id and other metadata
specs = spec_core %>%
dplyr::mutate(.sim_id = 1:(dplyr::n())) %>%
dplyr::relocate(".sim_id") %>%
dplyr::relocate(rep, .after = dplyr::everything())
specs_filter = dplyr::filter(specs, ...)
excluded_sim_ids = specs$.sim_id[!(specs$.sim_id %in% specs_filter$.sim_id)]
if(!is.null(x$meta_info$lookup)) {
## If there are list elements, join cells representing those list-columns
## into specs
specs = purrr::reduce2(x$meta_info$lookup,
dplyr::inner_join,
.init = specs,
.y = names(x$meta$lookup)) # the "by" argument to the join
}
create_sim_results(specs = specs,
x = x[c("meta_info",
"specify",
"variable_sep",
".use_names",
"include_calls")],
.sim_name = .sim_name,
.quiet = .quiet,
.debug = .debug,
.warn_on_error = .warn_on_error,
.stop_on_error = .stop_on_error,
.progress = .progress,
.options = .options,
excluded_sim_ids = excluded_sim_ids)
}
generate_sim = function(y, eval_environment, variable_sep, .use_names, .debug, .stop_on_error) {
eval_fun = purrr::as_mapper(y)
environment(eval_fun) <- eval_environment
if(.debug)
debug(eval_fun)
gen = eval_fun()
varnames = attr(y, "varnames")
if(is.null(ncol(gen))) {
gen_df = tibble::as_tibble(gen, .name_repair = "minimal")
names(gen_df) = varnames
assign(varnames, gen, envir = eval_environment)
} else if(length(dim(gen)) > 3) {
stop("More than 2 dimensional output in specify() not supported")
} else if(ncol(gen) == 0) {
stop("variable function returns 0 columns")
} else if(ncol(gen) >= 1) {
if(.use_names && !is.null(colnames(gen))) {
varnames = colnames(gen)
}
if(ncol(gen) == 1) {
names(gen) = attr(y, "varnames")
assign(varnames, gen[[1]], envir = eval_environment)
} else {
gen_df = tibble::as_tibble(gen, .name_repair = "minimal")
#rename gen_df # if multiple varnames given
#via formula lhs or from the dataset itself,
#use those
if(length(varnames) > 1) {
names(gen_df) = varnames
} else {
# Otherwise, use auto-numbering
names(gen_df) = sprintf(paste0("%s%s%0", nchar(trunc(ncol(gen))), ".0f"),
varnames,
variable_sep,
1:ncol(gen))
## assign names to the eval_environmnent
}
purrr::iwalk(gen_df, ~ assign(.y, .x, envir = eval_environment))
}
}
gen_df
}
generate_row = function(variables, ..., variable_sep, .use_names,
include_calls, meta_indices, .sim_name, .quiet,
.debug, .stop_on_error,
excluded_sim_ids) {
meta_values = list(...)
## Exclude and don't calculate if this row is filtered out
if(meta_values$.sim_id %in% excluded_sim_ids)
return(NULL)
eval_environment = rlang::as_environment(meta_values, parent = parent.frame())
if(.stop_on_error)
sim_list = purrr::map_dfc(variables, generate_sim,
eval_environment = eval_environment,
variable_sep = variable_sep,
.debug = .debug,
.use_names = .use_names)
else
sim_list = purrr::safely(purrr::map_dfc, otherwise = NULL, quiet = .quiet)(variables, generate_sim,
eval_environment = eval_environment,
variable_sep = variable_sep,
.debug = .debug,
.use_names = .use_names)
## Create a 1-row tibble with meta values and the simulation cell
df_full = purrr::map(meta_values, ~ if(length(.) == 1 && purrr::is_vector(.)) return(.) else return(list(.))) %>%
tibble::as_tibble(.rows = 1)
df_full[[.sim_name]] = list(sim_list$result)
if(!is.null(sim_list$error))
df_full[[".sim_error"]] = as.character(sim_list$error)
if(is.null(include_calls)) {
return(df_full)
} else {
## Add "simpr_tibble" class
class(df_full) = c("simpr_tibble", class(df_full))
## identify which variables are meta variables
attr(df_full, "meta") = meta_indices
attr(df_full, ".sim_name") = .sim_name
df_eval = purrr::reduce(.x = include_calls, .f = eval_pipe, .init = df_full) %>%
tibble::as_tibble()
return(df_eval)
}
}
eval_pipe = function(lhs, rhs) {
## Set future options here since not evaluated
## by include(), and future options are ignored
## anyway
.options = furrr_options(seed = TRUE)
.progress = FALSE
eval(call("%>%", lhs = lhs, rhs = rhs))
}
create_sim_results <- function(specs, x, .sim_name, .quiet, .warn_on_error, .progress, .options,
.debug, .stop_on_error,
excluded_sim_ids) {
## Create simulation results from specification
## Generate all replications
sim_results = specs %>%
tibble::as_tibble() %>%
furrr::future_pmap(generate_row,
variables = x$specify,
include_calls = x$include_calls,
variable_sep = x$variable_sep,
.use_names = x$.use_names,
meta_indices = names(x$meta_info$indices),
.debug = .debug,
.stop_on_error = .stop_on_error,
.sim_name = .sim_name,
.quiet = .quiet,
.progress = .progress,
.options = .options,
excluded_sim_ids = excluded_sim_ids) %>%
dplyr::bind_rows()
## Give warning if errors occured
if(.warn_on_error && ".sim_error" %in% names(sim_results))
warning("Simulation produced errors. See column '.sim_error'.")
## Add some attributes to the tibble to track meta and variables
attr(sim_results, "meta") = names(x$meta_info$indices)
attr(sim_results, "variables") = purrr::map(x$variables, ~ attr(., "varnames")) %>% unlist
attr(sim_results, ".sim_name") = .sim_name
attr(sim_results, "sim_total") = max(specs$.sim_id)
## Add "simpr_tibble" class if there is still a sim column
if(get_sim_name(sim_results) %in% names(sim_results))
class(sim_results) = c("simpr_tibble", class(sim_results))
sim_results
}
validate_reps = function(.reps) {
## Check .reps argument is a whole number > 0
if(length(.reps) > 1) {
.reps = .reps[1]
warning(".reps has length > 1, using only first element")
}
if(.reps != round(.reps)) {
.reps = round(.reps)
warning(".reps not a whole number, rounding to nearest whole number")
}
if(.reps < 1) {
stop(".reps should be at least 1")
}
}
#' @importFrom generics generate
#' @export
generics::generate
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.