#' Specify data-generating mechanisms
#'
#' Specify the data-generating mechanisms for the
#' simulation using purrr-style lambda functions.
#'
#' This is always the first command in the
#' simulation process, to specify the actual
#' simulated variables, which is then passed to
#' \code{\link{define}} to define metaparameters
#' and then to
#' \code{\link[=generate.simpr_spec]{generate}} to
#' generate the data.
#'
#' The \code{\dots} arguments use an efficient
#' syntax to specify custom functions needed for
#' generating a simulation, based on the
#' \code{purrr} package. When producing one
#' variable, one can provide an expression such as
#' \code{specify(a = ~ 3 + runif(10))}; the
#' expression is preceded by \code{~}, the tilde
#' operator, and can refer to previous arguments
#' in \code{specify} or to metaparameters in
#' \code{\link{define}}. This is called a lambda
#' function.
#'
#' Order matters: arguments are evaluated
#' sequentially, so later argument can refer to an
#' earlier one, e.g. \code{specify(a = ~ rnorm(2),
#' b = ~ a + rnorm(2))}.
#'
#' \code{\link[=generate.simpr_spec]{generate}}
#' combines results together into a single tibble
#' for each simulation, so all lambda functions
#' should produce the same number of rows.
#' However, a lambda function can produce multiple
#' columns.
#'
#' @section Column naming:
#'
#' Because functions can produce different
#' numbers of columns, there are several options
#' for naming columns. If a provided lambda
#' function produces a single column, the name
#' given to the argument becomes the name of the
#' column. If the lambda function already
#' produces column names, then the output will
#' use these names if \code{.use_names = TRUE},
#' the default. Otherwise, simpr uses the
#' argument name as a base and auto-numbers the
#' columns. For instance, if the argument
#' \code{a} generates a two-column matrix and
#' \code{.sep = "_"} (the default) the columns
#' will be named \code{a_1}and \code{a_2}.
#'
#' Custom names can also be directly provided by
#' a double-sided formula. The left-hand side
#' must use \code{\link{c}} or
#' \code{\link{cbind}}, e.g. \code{specify(c(a,
#' b) ~ MASS::mvrnorm(5, c(0, 0), Sigma =
#' diag(2)))}.
#'
#' @section Note:
#'
#' This function is an S3 method for
#' \code{\link[generics]{specify}} from the
#' \code{generics} package. Because \code{x} is
#' a formal argument of
#' \code{\link[generics]{specify}}, if you have
#' a variable in your simulation named \code{x}
#' it will be automatically moved to be the
#' first variable (with a message). It is therefore
#' safest to use any other variable name besides
#' \code{x}.
#'
#' @param x leave this argument blank (NULL); this
#' argument is a placeholder and can be skipped.
#' @param ... named \code{purrr}-style formula
#' functions used for generating simulation
#' variables. \code{x} is not recommended as a
#' name, since it is a formal argument and will
#' be automatically assumed to be the first
#' variable (a message will be displayed if
#' \code{x} is used).
#' @param .sep Specify the separator for
#' auto-generating names. See \emph{Column
#' naming}.
#' @param .use_names Whether to use names
#' generated by the lambda function (TRUE, the
#' default), or to overwrite them with supplied
#' names.
#' @return A \code{simpr_specify} object which
#' contains the functions needed to generate the
#' simulation; to be passed to
#' \code{\link{define}} for defining
#' metaparameters or, if there are no
#' metaparameters, directly to
#' \code{\link[=generate.simpr_spec]{generate}}
#' for generating the simulation.
#'
#' Also useful is the fact that one can refer to
#' variables in subsequent arguments. So, one
#' could define another variable \code{b} that
#' depends on \code{a} very simply, e.g.
#' \code{specify(a = ~ 3 + runif(10), b = ~ 2 *
#' x)}.
#'
#' Finally, one can also refer to metaparameters
#' that are to be systematically varied in the
#' simulation study. See \code{\link{define}}
#' and the examples for more details.
#'
#' @examples
#' ## specify a variable and generate it in the simulation
#' single_var = specify(a = ~ 1 + rnorm(5)) %>%
#' generate(1) # generate a single repetition of the simulation
#' single_var
#'
#' two_var = specify(a = ~ 1 + rnorm(5),
#' b = ~ x + 2) %>%
#' generate(1)
#' two_var
#'
#' ## Generates a_01 through a_10
#' autonumber_var = specify(a = ~ MASS::mvrnorm(5, rep(0, 10), Sigma = diag(10))) %>%
#' generate(1)
#' autonumber_var
#'
#' # alternatively, you could use a two-sided formula for names
#' multi_name = specify(cbind(a, b, c) ~ MASS::mvrnorm(5, rep(0, 3), Sigma = diag(3))) %>%
#' generate(1)
#' multi_name
#'
#' # Simple example of setting a metaparameter
#' simple_meta = specify(a = ~ 1 + rnorm(n)) %>%
#' define(n = c(5, 10)) %>% # without this line you would get an error!
#' generate(1)
#'
#'
#' simple_meta # has two rows now, one for each value of n
#' simple_meta$sim[[1]] # n = 5
#' simple_meta$sim[[2]] # n = 10
#'
#' @export
specify.formula = function(x = NULL, ..., .use_names = TRUE, .sep = "_") {
## Method for creating a new simpr_spec object,
## which means that first argument must be a formula
## Note that this method uses S3 dispatch in a
## tricky way; usually the user will not
## actually specify anything called "x", but
## still this method is dispatched. This is
## written to work more gracefully with the way
## that generics::specify is written
vars = list(...)
## Normally x is ignored, but if the user does
## provide a variable called "x" we need to
## include that as well
if(!is.null(x)) {
message("Formula specification for 'x' detected. ",
"Assuming 'x' is the first formula.\n\n",
"To hide this message, or to avoid moving this formula first, ",
"use a different variable name.")
vars = c(list(x = x), vars)
}
add_specification(new_simpr_spec(),
varlist = vars,
.sep = .sep,
.use_names = .use_names)
}
add_specification = function(spec, varlist, .sep, .use_names) {
if(length(varlist) == 0)
stop("No variables defined")
if(!all(purrr::map_lgl(varlist, purrr::is_formula))) {
stop("All specifications should be purr-style formula functions")
}
## Identify named arguments
if(is.null(names(varlist))) {
named_varlist = rep(FALSE, length(varlist))
names(varlist) = paste0(".unnamed_",1:length(varlist))
} else {
named_varlist = names(varlist) != "" # empty names become "" when there are both named and unnamed args
names(varlist)[!named_varlist] = paste0(".unnamed_", names(varlist)[!named_varlist])
}
# Process formulas to extract and set varnames attribute
spec$specify = purrr::pmap(list(varlist, names(varlist), named_varlist),
function(x, n, named) {
if(!rlang::is_formula(x))
stop("Argument is not formula")
else {
## Double-sided formula
if(length(x) == 3) {
## Get names from left-hand side of formula
attr(x, "varnames") = x[[2]][-1] %>% as.list() %>% purrr::map_chr(deparse)
## delete left-hand side of formula and return right-handed formula
x_out = x
x_out[[2]] = NULL
x_out
} else {
## Single-sided formula
if(length(x) == 2) {
if(!named)
stop("Right-hand formulas must be named.")
x_out = x
attr(x_out, "varnames") = n
x_out
}
}
}
})
# set attributes of ".use_names" and ".sep" for auto-numbering variables with multiple outputs
spec$variable_sep = .sep
spec$.use_names = .use_names
spec
}
#' @importFrom generics specify
#' @export
generics::specify
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.