R/specify.R

Defines functions add_specification specify.formula

Documented in specify.formula

#' Specify data-generating mechanisms
#'
#' Specify the data-generating mechanisms for the
#' simulation using purrr-style lambda functions.
#'
#' This is always the first command in the
#' simulation process, to specify the actual
#' simulated variables, which is then passed to
#' \code{\link{define}} to define metaparameters
#' and then to
#' \code{\link[=generate.simpr_spec]{generate}} to
#' generate the data.
#'
#' The \code{\dots} arguments use an efficient
#' syntax to specify custom functions needed for
#' generating a simulation, based on the
#' \code{purrr} package.  When producing one
#' variable, one can provide an expression such as
#' \code{specify(a = ~ 3 + runif(10))}; the
#' expression is preceded by \code{~}, the tilde
#' operator, and can refer to previous arguments
#' in \code{specify} or to metaparameters in
#' \code{\link{define}}. This is called a lambda
#' function.
#'
#' Order matters: arguments are evaluated
#' sequentially, so later argument can refer to an
#' earlier one, e.g. \code{specify(a = ~ rnorm(2),
#' b = ~ a + rnorm(2))}.
#'
#' \code{\link[=generate.simpr_spec]{generate}}
#' combines results together into a single tibble
#' for each simulation, so all lambda functions
#' should produce the same number of rows.
#' However, a lambda function can produce multiple
#' columns.
#'
#' @section Column naming:
#'
#'   Because functions can produce different
#'   numbers of columns, there are several options
#'   for naming columns. If a provided lambda
#'   function produces a single column, the name
#'   given to the argument becomes the name of the
#'   column.  If the lambda function already
#'   produces column names, then the output will
#'   use these names if \code{.use_names = TRUE},
#'   the default. Otherwise, simpr uses the
#'   argument name as a base and auto-numbers the
#'   columns. For instance, if the argument
#'   \code{a} generates a two-column matrix and
#'   \code{.sep = "_"} (the default) the columns
#'   will be named \code{a_1}and \code{a_2}.
#'
#'   Custom names can also be directly provided by
#'   a double-sided formula. The left-hand side
#'   must use \code{\link{c}} or
#'   \code{\link{cbind}}, e.g. \code{specify(c(a,
#'   b) ~ MASS::mvrnorm(5, c(0, 0), Sigma =
#'   diag(2)))}.
#'
#' @section Note:
#'
#'   This function is an S3 method for
#'   \code{\link[generics]{specify}} from the
#'   \code{generics} package.  Because \code{x} is
#'   a formal argument of
#'   \code{\link[generics]{specify}}, if you have
#'   a variable in your simulation named \code{x}
#'   it will be automatically moved to be the
#'   first variable (with a message).  It is therefore
#'   safest to use any other variable name besides
#'   \code{x}.
#'
#' @param x leave this argument blank (NULL); this
#'   argument is a placeholder and can be skipped.
#' @param ... named \code{purrr}-style formula
#'   functions used for generating simulation
#'   variables. \code{x} is not recommended as a
#'   name, since it is a formal argument and will
#'   be automatically assumed to be the first
#'   variable (a message will be displayed if
#'   \code{x} is used).
#' @param .sep Specify the separator for
#'   auto-generating names.  See \emph{Column
#'   naming}.
#' @param .use_names Whether to use names
#'   generated by the lambda function (TRUE, the
#'   default), or to overwrite them with supplied
#'   names.
#' @return A \code{simpr_specify} object which
#'   contains the functions needed to generate the
#'   simulation; to be passed to
#'   \code{\link{define}} for defining
#'   metaparameters or, if there are no
#'   metaparameters, directly to
#'   \code{\link[=generate.simpr_spec]{generate}}
#'   for generating the simulation.
#'
#'   Also useful is the fact that one can refer to
#'   variables in subsequent arguments.  So, one
#'   could define another variable \code{b} that
#'   depends on \code{a} very simply, e.g.
#'   \code{specify(a = ~ 3 + runif(10), b = ~ 2 *
#'   x)}.
#'
#'   Finally, one can also refer to metaparameters
#'   that are to be systematically varied in the
#'   simulation study.  See \code{\link{define}}
#'   and the examples for more details.
#'
#' @examples
#' ## specify a variable and generate it in the simulation
#' single_var = specify(a = ~ 1 + rnorm(5)) %>%
#'   generate(1) # generate a single repetition of the simulation
#' single_var
#'
#' two_var = specify(a = ~ 1 + rnorm(5),
#'                     b = ~ x + 2) %>%
#'   generate(1)
#' two_var
#'
#' ## Generates a_01 through a_10
#' autonumber_var = specify(a = ~ MASS::mvrnorm(5, rep(0, 10), Sigma = diag(10))) %>%
#'   generate(1)
#' autonumber_var
#'
#' # alternatively, you could use a two-sided formula for names
#' multi_name = specify(cbind(a, b, c) ~ MASS::mvrnorm(5, rep(0, 3), Sigma = diag(3))) %>%
#'   generate(1)
#' multi_name
#'
#' # Simple example of setting a metaparameter
#' simple_meta = specify(a = ~ 1 + rnorm(n)) %>%
#'   define(n = c(5, 10)) %>% # without this line you would get an error!
#'   generate(1)
#'
#'
#' simple_meta # has two rows now, one for each value of n
#' simple_meta$sim[[1]] # n = 5
#' simple_meta$sim[[2]] # n = 10
#'
#' @export
specify.formula = function(x = NULL, ..., .use_names = TRUE, .sep = "_") {
  ## Method for creating a new simpr_spec object,
  ## which means that first argument must be a formula

  ## Note that this method uses S3 dispatch in a
  ## tricky way; usually the user will not
  ## actually specify anything called "x", but
  ## still this method is dispatched.  This is
  ## written to work more gracefully with the way
  ## that generics::specify is written

  vars = list(...)

  ## Normally x is ignored, but if the user does
  ## provide a variable called "x" we need to
  ## include that as well
  if(!is.null(x)) {
    message("Formula specification for 'x' detected. ",
    "Assuming 'x' is the first formula.\n\n",
    "To hide this message, or to avoid moving this formula first, ",
    "use a different variable name.")

    vars = c(list(x = x), vars)
  }

  add_specification(new_simpr_spec(),
                    varlist = vars,
                    .sep = .sep,
                    .use_names = .use_names)

}

add_specification = function(spec, varlist, .sep, .use_names) {

  if(length(varlist) == 0)
    stop("No variables defined")

  if(!all(purrr::map_lgl(varlist, purrr::is_formula))) {
    stop("All specifications should be purr-style formula functions")
  }

  ## Identify named arguments
  if(is.null(names(varlist))) {
    named_varlist = rep(FALSE, length(varlist))
    names(varlist) = paste0(".unnamed_",1:length(varlist))
  } else {
    named_varlist =  names(varlist) != "" # empty names become "" when there are both named and unnamed args
    names(varlist)[!named_varlist] =  paste0(".unnamed_", names(varlist)[!named_varlist])
  }

  # Process formulas to extract and set varnames attribute
  spec$specify = purrr::pmap(list(varlist, names(varlist), named_varlist),
                            function(x, n, named) {

                              if(!rlang::is_formula(x))
                                stop("Argument is not formula")
                              else {
                                ## Double-sided formula
                                if(length(x) == 3) {
                                  ## Get names from left-hand side of formula
                                  attr(x, "varnames") = x[[2]][-1] %>% as.list() %>% purrr::map_chr(deparse)

                                  ## delete left-hand side of formula and return right-handed formula
                                  x_out = x

                                  x_out[[2]] = NULL
                                  x_out
                                } else {
                                  ## Single-sided formula
                                  if(length(x) == 2) {
                                    if(!named)
                                      stop("Right-hand formulas must be named.")

                                    x_out = x
                                    attr(x_out, "varnames") = n
                                    x_out

                                  }

                                }

                              }
                            })


  # set attributes of ".use_names" and ".sep" for auto-numbering variables with multiple outputs
  spec$variable_sep = .sep
  spec$.use_names = .use_names

  spec
}

#' @importFrom generics specify
#' @export
generics::specify
statisfactions/simpr documentation built on July 18, 2024, 6:44 a.m.