R/bundle_sim.R

Defines functions bundle_sim

Documented in bundle_sim

#' @title Bundle functions into a simulation driver function
#'
#' @description Bundle a data-generation function, a data-analysis function, and
#'   (optionally) a performance summary function into a simulation driver.
#'
#' @param f_generate function for data-generation
#' @param f_analyze function for data-analysis. The first argument must be the
#'   data, in the format generated by \code{f_analyze()}.
#' @param f_summarize function for calculating performance summaries across
#'   replications. The first argument must be the replicated data analysis
#'   results. Default is \code{NULL}, so that no summary function is used.
#' @param reps_name character string to set the name of the argument for the
#'   number of replications, with a default value of \code{"reps"}.
#' @param seed_name character string to set the name of the argument for the
#'   seed option, with a default value of \code{"seed"}. Set to \code{NULL} to
#'   remove the argument from the simulation driver.
#' @param summarize_opt_name character string to set the name of the argument
#'   for where to apply \code{f_summarize} to the simulation results, with a
#'   default value of \code{"summarize"}. Ignored if no \code{f_summarize} function is
#'   specified. Set to \code{NULL} to remove the argument from the simulation
#'   driver.
#' @param row_bind_reps logical indicating whether to combine the simulation
#'   results into a data frame using \code{rbind()}, with a default value of
#'   \code{TRUE}. If \code{FALSE}, then the function will return replications in
#'   a list and so \code{f_summarize} must be able to take a list as its first
#'   argument.
#'
#' @return A function to repeatedly run the `f_generate` and `f_analyze`
#'   functions and (optionally) apply `f_summarize` to the resulting
#'   replications.
#'
#' @export
#'
#' @examples
#' f_G <- rnorm
#' f_A <- function(x, trim = 0) data.frame(y_bar = mean(x, trim = trim))
#' f_S <- function(x, calc_sd = FALSE) {
#'   if (calc_sd) {
#'     res_SD <- apply(x, 2, sd)
#'     res <- data.frame(M = colMeans(x), SD = res_SD)
#'   } else {
#'     res <- data.frame(M = colMeans(x))
#'   }
#'   res
#' }
#'
#' # bundle data-generation and data-analysis functions
#' sim1 <- bundle_sim(f_generate = f_G, f_analyze = f_A)
#' args(sim1)
#' res1 <- sim1(4, n = 70, mean = 0.5, sd = 1, trim = 0.2)
#' res1
#'
#' # bundle data-generation, data-analysis, and performance summary functions
#' sim2 <- bundle_sim(f_generate = f_G, f_analyze = f_A, f_summarize = f_S)
#' args(sim2)
#' res2 <- sim2(24, n = 7, mean = 0, sd = 1, trim = 0.2, calc_sd = TRUE)
#' res2
#'
#' # bundle data-generation and data-analysis functions, returning results as a list
#' sim3 <- bundle_sim(f_generate = f_G, f_analyze = f_A, row_bind_reps = FALSE)
#' args(sim3)
#' res3 <- sim3(4, n = 70, mean = 0.5, sd = 3, trim = 0.2)
#' res3
#'

bundle_sim <- function(
    f_generate,                 # data generation function
    f_analyze,                  # data analysis function
    f_summarize = NULL,         # optional performance summary function
    reps_name = "reps",         # argument name for number of replications
    seed_name = "seed",               # name for seed argument
    summarize_opt_name = "summarize", # name for optional summarize argument
    row_bind_reps = TRUE              # whether to combine replications using rbind
) {

  # Get component arguments and argument names
  gen_args <- formals(f_generate)
  gen_arg_names <- names(gen_args)
  ana_args <- formals(f_analyze)
  ana_arg_names <- names(ana_args)
  all_names <- union(gen_arg_names, ana_arg_names[-1])
  arg_list <- c(reps_name = reps_name, seed_name = seed_name)
  function_list_str <- "`f_generate` or `f_analyze`"

  if (!is.null(f_summarize)) {
    sum_args <- formals(f_summarize)
    sum_arg_names <- names(sum_args)
    all_names <- union(all_names, sum_arg_names[-1])
    function_list_str <- "`f_generate`, `f_analyze`, or `f_summarize`"
    arg_list <- c(reps_name = reps_name, seed_name = seed_name, summarize_opt_name = summarize_opt_name)
  }

  # Check that all extra arguments of f_analyze have defaults
  if (any(sapply(ana_args[-1], \(x) identical(x, substitute())))) {
    stop("All arguments of `f_analyze` except for the first must have default values.")
  }

  # Check that all extra arguments of f_summarize have defaults
  if (!is.null(f_summarize)) {
    if (any(sapply(sum_args[-1], \(x) identical(x, substitute())))) {
      stop("All arguments of `f_summarize` except for the first must have default values.")
    }
  }

  # Check for argument name conflicts
  for (arg in names(arg_list)) {
    if (arg_list[arg] %in% all_names) {
      stop(paste0(
        arg_list[arg],
        " cannot be used as an argument name in ",
        function_list_str,
        ". Consider renaming arguments or set a different `",
        arg,
        "`."
      ))
    }
  }

  # Check for conflicting default arguments
  common_args <- intersect(gen_arg_names, ana_arg_names[-1])
  if (length(common_args) > 0L) {
    defaults_match <- identical(gen_args[common_args], ana_args[common_args])
    if (!defaults_match) stop("Default arguments of `f_analyze` do not match default arguments of `f_generate`. Consider renaming arguments to avoid ambiguity.")
  }
  if (!is.null(f_summarize)) {
    common_gen_args <- intersect(gen_arg_names, sum_arg_names)
    if (length(common_gen_args) > 0L) {
      defaults_match <- identical(gen_args[common_gen_args], sum_args[common_gen_args])
      if (!defaults_match) stop("Default arguments of `f_summarize` do not match default arguments of `f_generate`. Consider renaming arguments to avoid ambiguity.")
    }
    common_ana_args <- intersect(ana_arg_names[-1], sum_arg_names)
    if (length(common_ana_args) > 0L) {
      defaults_match <- identical(ana_args[common_ana_args], sum_args[common_ana_args])
      if (!defaults_match) stop("Default arguments of `f_summarize` do not match default arguments of `f_analyze`. Consider renaming arguments to avoid ambiguity.")
    }
  }

  front_arg <- stats::setNames(alist(reps = ), reps_name)
  extra_ana_args <- setdiff(ana_arg_names[-1], gen_arg_names)
  if (is.null(f_summarize)) {
    full_args <- c(front_arg, gen_args, ana_args[extra_ana_args])
  } else {
    extra_sum_args <- setdiff(sum_arg_names[-1], c(names(front_arg), names(gen_args), extra_ana_args))
    full_args <- c(front_arg, gen_args, ana_args[extra_ana_args], sum_args[extra_sum_args])
  }
  if (!is.null(seed_name)) {
    full_args[[seed_name]] <- NA_integer_
  }
  if (!is.null(f_summarize) && !is.null(summarize_opt_name)) {
    full_args[[summarize_opt_name]] <- TRUE
  }
  full_args <- as.pairlist(full_args)

  # Build generate call
  gen_cl <- as.call(c(quote(f_generate), lapply(stats::setNames(gen_arg_names, gen_arg_names), as.symbol)))

  # Build analyze call
  ana_arg_vals <- c("dat", ana_arg_names[-1])
  ana_cl <- as.call(c(quote(f_analyze), lapply(stats::setNames(ana_arg_vals, ana_arg_names), as.symbol)))

  # Build summarize call
  if (!is.null(f_summarize)) {
    sum_arg_vals <- c("res", sum_arg_names[-1])
    sum_cl <- as.call(c(quote(f_summarize), lapply(stats::setNames(sum_arg_vals, sum_arg_names), as.symbol)))
  }

  # Build iteration function

  bundled_sim <- function(reps, seed, summarize) {

    if (!is.na(seed)) {
      set.seed(seed)
    }

    res <- lapply(1:reps, function(x) {
      dat <- eval(gen_cl)
      eval(ana_cl)
    })

    res <- do.call(rbind, res)

    if (summarize) {
      res <- eval(sum_cl)
    }

    return(res)
  }

  formals(bundled_sim) <- full_args

  # adjust reps_name
  body(bundled_sim)[[3]][[3]][[2]][[3]] <- as.symbol(reps_name)

  # adjust summarize_opt_name
  if (is.null(f_summarize)) {
    body(bundled_sim)[[5]] <- NULL
  } else if (is.null(summarize_opt_name)) {
    body(bundled_sim)[[5]] <- body(bundled_sim)[[5]][[3]]
  } else {
    body(bundled_sim)[[5]][[2]] <- as.symbol(summarize_opt_name)
  }

  # adjust row binding
  if (!row_bind_reps) {
    body(bundled_sim)[[4]] <- NULL
  }

  # adjust seed_name
  if (is.null(seed_name)) {
    body(bundled_sim)[[2]] <- NULL
  } else {
    body(bundled_sim)[[2]][[2]][[2]][[2]] <- as.symbol(seed_name)
    body(bundled_sim)[[2]][[3]][[2]][[2]] <- as.symbol(seed_name)
  }

  return(bundled_sim)
}
meghapsimatrix/SimHelpers documentation built on Jan. 14, 2025, 5:16 a.m.