R/sim_fun.R

Defines functions simPower summary.simpow print.simpow

Documented in print.simpow summary.simpow

#' Run a power simulation.
#'
#' This function allows one to simulate the power of a test with the intention
#' of making it easier to run power calculations in general sources of data and
#' applied to general types of tests.
#'
#' @param data_generator A function that returns single instance of data on
#'   which to run test.  See details.
#' @param test_function A function that takes single instance of data and
#'   returns a value between 0 and 1 (a P-value).  First argument must be the
#'   dataset.
#' @param alpha The alpha level of the test.  Defaults to 5\%.
#' @param nsim The number of simulations to run as part of the sample size
#'   calculation.
#' @param seed Used if reproducibility is needed.  See details.
#' @param parallel Logical scalar indicating whether foreach should use parallel
#'   processing. See details.
#' @param ... Arguments passed along to the testing function.
#' @details This function can be enabled to use parallel processing brought to
#'   you by \code{foreach}.  This requires registering a parallel backend using
#'   \code{doParallel} and, optionally, making simulations reproducible using
#'   \code{doRNG}.
#'
#' @section Notes: An error will be issued when any P-value returned by a test
#'   function is missing.  For example, this occurs when conducting a t-test on
#'   data having no variability (e.g., low-mean Poisson counts).  I could
#'   include a test for this condition, but my preference if for keeping the
#'   testing function as light-weight as possible.
#'
#' @return An object of type \code{simpower}.
#' @import foreach doParallel doRNG
#' @importFrom magrittr %>%
#' @importFrom stringr str_remove
#' @importFrom checkmate makeAssertCollection assertNumber assertIntegerish
#'   assertFunction reportAssertions assertNumeric
#' @export
simPower = function(data_generator, test_function, alpha = 0.05, nsim, seed = NULL, parallel = FALSE, ...){

  #check stuff
  chk = makeAssertCollection()
  assertNumber(alpha, lower = 0, upper = 1, null.ok = FALSE, add = chk)
  assertIntegerish(nsim, lower=1, len = 1, any.missing = FALSE, add = chk)
  assertFunction(data_generator, add = chk)
  assertFunction(test_function, args="data", ordered = TRUE, add = chk)
  assertIntegerish(seed, null.ok = TRUE)
  reportAssertions(chk)

  #get names of data and test functions
  sim_call = match.call() %>%
    deparse %>%
    str_remove("^\\s+") %>%
    paste(collapse="")
  data_gen_name = deparse(substitute(data_generator))
  test_name = deparse(substitute(test_function))

  #if a seed is given, set it, but return to original state
  if(!is.null(seed)){
    old_rng_state = if(exists(".Random.seed", envir = globalenv()))
      get(".Random.seed", envir = globalenv()) else
        NULL
    set.seed(seed)
    on.exit(assign(".Random.seed", old_rng_state, envir = globalenv()))
  }

  #turn the data generation function into an iterator function
  ifun = function(){
    i = 0
    nextEl = function(){
      if(i < nsim)
        i <<- i + 1 else
          stop("StopIteration")

      data_generator()
    }

    obj = list(nextElem = nextEl)
    class(obj) = c("ifun", "abstractiter", "iter")
    obj
  }

  data_iterator = ifun()

  if(isTRUE(parallel) && requireNamespace("doParallel") && requireNamespace("doRNG")){
    cl = parallel::makeCluster(parallel::detectCores() - 1)
    doParallel::registerDoParallel(cl)
    if(!is.null(seed)) doRNG::registerDoRNG(seed)
  } else if(isTRUE(parallel)){
    warning(call. = FALSE, "Parallel requested but packages 'doParallel' and/or 'doRNG' not available.  Defaulting to sequential processing.")
    registerDoSEQ()
  }


  #get P-values and make sure they are in proper range
  pvals = foreach(df = data_iterator, .combine = c) %dopar%
    test_function(df, ...)

  if(exists("cl")) parallel::stopCluster(cl)

  #check that P-values are reasonable
  assertNumeric(pvals, lower=0, upper=1, any.missing = FALSE)

  #return a simpow object
  structure(
    list(
      pVal = pvals,
      alpha = alpha,
      simCall = sim_call,
      genCall = data_gen_name,
      testCall = test_name,
      seed = seed
    ),
    class="simpow")
}

#' \code{summary} method for \code{simpow} class.
#'
#' @param obj An object of type \code{simpow}.
#' @param length Length of a line used to print block of P-values.  Defaults to
#'     current linewidth * 0.8.
#'
#' @return Returns an invisible NULL.
#' @importFrom checkmate assertInt
#' @importFrom stringr str_extract_all
#' @importFrom magrittr %>%
#' @export
summary.simpow = function(obj, width){
  #set line width of P-value block, if not supplied
  if(missing(width))
    width = floor(getOption("width") * 0.8)

  assertInt(width, lower = 5)

  #wrap P-value block at desired width
  split_string = paste0("(?<= |^)[\\d\\. <]{1,", width, "}(?= |$)")
  fmt_p_block = paste(format_pval(obj$pVal), collapse=" ") %>%
    str_extract_all(split_string) %>%
    unlist(use.names = FALSE, recursive = FALSE)

  #handle truncation of P-value block
  if(length(fmt_p_block) > 11L)
    fmt_p_block = c(fmt_p_block[1:5], paste0("...<", length(fmt_p_block) - 10, " lines truncated>"), fmt_p_block[(length(fmt_p_block)-4):length(fmt_p_block)])

  #handle the pretty printing to screen
  cat("Call:\n  ", obj$simCall, "\n\n", sep="")
  cat("P-values:\n  ", paste(fmt_p_block, collapse = "\n  "), sep = "")

  #return P-values
  return(invisible())
}

#' Print method for \code{simpow} object.
#'
#' @param obj \code{simpow} object to print
#'
#' @return Invisibly returns P-values stored in \code{simpow} object.
#' @export
print.simpow = function(obj){
  pow = mean(obj$pVal < obj$alpha)
  cat("Call:\n  ", obj$simCall, "\n\n", sep="")
  cat("Power:\n ", pow)
  return(invisible(obj$pVal))
}
colinorourke/simpower documentation built on May 21, 2019, 1:42 a.m.