inst/doc/simTool.R

## ---- echo = FALSE, message = FALSE---------------------------------------------------------------
knitr::opts_chunk$set(comment = "")
options(width = 100, max.print = 100)
set.seed(123456)
library(simTool)
library(dplyr)
library(tidyr)
library(tibble)
library(ggplot2)
library(broom)
library(boot)

## -------------------------------------------------------------------------------------------------
regData <- function(n, SD) {
  x <- seq(0, 1, length = n)
  y <- 10 + 2 * x + rnorm(n, sd = SD)
  tibble(x = x, y = y)
}

eval_tibbles(
  expand_tibble(fun = "regData", n = 5L, SD = 1:2),
  expand_tibble(proc = "lm", formula = c("y~x", "y~I(x^2)")),
  post_analyze = broom::tidy,
  summary_fun = list(mean = mean, sd = sd),
  group_for_summary = "term",
  replications = 3
)

## -------------------------------------------------------------------------------------------------
print(dg <- dplyr::bind_rows(
  expand_tibble(fun = "rexp", n = c(10L, 20L), rate = 1:2),
  expand_tibble(fun = "rnorm", n = c(10L, 20L), mean = 1:2)
))

## -------------------------------------------------------------------------------------------------
print(pg <- dplyr::bind_rows(
  expand_tibble(proc = "min"),
  expand_tibble(proc = "mean", trim = c(0.1, 0.2))
))

## ---- eval=FALSE----------------------------------------------------------------------------------
#  1.  convert dg to R-functions  {g_1, ..., g_k}
#  2.  convert pg to R-functions  {f_1, ..., f_L}
#  3.  initialize result object
#  4.  append dg and pg to the result object
#  5.  t1 = current.time()
#  6.  for g in  {g_1, ..., g_k}
#  7.      for r in 1:replications (optionally in a parallel manner)
#  8.          data = g()
#  9.          for f in  {f_1, \ldots, f_L}
#  10.             append f(data) to the result object (optionally apply a post-analyze-function)
#  11.         optionally append data to the result object
#  12.      optionally summarize the result object over all
#           replications but separately for f_1, ..., f_L (and optional group variables)
#  13. t2 = current.time()
#  14. Estimate the number of replications per hour from t1 and t2

## -------------------------------------------------------------------------------------------------
dg <- expand_tibble(fun = "rnorm", n = 10, mean = 1:2)
pg <- expand_tibble(proc = "min")
eg <- eval_tibbles(data_grid = dg, proc_grid = pg, replications = 2)
eg

## -------------------------------------------------------------------------------------------------
eg <- eval_tibbles(data_grid = dg, proc_grid = pg, replications = 3)
eg

## -------------------------------------------------------------------------------------------------
eg <- eval_tibbles(data_grid = dg, proc_grid = pg, replications = 1)
eg$simulation
eg$generated_data

## -------------------------------------------------------------------------------------------------
dg <- expand_tibble(fun = "runif", n = c(10, 20, 30))
pg <- expand_tibble(proc = c("min", "max"))
eval_tibbles(
  data_grid = dg, proc_grid = pg, replications = 1000,
  summary_fun = list(mean = mean)
)
eval_tibbles(
  data_grid = dg, proc_grid = pg, replications = 1000,
  summary_fun = list(mean = mean, sd = sd)
)

## -------------------------------------------------------------------------------------------------
eval_tibbles(
  expand_tibble(fun = "regData", n = 5L, SD = 1:2),
  expand_tibble(proc = "lm", formula = c("y~x", "y~I(x^2)")),
  replications = 2
)

## -------------------------------------------------------------------------------------------------
eval_tibbles(
  expand_tibble(fun = "regData", n = 5L, SD = 1:2),
  expand_tibble(proc = "lm", formula = c("y~x", "y~I(x^2)")),
  post_analyze = purrr::compose(function(mat) mat["(Intercept)", "Estimate"], coef, summary.lm),
  replications = 2
)

## -------------------------------------------------------------------------------------------------
presever_rownames <- function(mat) {
  rn <- rownames(mat)
  ret <- tibble::as_tibble(mat)
  ret$term <- rn
  ret
}

eval_tibbles(
  expand_tibble(fun = "regData", n = 5L, SD = 1:2),
  expand_tibble(proc = "lm", formula = c("y~x", "y~I(x^2)")),
  post_analyze = purrr::compose(presever_rownames, coef, summary),
  replications = 3
)

## -------------------------------------------------------------------------------------------------
eval_tibbles(
  expand_tibble(fun = "regData", n = 5L, SD = 1:2),
  expand_tibble(proc = "lm", formula = c("y~x", "y~I(x^2)")),
  post_analyze = purrr::compose(presever_rownames, coef, summary),
  summary_fun = list(mean = mean, sd = sd),
  group_for_summary = "term",
  replications = 3
)

## -------------------------------------------------------------------------------------------------
eval_tibbles(
  data_grid = dg, proc_grid = pg, replications = 10,
  ncpus = 2, summary_fun = list(mean = mean)
)

## -------------------------------------------------------------------------------------------------
library(parallel)
cl <- makeCluster(rep("localhost", 2), type = "PSOCK")
eval_tibbles(
  data_grid = dg, proc_grid = pg, replications = 10,
  cluster = cl, summary_fun = list(mean = mean)
)
stopCluster(cl)

## -------------------------------------------------------------------------------------------------
library(boot)
ratio <- function(d, w) sum(d$x * w) / sum(d$u * w)
city.boot <- boot(city, ratio,
  R = 999, stype = "w",
  sim = "ordinary"
)
boot.ci(city.boot,
  conf = c(0.90, 0.95),
  type = c("norm", "basic", "perc", "bca")
)

## -------------------------------------------------------------------------------------------------
returnCity <- function() {
  city
}
bootConfInt <- function(data) {
  city.boot <- boot(data, ratio,
    R = 999, stype = "w",
    sim = "ordinary"
  )
  boot.ci(city.boot,
    conf = c(0.90, 0.95),
    type = c("norm", "basic", "perc", "bca")
  )
}

## -------------------------------------------------------------------------------------------------
dg <- expand_tibble(fun = "returnCity")
pg <- expand_tibble(proc = "bootConfInt")
eval_tibbles(dg, pg,
  replications = 10, ncpus = 2,
  cluster_libraries = c("boot"),
  cluster_global_objects = c("ratio")
)

## -------------------------------------------------------------------------------------------------
# masking summary from the base package
summary <- function(x) tibble(sd = sd(x))
g <- function(x) tibble(q0.1 = quantile(x, 0.1))
someFunc <- function() {
  summary <- function(x) tibble(sd = sd(x), mean = mean(x))

  dg <- expand_tibble(fun = "runif", n = 100)
  pg <- expand_tibble(proc = c("summary", "g"))

  # the standard is to use the global
  # environment, hence summary defined outside
  # of someFunc() will be used
  print(eval_tibbles(dg, pg))
  cat("--------------------------------------------------\n")
  # will use the local defined summary, but g
  # from the global environment, because
  # g is not locally defined.
  print(eval_tibbles(dg, pg, envir = environment()))
}
someFunc()

## -------------------------------------------------------------------------------------------------
dg <- expand_tibble(fun = c("rnorm"), mean = c(1,1000), sd = c(1,10), n = c(10L, 100L))
pg <- expand_tibble(proc = "quantile", probs = 0.975)
post_ana <- function(q_est, .truth){
  tibble::tibble(bias = q_est - stats::qnorm(0.975, mean = .truth$mean, sd = .truth$sd))
}
eval_tibbles(dg, pg, replications = 10^3, discard_generated_data = TRUE,
                   ncpus = 2,
                   post_analyze = post_ana,
                   summary_fun = list(mean = mean))

## -------------------------------------------------------------------------------------------------
dg <- dplyr::bind_rows(
  expand_tibble(fun = c("rnorm"), mean = 0, n = c(10L, 100L), .truth = qnorm(0.975)),
  expand_tibble(fun = c("rexp"), rate = 1, n = c(10L, 100L), .truth = qexp(0.975, rate = 1)),
  expand_tibble(fun = c("runif"), max = 2, n = c(10L, 100L), .truth = qunif(0.975, max = 2))
)
pg <- expand_tibble(proc = "quantile", probs = 0.975)
post_ana <- function(q_est, .truth){
  ret <- q_est - .truth
  names(ret) <- "bias"
  ret
}
eval_tibbles(dg, pg, replications = 10^3, discard_generated_data = TRUE,
                   ncpus = 2,
                   post_analyze = post_ana,
                   summary_fun = list(mean = mean))

## -------------------------------------------------------------------------------------------------
dg <- dplyr::bind_rows(
  expand_tibble(fun = c("rnorm"), mean = 0, n = c(10L, 1000L), 
                .truth = list(function(prob) qnorm(prob, mean = 0))),
  expand_tibble(fun = c("rexp"), rate = 1, n = c(10L, 1000L),
                .truth = list(function(prob) qexp(prob, rate = 1))),
  expand_tibble(fun = c("runif"), max = 2, n = c(10L, 1000L),
                .truth = list(function(prob) qunif(prob, max = 2)))
)
bias_quantile <- function(x, prob, .truth) {
  est <- quantile(x, probs = prob)
  ret <- est - .truth[[1]](prob)
  names(ret) <- "bias"
  ret
}
pg <- expand_tibble(proc = "bias_quantile", prob = c(0.9, 0.975))
eval_tibbles(dg, pg, replications = 10^3, discard_generated_data = TRUE,
                   ncpus = 1,
                   summary_fun = list(mean = mean))

## ---- eval = FALSE--------------------------------------------------------------------------------
#  6.  for g in  {g_1, ..., g_k}
#  7.      for r in 1:replications (optionally in a parallel manner)
#  8.          data = g()
#  9.          for f in  {f_1, \ldots, f_L}
#  10.             append f(data) to the result object (optionally apply a post-analyze-function)

## -------------------------------------------------------------------------------------------------
EVAL <- FALSE
if (Sys.getenv("NOT_CRAN") == "true") {
  EVAL <- TRUE
}

## ---- eval = EVAL---------------------------------------------------------------------------------
#  dg <- dplyr::bind_rows(
#    expand_tibble(fun = c("rnorm"), mean = 1, n = c(10L, 100L)),
#    expand_tibble(fun = c("rexp"), rate = 1, n = c(10L, 100L))
#  )
#  dg

## ---- eval = EVAL---------------------------------------------------------------------------------
#  pg <- expand_tibble(proc = c("mean", "median"))
#  pg

## ---- eval = EVAL---------------------------------------------------------------------------------
#  et <- eval_tibbles(dg, pg, replications = 10^4, ncpus = 2)
#  
#  et$simulation %>%
#    ggplot(aes(x = results, color = interaction(fun, n), fill = interaction(fun, n))) +
#    geom_density(alpha = 0.3) +
#    facet_wrap(~ proc)

## ---- eval = EVAL---------------------------------------------------------------------------------
#  bootstrap_ci <- function(x, conf.level, R = 999) {
#    b <- boot::boot(x, function(d, i) {
#      n <- length(i)
#      c(
#        mean = mean(d[i]),
#        variance = (n - 1) * var(d[i]) / n^2
#      )
#    }, R = R)
#    boot::boot.ci(b, conf = conf.level, type = "all")
#  }

## ---- eval = EVAL---------------------------------------------------------------------------------
#  post_analyze <- function(o, .truth) {
#    if (class(o) == "htest") {
#    #post-process the object returned by t.test
#      ci <- o$conf.int
#      return(tibble::tibble(
#        type = "t.test",
#        aspect = c("covered", "ci_length"),
#        value = c(ci[1] <= .truth && .truth <= ci[2], ci[2] - ci[1])
#      ))
#    } else if (class(o) == "bootci") {
#    #post-process the object returned by boot.ci
#      method = c("normal", "basic", "student", "percent", "bca")
#      ret = o[method]
#      lower = unlist(purrr::map(ret, ~dplyr::nth(.x, -2)))
#      upper = sapply(ret, dplyr::last)
#      type = paste("boot", method, sep = "_")
#  
#      return(
#        dplyr::bind_rows(
#        tibble::tibble(
#          type = type,
#          aspect = "covered",
#          value = as.integer(lower <= .truth & .truth <= upper)),
#        tibble::tibble(
#          type = type,
#          aspect = "ci_length",
#          value = upper - lower))
#      )
#    }
#  }

## ---- eval = EVAL---------------------------------------------------------------------------------
#  dg <- dplyr::bind_rows(
#    simTool::expand_tibble(fun = "rnorm", n = 10L, mean = 0, sd = sqrt(3), .truth = 0),
#    simTool::expand_tibble(fun = "runif", n = 10L, max = 6, .truth = 3),
#    simTool::expand_tibble(fun = "rexp", n = 10L, rate = 1 / sqrt(3), .truth = sqrt(3))
#  )
#  dg

## ---- eval = EVAL---------------------------------------------------------------------------------
#  pg <- simTool::expand_tibble(
#    proc = c("t.test","bootstrap_ci"),
#    conf.level = c(0.9, 0.95)
#  )
#  pg

## ---- eval = EVAL---------------------------------------------------------------------------------
#  et <- eval_tibbles(dg, pg,
#    replications = 10^3, ncpus = 2,
#    cluster_global_objects = "post_analyze",
#    post_analyze = post_analyze,
#    summary_fun = list(mean = mean),
#    group_for_summary = c("aspect", "type")
#  )
#  et

## ---- eval = EVAL, fig.width=13-------------------------------------------------------------------
#  et$simulation %>%
#    ggplot(aes(x = fun, y = value, group = type, fill = type, label = round(value, 2))) +
#    geom_col(position = "dodge") +
#    geom_label(position = position_dodge(0.9), size = 3) +
#    theme(legend.position = "bottom") +
#    facet_grid(aspect ~ conf.level, scales = "free")

## ---- eval = EVAL---------------------------------------------------------------------------------
#  t_test = function(x, conf.level){
#    tt <- t.test(x, conf.level = conf.level)
#  
#    # unify return
#    tibble::tibble(type = "t.test", lower = tt$conf.int[1], upper = tt$conf.int[2])
#  }
#  
#  bootstrap_ci <- function(x, conf.level, R = 999) {
#    b <- boot::boot(x, function(d, i) {
#      n <- length(i)
#      c(
#        mean = mean(d[i]),
#        variance = (n - 1) * var(d[i]) / n^2
#      )
#    }, R = R)
#    ci <- boot::boot.ci(b, conf = conf.level, type = "all")
#    method = c("normal", "basic", "student", "percent", "bca")
#    ret = ci[method]
#    lower = unlist(purrr::map(ret, ~dplyr::nth(.x, -2)))
#    upper = sapply(ret, dplyr::last)
#    type = paste("boot", method, sep = "_")
#  
#    # unify return
#    tibble::tibble(type = type, lower = lower, upper = upper)
#  }
#  
#  dg <- dplyr::bind_rows(
#    simTool::expand_tibble(fun = "rnorm", n = 10L, mean = 0, sd = sqrt(3), .truth = 0),
#    simTool::expand_tibble(fun = "runif", n = 10L, max = 6, .truth = 3),
#    simTool::expand_tibble(fun = "rexp", n = 10L, rate = 1 / sqrt(3), .truth = sqrt(3))
#  )
#  
#  pg <- simTool::expand_tibble(
#    proc = c("t_test","bootstrap_ci"),
#    conf.level = c(0.9, 0.95)
#  ) %>%
#    mutate(R = ifelse(proc == "bootstrap_ci", 100, NA))
#  
#  et <- eval_tibbles(dg, pg,
#    replications = 10^2, ncpus = 2
#  )
#  et

## ---- eval = EVAL---------------------------------------------------------------------------------
#  grps <- et$simulation %>%
#    select(-replications) %>%
#    select(fun:type) %>%
#    names
#  
#  et$simulation %>%
#    mutate(covered = lower <= .truth & .truth <= upper,
#           ci_length = upper - lower) %>%
#    group_by(.dots = grps) %>%
#    summarise(coverage = mean(covered),
#              ci_length = mean(ci_length))

Try the simTool package in your browser

Any scripts or data that you put into this service are public.

simTool documentation built on Jan. 8, 2021, 2:25 a.m.