inst/doc/tidy_simulation.R

## ----setup, include=FALSE-----------------------------------------------------
library(knitr)
library(dplyr)
library(simglm)
knit_print.data.frame = function(x, ...) {
  res = paste(c('', '', kable(x, output = FALSE)), collapse = '\n')
  asis_output(res)
}

## ----simulate_fixed-----------------------------------------------------------
library(simglm)

set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + x1 + x2,
  fixed = list(x1 = list(var_type = 'continuous', 
                         mean = 180, sd = 30),
               x2 = list(var_type = 'continuous', 
                         mean = 40, sd = 5)),
  sample_size = 10
)

simulate_fixed(data = NULL, sim_arguments)

## ----simulate_fixed_real------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'continuous', mean = 40, sd = 5)),
  sample_size = 10
)

simulate_fixed(data = NULL, sim_arguments)

## ----simulate_fixed_ordinal---------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60)),
  sample_size = 10
)

simulate_fixed(data = NULL, sim_arguments)

## ----simulate_fixed_factor----------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  sample_size = 10
)

simulate_fixed(data = NULL, sim_arguments)

## ----simulate_error-----------------------------------------------------------
set.seed(321)

sim_arguments <- list(
  sample_size = 10
)

simulate_error(data = NULL, sim_arguments)

## ----simulate_error_verbose---------------------------------------------------
set.seed(321)

sim_arguments <- list(
  error = list(variance = 1),
  sample_size = 10
)

simulate_error(data = NULL, sim_arguments)

## ----simulate_error_var25-----------------------------------------------------
set.seed(321)

sim_arguments <- list(
  error = list(variance = 25),
  sample_size = 10
)

simulate_error(data = NULL, sim_arguments)

## ----generate_response--------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  error = list(variance = 25),
  sample_size = 10,
  reg_weights = c(2, 0.3, -0.1, 0.5)
)

simulate_fixed(data = NULL, sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments)

## ----generate_3_categories----------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex + grade,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', 
                          levels = c('male', 'female')),
               grade = list(var_type = 'factor', 
                            levels = c('freshman', 'sophomore',
                                       'junior', 'senior'))),
  error = list(variance = 25),
  sample_size = 100
)

simulate_fixed(data = NULL, sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  count(grade)

## ----generate_3_categories_prob-----------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex + grade,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', 
                          levels = c('male', 'female')),
               grade = list(var_type = 'factor', 
                            levels = c('freshman', 'sophomore',
                                       'junior', 'senior'),
                            prob = c(.05, .3, .45, .2))),
  error = list(variance = 25),
  sample_size = 100
)

simulate_fixed(data = NULL, sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  count(grade)

## ----indicator----------------------------------------------------------------
data.frame(
  grade = sample(c('freshman', 'sophomore', 'junior', 'senior'),
                 size = 10, replace = TRUE)
) %>% 
  mutate(sophomore = ifelse(grade == 'sophomore', 1, 0),
         junior = ifelse(grade == 'junior', 1, 0), 
         senior = ifelse(grade == 'senior', 1, 0))

## ----generate_3_categories_resp-----------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex + grade,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', 
                          levels = c('male', 'female')),
               grade = list(var_type = 'factor', 
                            levels = c('freshman', 'sophomore',
                                       'junior', 'senior'))),
  error = list(variance = 25),
  sample_size = 10000,
  reg_weights = c(2, .1, .55, 1.5, .75, 1.8, 2.5)
)

simulate_fixed(data = NULL, sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments) %>% 
  head()

## ----binary-------------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  error = list(variance = 25),
  sample_size = 10,
  reg_weights = c(2, 0.3, -0.1, 0.5),
  outcome_type = 'binary'
)

simulate_fixed(data = NULL, sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments)

## ----count--------------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + sex,
  fixed = list(weight = list(var_type = 'continuous', mean = 0, sd = 30),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  error = list(variance = 25),
  sample_size = 10,
  reg_weights = c(2, 0.01, 0.5),
  outcome_type = 'count'
)

simulate_fixed(data = NULL, sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments)

## ----model_extract_coefficients-----------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  error = list(variance = 25),
  sample_size = 10,
  reg_weights = c(2, 0.3, -0.1, 0.5)
)

simulate_fixed(data = NULL, sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments) %>% 
  model_fit(sim_arguments) %>%
  extract_coefficients()

## ----model_fit_manual---------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  error = list(variance = 25),
  sample_size = 10,
  reg_weights = c(2, 0.3, -0.1, 0.5),
  model_fit = list(formula = y ~ 1 + age + sex,
                   model_function = 'lm'),
  reg_weights_model = c(2, -0.1, 0.5)
)

simulate_fixed(data = NULL, sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments) %>% 
  model_fit(sim_arguments) %>%
  extract_coefficients()

## ----replicate_simulation-----------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  error = list(variance = 25),
  sample_size = 10,
  reg_weights = c(2, 0.3, -0.1, 0.5),
  model_fit = list(formula = y ~ 1 + age + sex,
                   model_function = 'lm'),
  reg_weights_model = c(2, -0.1, 0.5),
  replications = 10,
  extract_coefficients = TRUE
)

replicate_simulation(sim_arguments) %>%
  compute_statistics(sim_arguments)

## ----replicate_simulation_power_values----------------------------------------
set.seed(321) 

library(future)
plan(sequential)

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex,
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  error = list(variance = 25),
  sample_size = 50,
  reg_weights = c(2, 0.3, -0.1, 0.5),
  model_fit = list(formula = y ~ 1 + age + sex,
                   model_function = 'lm'),
  reg_weights_model = c(2, -0.1, 0.5),
  replications = 1000,
  power = list(
    dist = 'qt',
    alpha = .02,
    opts = list(df = 1)
  ),
  extract_coefficients = TRUE
)

replicate_simulation(sim_arguments) %>%
  compute_statistics(sim_arguments)

## ----nested-------------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex + (1 | neighborhood),
  reg_weights = c(4, -0.03, 0.2, 0.33),
  fixed = list(weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  randomeffect = list(int_neighborhood = list(variance = 8, var_level = 2)),
  sample_size = list(level1 = 10, level2 = 20)
)

nested_data <- sim_arguments %>%
  simulate_fixed(data = NULL, .) %>%
  simulate_randomeffect(sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments)

head(nested_data, n = 10)
nrow(nested_data)

Try the simglm package in your browser

Any scripts or data that you put into this service are public.

simglm documentation built on Feb. 7, 2022, 9:08 a.m.