inst/doc/simulation_arguments.R

## ----setup, include=FALSE-----------------------------------------------------
library(knitr)
library(dplyr)
library(simglm)
knit_print.data.frame = function(x, ...) {
  res = paste(c('', '', kable(x, output = FALSE)), collapse = '\n')
  asis_output(res)
}

## ----fixed_sim----------------------------------------------------------------
library(simglm)

set.seed(321) 

# To-DO: Add knot variable and debug

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  sample_size = list(level1 = 10, level2 = 20)
)

fixed_data <- simulate_fixed(data = NULL, sim_arguments)
head(fixed_data, n = 20)

## ----time_var-----------------------------------------------------------------
set.seed(321) 

# To-DO: Add knot variable and debug

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  fixed = list(time = list(var_type = 'time',
                           time_levels = c(0, 0.5, 1, 1.5, 2, 2.5, 3, 4, 5, 6)),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  sample_size = list(level1 = 10, level2 = 20)
)

fixed_data <- simulate_fixed(data = NULL, sim_arguments)
head(fixed_data, n = 20)

## ----rbeta--------------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  fixed = list(time = list(var_type = 'time',
                           time_levels = c(0, 0.5, 1, 1.5, 2, 2.5, 3, 4, 5, 6)),
               weight = list(var_type = 'continuous', dist = 'rgamma', 
                             shape = 3),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  sample_size = list(level1 = 10, level2 = 20)
)

fixed_data <- simulate_fixed(data = NULL, sim_arguments)
head(fixed_data, n = 20)

## ----dist_beta----------------------------------------------------------------
library(ggplot2)

ggplot(fixed_data, aes(x = weight)) + 
  geom_density() + 
  theme_bw()

## ----knot-args----------------------------------------------------------------
sim_args <- list(
    formula = y ~ 1  + age + age_knot,
    fixed = list(age = list(var_type = 'ordinal', levels = 30:60)),
    knot = list(age_knot = list(variable = 'age', 
                                knot_locations = 50)),
    sample_size = 500,
    error = list(variance = 10),
    reg_weights = c(2, .5, 1.5)
  )

## ----knot-sim-----------------------------------------------------------------
simulate_fixed(data = NULL, sim_args = sim_args) %>%
  head()

## ----knot-args-int------------------------------------------------------------
sim_args <- list(
    formula = y ~ 1  + age + age_knot + age:age_knot,
    fixed = list(age = list(var_type = 'ordinal', levels = 30:60)),
    knot = list(age_knot = list(variable = 'age', 
                                knot_locations = 50)),
    sample_size = 500,
    error = list(variance = 1000),
    reg_weights = c(2, .5, 1.5, 10)
  )

simulate_fixed(data = NULL, sim_args = sim_args) %>% 
  head()

## ----random_error-------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  sample_size = list(level1 = 10, level2 = 20)
)

error_data <- simulate_error(data = NULL, sim_arguments)
head(error_data, n = 20)

## ----random_error_t-----------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  error = list(dist = 'rt', df = 4),
  sample_size = list(level1 = 10, level2 = 20)
)

error_data <- simulate_error(data = NULL, sim_arguments)
head(error_data, n = 20)

## ----re_10--------------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  error = list(dist = 'rt', df = 4, variance = 10),
  sample_size = list(level1 = 10, level2 = 20)
)

error_data <- simulate_error(data = NULL, sim_arguments)
var(error_data$error)

## ----re_10_ts-----------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  error = list(dist = 'rt', df = 4, variance = 10, ther_sim = TRUE),
  sample_size = list(level1 = 10, level2 = 20)
)

error_data <- simulate_error(data = NULL, sim_arguments)
var(error_data$error)

## ----re_10_specify------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  error = list(dist = 'rt', df = 4, variance = 10, ther_val = c(0, sqrt(2))),
  sample_size = list(level1 = 10, level2 = 20)
)

error_data <- simulate_error(data = NULL, sim_arguments)
var(error_data$error)

## ----heterogeneity------------------------------------------------------------
simulation_arguments <- list(
  formula = y ~ 1 + group,
  fixed = list(group = list(var_type = 'factor', 
                            levels = c('male', 'female'))),
  sample_size = 500,
  error = list(variance = 1),
  heterogeneity = list(variable = 'group',
                       variance = c(1, 8)),
  reg_weights = c(0, .15)
)

hetero_data <- simulate_fixed(data = NULL, simulation_arguments) %>%
  simulate_error(simulation_arguments) %>%
  simulate_heterogeneity(simulation_arguments)

## ----heterogeneity-var--------------------------------------------------------
hetero_data %>% 
  group_by(group) %>% 
  summarise(var_error = var(error), 
            var_o_error = var(orig_error))

## ----random_args--------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  randomeffect = list(int_id = list(variance = 8, var_level = 2),
                time_id = list(variance = 3, var_level = 2)),
  sample_size = list(level1 = 10, level2 = 20)
)

random_data <- simulate_randomeffect(data = NULL, sim_arguments)
head(random_data, n = 20)

## ----cross-class--------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id) +
    (1 | neighborhood_id),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  randomeffect = list(int_id = list(variance = 8, var_level = 2),
                time_id = list(variance = 3, var_level = 2),
                int_nid = list(variance = 5, var_level = 2,
                               cross_class = TRUE,
                               num_ids = 12)),
  sample_size = list(level1 = 10, level2 = 20)
)

random_data <- simulate_randomeffect(data = NULL, sim_arguments)
head(random_data, n = 20)

## ----correlate-fixed----------------------------------------------------------
sim_args <- list(formula = y ~ 1 + act + gpa + commute_time, 
                 fixed = list(act = list(var_type = 'ordinal',
                                         levels = 15:36),
                              gpa = list(var_type = 'continuous',
                                         mean = 2, 
                                         sd = .5),
                              commute_time = list(var_type = 'continuous',
                                         mean = 15, 
                                         sd = 6)),
                 correlate = list(fixed = data.frame(x = c('act', 'act', 'gpa'), 
                                                     y = c('gpa', 'commute_time', 'commute_time'), 
                                                     corr = c(0.5, .6, .2))),
                 sample_size = 1000)

correlate_attribute <- simulate_fixed(data = NULL, sim_args) %>%
  correlate_variables(sim_args)

head(correlate_attribute)

## ----correlate-fixed-values---------------------------------------------------
select(correlate_attribute, -X.Intercept., -level1_id) %>%
  cor(.)

## ----random-correlate---------------------------------------------------------
sim_args <- list(formula = y ~ 1 + act + gpa + sat + (1 + act | id), 
                 fixed = list(act = list(var_type = 'continuous',
                                         mean = 20, 
                                         sd = 4),
                              gpa = list(var_type = 'continuous',
                                         mean = 2, 
                                         sd = .5),
                              sat = list(var_type = 'continuous',
                                         mean = 500, 
                                         sd = 100)),
                 
                 randomeffect = list(int_id = list(variance = 8, var_level = 2),
                                     act_id = list(variance = 3, var_level = 2)),
                 sample_size = list(level1 = 100, level2 = 5000),
                 correlate = list(random = data.frame(x = 'int_id', y = 'act_id',
                                                      corr = .3))
                 )

random_correlate <- simulate_randomeffect(data = NULL, sim_args) %>%
  correlate_variables(sim_args)

head(random_correlate)

## ----random-correlate-values--------------------------------------------------
select(random_correlate, -level1_id, -id) %>%
  cor(.)

## ----missing_random-----------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  reg_weights = c(4, 0.5, 0.75, 0, 0.33),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  randomeffect = list(int_id = list(variance = 8, var_level = 2),
                      time_id = list(variance = 3, var_level = 2)),
  missing_data = list(miss_prop = .25, new_outcome = 'y_missing',
                      type = 'random'),
  sample_size = list(level1 = 10, level2 = 20)
)

data_w_missing <- sim_arguments %>%
  simulate_fixed(data = NULL, .) %>%
  simulate_randomeffect(sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments) %>%
  generate_missing(sim_arguments)

head(data_w_missing, n = 10)

## ----amount_missing-----------------------------------------------------------
prop.table(table(is.na(data_w_missing$y_missing)))

## ----missing_mar--------------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  reg_weights = c(4, 0.5, 0.75, 0, 0.33),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30,
                             var_level = 1),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  randomeffect = list(int_id = list(variance = 8, var_level = 2),
                      time_id = list(variance = 3, var_level = 2)),
  missing_data = list(new_outcome = 'y_missing', miss_cov = 'weight', 
                      mar_prop = seq(from = 0, to = .9, length.out = 200),
                      type = 'mar'),
  sample_size = list(level1 = 10, level2 = 20)
)

data_w_missing <- sim_arguments %>%
  simulate_fixed(data = NULL, .) %>%
  simulate_randomeffect(sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments) %>%
  generate_missing(sim_arguments)

head(data_w_missing, n = 10)

## ----amount_missing_mar-------------------------------------------------------
prop.table(table(is.na(data_w_missing$y_missing)))

## ----missing_dropout----------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  reg_weights = c(4, 0.5, 0.75, 0, 0.33),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  randomeffect = list(int_id = list(variance = 8, var_level = 2),
                      time_id = list(variance = 3, var_level = 2)),
  missing_data = list(miss_prop = .25, new_outcome = 'y_missing',
                      clust_var = 'id', type = 'dropout'),
  sample_size = list(level1 = 10, level2 = 20)
)

data_w_missing <- sim_arguments %>%
  simulate_fixed(data = NULL, .) %>%
  simulate_randomeffect(sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments) %>%
  generate_missing(sim_arguments)

head(data_w_missing, n = 10)

## ----amount_missing_dropout---------------------------------------------------
prop.table(table(is.na(data_w_missing$y_missing)))

## ----missing_by_time----------------------------------------------------------
prop.table(table(is.na(data_w_missing$y_missing), data_w_missing$time))

## ----missing_dropout_spec-----------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
  reg_weights = c(4, 0.5, 0.75, 0, 0.33),
  fixed = list(time = list(var_type = 'time'),
               weight = list(var_type = 'continuous', mean = 180, sd = 30),
               age = list(var_type = 'ordinal', levels = 30:60, var_level = 2),
               treat = list(var_type = 'factor', 
                            levels = c('Treatment', 'Control'),
                            var_level = 2)),
  randomeffect = list(int_id = list(variance = 8, var_level = 2),
                      time_id = list(variance = 3, var_level = 2)),
  missing_data = list(new_outcome = 'y_missing',
      dropout_location = c(3, 9, 1, 6, 7, 8, 6, 9, 2, 4, 6, 5, 8, 9, 4, 5, 
                           6, 7, 2, 9),
                      clust_var = 'id', type = 'dropout'),
  sample_size = list(level1 = 10, level2 = 20)
)

data_w_missing <- sim_arguments %>%
  simulate_fixed(data = NULL, .) %>%
  simulate_randomeffect(sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments) %>%
  generate_missing(sim_arguments)

head(data_w_missing, n = 10)

## ----amount_missing_dropout_spec----------------------------------------------
prop.table(table(is.na(data_w_missing$y_missing)))

## ----missing_by_time_dropout_spec---------------------------------------------
prop.table(table(is.na(data_w_missing$y_missing), data_w_missing$time))

## ----binomial_logit-----------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex,
  fixed = list(weight = list(var_type = 'continuous', mean = 0, sd = 30),
               age = list(var_type = 'ordinal', levels = 0:30),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  error = list(variance = 25),
  sample_size = 10,
  reg_weights = c(2, 0.3, -0.1, 0.5),
  outcome_type = 'binary',
  model_fit = list(
    model_function = 'glm',
    family = binomial
  )
)

simulate_fixed(data = NULL, sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments) %>%
  model_fit(sim_arguments) %>% .$family

## ----binomial_probit----------------------------------------------------------
set.seed(321) 

sim_arguments <- list(
  formula = y ~ 1 + weight + age + sex,
  fixed = list(weight = list(var_type = 'continuous', mean = 0, sd = 30),
               age = list(var_type = 'ordinal', levels = 0:30),
               sex = list(var_type = 'factor', levels = c('male', 'female'))),
  error = list(variance = 25),
  sample_size = 10,
  reg_weights = c(2, 0.3, -0.1, 0.5),
  outcome_type = 'binary',
  model_fit = list(
    model_function = 'glm',
    family = binomial(link = 'probit')
  )
)

simulate_fixed(data = NULL, sim_arguments) %>%
  simulate_error(sim_arguments) %>%
  generate_response(sim_arguments) %>%
  model_fit(sim_arguments) %>% .$family

## ----gee, eval = FALSE--------------------------------------------------------
#  set.seed(321)
#  
#  # To-DO: Add knot variable and debug
#  
#  sim_arguments <- list(
#    formula = y ~ 1 + time + weight + age + treat + (1 + time| id),
#    fixed = list(time = list(var_type = 'time',
#                             time_levels = c(0, 0.5, 1, 1.5, 2, 2.5, 3, 4, 5, 6)),
#                 weight = list(var_type = 'continuous', mean = 0, sd = 30),
#                 age = list(var_type = 'ordinal', levels = 0:30, var_level = 2),
#                 treat = list(var_type = 'factor',
#                              levels = c('Treatment', 'Control'),
#                              var_level = 2)),
#    reg_weights = c(0.4, 0.2, -0.5, 1, -0.6),
#    randomeffect = list(int_id = list(variance = 8, var_level = 2),
#                  time_id = list(variance = 3, var_level = 2)),
#    error = list(variance = 5),
#    outcome_type = 'binary',
#    sample_size = list(level1 = 10, level2 = 20),
#    model_fit = list(
#      model_function = geepack::geeglm,
#      formula = y ~ 1 + time + weight + age + treat,
#      id = 'level1_id',
#      family = binomial,
#      corstr = 'ar1'
#    )
#  )
#  
#  simulate_fixed(data = NULL, sim_arguments) %>%
#    simulate_error(sim_arguments) %>%
#    simulate_randomeffect(sim_arguments) %>%
#    generate_response(sim_arguments) %>%
#    model_fit(sim_arguments) %>%
#    extract_coefficients()

## ----vary_simulation----------------------------------------------------------
library(future)
plan(sequential)

sim_arguments <- list(
  formula =  resp_var ~ 1 + time + factor(trt) + time:factor(trt) + 
    (1 + time | individual),
  reg_weights = c(4, 0.5, 0.75, 0),
  fixed = list(time = list(var_type = 'time'),
               trt = list(var_type = 'factor', levels = c('Drug', 'Placebo'), 
                          var_level = 2)
               ),
  randomeffect = list(int_clust = list(variance = .2, var_level = 2),
                      time_clust = list(variance = .3, var_level = 2)),
  replications = 3,
  error = list(variance = 1),
  vary_arguments = list(
    sample_size = list(list(level1 = 5, level2 = 50),
                       list(level1 = 5, level2 = 60))
  )
)

replicate_simulation(sim_arguments)

Try the simglm package in your browser

Any scripts or data that you put into this service are public.

simglm documentation built on Feb. 7, 2022, 9:08 a.m.