This code was used by the articles presented in bayestestR to simulate statistical models.
library(tidyverse) library(logspline) library(bayestestR) library(parameters) library(see) library(rstanarm)
generate_data <- function(true_effect = 0, outcome_type = "linear", sample_size = 50, error = 0) { # Generate data data <- data.frame(x = sort(rnorm(sample_size, 0, 1))) if (outcome_type == "linear") { if (true_effect == 1) { data$y <- data$x } else { data$y <- parameters::standardize(cos(data$x * 5)) } } else { if (true_effect == 1) { data$y <- ifelse(data$x > 0, 1, 0) } else { data$y <- ifelse(cos(data$x * 5) > 0, 1, 0) } } # Add error data$x <- parameters::standardize(data$x + rnorm(sample_size, 0, error)) return(data) }
compute_models <- function(data, outcome_type = "linear", chains = 4, iter = 2000, warmup = 1 / 2) { if (outcome_type == "linear") { model_frequentist <- lm(y ~ x, data = data) model_bayesian <- stan_glm(y ~ x, data = data, prior = normal(0, 1), chains = chains, iter = iter, warmup = round(warmup * iter), refresh = 0) } else { model_frequentist <- glm(y ~ x, data = data, family = "binomial") model_bayesian <- stan_glm(y ~ x, data = data, family = "binomial", prior = normal(0, 1), chains = chains, iter = iter, warmup = round(warmup * iter), refresh = 0) } return( list( frequentist = model_frequentist, bayesian = model_bayesian ) ) }
compute_indices <- function(models) { params <- parameters::model_parameters(models$frequentist)[2, ] %>% dplyr::select(Coefficient, SE, CI_low_95_freq = CI_low, CI_high_95_freq = CI_high, p_value = p) posterior <- as.data.frame(models$bayesian)$x algorithm <- insight::find_algorithm(models$bayesian) params$chains <- algorithm$chains params$iterations <- algorithm$iterations params$warmup <- algorithm$warmup / params$iterations params$samples <- nrow(posterior) prior <- bayestestR::describe_prior(models$bayesian)[2, ] params$Prior_Distribution <- prior$Prior_Distribution params$Prior_Location <- prior$Prior_Location params$Prior_Scale <- prior$Prior_Scale params$Median <- median(posterior) params$Mean <- mean(posterior) map <- map_estimate(posterior) params$MAP <- map params$MAP_density <- attributes(map)$MAP_density params$p_direction <- p_direction(posterior, method = "direct") params$p_direction_AUC <- p_direction(posterior, method = "logspline") params$p_MAP <- p_map(posterior) range <- rope_range(models$bayesian) params$ROPE_range <- range[[2]] params$ROPE_89 <- rope(posterior, range = range, ci = 0.89)$ROPE_Percentage params$ROPE_90 <- rope(posterior, range = range, ci = 0.90)$ROPE_Percentage params$ROPE_95 <- rope(posterior, range = range, ci = 0.95)$ROPE_Percentage params$ROPE_full <- rope(posterior, range = range, ci = 1)$ROPE_Percentage params$p_ROPE <- tryCatch( { p_rope(posterior, range = range) }, error = function(error_condition) { NA } ) # Bayesfactor params$BF <- as.numeric(bayesfactor_parameters(posterior, prior = distribution_normal(length(posterior), mean = params$Prior_Location, sd = params$Prior_Scale ), null = 0, verbose = FALSE )) params$BF_log <- log(params$BF) # CIs ci_95 <- hdi(posterior, ci = 0.95) params$CI_low_95_hdi <- ci_95$CI_low params$CI_high_95_hdi <- ci_95$CI_high ci_90 <- hdi(posterior, ci = 0.90) params$CI_low_90_hdi <- ci_90$CI_low params$CI_high_90_hdi <- ci_90$CI_high ci_89 <- hdi(posterior, ci = 0.89) params$CI_low_89_hdi <- ci_89$CI_low params$CI_high_89_hdi <- ci_89$CI_high ci_95 <- ci(posterior, ci = 0.95) params$CI_low_95_quantile <- ci_95$CI_low params$CI_high_95_quantile <- ci_95$CI_high ci_90 <- ci(posterior, ci = 0.90) params$CI_low_90_quantile <- ci_90$CI_low params$CI_high_90_quantile <- ci_90$CI_high ci_89 <- ci(posterior, ci = 0.89) params$CI_low_89_quantile <- ci_89$CI_low params$CI_high_89_quantile <- ci_89$CI_high params }
generate_and_process <- function(true_effect = 0, outcome_type = "linear", sample_size = 50, error = 0, chains = 4, iter = 2000, warmup = 1 / 2, modulate_priors = FALSE, priors_precision = 1) { data <- generate_data(true_effect = true_effect, outcome_type = outcome_type, sample_size = sample_size, error = error) models <- compute_models(data, outcome_type = outcome_type, chains = chains, iter = iter, warmup = warmup) params <- compute_indices(models) params$Prior <- "Uninformative" if (modulate_priors) { # Congruent models$bayesian <- update(models$bayesian, prior = normal( location = params$Prior_Scale, scale = params$Prior_Scale * priors_precision, autoscale = FALSE ) ) params_congruent <- compute_indices(models) params_congruent$Prior <- "Congruent" # Incongruent models$bayesian <- update(models$bayesian, prior = normal( location = -1 * params$Prior_Scale, scale = params$Prior_Scale * priors_precision, autoscale = FALSE ) ) params_incongruent <- compute_indices(models) params_incongruent$Prior <- "Incongruent" params <- rbind(params, params_congruent, params_incongruent) } rownames(params) <- NULL params }
The simulation aimed at modulating the following characteristics:
We generated a dataset for each combination of these characteristics, resulting in a total of 2 * 2 * 9 * 1000 = 36000
Bayesian and frequentist models.
# options(mc.cores = parallel::detectCores()-1) options(mc.cores = 1) set.seed(333) # Parameters ------------------------------------------------------------------- outcome_types <- c("binary", "linear") effects <- c(0, 1) sample_sizes <- seq(20, 100, by = 10) errors <- seq(0.666, 6.66, length.out = 2) # Run -------------------------------------------------------------------------- # Initialize data all_data <- data.frame() n <- 1 # Start loop for (outcome_type in outcome_types) { print(outcome_type) for (true_effect in effects) { for (error in errors) { for (sample_size in sample_sizes) { for (iteration in 1:1) { cat(".") fail <- TRUE while (fail == TRUE) { tryCatch( { params <- generate_and_process(true_effect = true_effect, outcome_type = outcome_type, sample_size = sample_size, error = error) fail <- FALSE }, error = function(e) { }, warning = function(w) { cat("*") } ) } # Save data params <- mutate(params, true_effect = true_effect, sample_size = sample_size, outcome_type = outcome_type, error = error, iteration = iteration, n = n ) all_data <- rbind(all_data, params) n <- n + 1 } } } # write.csv(all_data, "bayesSim_study1.csv", row.names = FALSE) } }
# all_data <- read.csv("https://raw.github.com/easystats/circus/main/data/bayesSim_study1.csv")
effects <- c(0, 1) sample_sizes <- c(240) outcome_types <- c("binary", "linear") errors <- round(seq(0.666, 6.66, length.out = 3), 2) example_data <- data.frame() for (outcome_type in outcome_types) { for (true_effect in effects) { for (error in errors) { for (sample_size in sample_sizes) { data <- generate_data(true_effect, outcome_type, sample_size, error) example_data <- rbind( example_data, mutate(data, true_effect = true_effect, outcome_type = outcome_type, sample_size = sample_size, error = error ) ) } } } } linear_noeffect <- example_data %>% filter( outcome_type == "linear", true_effect == 0 ) %>% mutate( sample_size = as.factor(sample_size), error = as.factor(error) ) %>% ggplot(aes(x = x, y = y, color = error)) + geom_point2(alpha = 0.2, size = 3) + geom_smooth(method = "lm") + theme_modern() + scale_color_material_d() + facet_wrap(~error, scale = "free", nrow = 3) linear_effect <- example_data %>% filter( outcome_type == "linear", true_effect == 1 ) %>% mutate( sample_size = as.factor(sample_size), error = as.factor(error) ) %>% ggplot(aes(x = x, y = y, color = error)) + geom_point2(alpha = 0.2, size = 3) + geom_smooth(method = "lm") + theme_modern() + scale_color_material_d() + facet_wrap(~error, scale = "free", nrow = 3) binary_noeffect <- example_data %>% filter( outcome_type == "binary", true_effect == 0 ) %>% mutate( true_effect = as.factor(true_effect), sample_size = as.factor(sample_size), error = as.factor(error) ) %>% ggplot(aes(x = x, y = y, color = error)) + geom_point2(alpha = 0.2, size = 3) + geom_smooth(method = "glm") + theme_modern() + scale_color_material_d() + facet_wrap(~ true_effect * error, scale = "free", nrow = 3) binary_effect <- example_data %>% filter( outcome_type == "binary", true_effect == 1 ) %>% mutate( true_effect = as.factor(true_effect), sample_size = as.factor(sample_size), error = as.factor(error) ) %>% ggplot(aes(x = x, y = y, color = error)) + geom_point2(alpha = 0.2, size = 3) + geom_smooth(method = "glm") + theme_modern() + scale_color_material_d() + facet_wrap(~ true_effect * error, scale = "free", nrow = 3) plots(linear_effect, linear_noeffect, binary_effect, binary_noeffect, nrow = 2, tags = c( "Linear - Effect", "Linear - No effect", "Binary - Effect", "Binary - No effect" ) )
all_data %>% mutate( sample_size = as.factor(sample_size), beta = as.numeric(beta), true_effect = as.factor(true_effect) ) %>% ggplot(aes(x = true_effect, y = beta, fill = sample_size)) + geom_boxplot() + theme_modern() + scale_fill_material_d("rainbow") + xlab("\nSpecified Coefficient") + ylab("Actual Coefficient\n") + facet_wrap(~outcome_type, scale = "free_y") all_data %>% mutate( sample_size = as.factor(sample_size), beta = as.numeric(beta), error = as.numeric(error), true_effect = as.factor(true_effect) ) %>% ggplot(aes(x = error, y = beta, color = sample_size)) + geom_point2(alpha = 0.01, size = 3) + geom_smooth(se = FALSE) + theme_modern() + scale_color_material_d("rainbow") + xlab("Error") + ylab("Actual Coefficient\n") + facet_wrap(~ outcome_type * true_effect, scale = "free") all_data %>% mutate( sample_size = as.factor(sample_size), true_effect = as.factor(true_effect), beta = as.numeric(beta) ) %>% ggplot(aes(x = beta, fill = sample_size)) + geom_density(alpha = 0.1) + theme_modern() + scale_fill_material_d() + xlab("Actual Coefficient\n") + facet_wrap(~ true_effect * outcome_type, scale = "free")
The simulation aimed at modulating the following characteristics:
We generated 3 datasets for each combination of these characteristics, resulting in a total of 2 * 2 * 1000 * 9 = 34560
Bayesian and frequentist models.
# options(mc.cores = parallel::detectCores()-1) options(mc.cores = 1) set.seed(333) # Parameters ------------------------------------------------------------------- outcome_types <- c("binary", "linear") effects <- c(0, 1) chains <- c(4) # draws <- seq(50, 5000, by=50) # warmups <- seq(1/10, 9/10, by=1/10) # Short version draws <- seq(100, 1000, by = 100) warmups <- seq(3 / 10, 7 / 10, by = 1 / 10) # Run -------------------------------------------------------------------------- # Initialize data all_data <- data.frame() n <- 1 # Start loop for (outcome_type in outcome_types) { print(outcome_type) for (true_effect in effects) { for (n_chains in chains) { for (n_draws in draws) { for (warmup in warmups) { for (iteration in 1:1) { cat(".") fail <- TRUE while (fail == TRUE) { tryCatch( { params <- generate_and_process(true_effect = true_effect, outcome_type = outcome_type, sample_size = 50, error = 1, iter = n_draws, chains = n_chains, warmup = warmup) fail <- FALSE params }, error = function(e) { }, warning = function(w) { cat("*") } ) } # Save data params <- mutate(params, true_effect = true_effect, outcome_type = outcome_type, iteration = iteration, n = n ) all_data <- rbind(all_data, params) n <- n + 1 } } } } } write.csv(all_data, "bayesSim_study2.csv", row.names = FALSE) }
# all_data <- read.csv("https://raw.github.com/easystats/circus/main/data/bayesSim_study2.csv")
The simulation aimed at modulating the following characteristics:
We generated 3 datasets for each combination of these characteristics, resulting
in a total of 2 * 2 * 3 * 1000 = 12000
Bayesian and frequentist models.
# options(mc.cores = parallel::detectCores()) set.seed(333) # Parameters ------------------------------------------------------------------- outcome_types <- c("binary", "linear") effects <- c(0, 1) priors_precisions <- seq(0.5, 2, length.out = 10) # Run -------------------------------------------------------------------------- # Initialize data all_data <- data.frame() n <- 1 # Start loop for (outcome_type in outcome_types) { print(outcome_type) for (true_effect in effects) { for (priors_precision in priors_precisions) { for (iteration in 1:1) { cat(".") fail <- TRUE while (fail == TRUE) { tryCatch( { params <- generate_and_process(true_effect = true_effect, outcome_type = outcome_type, sample_size = 50, error = 1, modulate_priors = TRUE, priors_precision = priors_precision) fail <- FALSE params }, error = function(e) { }, warning = function(w) { cat("*") } ) } # Save data params <- mutate(params, true_effect = true_effect, outcome_type = outcome_type, iteration = iteration, n = n ) all_data <- rbind(all_data, params) n <- n + 1 } } } # write.csv(all_data, "bayesSim_study3.csv", row.names = FALSE) }
# all_data <- read.csv("https://raw.github.com/easystats/circus/main/data/bayesSim_study2.csv")
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.