Parameters and Packages

library(tidyverse)
library(rstanarm)
library(bayestestR)
library(estimate)
library(see)

set.seed(333)

Convenience Functions

generate_parent_population <- function(noise = 1, n = 10000) {
  y <- rnorm(n, 0, 1)
  x <- y + rnorm(n, 0, noise)
  return(data.frame(x = x, y = y, noise = noise))
}



extract_sample <- function(data, n = 30) {
  return(data[sample(nrow(data), n), ])
}

Study 1: Effect of Magnitude

Parent Population

plots(
  generate_parent_population(noise = 0.5, n = 10000) %>%
    ggplot(aes(x = x, y = y)) +
    geom_point(size = 2, alpha = 0.1, shape = 16) +
    geom_smooth(method = "lm", se = FALSE) +
    theme_classic() +
    ggtitle("Noise = 0.5"),
  generate_parent_population(noise = 1.5, n = 10000) %>%
    ggplot(aes(x = x, y = y)) +
    geom_point(size = 2, alpha = 0.1, shape = 16) +
    geom_smooth(method = "lm", se = FALSE) +
    theme_classic() +
    ggtitle("Noise = 1.5"),
  generate_parent_population(noise = 4.5, n = 10000) %>%
    ggplot(aes(x = x, y = y)) +
    geom_point(size = 2, alpha = 0.1, shape = 16) +
    geom_smooth(method = "lm", se = FALSE) +
    theme_classic() +
    ggtitle("Noise = 4.5")
)

Simulation

data <- data.frame()

for (n in c(15, 30, 45, 60, 120, 240)) {
  cat(paste0("\n", round(n / 240 * 100, 1), "%"))
  for (noise in seq(0.1, 5, length.out = 1000)) {
    cat(".")

    parent <- generate_parent_population(noise = noise, n = 100000)
    parent_effect <- insight::get_parameters(lm(y ~ x, data = parent))[2, 2]

    sample <- extract_sample(parent, n = n)
    model <- stan_glm(y ~ x, data = sample, refresh = 0)
    sensitivity <- bayestestR::sensitivity_to_prior(model, index = c("Median", "Mean", "MAP"), magnitude = 10)

    out <- data.frame(
      Sensitivity_Median = sensitivity$Median,
      Sensitivity_Mean = sensitivity$Mean,
      Sensitivity_MAP = sensitivity$MAP,
      magnitude = 10,
      noise = noise,
      n = n,
      parent_n = 100000,
      parent_effect = parent_effect
    )
    out <- cbind(out, bayestestR::describe_posterior(model)[2, ])


    data <- rbind(
      data,
      out
    )
  }
  write.csv(data, "sensitivity_to_priors.csv", row.names = FALSE)
}

Results

data <- read.csv("https://raw.github.com/easystats/circus/main/data/sensitivity_to_priors.csv")

df <- data %>%
  mutate(
    noise_factor = as.factor(cut(noise, 4, dig.lab = 0)),
    sample_size = fct_rev(as.factor(n))
  ) %>%
  filter(Sensitivity_Median < 2)

Visualisation

df %>%
  ggplot(aes(x = Sensitivity_Median, fill = sample_size)) +
  geom_density(alpha = 0.5) +
  ggtitle("Relationship with Sample Size")
df %>%
  ggplot(aes(x = Sensitivity_Median, fill = noise_factor)) +
  geom_density(alpha = 0.5) +
  ggtitle("Relationship with Noise")
df %>%
  ggplot(aes(x = noise, y = Sensitivity_Median, fill = sample_size, color = sample_size)) +
  geom_point2(size = 3, alpha = 0.3) +
  geom_smooth(alpha = 0.3) +
  ggtitle("Relationship with Noise and Sample Size")
df %>%
  ggplot(aes(x = noise_factor, y = Sensitivity_Median, fill = sample_size)) +
  geom_boxplot() +
  ggtitle("Relationship with Noise and Sample Size")
see::plots(
  df %>%
    ggplot(aes(x = pd, y = Sensitivity_Median, color = sample_size)) +
    geom_point2(size = 3, alpha = 0.3) +
    ggtitle("Relationship with the pd and the sample size"),
  df %>%
    ggplot(aes(x = pd, y = Sensitivity_Median, color = noise_factor)) +
    geom_point2(size = 3, alpha = 0.3) +
    ggtitle("Relationship with the pd and the noise")
)
see::plots(
  df %>%
    ggplot(aes(x = ROPE_Percentage, y = Sensitivity_Median, color = sample_size)) +
    geom_point2(size = 3, alpha = 0.3) +
    ggtitle("Relationship with the ROPE and the sample size"),
  df %>%
    ggplot(aes(x = ROPE_Percentage, y = Sensitivity_Median, color = noise_factor)) +
    geom_point2(size = 3, alpha = 0.3) +
    ggtitle("Relationship with the ROPE and the noise")
)

Statistics

model <- lm(Sensitivity_Median ~ poly(noise, 2, raw = TRUE) * poly(n, 2, raw = TRUE), data = df)
summary(model)

Conclusion

Sensitivity to priors seems to be an interesting integrative index of robustness of parameters, being sensitive to both noise and sample size.



easystats/circus documentation built on March 26, 2024, 9:30 p.m.