inst/doc/example_2.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, message = FALSE---------------------------------------------------
library(tidyr)
library(SimEngine)

## -----------------------------------------------------------------------------
sim <- new_sim()

create_regression_data <- function(n) {
  beta <- c(-1, 10)
  x <- rnorm(n)
  sigma2 <- exp(x)
  y <- rnorm(n=n, mean=(beta[1]+beta[2]*x), sd = sqrt(sigma2))
  return(data.frame(x=x, y=y))
}

## ---- message = FALSE, fig.width = 6------------------------------------------
dat <- create_regression_data(n=500)
linear_model <- lm(y~x, data=dat)
dat$residuals <- linear_model$residuals

library(ggplot2)
ggplot(dat, aes(x=x, y=residuals)) +
  geom_point() +
  theme_bw() +
  labs(x="x", y="residual")

## -----------------------------------------------------------------------------
model_vcov <- function(data) {
  mod <- lm(y~x, data=data)
  return(list("coef"=mod$coefficients, "vcov"=diag(vcov(mod))))
}

sandwich_vcov <- function(data) {
  mod <- lm(y~x, data=data)
  return(list("coef"=mod$coefficients, "vcov"=diag(vcovHC(mod))))
}

## -----------------------------------------------------------------------------
sim %<>% set_script(function() {
  data <- create_regression_data(n=L$n)
  estimates <- use_method(L$estimator, list(data))
  return(list(
    "beta0_est" = estimates$coef[1],
    "beta1_est" = estimates$coef[2],
    "beta0_se_est" = sqrt(estimates$vcov[1]),
    "beta1_se_est" = sqrt(estimates$vcov[2])
  ))
})

sim %<>% set_levels(
  estimator = c("model_vcov", "sandwich_vcov"),
  n = c(50, 100, 500, 1000)
)

sim %<>% set_config(
  num_sim = 500,
  seed = 24,
  packages = c("sandwich")
)

sim %<>% run()

## -----------------------------------------------------------------------------
summarized_results <- sim %>% summarize(
  list(stat="mean", name="mean_se_beta0", x="beta0_se_est"),
  list(stat="mean", name="mean_se_beta1", x="beta1_se_est"),
  list(stat="coverage", name="cov_beta0", estimate="beta0_est",
       se="beta0_se_est", truth=-1),
  list(stat="coverage", name="cov_beta1", estimate="beta1_est",
       se="beta1_se_est", truth=10)
)

print(summarized_results)

## -----------------------------------------------------------------------------
plot_results <- function(which_graph, n_est) {
  if (n_est == 3) {
    values <- c("#999999", "#E69F00", "#56B4E9")
    breaks <- c("model_vcov", "sandwich_vcov", "bootstrap_vcov")
    labels <- c("Model-based", "Sandwich", "Bootstrap")
  } else {
    values <- c("#999999", "#E69F00")
    breaks <- c("model_vcov", "sandwich_vcov")
    labels <- c("Model-based", "Sandwich")
  }
  if (which_graph == "width") {
    summarized_results %>%
    pivot_longer(
      cols = c("mean_se_beta0", "mean_se_beta1"),
      names_to = "parameter",
      names_prefix = "mean_se_"
    ) %>%
    dplyr::mutate(value_j = jitter(value, amount = 0.01)) %>%
    ggplot(aes(x=n, y=1.96*value_j, color=estimator)) +
    geom_line(aes(linetype=parameter)) +
    geom_point() +
    theme_bw() +
    ylab("Average CI width") +
    scale_color_manual(
      values = values,
      breaks = breaks,
      name = "SE estimator",
      labels = labels
    ) +
    scale_linetype_discrete(
      breaks = c("beta0", "beta1"),
      name = "Parameter",
      labels = c(expression(beta[0]), expression(beta[1]))
    )
  } else {
    summarized_results %>%
    pivot_longer(
      cols = c("cov_beta0","cov_beta1"),
      names_to = "parameter",
      names_prefix = "cov_"
    ) %>%
    dplyr::mutate(value_j = jitter(value, amount = 0.01)) %>%
    ggplot(aes(x=n, y=value, color=estimator)) +
    geom_line(aes(linetype = parameter)) +
    geom_point() +
    theme_bw() +
    ylab("Coverage") +
    scale_color_manual(
      values = values,
      breaks = breaks,
      name = "SE estimator",
      labels = labels
    ) +
    scale_linetype_discrete(
      breaks = c("beta0", "beta1"),
      name = "Parameter",
      labels = c(expression(beta[0]), expression(beta[1]))
    ) +
    geom_hline(yintercept=0.95)
  }
}

## ---- message = FALSE, fig.width = 6------------------------------------------
plot_results("width", 2)
plot_results("coverage", 2)

## -----------------------------------------------------------------------------
bootstrap_vcov <- function(data) {
  mod <- lm(y~x, data=data)
  boot_ests <- matrix(NA, nrow=100, ncol=2)
  for (j in 1:100) {
    indices <- sample(1:nrow(data), size=nrow(data), replace=TRUE)
    boot_dat <- data[indices,]
    boot_mod <- lm(y~x, data=boot_dat)
    boot_ests[j,] <- boot_mod$coefficients
  }
  boot_v1 <- var(boot_ests[,1])
  boot_v2 <- var(boot_ests[,2])
  return(list("coef"=mod$coefficients, "vcov"=c(boot_v1, boot_v2)))
}

sim %<>% set_levels(
  estimator = c("model_vcov", "sandwich_vcov", "bootstrap_vcov"),
  n = c(50, 100, 500, 1000)
)

sim %<>% set_config(
  num_sim = 500,
  seed = 24,
  parallel = TRUE,
  n_cores = 2,
  packages = c("sandwich")
)

sim %<>% update_sim()

## ---- fig.width = 6-----------------------------------------------------------
summarized_results <- sim %>% summarize(
  list(stat="mean", name="mean_se_beta0", x="beta0_se_est"),
  list(stat="mean", name="mean_se_beta1", x="beta1_se_est"),
  list(stat="coverage", name="cov_beta0", estimate="beta0_est",
       se="beta0_se_est", truth=-1),
  list(stat="coverage", name="cov_beta1", estimate="beta1_est",
       se="beta1_se_est", truth=10)
)

plot_results("width", 3)
plot_results("coverage", 3)

Try the SimEngine package in your browser

Any scripts or data that you put into this service are public.

SimEngine documentation built on Oct. 27, 2023, 1:07 a.m.