Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup, message = FALSE---------------------------------------------------
library(tidyr)
library(SimEngine)
## -----------------------------------------------------------------------------
sim <- new_sim()
create_regression_data <- function(n) {
beta <- c(-1, 10)
x <- rnorm(n)
sigma2 <- exp(x)
y <- rnorm(n=n, mean=(beta[1]+beta[2]*x), sd = sqrt(sigma2))
return(data.frame(x=x, y=y))
}
## ---- message = FALSE, fig.width = 6------------------------------------------
dat <- create_regression_data(n=500)
linear_model <- lm(y~x, data=dat)
dat$residuals <- linear_model$residuals
library(ggplot2)
ggplot(dat, aes(x=x, y=residuals)) +
geom_point() +
theme_bw() +
labs(x="x", y="residual")
## -----------------------------------------------------------------------------
model_vcov <- function(data) {
mod <- lm(y~x, data=data)
return(list("coef"=mod$coefficients, "vcov"=diag(vcov(mod))))
}
sandwich_vcov <- function(data) {
mod <- lm(y~x, data=data)
return(list("coef"=mod$coefficients, "vcov"=diag(vcovHC(mod))))
}
## -----------------------------------------------------------------------------
sim %<>% set_script(function() {
data <- create_regression_data(n=L$n)
estimates <- use_method(L$estimator, list(data))
return(list(
"beta0_est" = estimates$coef[1],
"beta1_est" = estimates$coef[2],
"beta0_se_est" = sqrt(estimates$vcov[1]),
"beta1_se_est" = sqrt(estimates$vcov[2])
))
})
sim %<>% set_levels(
estimator = c("model_vcov", "sandwich_vcov"),
n = c(50, 100, 500, 1000)
)
sim %<>% set_config(
num_sim = 500,
seed = 24,
packages = c("sandwich")
)
sim %<>% run()
## -----------------------------------------------------------------------------
summarized_results <- sim %>% summarize(
list(stat="mean", name="mean_se_beta0", x="beta0_se_est"),
list(stat="mean", name="mean_se_beta1", x="beta1_se_est"),
list(stat="coverage", name="cov_beta0", estimate="beta0_est",
se="beta0_se_est", truth=-1),
list(stat="coverage", name="cov_beta1", estimate="beta1_est",
se="beta1_se_est", truth=10)
)
print(summarized_results)
## -----------------------------------------------------------------------------
plot_results <- function(which_graph, n_est) {
if (n_est == 3) {
values <- c("#999999", "#E69F00", "#56B4E9")
breaks <- c("model_vcov", "sandwich_vcov", "bootstrap_vcov")
labels <- c("Model-based", "Sandwich", "Bootstrap")
} else {
values <- c("#999999", "#E69F00")
breaks <- c("model_vcov", "sandwich_vcov")
labels <- c("Model-based", "Sandwich")
}
if (which_graph == "width") {
summarized_results %>%
pivot_longer(
cols = c("mean_se_beta0", "mean_se_beta1"),
names_to = "parameter",
names_prefix = "mean_se_"
) %>%
dplyr::mutate(value_j = jitter(value, amount = 0.01)) %>%
ggplot(aes(x=n, y=1.96*value_j, color=estimator)) +
geom_line(aes(linetype=parameter)) +
geom_point() +
theme_bw() +
ylab("Average CI width") +
scale_color_manual(
values = values,
breaks = breaks,
name = "SE estimator",
labels = labels
) +
scale_linetype_discrete(
breaks = c("beta0", "beta1"),
name = "Parameter",
labels = c(expression(beta[0]), expression(beta[1]))
)
} else {
summarized_results %>%
pivot_longer(
cols = c("cov_beta0","cov_beta1"),
names_to = "parameter",
names_prefix = "cov_"
) %>%
dplyr::mutate(value_j = jitter(value, amount = 0.01)) %>%
ggplot(aes(x=n, y=value, color=estimator)) +
geom_line(aes(linetype = parameter)) +
geom_point() +
theme_bw() +
ylab("Coverage") +
scale_color_manual(
values = values,
breaks = breaks,
name = "SE estimator",
labels = labels
) +
scale_linetype_discrete(
breaks = c("beta0", "beta1"),
name = "Parameter",
labels = c(expression(beta[0]), expression(beta[1]))
) +
geom_hline(yintercept=0.95)
}
}
## ---- message = FALSE, fig.width = 6------------------------------------------
plot_results("width", 2)
plot_results("coverage", 2)
## -----------------------------------------------------------------------------
bootstrap_vcov <- function(data) {
mod <- lm(y~x, data=data)
boot_ests <- matrix(NA, nrow=100, ncol=2)
for (j in 1:100) {
indices <- sample(1:nrow(data), size=nrow(data), replace=TRUE)
boot_dat <- data[indices,]
boot_mod <- lm(y~x, data=boot_dat)
boot_ests[j,] <- boot_mod$coefficients
}
boot_v1 <- var(boot_ests[,1])
boot_v2 <- var(boot_ests[,2])
return(list("coef"=mod$coefficients, "vcov"=c(boot_v1, boot_v2)))
}
sim %<>% set_levels(
estimator = c("model_vcov", "sandwich_vcov", "bootstrap_vcov"),
n = c(50, 100, 500, 1000)
)
sim %<>% set_config(
num_sim = 500,
seed = 24,
parallel = TRUE,
n_cores = 2,
packages = c("sandwich")
)
sim %<>% update_sim()
## ---- fig.width = 6-----------------------------------------------------------
summarized_results <- sim %>% summarize(
list(stat="mean", name="mean_se_beta0", x="beta0_se_est"),
list(stat="mean", name="mean_se_beta1", x="beta1_se_est"),
list(stat="coverage", name="cov_beta0", estimate="beta0_est",
se="beta0_se_est", truth=-1),
list(stat="coverage", name="cov_beta1", estimate="beta1_est",
se="beta1_se_est", truth=10)
)
plot_results("width", 3)
plot_results("coverage", 3)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.