rstanarm.R
In rstanarm: Bayesian Applied Regression Modeling via Stan

## ----SETTINGS-knitr, include=FALSE--------------------------------------------
stopifnot(require(knitr))
opts_chunk$set(
  comment=NA, 
  message = FALSE, 
  warning = FALSE, 
  eval = identical(Sys.getenv("NOT_CRAN"), "true"),
  dev = "png",
  dpi = 150,
  fig.asp = 0.618,
  fig.width = 5,
  out.width = "60%",
  fig.align = "center"
)

## ----SETTINGS-gg, include=TRUE------------------------------------------------
library(ggplot2)
library(bayesplot)
theme_set(bayesplot::theme_default())

## ----rstanarm-mle, eval = TRUE------------------------------------------------
data("womensrole", package = "HSAUR3")
womensrole$total <- womensrole$agree + womensrole$disagree
womensrole_glm_1 <- glm(cbind(agree, disagree) ~ education + gender,
                        data = womensrole, family = binomial(link = "logit"))
round(coef(summary(womensrole_glm_1)), 3)

## ----rstanarm-mcmc, results="hide", eval = TRUE-------------------------------
library(rstanarm)
womensrole_bglm_1 <- stan_glm(cbind(agree, disagree) ~ education + gender,
                              data = womensrole,
                              family = binomial(link = "logit"), 
                              prior = student_t(df = 7, 0, 5), 
                              prior_intercept = student_t(df = 7, 0, 5),
                              cores = 2, seed = 12345)
womensrole_bglm_1

## ----echo=FALSE, eval = TRUE--------------------------------------------------
print(womensrole_bglm_1)

## ----rstanarm-ci, eval = TRUE-------------------------------------------------
ci95 <- posterior_interval(womensrole_bglm_1, prob = 0.95, pars = "education")
round(ci95, 2)

## ----rstanarm-methods, eval = TRUE--------------------------------------------
cbind(Median = coef(womensrole_bglm_1), MAD_SD = se(womensrole_bglm_1))
summary(residuals(womensrole_bglm_1)) # not deviance residuals
cov2cor(vcov(womensrole_bglm_1))

## ----rstanarm-shinystan, eval = FALSE-----------------------------------------
# launch_shinystan(womensrole_bglm_1, ppd = FALSE)

## ----rstanarm-posterior_predict, eval = TRUE----------------------------------
y_rep <- posterior_predict(womensrole_bglm_1)
dim(y_rep)

## ----rstanarm-criticism-plot, fig.width=8, out.width="90%", fig.cap="Posterior predictive boxplots vs. observed datapoints", eval = TRUE----
par(mfrow = 1:2, mar = c(5,3.7,1,0) + 0.1, las = 3)
boxplot(sweep(y_rep[,womensrole$gender == "Male"], 2, STATS = 
               womensrole$total[womensrole$gender == "Male"], FUN = "/"), 
        axes = FALSE, main = "Male", pch = NA,
        xlab = "Years of Education", ylab = "Proportion of Agrees")
with(womensrole, axis(1, at = education[gender == "Male"] + 1, 
                      labels = 0:20))
axis(2, las = 1)
with(womensrole[womensrole$gender == "Male",], 
     points(education + 1,  agree / (agree + disagree), 
            pch = 16, col = "red"))
boxplot(sweep(y_rep[,womensrole$gender == "Female"], 2, STATS = 
          womensrole$total[womensrole$gender == "Female"], FUN = "/"), 
          axes = FALSE, main = "Female", pch = NA,
        xlab = "Years of Education", ylab = "")
with(womensrole, axis(1, at = education[gender == "Female"] + 1,
     labels = 0:20))
with(womensrole[womensrole$gender == "Female",], 
     points(education + 1,  agree / (agree + disagree), 
            pch = 16, col = "red"))

## ----rstanarm-update, results="hide", eval = TRUE-----------------------------
(womensrole_bglm_2 <- update(womensrole_bglm_1, formula. = . ~ . + I(education^2)))

## ----echo=FALSE---------------------------------------------------------------
print(womensrole_bglm_2)

## ----rstanarm-loo, eval = TRUE------------------------------------------------
loo_bglm_1 <- loo(womensrole_bglm_1)
loo_bglm_2 <- loo(womensrole_bglm_2)

## ----rstanarm-loo-plot, fig.width=7, out.width="70%", eval = TRUE-------------
par(mfrow = 1:2, mar = c(5,3.8,1,0) + 0.1, las = 3)
plot(loo_bglm_1, label_points = TRUE)
plot(loo_bglm_2, label_points = TRUE)

## ----rstanarm-loo-compare, eval = TRUE----------------------------------------
loo_compare(loo_bglm_1, loo_bglm_2)

## ----rstanarm-loo-print, eval = TRUE------------------------------------------
loo_bglm_1

## ----rstanarm-posterior_predict-manipulate, eval = TRUE-----------------------
# note: in newdata we want agree and disagree to sum to the number of people we
# want to predict for. the values of agree and disagree don't matter so long as
# their sum is the desired number of trials. we need to explicitly imply the
# number of trials like this because our original data are aggregate. if we had
# bernoulli data then it would be a given we wanted to predict for single
# individuals.
newdata <- data.frame(agree = c(0,0), disagree = c(100,100), education = c(12,16),
                      gender = factor("Female", levels = c("Male", "Female")))
y_rep <- posterior_predict(womensrole_bglm_2, newdata)
summary(apply(y_rep, 1, diff))

## ----rstanarm-rhat-fit, results='hide', warning=TRUE, eval = TRUE-------------
bad_rhat <- stan_glm(mpg ~ ., data = mtcars, iter = 20, chains = 2, seed = 12345)
good_rhat <- update(bad_rhat, iter = 1000, chains = 2, seed = 12345)

## ----rstasnarm-rhat-bad, eval = TRUE------------------------------------------
rhat <- summary(bad_rhat)[, "Rhat"]
rhat[rhat > 1.1]

## ----rstasnarm-rhat-good, eval = TRUE-----------------------------------------
any(summary(good_rhat)[, "Rhat"] > 1.1)