Nothing
params <-
list(EVAL = TRUE)
## ----SETTINGS-knitr, include=FALSE--------------------------------------------
stopifnot(require(knitr))
opts_chunk$set(
comment=NA,
eval = if (isTRUE(exists("params"))) params$EVAL else FALSE,
dev = "png",
dpi = 150,
fig.asp = 0.618,
fig.width = 5,
out.width = "60%",
fig.align = "center"
)
## ----stancode-----------------------------------------------------------------
stancode <- "
data {
int<lower=1> K;
int<lower=1> N;
matrix[N,K] x;
int y[N];
vector[N] offset;
real beta_prior_scale;
real alpha_prior_scale;
}
parameters {
vector[K] beta;
real intercept;
}
model {
y ~ poisson(exp(x * beta + intercept + offset));
beta ~ normal(0,beta_prior_scale);
intercept ~ normal(0,alpha_prior_scale);
}
generated quantities {
vector[N] log_lik;
for (n in 1:N)
log_lik[n] = poisson_lpmf(y[n] | exp(x[n] * beta + intercept + offset[n]));
}
"
## ----setup, message=FALSE-----------------------------------------------------
library("rstan")
library("loo")
seed <- 9547
set.seed(seed)
## ----modelfit-holdout, message=FALSE------------------------------------------
# Prepare data
data(roaches, package = "rstanarm")
roaches$roach1 <- sqrt(roaches$roach1)
roaches$offset <- log(roaches[,"exposure2"])
# 20% of the data goes to the test set:
roaches$test <- 0
roaches$test[sample(.2 * seq_len(nrow(roaches)))] <- 1
# data to "train" the model
data_train <- list(y = roaches$y[roaches$test == 0],
x = as.matrix(roaches[roaches$test == 0,
c("roach1", "treatment", "senior")]),
N = nrow(roaches[roaches$test == 0,]),
K = 3,
offset = roaches$offset[roaches$test == 0],
beta_prior_scale = 2.5,
alpha_prior_scale = 5.0
)
# data to "test" the model
data_test <- list(y = roaches$y[roaches$test == 1],
x = as.matrix(roaches[roaches$test == 1,
c("roach1", "treatment", "senior")]),
N = nrow(roaches[roaches$test == 1,]),
K = 3,
offset = roaches$offset[roaches$test == 1],
beta_prior_scale = 2.5,
alpha_prior_scale = 5.0
)
## ----fit-train----------------------------------------------------------------
# Compile
stanmodel <- stan_model(model_code = stancode)
# Fit model
fit <- sampling(stanmodel, data = data_train, seed = seed, refresh = 0)
## ----gen-test-----------------------------------------------------------------
gen_test <- gqs(stanmodel, draws = as.matrix(fit), data= data_test)
log_pd <- extract_log_lik(gen_test)
## ----elpd-holdout-------------------------------------------------------------
(elpd_holdout <- elpd(log_pd))
## ----prepare-folds, message=FALSE---------------------------------------------
# Prepare data
roaches$fold <- kfold_split_random(K = 10, N = nrow(roaches))
## -----------------------------------------------------------------------------
# Prepare a matrix with the number of post-warmup iterations by number of observations:
log_pd_kfold <- matrix(nrow = 4000, ncol = nrow(roaches))
# Loop over the folds
for(k in 1:10){
data_train <- list(y = roaches$y[roaches$fold != k],
x = as.matrix(roaches[roaches$fold != k,
c("roach1", "treatment", "senior")]),
N = nrow(roaches[roaches$fold != k,]),
K = 3,
offset = roaches$offset[roaches$fold != k],
beta_prior_scale = 2.5,
alpha_prior_scale = 5.0
)
data_test <- list(y = roaches$y[roaches$fold == k],
x = as.matrix(roaches[roaches$fold == k,
c("roach1", "treatment", "senior")]),
N = nrow(roaches[roaches$fold == k,]),
K = 3,
offset = roaches$offset[roaches$fold == k],
beta_prior_scale = 2.5,
alpha_prior_scale = 5.0
)
fit <- sampling(stanmodel, data = data_train, seed = seed, refresh = 0)
gen_test <- gqs(stanmodel, draws = as.matrix(fit), data= data_test)
log_pd_kfold[, roaches$fold == k] <- extract_log_lik(gen_test)
}
## ----elpd-kfold---------------------------------------------------------------
(elpd_kfold <- elpd(log_pd_kfold))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.