knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
```{stan output.var="elnet_glmm"}
data {
int
parameters {
real beta_int;
vector[K] beta;
vector[ngrp] beta0;
real
model { ## Mean prediction and likelihood vector[N] x_beta_jj; for(n in 1:N) x_beta_jj[n] = X[n] * beta + beta_int + beta0[grp[n]]; y ~ normal(x_beta_jj, sigma);
## Elastic net penalty (pages 381-382 in Stan manual v2.17.0) for (k in 1:K) target += -lambda1 * fabs(beta[k]); # LASSO target += -lambda2 * dot_self(beta); # Ridge
## Priors sigma ~ cauchy(0, 2.5); sigma_int ~ cauchy(0, 2.5); beta_int ~ normal(0,5); beta0 ~ normal(0, sigma_int); beta ~ normal(0,5); }
generated quantities { vector[ngrp] yhat; vector[ngrp] log_lik; vector[K] beta_elastic_net; beta_elastic_net = (1+lambda2) * beta; for(n in 1:ngrp){ yhat[n] = Xoos[n] * beta_elastic_net + beta_int + beta0[n]; log_lik[n] = normal_lpdf(yoos[n] | yhat[n], sigma); } }
## Choose lambda based on LOO-CV Use leave-one-year-out cross-validation (LOO-CV) to find optimal penalty based on log predictive density (lpd). ```r library(modselr) library(tidyverse) library(rstan) library(ggmcmc) butterfly_data <- butterfly %>% filter(year < 2015) %>% select_if(~ !any(is.na(.))) # remove columns with NA lambda_vec <- seq(0, 30, by = 2) years_vec <- unique(butterfly_data$year) all_lpd <- {} for(ilambda in lambda_vec){ for(iyear in years_vec){ # Fetch training data training_data <- butterfly_data %>% filter(year != iyear) covar_matrix <- training_data %>% select(-year, -meada, -Nt, -Rt) %>% as.matrix() scale_data_and_info <- get_scaled_covars(covar_matrix) response_vector <- training_data %>% pull(Rt) subpop_ids <- training_data %>% pull(meada) # Fetch validation data validation_data <- butterfly_data %>% filter(year == iyear) Xoos <- validation_data %>% select(-year, -meada, -Nt, -Rt) %>% as.matrix() Xoos <- scale_oos_covars(Xoos, scale_data_and_info[[2]]) yoos <- validation_data %>% pull(Rt) stan_data <- list( N = length(response_vector), K = ncol(covar_matrix), ngrp = length(unique(subpop_ids)), grp = as.numeric(as.factor(subpop_ids)), y = response_vector, X = scale_data_and_info[[1]], yoos = yoos, Xoos = Xoos, lambda1 = ilambda, lambda2 = ilambda ) rstan_options(auto_write = TRUE) options(mc.cores = parallel::detectCores()) fit <- rstan::sampling( elnet_glmm, data = stan_data, iter = 200, chains = 1, warmup = 100, pars = "log_lik" ) waic_metrics <- waic(fit) lpd <- waic_metrics[["total"]]["lpd"] out_df <- data.frame(out_year = iyear, lambda = ilambda, lpd = lpd) all_lpd <- rbind(all_lpd, out_df) } # next iyear } # next ilambda lpd_meds <- all_lpd %>% group_by(lambda) %>% summarise(lpdsum = sum(lpd)) ggplot(lpd_meds, aes(x = lambda, y = lpdsum))+ geom_line()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.