knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Write the Stan model

```{stan output.var="elnet_glmm"} data { int N; int K; int ngrp; int grp[N]; vector[N] y; vector[ngrp] yoos; row_vector[K] X[N]; row_vector[K] Xoos[ngrp]; real lambda1; real lambda2; }

parameters { real beta_int; vector[K] beta; vector[ngrp] beta0; real sigma_int; real sigma; }

model { ## Mean prediction and likelihood vector[N] x_beta_jj; for(n in 1:N) x_beta_jj[n] = X[n] * beta + beta_int + beta0[grp[n]]; y ~ normal(x_beta_jj, sigma);

## Elastic net penalty (pages 381-382 in Stan manual v2.17.0) for (k in 1:K) target += -lambda1 * fabs(beta[k]); # LASSO target += -lambda2 * dot_self(beta); # Ridge

## Priors sigma ~ cauchy(0, 2.5); sigma_int ~ cauchy(0, 2.5); beta_int ~ normal(0,5); beta0 ~ normal(0, sigma_int); beta ~ normal(0,5); }

generated quantities { vector[ngrp] yhat; vector[ngrp] log_lik; vector[K] beta_elastic_net; beta_elastic_net = (1+lambda2) * beta; for(n in 1:ngrp){ yhat[n] = Xoos[n] * beta_elastic_net + beta_int + beta0[n]; log_lik[n] = normal_lpdf(yoos[n] | yhat[n], sigma); } }

## Choose lambda based on LOO-CV
Use leave-one-year-out cross-validation (LOO-CV) to find optimal penalty based on log predictive density (lpd).

```r
library(modselr)
library(tidyverse)
library(rstan)
library(ggmcmc)

butterfly_data <- butterfly %>%
  filter(year < 2015) %>%
  select_if(~ !any(is.na(.))) # remove columns with NA

lambda_vec <- seq(0, 30, by = 2)
years_vec <- unique(butterfly_data$year)

all_lpd <- {}

for(ilambda in lambda_vec){
  for(iyear in years_vec){

    # Fetch training data
    training_data <- butterfly_data %>%
      filter(year != iyear)

    covar_matrix <- training_data %>%
      select(-year, -meada, -Nt, -Rt) %>%
      as.matrix()

    scale_data_and_info <- get_scaled_covars(covar_matrix)

    response_vector <- training_data %>%
      pull(Rt)

    subpop_ids <- training_data %>%
      pull(meada)

    # Fetch validation data
    validation_data <- butterfly_data %>%
      filter(year == iyear) 

    Xoos <- validation_data %>%
      select(-year, -meada, -Nt, -Rt) %>%
      as.matrix()

    Xoos <- scale_oos_covars(Xoos, scale_data_and_info[[2]])

    yoos <- validation_data %>%
      pull(Rt)

    stan_data <- list(
      N = length(response_vector),
      K = ncol(covar_matrix),
      ngrp = length(unique(subpop_ids)),
      grp = as.numeric(as.factor(subpop_ids)),
      y = response_vector,
      X = scale_data_and_info[[1]],
      yoos = yoos,
      Xoos = Xoos,
      lambda1 = ilambda,
      lambda2 = ilambda
    )

    rstan_options(auto_write = TRUE)
    options(mc.cores = parallel::detectCores())

    fit <- rstan::sampling(
      elnet_glmm,
      data = stan_data, 
      iter = 200, 
      chains = 1, 
      warmup = 100,
      pars = "log_lik"
      )

    waic_metrics <- waic(fit)
    lpd <- waic_metrics[["total"]]["lpd"]

    out_df <- data.frame(out_year = iyear,
                         lambda = ilambda,
                         lpd = lpd)
    all_lpd <- rbind(all_lpd, out_df)

  } # next iyear
} # next ilambda

lpd_meds <- all_lpd %>%
  group_by(lambda) %>%
  summarise(lpdsum = sum(lpd))

ggplot(lpd_meds, aes(x = lambda, y = lpdsum))+
  geom_line()


atredennick/modselr documentation built on Dec. 11, 2020, 10:09 p.m.