devtools::load_all() library(here) library(rstan) options(mc.cores = parallel::detectCores()) rstan_options(auto_write = TRUE) library(tidyverse) library(cowplot)
model_sum_nb <- stan_model(here("stan","test_sum_nb.stan"))
Generating data
generator <- function(G, N, method = "saddlepoint", prior_mean_mus = 2, prior_sd_mus = 1) { if(method == "saddlepoint") { method_id = 0 } else if (method == "moments") { method_id = 1 } else { stop("Invalid method") } function() { all_mus <- rlnorm(G + 1, prior_mean_mus, prior_sd_mus) all_mus[G + 1] <- rlnorm(1, 5, 3) all_phis <- 1 / sqrt(abs(rnorm(G + 1))) sums <- array(-1, N) repeats <- 0 for(n in 1:N) { repeat { sums[n] <- sum(rnbinom(G + 1, mu = all_mus, size = all_phis)) if(sums[n] > 0) { break; } repeats <- repeats + 1 } } if(repeats > 0) { cat(repeats, " repeats\n") } list( observed = list( N = N, sums = sums, G = G, method = method_id, mus = array(all_mus[1:G], G), phis = array(all_phis[1:G], G) #phis = all_phis ), true = list( extra_mu = all_mus[G+1], extra_phi = all_phis[G+1] ) ) } }
Testing a single version to see it doesn't break terribly.
data <- generator(G = 5, N = 5, "moments")() fit <- sampling(model_sum_nb, data$observed) evaluate_all_params(rstan::extract(fit), data$true)
Here we test a sum of two NBs - one is small and has known parameters, the other has unknown parameters and possibly larger mean (the prior mean is larger).
Calibration and accuracy with saddlepoint approximation
sbc_res_saddlepoint_small <- sbc(model_sum_nb, generator(G = 1, N = 10, "saddlepoint"), N_steps = 100, control = list(adapt_delta = 0.95)) plot_sbc_params(sbc_res_saddlepoint_small$params, x_axis_trans = "log10", y_axis_trans = "log10") summarise_sbc_diagnostics(sbc_res_saddlepoint_small)
Median time - saddlepoint small, individual: 52.7035
Calibration and accuracy with moments approximation
sbc_res_moments_small <- sbc(model_sum_nb, generator(G = 1, N = 10, "moments"), N_steps = 100, control = list(adapt_delta = 0.95)) plot_sbc_params(sbc_res_moments_small$params, x_axis_trans = "log10", y_axis_trans = "log10") summarise_sbc_diagnostics(sbc_res_moments_small)
20 NBs are known (low means) and one NB is unknown (a prior large mean)
Saddlepoint:
sbc_res_saddlepoint_large <- sbc(model_sum_nb, generator(G = 20, N = 20, "saddlepoint"), N_steps = 100, control = list(adapt_delta = 0.95)) plot_sbc_params(sbc_res_saddlepoint_large$params, x_axis_trans = "log10", y_axis_trans = "log10") summarise_sbc_diagnostics(sbc_res_saddlepoint_large)
Median time - saddlepoint large, individual: 259.2945
Moments:
sbc_res_moments_large <- sbc(model_sum_nb, generator(G = 20, N = 20, "moments"), N_steps = 100, control = list(adapt_delta = 0.95)) plot_sbc_params(sbc_res_moments_large$params, x_axis_trans = "log10", y_axis_trans = "log10", plot_stat = "mean") summarise_sbc_diagnostics(sbc_res_moments_large)
9 known NBs, 1 unknown, all large ($log(\mu) \sim N(5,3)$). Here, the estimates start to be dominated by the prior.
Saddlepoint:
sbc_res_saddlepoint_10_large <- sbc(model_sum_nb, generator(G = 9, N = 10, "saddlepoint", prior_mean_mus = 5, prior_sd_mus = 3), N_steps = 100, control = list(adapt_delta = 0.95)) plot_sbc_params(sbc_res_saddlepoint_10_large$params, x_axis_trans = "log10", y_axis_trans = "log10") summarise_sbc_diagnostics(sbc_res_saddlepoint_10_large)
Moments:
sbc_res_moments_10_large <- sbc(model_sum_nb, generator(G = 9, N = 10, "moments", prior_mean_mus = 5, prior_sd_mus = 3), N_steps = 100, control = list(adapt_delta = 0.95)) plot_sbc_params(sbc_res_moments_10_large$params, x_axis_trans = "log10", y_axis_trans = "log10") summarise_sbc_diagnostics(sbc_res_moments_10_large)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.