Nothing
#'
#' Utilities for SBC validation
#'
load_rbest_dev <- function() {
if (rbest_lib_dir != .libPaths()[1]) {
cat("Unloading RBesT and setting libPaths to dev RBesT install.\n")
unloadNamespace("RBesT")
.libPaths(c(rbest_lib_dir, .libPaths()))
}
if (!("RBesT" %in% .packages())) {
require(RBesT, lib.loc = rbest_lib_dir)
}
}
setup_lecuyer_seeds <- function(lecuyer_seed, num) {
## note: seed have the format from L'Ecuyer. Just set
## RNGkind("L'Ecuyer-CMRG")
## and then use .Random.seed
job_seeds <- list()
job_seeds[[1]] <- parallel::nextRNGStream(lecuyer_seed)
i <- 2
while (i < num + 1) {
job_seeds[[i]] <- parallel::nextRNGSubStream(job_seeds[[i - 1]])
i <- i + 1
}
job_seeds
}
#'
#' Simulates a draw from the prior and fake data for it. This will be
#' the data generating step in the simulation. The function recieves
#' the problem data, job specifics and hyper-parameters which
#' determine the scenario (choices for the prior).
#'
simulate_fake <- function(data, family, mean_mu, sd_mu, sd_tau, samp_sd) {
G <- length(unique(data$group))
N <- length(data$group)
rl <- rle(data$group)
Ng <- rl$lengths
mu <- rnorm(1, mean_mu, sd_mu)
tau <- abs(rnorm(1, 0, sd_tau))
theta <- rnorm(G, mu, tau)
family <- get(family, mode = "function", envir = parent.frame())
inv_link <- family()$linkinv
alpha <- inv_link(rep(theta, times = Ng))
likelihood <- family()$family
if (likelihood == "binomial") {
r <- rbinom(N, 1, alpha)
y <- as.numeric(tapply(r, data$group, sum))
fake <- data.frame(r = y, nr = Ng - y, group = 1:G)
}
if (likelihood == "poisson") {
count <- rpois(N, alpha)
y <- as.numeric(tapply(count, data$group, sum))
fake <- data.frame(y = y, n = Ng, group = 1:G)
}
if (likelihood == "gaussian") {
y_i <- rnorm(N, alpha, samp_sd)
y <- as.numeric(tapply(y_i, data$group, mean))
fake <- data.frame(y = y, y_se = samp_sd / sqrt(Ng), group = 1:G)
}
list(
fake = fake,
draw = c(mu = mu, tau = tau),
draw_theta = theta
)
}
#'
#'
#' Procedure to fit each fake data set using our fitting
#' procedure. This method obtains the problem data, job details and an
#' **instance** of the scenario as generated by `simulate_fake`.
#'
fit_rbest <- function(fake, draw, draw_theta, family, prior_mean_mu, prior_sd_mu, prior_sd_tau, samp_sd) {
Ng <- length(draw_theta)
## pars <- job$pars$prob.pars
## prior_mean_mu <- pars$mean_mu
## prior_sd_mu <- pars$sd_mu
## prior_sd_tau <- pars$sd_tau
## samp_sd <- pars$samp_sd
## family <- pars$family
model <- switch(family,
binomial = cbind(r, nr) ~ 1 | group,
poisson = y ~ 1 + offset(log(n)) | group,
gaussian = cbind(y, y_se) ~ 1 | group
)
options(
RBesT.MC.warmup = 2000, RBesT.MC.iter = 4000, RBesT.MC.thin = 1, RBesT.MC.init = 0.1,
RBesT.MC.control = list( ## adapt_delta=0.999, ## 2024-11-21 lowered to 0.95 for the sake of performance
adapt_delta = 0.95,
stepsize = 0.01, max_treedepth = 10,
adapt_init_buffer = 100, adapt_term_buffer = 300
)
)
if (Ng > 5) {
## for the dense case we can be a bit less aggressive with the sampling tuning parameters
options(RBesT.MC.control = list( ## adapt_delta=0.95,
adapt_delta = 0.90,
stepsize = 0.01, max_treedepth = 10,
adapt_init_buffer = 100, adapt_term_buffer = 300
))
}
fit <- gMAP(model,
data = fake, family = family,
tau.dist = "HalfNormal",
tau.prior = cbind(0, prior_sd_tau),
beta.prior = cbind(c(prior_mean_mu), c(prior_sd_mu)),
chains = 2
)
params <- c("beta[1]", "tau[1]")
params_group <- paste0("theta[", 1:Ng, "]")
sampler_params <- rstan::get_sampler_params(fit$fit, inc_warmup = FALSE)
n_divergent <- sum(sapply(sampler_params, function(x) sum(x[, "divergent__"])))
fit_sum <- rstan::summary(fit$fit)$summary
samp_diags <- fit_sum[params, c("n_eff", "Rhat")]
min_Neff <- ceiling(min(samp_diags[, "n_eff"], na.rm = TRUE))
max_Rhat <- max(samp_diags[, "Rhat"], na.rm = TRUE)
lp_ess <- as.numeric(rstan::monitor(as.array(fit$fit, pars = "lp__"), print = FALSE)[1, c("Bulk_ESS", "Tail_ESS")])
post <- as.matrix(fit)[, params]
post_group <- as.matrix(fit)[, params_group]
S <- nrow(post)
## thin down to 1023 draws so that we get 1024 bins
idx <- round(seq(1, S, length = 1024 - 1))
post <- post[idx, ]
post_group <- post_group[idx, ]
colnames(post) <- c("mu", "tau")
unlist(list(
rank = c(colSums(sweep(post, 2, draw) < 0), colSums(sweep(post_group, 2, draw_theta) < 0)),
min_Neff = min_Neff,
n_divergent = n_divergent,
max_Rhat = max_Rhat,
lp_ess_bulk = lp_ess[1],
lp_ess_tail = lp_ess[2]
))
}
scale_ranks <- function(Nbins, scale = 1) {
## scale must evenly divide the total number of bins
assert_that(round(Nbins / scale) == Nbins / scale)
breaks <- (0:(Nbins / scale))
Nbreaks <- length(breaks)
function(scen) {
vars <- grep("^rank.", names(scen), value = TRUE)
res <- lapply(vars, function(v) hist(ceiling((scen[[v]] + 1) / scale), breaks = breaks, plot = FALSE, include.lowest = FALSE)$counts)
names(res) <- gsub("^rank", "count", vars)
res$rank <- breaks[-Nbreaks]
res <- as.data.frame(do.call(cbind, res))
res
}
}
run_sbc_case <- function(job.id, repl, data_scenario, family, mean_mu, sd_mu, sd_tau, samp_sd, base_scenarios, seeds) {
RNGkind("L'Ecuyer-CMRG")
.Random.seed <<- seeds[[job.id]]
runtime <- system.time({
suppressMessages(load_rbest_dev())
data <- base_scenarios[[data_scenario]]
fake <- simulate_fake(data, family, mean_mu, sd_mu, sd_tau, samp_sd)
fit <- fit_rbest(fake$fake, fake$draw, fake$draw_theta, family, mean_mu, sd_mu, sd_tau, samp_sd)
})
c(list(job.id = job.id, time.running = runtime["elapsed"]), fit)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.