#' Convenience function for computing relative efficiencies
#'
#' `relative_eff()` computes the the MCMC effective sample size divided by
#' the total sample size.
#'
#' @export
#' @param x A vector, matrix, 3-D array, or function. See the **Methods (by
#' class)** section below for details on specifying `x`, but where
#' "log-likelihood" is mentioned replace it with one of the following
#' depending on the use case:
#' * For use with the [loo()] function, the values in `x` (or generated by
#' `x`, if a function) should be **likelihood** values
#' (i.e., `exp(log_lik)`), not on the log scale.
#' * For generic use with [psis()], the values in `x` should be the reciprocal
#' of the importance ratios (i.e., `exp(-log_ratios)`).
#' @param chain_id A vector of length `NROW(x)` containing MCMC chain
#' indexes for each each row of `x` (if a matrix) or each value in
#' `x` (if a vector). No `chain_id` is needed if `x` is a 3-D
#' array. If there are `C` chains then valid chain indexes are values
#' in `1:C`.
#' @param cores The number of cores to use for parallelization.
#' @return A vector of relative effective sample sizes.
#'
#' @examples
#' LLarr <- example_loglik_array()
#' LLmat <- example_loglik_matrix()
#' dim(LLarr)
#' dim(LLmat)
#'
#' rel_n_eff_1 <- relative_eff(exp(LLarr))
#' rel_n_eff_2 <- relative_eff(exp(LLmat), chain_id = rep(1:2, each = 500))
#' all.equal(rel_n_eff_1, rel_n_eff_2)
#'
relative_eff <- function(x, ...) {
UseMethod("relative_eff")
}
#' @export
#' @templateVar fn relative_eff
#' @template vector
#'
relative_eff.default <- function(x, chain_id, ...) {
dim(x) <- c(length(x), 1)
class(x) <- "matrix"
relative_eff.matrix(x, chain_id)
}
#' @export
#' @templateVar fn relative_eff
#' @template matrix
#'
relative_eff.matrix <- function(x, chain_id, ..., cores = getOption("mc.cores", 1)) {
x <- llmatrix_to_array(x, chain_id)
relative_eff.array(x, cores = cores)
}
#' @export
#' @templateVar fn relative_eff
#' @template array
#'
relative_eff.array <- function(x, ..., cores = getOption("mc.cores", 1)) {
stopifnot(length(dim(x)) == 3)
S <- prod(dim(x)[1:2]) # posterior sample size = iter * chains
if (cores == 1) {
n_eff_vec <- apply(x, 3, ess_rfun)
} else {
if (!os_is_windows()) {
n_eff_list <-
parallel::mclapply(
mc.cores = cores,
X = seq_len(dim(x)[3]),
FUN = function(i) ess_rfun(x[, , i, drop = TRUE])
)
} else {
cl <- parallel::makePSOCKcluster(cores)
on.exit(parallel::stopCluster(cl))
n_eff_list <-
parallel::parLapply(
cl = cl,
X = seq_len(dim(x)[3]),
fun = function(i) ess_rfun(x[, , i, drop = TRUE])
)
}
n_eff_vec <- unlist(n_eff_list, use.names = FALSE)
}
return(n_eff_vec / S)
}
#' @export
#' @templateVar fn relative_eff
#' @template function
#' @param data,draws,... Same as for the [loo()] function method.
#'
relative_eff.function <-
function(x,
chain_id,
...,
cores = getOption("mc.cores", 1),
data = NULL,
draws = NULL) {
f_i <- validate_llfun(x) # not really an llfun, should return exp(ll) or exp(-ll)
N <- dim(data)[1]
if (cores == 1) {
n_eff_list <-
lapply(
X = seq_len(N),
FUN = function(i) {
val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
}
)
} else {
if (!os_is_windows()) {
n_eff_list <-
parallel::mclapply(
X = seq_len(N),
FUN = function(i) {
val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
},
mc.cores = cores
)
} else {
cl <- parallel::makePSOCKcluster(cores)
parallel::clusterExport(cl=cl, varlist=c("draws", "chain_id", "data"), envir=environment())
on.exit(parallel::stopCluster(cl))
n_eff_list <-
parallel::parLapply(
cl = cl,
X = seq_len(N),
fun = function(i) {
val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
}
)
}
}
n_eff_vec <- unlist(n_eff_list, use.names = FALSE)
return(n_eff_vec)
}
#' @export
#' @describeIn relative_eff
#' If `x` is an object of class `"psis"`, `relative_eff()` simply returns
#' the `r_eff` attribute of `x`.
relative_eff.importance_sampling <- function(x, ...) {
attr(x, "r_eff")
}
# internal ----------------------------------------------------------------
#' Effective sample size for PSIS
#'
#' @noRd
#' @param w A vector or matrix (one column per observation) of normalized Pareto
#' smoothed weights (not log weights).
#' @param r_eff Relative effective sample size of `exp(log_lik)` or
#' `exp(-log_ratios)`. `r_eff` should be a scalar if `w` is a
#' vector and a vector of length `ncol(w)` if `w` is a matrix.
#' @return A scalar if `w` is a vector. A vector of length `ncol(w)`
#' if `w` is matrix.
#'
psis_n_eff <- function(w, ...) {
UseMethod("psis_n_eff")
}
#' @export
psis_n_eff.default <- function(w, r_eff = NULL, ...) {
ss <- sum(w^2)
if (is.null(r_eff)) {
return(1 / ss)
}
stopifnot(length(r_eff) == 1)
1 / ss * r_eff
}
#' @export
psis_n_eff.matrix <- function(w, r_eff = NULL, ...) {
ss <- colSums(w^2)
if (is.null(r_eff)) {
return(1 / ss)
}
if (length(r_eff) != length(ss) && length(r_eff) != 1) {
stop("r_eff must have length 1 or ncol(w).", call. = FALSE)
}
1 / ss * r_eff
}
#' MCMC effective sample size calculation
#'
#' @noRd
#' @param sims An iterations by chains matrix of draws for a single parameter.
#' In the case of the **loo** package, this will be the **exponentiated**
#' log-likelihood values for the ith observation.
#' @return MCMC effective sample size based on RStan's calculation.
#'
ess_rfun <- function(sims) {
if (is.vector(sims)) dim(sims) <- c(length(sims), 1)
chains <- ncol(sims)
n_samples <- nrow(sims)
acov <- lapply(1:chains, FUN = function(i) posterior::autocovariance(sims[,i]))
acov <- do.call(cbind, acov)
chain_mean <- colMeans(sims)
mean_var <- mean(acov[1,]) * n_samples / (n_samples - 1)
var_plus <- mean_var * (n_samples - 1) / n_samples
if (chains > 1)
var_plus <- var_plus + var(chain_mean)
# Geyer's initial positive sequence
rho_hat_t <- rep.int(0, n_samples)
t <- 0
rho_hat_even <- 1
rho_hat_t[t + 1] <- rho_hat_even
rho_hat_odd <- 1 - (mean_var - mean(acov[t + 2, ])) / var_plus
rho_hat_t[t + 2] <- rho_hat_odd
while (t < nrow(acov) - 5 && !is.nan(rho_hat_even + rho_hat_odd) &&
(rho_hat_even + rho_hat_odd > 0)) {
t <- t + 2
rho_hat_even = 1 - (mean_var - mean(acov[t + 1, ])) / var_plus
rho_hat_odd = 1 - (mean_var - mean(acov[t + 2, ])) / var_plus
if ((rho_hat_even + rho_hat_odd) >= 0) {
rho_hat_t[t + 1] <- rho_hat_even
rho_hat_t[t + 2] <- rho_hat_odd
}
}
max_t <- t
# this is used in the improved estimate
if (rho_hat_even>0)
rho_hat_t[max_t + 1] <- rho_hat_even
# Geyer's initial monotone sequence
t <- 0
while (t <= max_t - 4) {
t <- t + 2
if (rho_hat_t[t + 1] + rho_hat_t[t + 2] >
rho_hat_t[t - 1] + rho_hat_t[t]) {
rho_hat_t[t + 1] = (rho_hat_t[t - 1] + rho_hat_t[t]) / 2;
rho_hat_t[t + 2] = rho_hat_t[t + 1];
}
}
ess <- chains * n_samples
# Geyer's truncated estimate
# tau_hat <- -1 + 2 * sum(rho_hat_t[1:max_t])
# Improved estimate reduces variance in antithetic case
tau_hat <- -1 + 2 * sum(rho_hat_t[1:max_t]) + rho_hat_t[max_t+1]
# Safety check for negative values and with max ess equal to ess*log10(ess)
tau_hat <- max(tau_hat, 1/log10(ess))
ess <- ess / tau_hat
ess
}
fft_next_good_size <- function(N) {
# Find the optimal next size for the FFT so that
# a minimum number of zeros are padded.
if (N <= 2)
return(2)
while (TRUE) {
m = N
while ((m %% 2) == 0) m = m / 2
while ((m %% 3) == 0) m = m / 3
while ((m %% 5) == 0) m = m / 5
if (m <= 1)
return(N)
N = N + 1
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.