R/effective_sample_sizes.R

Defines functions fft_next_good_size ess_rfun psis_n_eff.matrix psis_n_eff.default psis_n_eff relative_eff.importance_sampling relative_eff.function relative_eff.array relative_eff.matrix relative_eff.default relative_eff

Documented in relative_eff relative_eff.array relative_eff.default relative_eff.function relative_eff.importance_sampling relative_eff.matrix

#' Convenience function for computing relative efficiencies
#'
#' `relative_eff()` computes the the MCMC effective sample size divided by
#' the total sample size.
#'
#' @export
#' @param x A vector, matrix, 3-D array, or function. See the **Methods (by
#'   class)** section below for details on specifying `x`, but where
#'   "log-likelihood" is mentioned replace it with one of the following
#'   depending on the use case:
#'   * For use with the [loo()] function, the values in `x` (or generated by
#'     `x`, if a function) should be **likelihood** values
#'     (i.e., `exp(log_lik)`), not on the log scale.
#'   * For generic use with [psis()], the values in `x` should be the reciprocal
#'     of the importance ratios (i.e., `exp(-log_ratios)`).
#' @param chain_id A vector of length `NROW(x)` containing MCMC chain
#'   indexes for each each row of `x` (if a matrix) or each value in
#'   `x` (if a vector). No `chain_id` is needed if `x` is a 3-D
#'   array. If there are `C` chains then valid chain indexes are values
#'   in `1:C`.
#' @param cores The number of cores to use for parallelization.
#' @return A vector of relative effective sample sizes.
#'
#' @examples
#' LLarr <- example_loglik_array()
#' LLmat <- example_loglik_matrix()
#' dim(LLarr)
#' dim(LLmat)
#'
#' rel_n_eff_1 <- relative_eff(exp(LLarr))
#' rel_n_eff_2 <- relative_eff(exp(LLmat), chain_id = rep(1:2, each = 500))
#' all.equal(rel_n_eff_1, rel_n_eff_2)
#'
relative_eff <- function(x, ...) {
  UseMethod("relative_eff")
}

#' @export
#' @templateVar fn relative_eff
#' @template vector
#'
relative_eff.default <- function(x, chain_id, ...) {
  dim(x) <- c(length(x), 1)
  class(x) <- "matrix"
  relative_eff.matrix(x, chain_id)
}

#' @export
#' @templateVar fn relative_eff
#' @template matrix
#'
relative_eff.matrix <- function(x, chain_id, ..., cores = getOption("mc.cores", 1)) {
  x <- llmatrix_to_array(x, chain_id)
  relative_eff.array(x, cores = cores)
}

#' @export
#' @templateVar fn relative_eff
#' @template array
#'
relative_eff.array <- function(x, ..., cores = getOption("mc.cores", 1)) {
  stopifnot(length(dim(x)) == 3)
  S <- prod(dim(x)[1:2]) # posterior sample size = iter * chains

  if (cores == 1) {
    n_eff_vec <- apply(x, 3, ess_rfun)
  } else {
    if (!os_is_windows()) {
      n_eff_list <-
        parallel::mclapply(
          mc.cores = cores,
          X = seq_len(dim(x)[3]),
          FUN = function(i) ess_rfun(x[, , i, drop = TRUE])
        )
    } else {
      cl <- parallel::makePSOCKcluster(cores)
      on.exit(parallel::stopCluster(cl))
      n_eff_list <-
        parallel::parLapply(
          cl = cl,
          X = seq_len(dim(x)[3]),
          fun = function(i) ess_rfun(x[, , i, drop = TRUE])
        )
    }
    n_eff_vec <- unlist(n_eff_list, use.names = FALSE)
  }

  return(n_eff_vec / S)
}

#' @export
#' @templateVar fn relative_eff
#' @template function
#' @param data,draws,... Same as for the [loo()] function method.
#'
relative_eff.function <-
  function(x,
           chain_id,
           ...,
           cores = getOption("mc.cores", 1),
           data = NULL,
           draws = NULL) {

    f_i <- validate_llfun(x) # not really an llfun, should return exp(ll) or exp(-ll)
    N <- dim(data)[1]

    if (cores == 1) {
      n_eff_list <-
        lapply(
          X = seq_len(N),
          FUN = function(i) {
            val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
            relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
          }
        )
    } else {
      if (!os_is_windows()) {
        n_eff_list <-
          parallel::mclapply(
            X = seq_len(N),
            FUN = function(i) {
              val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
              relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
            },
            mc.cores = cores
          )
      } else {
        cl <- parallel::makePSOCKcluster(cores)
        parallel::clusterExport(cl=cl, varlist=c("draws", "chain_id", "data"), envir=environment())
        on.exit(parallel::stopCluster(cl))
        n_eff_list <-
          parallel::parLapply(
            cl = cl,
            X = seq_len(N),
            fun = function(i) {
              val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
              relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
            }
          )
      }
    }

    n_eff_vec <- unlist(n_eff_list, use.names = FALSE)
    return(n_eff_vec)
  }

#' @export
#' @describeIn relative_eff
#'   If `x` is an object of class `"psis"`, `relative_eff()` simply returns
#'   the `r_eff` attribute of `x`.
relative_eff.importance_sampling <- function(x, ...) {
  attr(x, "r_eff")
}


# internal ----------------------------------------------------------------


#' Effective sample size for PSIS
#'
#' @noRd
#' @param w A vector or matrix (one column per observation) of normalized Pareto
#'   smoothed weights (not log weights).
#' @param r_eff Relative effective sample size of `exp(log_lik)` or
#'   `exp(-log_ratios)`. `r_eff` should be a scalar if `w` is a
#'   vector and a vector of length `ncol(w)` if `w` is a matrix.
#' @return A scalar if `w` is a vector. A vector of length `ncol(w)`
#'   if `w` is matrix.
#'
psis_n_eff <- function(w, ...) {
  UseMethod("psis_n_eff")
}

#' @export
psis_n_eff.default <- function(w, r_eff = NULL, ...) {
  ss <- sum(w^2)
  if (is.null(r_eff)) {
    return(1 / ss)
  }
  stopifnot(length(r_eff) == 1)
  1 / ss * r_eff
}

#' @export
psis_n_eff.matrix <- function(w, r_eff = NULL, ...) {
  ss <- colSums(w^2)
  if (is.null(r_eff)) {
    return(1 / ss)
  }
  if (length(r_eff) != length(ss) && length(r_eff) != 1) {
    stop("r_eff must have length 1 or ncol(w).", call. = FALSE)
  }
  1 / ss * r_eff
}

#' MCMC effective sample size calculation
#'
#' @noRd
#' @param sims An iterations by chains matrix of draws for a single parameter.
#'   In the case of the **loo** package, this will be the **exponentiated**
#'   log-likelihood values for the ith observation.
#' @return MCMC effective sample size based on RStan's calculation.
#'
ess_rfun <- function(sims) {
  if (is.vector(sims)) dim(sims) <- c(length(sims), 1)
  chains <- ncol(sims)
  n_samples <- nrow(sims)
  acov <- lapply(1:chains, FUN = function(i) posterior::autocovariance(sims[,i]))
  acov <- do.call(cbind, acov)
  chain_mean <- colMeans(sims)
  mean_var <- mean(acov[1,]) * n_samples / (n_samples - 1)
  var_plus <- mean_var * (n_samples - 1) / n_samples
  if (chains > 1)
    var_plus <- var_plus + var(chain_mean)
  # Geyer's initial positive sequence
  rho_hat_t <- rep.int(0, n_samples)
  t <- 0
  rho_hat_even <- 1
  rho_hat_t[t + 1] <- rho_hat_even
  rho_hat_odd <- 1 - (mean_var - mean(acov[t + 2, ])) / var_plus
  rho_hat_t[t + 2] <- rho_hat_odd
  while (t < nrow(acov) - 5 && !is.nan(rho_hat_even + rho_hat_odd) &&
         (rho_hat_even + rho_hat_odd > 0)) {
    t <- t + 2
    rho_hat_even = 1 - (mean_var - mean(acov[t + 1, ])) / var_plus
    rho_hat_odd = 1 - (mean_var - mean(acov[t + 2, ])) / var_plus
    if ((rho_hat_even + rho_hat_odd) >= 0) {
      rho_hat_t[t + 1] <- rho_hat_even
      rho_hat_t[t + 2] <- rho_hat_odd
    }
  }
  max_t <- t
  # this is used in the improved estimate
  if (rho_hat_even>0)
      rho_hat_t[max_t + 1] <- rho_hat_even

  # Geyer's initial monotone sequence
  t <- 0
  while (t <= max_t - 4) {
    t <- t + 2
    if (rho_hat_t[t + 1] + rho_hat_t[t + 2] >
        rho_hat_t[t - 1] + rho_hat_t[t]) {
      rho_hat_t[t + 1] = (rho_hat_t[t - 1] + rho_hat_t[t]) / 2;
      rho_hat_t[t + 2] = rho_hat_t[t + 1];
    }
  }
  ess <- chains * n_samples
  # Geyer's truncated estimate
  # tau_hat <- -1 + 2 * sum(rho_hat_t[1:max_t])
  # Improved estimate reduces variance in antithetic case
  tau_hat <- -1 + 2 * sum(rho_hat_t[1:max_t]) + rho_hat_t[max_t+1]
  # Safety check for negative values and with max ess equal to ess*log10(ess)
  tau_hat <- max(tau_hat, 1/log10(ess))
  ess <- ess / tau_hat
  ess
}


fft_next_good_size <- function(N) {
  # Find the optimal next size for the FFT so that
  # a minimum number of zeros are padded.
  if (N <= 2)
    return(2)
  while (TRUE) {
    m = N
    while ((m %% 2) == 0) m = m / 2
    while ((m %% 3) == 0) m = m / 3
    while ((m %% 5) == 0) m = m / 5
    if (m <= 1)
      return(N)
    N = N + 1
  }
}
stan-dev/loo documentation built on April 15, 2024, 10:34 p.m.