R/ensemblealg.R
In QAEnsemble: Ensemble Quadratic and Affine Invariant Markov Chain Monte Carlo

Documented in ensemblealg

#' Ensemble MCMC algorithm (either Quadratic or Affine Invariant method)
#'
#' @description This function runs the Ensemble Quadratic or Affine Invariant
#' MCMC algorithm for Bayesian inference parameter estimation and it is based
#' off of the papers by Militzer (2023), Goodman and Weare (2010), and
#' Foreman-Mackey, Hogg, Lang, and Goodman (2013). The Ensemble Quadratic Monte
#' Carlo algorithm was developed by Militzer (2023). The Ensemble Affine
#' Invariant algorithm was developed by Goodman and Weare (2010) and it was
#' implemented in Python by Foreman-Mackey, Hogg, Lang, and Goodman (2013). The
#' Quadratic Monte Carlo method was shown to perform better than the Affine
#' Invariant method in the paper by Militzer (2023) and the Quadratic method is
#' the default method used in the 'ensemblealg' function.
#'
#' @param theta0 The matrix of initial guesses for the MCMC chains
#' @param logfuns A list object containing the log prior function and log
#' likelihood function
#' @param T_iter The number of iterations to run for each chain in the Ensemble
#' MCMC algorithm
#' @param Thin_val Every nth iteration is saved, where n is equal to the
#' "Thin_val" parameter
#' @param UseQuad If this bool is true, then the Ensemble Quadratic MCMC
#' algorithm is used. Otherwise, the Ensemble Affine Invariant MCMC algorithm is
#' used. (The default setting is true.)
#' @param a_par The parameter 'a_par' is a performance parameter for the MCMC
#' ensemble algorithm. (The default setting for the Quadratic algorithm is
#' 'a_par' equal to 1.5 and the default setting for the Affine Invariant
#' algorithm is 'a_par' equal to 2.)
#' @param ShowProgress If this bool is true, then the progress of the algorithm
#' is shown. Otherwise, the progress of the algorithm is not shown.(The default
#' setting is false.)
#' @param ReturnCODA If this bool is true, then the 'coda' package 'mcmc.list'
#' object is returned along with the other outputs (The default setting is
#' false.)
#'
#' @return A list object is returned that contains three matrices: theta_sample,
#' log_like_sample, and log_prior_sample.\cr
#'
#' theta_sample: this is the matrix of
#' parameter samples returned from the Ensemble MCMC algorithm, the matrix
#' dimensions are given by
#' (Number of parameters) x (Number of chains) x (Number of iterations) \cr
#'
#' log_like_sample: this is the matrix of log likelihood samples returned from
#' the Ensemble MCMC algorithm, the matrix dimensions are given by
#' (Number of chains) x (Number of iterations) \cr
#'
#' log_prior_sample: this is the matrix of log prior samples returned from the
#' Ensemble MCMC algorithm, the matrix dimensions are given by
#' (Number of chains) x (Number of iterations) \cr
#'
#' mcmc_list_coda: (optional) this is the 'coda' package 'mcmc.list' object that
#' can be used with various MCMC diagnostic functions in the 'coda' package
#' @export
#'
#' @references Militzer B (2023) Study of Jupiter’s Interior with Quadratic
#' Monte Carlo Simulations. ApJ 953(111):20pp.
#' https://doi.org/10.3847/1538-4357/ace1f1 \cr
#'
#' Goodman J and Weare J (2010) Ensemble samplers with affine invariance. Commun
#' Appl Math Comput Sci 5(1):65-80.
#' https://doi.org/10.2140/camcos.2010.5.65 \cr
#'
#' Foreman-Mackey D, Hogg DW, Lang D, Goodman J (2013) emcee: The MCMC Hammer.
#' PASP 125(925):306. https://doi.org/10.48550/arXiv.1202.3665
#'
#' @examples #Ensemble Quadratic MCMC algorithm example for fitting a Weibull
#' #distribution
#'
#' #Assume the true parameters are
#'
#' a_shape = 20
#' sigma_scale = 900
#'
#' #Random sample from the Weibull distribution with a = 20 and sigma = 900,
#' #Y~WEI(a = 20, sigma = 900)
#'
#' num_ran_samples = 50
#'
#' data_weibull = matrix(NA, nrow = 1, ncol = num_ran_samples)
#'
#' #Set the seed for this example
#' set.seed(10)
#'
#' data_weibull = rweibull(num_ran_samples, shape = a_shape, scale = sigma_scale)
#'
#' #We want to estimate a_shape and sigma_scale
#'
#' #Log prior function for a_shape and sigma_scale
#' #(assumed priors a_shape ~ U(1e-2, 1e2) and sigma_scale ~ U(1, 1e4))
#'logp <- function(param)
#'{
#'  a_shape_use = param[1]
#'  sigma_scale_use = param[2]
#'
#'  logp_val = dunif(a_shape_use, min = 1e-2, max = 1e2, log = TRUE) +
#'  dunif(sigma_scale_use, min = 1, max = 1e4, log = TRUE)
#'
#'  return(logp_val)
#'}
#'
#'#Log likelihood function for a_shape and sigma_scale
#'logl <- function(param)
#'{
#'  a_shape_use = param[1]
#'  sigma_scale_use = param[2]
#'
#'  logl_val = sum(dweibull(data_weibull, shape = a_shape_use, scale = sigma_scale_use, log = TRUE))
#'
#'  return(logl_val)
#'}
#'
#'logfuns = list(logp = logp, logl = logl)
#'
#'num_par = 2
#'
#'#It is recommended to use at least twice as many chains as the number of
#'#parameters to be estimated.
#'num_chains = 2*num_par
#'
#'#Generate initial guesses for the MCMC chains
#'theta0 = matrix(0, nrow = num_par, ncol = num_chains)
#'
#'temp_val = 0
#'j = 0
#'
#'while(j < num_chains)
#'{
#'  initial = c(runif(1, 1e-2, 1e2), runif(1, 1, 1e4))
#'  temp_val = logl(initial) + logp(initial)
#'
#'  while(is.na(temp_val) || is.infinite(temp_val))
#'  {
#'    initial = c(runif(1, 1e-2, 1e2), runif(1, 1, 1e4))
#'    temp_val = logl(initial) + logp(initial)
#'  }
#'
#'  j = j + 1
#'
#'  message(paste('j:', j))
#'
#'  theta0[1,j] = initial[1]
#'  theta0[2,j] = initial[2]
#'
#'}
#'
#'num_chain_iterations = 1e4
#'thin_val_par = 10
#'
#'#The total number of returned samples is given by
#'#(num_chain_iterations/thin_val_par)*num_chains = 4e3
#'
#'#Ensemble Quadratic MCMC algorithm
#'
#'Weibull_Quad_result = ensemblealg(theta0, logfuns,
#'T_iter = num_chain_iterations, Thin_val = thin_val_par)
#'
#'my_samples = Weibull_Quad_result$theta_sample
#'
#'my_log_prior = Weibull_Quad_result$log_prior_sample
#'
#'my_log_like = Weibull_Quad_result$log_like_sample
#'
#'#Burn-in 25% of each chain
#'my_samples_burn_in = my_samples[,,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]
#'
#'my_log_prior_burn_in = my_log_prior[,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]
#'
#'my_log_like_burn_in = my_log_like[,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]
#'
#'#Calculate potential scale reduction factors
#'diagnostic_result = psrfdiagnostic(my_samples_burn_in, 0.05)
#'
#'diagnostic_result$p_s_r_f_vec
#'
#'#log unnormalized posterior samples
#'log_un_post_vec = as.vector(my_log_prior_burn_in + my_log_like_burn_in)
#'
#'#a_shape posterior samples
#'k1 = as.vector(my_samples_burn_in[1,,])
#'
#'#sigma_scale posterior samples
#'k2 = as.vector(my_samples_burn_in[2,,])
#'
#'#Calculate posterior median, 95% credible intervals, and maximum posterior for
#'#the parameters
#'median(k1)
#'hpdparameter(k1, 0.05)
#'
#'median(k2)
#'hpdparameter(k2, 0.05)
#'
#'k1[which.max(log_un_post_vec)]
#'
#'k2[which.max(log_un_post_vec)]
#'
#'#These plots display the silhouette of the unnormalized posterior surface from
#'#the chosen parameter's perspective
#'
#'plot(k1, exp(log_un_post_vec), xlab="a_shape", ylab="unnormalized posterior density")
#'
#'plot(k2, exp(log_un_post_vec), xlab="sigma_scale", ylab="unnormalized posterior density")
ensemblealg <- function(theta0, logfuns, T_iter, Thin_val, UseQuad = TRUE, a_par = NULL, ShowProgress = FALSE, ReturnCODA = FALSE)
{
  if(UseQuad == TRUE & is.null(a_par))
  {
    a_par = 1.5
  }

  if(UseQuad == FALSE & is.null(a_par))
  {
    a_par = 2
  }

  n = dim(theta0)

  n1 = n[1]

  n2 = n[2]

  if(UseQuad == FALSE & n2 < 2)
  {
    stopifnot("The number of chains for the Affine Invariant Ensemble MCMC algorithm needs to be at least two. It is recommended to use at least twice as many chains as the number of parameters to be estimated." = (n2 >= 2))
  }

  if(UseQuad == TRUE & n2 < 3)
  {
    stopifnot("The number of chains for the Quadratic Ensemble MCMC algorithm needs to be at least three. It is recommended to use at least twice as many chains as the number of parameters to be estimated." = (n2 >= 3))
  }

  theta_sample = array(0, dim=c(n1, n2, floor(T_iter/Thin_val)))

  theta_sample_current = matrix(0, nrow = n1, ncol = n2)
  theta_sample_previous = matrix(0, nrow = n1, ncol = n2)

  theta_sample_current[,] = theta0

  log_prior_sample = matrix(0, nrow = n2, ncol = floor(T_iter/Thin_val))

  log_prior_current = matrix(0, nrow = n2, ncol = 1)
  log_prior_previous = matrix(0, nrow = n2, ncol = 1)

  for(i in 1:n2)
  {
    log_prior_current[i,1] = logfuns[["logp"]](theta0[,i])
  }

  log_like_sample = matrix(0, nrow = n2, ncol = floor(T_iter/Thin_val))

  log_like_current = matrix(0, nrow = n2, ncol = 1)
  log_like_previous = matrix(0, nrow = n2, ncol = 1)

  for(i in 1:n2)
  {
    log_like_current[i,1] = logfuns[["logl"]](theta0[,i])
  }

  save_index = 1

  #This 'for loop' will run either the Quadratic or Affine Invariant Ensemble
  #MCMC algorithm for the chains
  for(t in 1:T_iter)
  {
    theta_sample_previous[,] = theta_sample_current
    log_prior_previous[,1] = log_prior_current
    log_like_previous[,1] = log_like_current

    #message(paste('theta0:', theta0[1,1]))
    #message(paste('theta_sample_current dim:', theta_sample_current[1,1]))

    for(k in 1:n2)
    {
      if(UseQuad == TRUE)
      {
        j = sample((1:n2)[-k], 1)

        l = sample((1:n2)[-c(k, j)], 1)

        t_k = stats::runif(1, min = -1*a_par, max = a_par)
        t_k_guess = stats::runif(1, min = -1*a_par, max = a_par)

        theta_k_tminus1 = theta_sample_previous[,k]
        theta_j_tminus1 = theta_sample_previous[,j]
        theta_l_tminus1 = theta_sample_previous[,l]

        w_k = ((t_k_guess + 1)/(t_k + 1))*((t_k_guess - 1)/(t_k - 1))
        w_j = ((t_k_guess - 1)/(-1*2))*((t_k_guess - t_k)/(-1 - t_k))
        w_l = ((t_k_guess - t_k)/(1 - t_k))*((t_k_guess + 1)/(2))

        log_prior_k_tminus1 = log_prior_previous[k,1]

        log_like_k_tminus1 = log_like_previous[k,1]

        Y = w_k*theta_k_tminus1 + w_j*theta_j_tminus1 + w_l*theta_l_tminus1

      }
      else
      {

        j = sample((1:n2)[-k], 1)

        u = stats::runif(1, min = 0, max = 1)

        Z = (((a_par - 1)*u + 1)^2)/a_par

        theta_j_tminus1 = theta_sample_previous[,j]

        theta_k_tminus1 = theta_sample_previous[,k]

        log_prior_k_tminus1 = log_prior_previous[k,1]

        log_like_k_tminus1 = log_like_previous[k,1]

        Y = theta_j_tminus1 + Z*(theta_k_tminus1 - theta_j_tminus1)

      }

      log_prior_theta_g = logfuns[["logp"]](Y)

      if(is.infinite(log_prior_theta_g))
      {
        theta_sample_current[,k] = theta_k_tminus1
        log_prior_current[k,1] = log_prior_k_tminus1
        log_like_current[k,1] = log_like_k_tminus1

        #save
        if(t %% Thin_val == 0)
        {
          theta_sample[,k,save_index] = theta_k_tminus1
          log_prior_sample[k,save_index] = log_prior_k_tminus1
          log_like_sample[k,save_index] = log_like_k_tminus1
        }
      }
      else
      {
        log_like_theta_g = logfuns[["logl"]](Y)

        if(UseQuad == TRUE)
        {
          log_alpha = min(c(0, (n1)*log(abs(w_k)) + (log_prior_theta_g + log_like_theta_g - log_prior_k_tminus1 - log_like_k_tminus1)))
        }
        else
        {
          log_alpha = min(c(0, (n1 - 1)*log(Z) + (log_prior_theta_g + log_like_theta_g - log_prior_k_tminus1 - log_like_k_tminus1)))
        }

        u2 = stats::runif(1, min = 0, max = 1)

        if(log(u2) < log_alpha)
        {
          theta_sample_current[,k] = Y
          log_prior_current[k,1] = log_prior_theta_g
          log_like_current[k,1] = log_like_theta_g

          #save
          if(t %% Thin_val == 0)
          {
            theta_sample[,k,save_index] = Y
            log_prior_sample[k,save_index] = log_prior_theta_g
            log_like_sample[k,save_index] = log_like_theta_g
          }
        }
        else
        {
          theta_sample_current[,k] = theta_k_tminus1
          log_prior_current[k,1] = log_prior_k_tminus1
          log_like_current[k,1] = log_like_k_tminus1

          #save
          if(t %% Thin_val == 0)
          {
            theta_sample[,k,save_index] = theta_k_tminus1
            log_prior_sample[k,save_index] = log_prior_k_tminus1
            log_like_sample[k,save_index] = log_like_k_tminus1
          }
        }
      }
    }

    if(t %% Thin_val == 0)
    {
      save_index = save_index + 1
    }

    if(ShowProgress == TRUE)
    {
      svMisc::progress(t,T_iter,progress.bar = TRUE,init = (t == 1))
    }

  }

  if(ReturnCODA == TRUE)
  {
    mcmc_list_temp = list(coda::mcmc(t(theta_sample[,1,])))

    for(i in 2:n2)
    {
      mcmc_list_temp = append(mcmc_list_temp, list(coda::mcmc(t(theta_sample[,i,]))), after = i-1)
    }

    mcmc_list_coda = coda::mcmc.list(mcmc_list_temp)

    return(list("theta_sample" = theta_sample, "log_like_sample" = log_like_sample, "log_prior_sample" = log_prior_sample, "mcmc_list_coda" = mcmc_list_coda))
  }
  else
  {
    return(list("theta_sample" = theta_sample, "log_like_sample" = log_like_sample, "log_prior_sample" = log_prior_sample))
  }
}