Nothing
# Title : MJMCMC
# Objective : Mode Jumping MCMC algorithm
# Created by: jonlachmann
# Created on: 2021-04-27
#' Main algorithm for MJMCMC (Genetically Modified MJMCMC)
#'
#' @param x matrix containing the design matrix with data to use in the algorithm,
#' @param y response variable
#' @param N The number of MJMCMC iterations to run for (default 100)
#' @param probs A list of various probability vectors used by GMJMCMC, generated by \code{gen.probs.mjmcmc}.
#' @param params A list of various parameter vectors used by MJMCMC, generated by \code{gen.params.mjmcmc}.
#' @param loglik.pi A function specifying the marginal log-posterior of the model up to a constant, including the logarithm of the model prior:
#' \eqn{\log p(M|Y) = \text{const} + \log p(Y|M) + \log p(M)}.
#' Typically assumes a Gaussian model with Zellner's g prior with \eqn{g = max(n,p^2) by default}.
#' @param mlpost_params All parameters for the estimator function loglik.pi
#' @param intercept Logical. Whether to include an intercept in the design matrix. Default is \code{TRUE}. No variable selection is performed on the intercept.
#' @param fixed Integer specifying the number of leading columns in the design matrix to always include in the model. Default is 0.
#' @param sub Logical. If \code{TRUE}, uses subsampling or a stochastic approximation approach to the likelihood rather than the full likelihood. Default is \code{FALSE}.
#' @param verbose Logical. Whether to print messages during execution. Default is \code{TRUE} for \code{gmjmcmc} and \code{FALSE} for the parallel version.
#'
#' @return A list containing the following elements:
#' \item{models}{All visited models in both mjmcmc and local optimization.}
#' \item{accept}{Average acceptance rate of the chain.}
#' \item{mc.models}{All models visited during mjmcmc iterations.}
#' \item{best.crit}{The highest log marginal probability of the visited models.}
#' \item{marg.probs}{Marginal probabilities of the features.}
#' \item{model.probs}{Marginal probabilities of all of the visited models.}
#' \item{model.probs.idx}{Indices of unique visited models.}
#' \item{populations}{The covariates represented as a list of features.}
#'
#' @examples
#' result <- mjmcmc(
#' y = matrix(rnorm(100), 100),
#' x = matrix(rnorm(600), 100),
#' loglik.pi = gaussian.loglik)
#' summary(result)
#' plot(result)
#'
#' @export mjmcmc
mjmcmc <- function (
x,
y,
N = 1000,
probs = NULL,
params = NULL,
loglik.pi = NULL,
mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior")),
intercept = TRUE,
fixed = 0,
sub = FALSE,
verbose = TRUE
) {
# Verify that data is well-formed
if (intercept) {
x <- cbind(1, x)
fixed <- fixed + 1
}
data <- check.data(x, y, fixed, verbose)
if(is.null(loglik.pi))
{
if(is.null(mlpost_params$beta_prior$type) || is.null(mlpost_params$family))
stop("mlpost_params$beta_prior and mlpost_params$family must be specified")
loglik.pi <- select.mlpost.fun(mlpost_params$beta_prior$type, mlpost_params$family)
if(mlpost_params$family == "gaussian")
mlpost_params$beta_prior <- gen.mlpost.params.lm(mlpost_params$beta_prior$type, mlpost_params$beta_prior, ncol(data$x) - 1, nrow(data$x))
else
{
mlpost_params$beta_prior <- gen.mlpost.params.glm(mlpost_params$beta_prior$type, mlpost_params$beta_prior, ncol(data$x) - 1, nrow(data$x))
mlpost_params$beta_prior$type <- mlpost_params$beta_prior$type
}
}
labels <- colnames(x)
if (fixed != 0)
labels <- labels[-seq_len(fixed)]
# Generate default probabilities and parameters if there are none supplied.
if (is.null(probs)) probs <- gen.probs.mjmcmc()
if (is.null(params)) params <- gen.params.mjmcmc(ncol(data$x) - data$fixed)
if (!is.null(mlpost_params)) params$mlpost <- mlpost_params
# Acceptance probability
accept <- 0
# Create a population of just the covariates
S <- gen.covariates(data)
complex <- complex.features(S)
# Initialize first model
model.cur <- as.logical(rbinom(n = length(S), size = 1, prob = 0.5))
model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data, params$mlpost, visited.models = NULL, sub = sub)
model.cur <- list(prob = 0, model = model.cur, coefs = model.cur.res$coefs, crit = model.cur.res$crit, alpha = 0)
if (verbose) cat("\nMJMCMC begin.\n")
result <- mjmcmc.loop(data, complex, loglik.pi, model.cur, N, probs, params, sub, verbose)
if (verbose) cat("\nMJMCMC done.\n")
# Calculate acceptance rate
result$accept <- result$accept / N
result$populations <- S
# Return formatted results
result$fixed <- data$fixed
result$labels <- labels
result$intercept <- intercept
class(result) <- "mjmcmc"
return(result)
}
#' The main loop for the MJMCMC (Mode Jumping MCMC) algorithm, used in both MJMCMC and GMJMCMC (Genetically Modified MJMCMC)
#'
#' @param data The data to use
#' @param complex The complexity measures of the data
#' @param loglik.pi The (log) density to explore
#' @param model.cur The model to start the loop at
#' @param N The number of iterations to run for
#' @param probs A list of the various probability vectors to use
#' @param params A list of the various parameters for all the parts of the algorithm
#' @param sub An indicator that if the likelihood is inexact and should be improved each model visit (EXPERIMENTAL!)
#' @param verbose A logical denoting if messages should be printed
#'
#' @return A list containing the following elements:
#' \item{models}{All visited models.}
#' \item{accept}{Number of accepted proposals of the chain.}
#' \item{mc.models}{All models visited during mjmcmc.}
#' \item{best.crit}{The highest log marginal probability of the visited models.}
#' \item{marg.probs}{Marginal probabilities of the features.}
#' \item{model.probs}{Marginal probabilities of all of the visited models.}
#' \item{model.probs.idx}{Indices of unique visited models.}
#'
#' @noRd
#'
mjmcmc.loop <- function (data, complex, loglik.pi, model.cur, N, probs, params, sub = FALSE, verbose = TRUE) {
# Acceptance count
accept <- 0
# Number of covariates or features
covar_count <- ncol(data$x)
# A list of models that have been visited
models <- vector("list", N)
# Initialize a vector to contain local opt visited models
lo.models <- vector("list", 0)
# Initialize list for keeping track of unique visited models
visited.models <- hashmap()
visited.models[[model.cur$model]] <- list(crit = model.cur$crit, coefs = model.cur$coefs)
best.crit <- model.cur$crit # Set first best criteria value
progress <- 0
mcmc_total <- as.numeric(model.cur$model)
for (i in seq_len(N)) {
if (verbose && N > 40 && i %% floor(N / 40) == 0) progress <- print_progressbar(progress, 40)
if (i > params$burn_in) pip_estimate <- mcmc_total / i
else pip_estimate <- rep(1 / covar_count, covar_count)
proposal <- mjmcmc.prop(data, loglik.pi, model.cur, complex, pip_estimate, probs, params, visited.models, sub = sub)
if (proposal$crit > best.crit) {
best.crit <- proposal$crit
if (verbose) cat(paste("\rNew best crit in cur pop:", best.crit, "\n"))
}
# If we did a large jump and visited models to save
if (!is.null(proposal$models)) {
lo.models <- c(lo.models, proposal$models)
for (mod in seq_along(proposal$models)) {
visited.models[[proposal$models[[mod]]$model]] <- list(crit = proposal$models[[mod]]$crit, coefs = proposal$models[[mod]]$coefs)
}
proposal$models <- NULL
}
visited.models[[proposal$model]] <- list(crit = proposal$crit, coefs = proposal$coefs)
if (log(runif(1)) <= proposal$alpha) {
model.cur <- proposal
accept <- accept + 1
}
mcmc_total <- mcmc_total + model.cur$model
# Add the current model to the list of visited models
models[[i]] <- model.cur
}
# Calculate and store the marginal inclusion probabilities and the model probabilities
all.models <- c(models, lo.models)
marg.probs <- marginal.probs.renorm(all.models, type = "both")
best.crit <- all.models[[marg.probs$idx[which.max(marg.probs$probs.m)]]]$crit
return(list(
models = c(models, lo.models),
accept = accept,
mc.models = models,
best.crit = best.crit,
marg.probs = marg.probs$probs.f,
model.probs = marg.probs$probs.m,
model.probs.idx = marg.probs$idx
))
}
#' Subalgorithm for generating a proposal and acceptance probability in (G)MJMCMC
#'
#' @param data The data to use in the algorithm
#' @param loglik.pi The the (log) density to explore
#' @param model.cur The current model to make the proposal respective to
#' @param complex The complexity measures used when evaluating the marginal likelihood
#' @param pip_estimate The current posterior inclusion probability estimate, used for proposals
#' @param probs A list of the various probability vectors to use
#' @param params A list of the various parameters for all the parts of the algorithm
#' @param visited.models A list of the previously visited models to use when subsampling and avoiding recalculation
#' @param sub An indicator that if the likelihood is inexact and should be improved each model visit (EXPERIMENTAL!)
#'
#' @noRd
#'
mjmcmc.prop <- function (data, loglik.pi, model.cur, complex, pip_estimate, probs, params, visited.models = NULL, sub = FALSE) {
l <- runif(1)
if (l < probs$large) {
### Large jump
### Select kernels to use for the large jump
q.l <- sample.int(n = 4, size = 1, prob = probs$large.kern) # Select large jump kernel
q.o <- sample.int(n = 2, size = 1, prob = probs$localopt) # Select optimizer function
q.r <- sample.int(n = 2, size = 1, prob = probs$random.kern) # Set randomization kernel
# Generate and do large jump
large.jump <- gen.proposal(model.cur$model, params$large, q.l, NULL, pip_estimate) # Get the large jump
chi.0.star <- xor(model.cur$model, large.jump$swap) # Swap large jump indices
# Optimize to find a mode
localopt <- local.optim(chi.0.star, data, loglik.pi, !large.jump$swap, complex, q.o, params, visited.models = visited.models, sub = sub) # Do local optimization
chi.k.star <- localopt$model
# Randomize around the mode
proposal <- gen.proposal(chi.k.star, list(neigh.size = length(pip_estimate), neigh.min = 1, neigh.max = length(pip_estimate)), q.r, NULL, (pip_estimate * 0 + 1 - params$random$prob), prob=TRUE)
proposal$model <- xor(chi.k.star, proposal$swap)
# Do a backwards large jump and add in the kernel used in local optim to use the same for backwards local optim.
chi.0 <- xor(proposal$model, large.jump$swap)
# Do a backwards local optimization
localopt2 <- local.optim(chi.0, data, loglik.pi, !large.jump$swap, complex, q.o, params, kernel = localopt$kern, visited.models=visited.models, sub = sub)
chi.k <- localopt2$model
### Calculate acceptance probability
# Set up the parameters that were used to generate the proposal
prop.params <- list(neigh.size = length(pip_estimate), neigh.min = 1, neigh.max = length(pip_estimate))#list(neigh.min = params$random$min, neigh.max = params$random$max, neigh.size = proposal$S)
# Calculate current model probability given proposal
model.cur$prob <- prob.proposal(proposal$model, chi.k, q.r, prop.params, (pip_estimate*0 + 1 - params$random$prob)) # Get probability of gamma given chi.k
# Store models visited during local optimization
proposal$models <- c(localopt$models, localopt2$models)
} else {
### Regular MH step
# Select MH kernel
q.g <- sample.int(n = 6, size = 1, prob = probs$mh)
# Generate the proposal
proposal <- gen.proposal(model.cur$model, params$mh, q.g, NULL, pip_estimate, prob = TRUE)
proposal$model <- xor(proposal$swap, model.cur$model)
# Calculate current model probability given proposal
model.cur$prob <- prob.proposal(proposal$model, model.cur$model, q.g, params$mh, pip_estimate)
}
# Calculate log likelihoods for the proposed model
proposal.res <- loglik.pre(loglik.pi, proposal$model, complex, data, params$mlpost, visited.models=visited.models, sub = sub)
proposal$crit <- proposal.res$crit
# Calculate acceptance probability for proposed model
proposal$alpha <- min(0, (proposal$crit + model.cur$prob) - (model.cur$crit + proposal$prob))
### Format results and return them
proposal$swap <- NULL; proposal$S <- NULL
proposal$coefs <- proposal.res$coefs
return(proposal)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.