Nothing
#' MCMC sampler for the SDE posterior.
#'
#' A Metropolis-within-Gibbs sampler for the Euler-Maruyama approximation to the true posterior density.
#'
#' @param model An `sde.model` object constructed with [sde.make.model()].
#' @param init An `sde.init` object constructed with [sde.init()].
#' @param hyper The hyperparameters of the SDE prior. See [sde.prior()].
#' @param nsamples Number of MCMC iterations.
#' @param burn Integer number of burn-in samples, or fraction of `nsamples` to prepend as burn-in.
#' @param mwg.sd Standard deviation jump size for Metropolis-within-Gibbs on parameters and missing components of first SDE observation (see Details).
#' @param adapt Logical or list to specify adaptive Metropolis-within-Gibbs sampling (see Details).
#' @param loglik.out Logical, whether to return the loglikelihood at each step.
#' @param last.miss.out Logical, whether to return the missing sde components of the last observation.
#' @param update.data Logical, whether to update the missing data.
#' @param data.out A scalar, integer vector, or list of three integer vectors determining the subset of data to be returned (see Details).
#' @param update.params Logical, whether to update the model parameters.
#' @param fixed.params Logical vector of length `nparams` indicating which parameters are to be held fixed in the MCMC sampler.
#' @param ncores If `model` is compiled with `OpenMP`, the number of cores to use for parallel processing. Otherwise, uses `ncores = 1` and gives a warning.
#' @param verbose Logical, whether to periodically output MCMC status.
#' @details The Metropolis-within-Gibbs (MWG) jump sizes can be specified as a scalar, a vector or length `nparams + ndims`, or a named vector containing the elements defined by `sde.init$nvar.obs.m[1]` (the missing variables in the first SDE observation) and `fixed.params` (the SDE parameters which are not held fixed). The default jump sizes for each MWG random variable are `.25 * |initial_value|` when `|initial_value| > 0`, and 1 otherwise.
#'
#' `adapt == TRUE` implements an adaptive MCMC proposal by Roberts and Rosenthal (2009). At step \eqn{n} of the MCMC, the jump size of each MWG random variable is increased or decreased by \eqn{\delta(n)}, depending on whether the cumulative acceptance rate is above or below the optimal value of 0.44. If \eqn{\sigma_n} is the size of the jump at step \eqn{n}, then the next jump size is determined by
#' \deqn{
#' \log(\sigma_{n+1}) = \log(\sigma_n) \pm \delta(n), \qquad \delta(n) = \min(.01, 1/n^{1/2}).
#' }{
#' log(sigma_(n+1)) = log(sigma_n) \pm delta(n), where delta(n) = min(.01, 1/n^(1/2)).
#' }
#' When `adapt` is not logical, it is a list with elements `max` and `rate`, such that `delta(n) = min(max, 1/n^rate)`. These elements can be scalars or vectors in the same manner as `mwg.sd`.
#'
#' For SDE models with thousands of latent variables, `data.out` can be used to thin the MCMC missing data output. An integer vector or scalar returns specific or evenly-spaced posterior samples from the `ncomp x ndims` complete data matrix. A list with elements `isamples`, `icomp`, and `idims` determines which samples, time points, and SDE variables to return. The first of these can be a scalar or vector with the same meaning as before.
#' @return A list with elements:
#' \describe{
#' \item{`params`}{An `nsamples x nparams` matrix of posterior parameter draws.}
#' \item{`data`}{A 3-d array of posterior missing data draws, for which the output dimensions are specified by `data.out`.}
#' \item{`init`}{The `sde.init` object which initialized the sampler.}
#' \item{`data.out`}{A list of three integer vectors specifying which timepoints, variables, and MCMC iterations correspond to the values in the `data` output.}
#' \item{`mwg.sd`}{A named vector of Metropolis-within-Gibbs standard devations used at the last posterior iteration.}
#' \item{`hyper`}{The hyperparameter specification.}
#' \item{`loglik`}{If `loglik.out == TRUE`, the vector of `nsamples` complete data loglikelihoods calculated at each posterior sample.}
#' \item{`last.iter`}{A list with elements `data` and `params` giving the last MCMC sample. Useful for resuming the MCMC from that point.}
#' \item{`last.miss`}{If `last.miss.out == TRUE`, an `nsamples x nmissN` matrix of all posterior draws for the missing data in the final observation. Useful for SDE forecasting at future timepoints.}
#' \item{`accept`}{A named list of acceptance rates for the various components of the MCMC sampler.}
#' }
#' @example examples/sde.post.R
#' @references Roberts, G.O. and Rosenthal, J.S. "Examples of adaptive MCMC." *Journal of Computational and Graphical Statistics* 18.2 (2009): 349-367. <http://www.probability.ca/jeff/ftpdir/adaptex.pdf>.
#' @export
sde.post <- function(model, init, hyper,
nsamples, burn, mwg.sd = NULL, adapt = TRUE,
loglik.out = FALSE, last.miss.out = FALSE,
update.data = TRUE, data.out,
update.params = TRUE, fixed.params,
ncores = 1, verbose = TRUE) {
# model constants
if(class(model) != "sde.model") {
stop("model must be an sde.model object.")
}
ndims <- model$ndims
data.names <- model$data.names
nparams <- model$nparams
param.names <- model$param.names
# initial values
if(class(init) != "sde.init") {
# all argument checking done at construction time
stop("init must be an sde.init object.")
}
dt <- init$dt.m
par.index <- init$nvar.obs.m
init.data <- init$data
init.params <- init$params
if(missing(fixed.params)) fixed.params <- rep(FALSE, nparams)
# parse inputs
ncomp <- nrow(init.data)
nmiss0 <- ndims - par.index[1]
nparams2 <- nparams+nmiss0
## if(length(dt) == 1) dt <- rep(dt, ncomp-1) # time
## .check.init(init.data, dt, init.params, param.names, data.names)
if(missing(burn)) burn <- max(.1, 1e3)
if(burn < 1) burn <- nsamples*burn
burn <- floor(burn)
# output sizes
if(all(par.index == ndims)) update.data <- FALSE
data.out <- .set.data.out(data.out, nsamples, ncomp, ndims, update.data)
nsamples.out <- length(data.out$isamples)
ncomp.out <- length(data.out$icomp)
ndims.out <- length(data.out$idims)
ndata.out <- ifelse(update.data, nsamples.out*ndims.out*ncomp.out, 1)
nparams.out <- ifelse(update.params, nsamples*nparams, 1)
nmissN <- ndims - par.index[ncomp] # last missing output
if(nmissN == 0) last.miss.out <- FALSE
nlast.miss.out <- ifelse(last.miss.out, nsamples*nmissN, 1)
# prior specification
prior <- hyper
# format hyperparameters
prior <- model$hyper.check(prior, param.names, data.names)
# C++ format check (is phi a list with vector-double elements)
if(!is.valid.hyper(prior)) {
stop("model$hyper.check must convert hyper to a list with NULL or vector-numeric elements.")
}
# random walk jump size
if(is.null(mwg.sd)) {
mwg.sd <- .25 * abs(c(init.params, init.data[1,]))
mwg.sd[mwg.sd == 0] <- 1
}
tune.par <- .set.jump(mwg.sd, adapt, param.names, data.names)
# multicore functionality
if(ncores < 1) stop("ncores must be a positive integer.")
if(!model$omp && ncores > 1) {
warning("model not compiled with openMP: ncores set to 1.")
ncores <- 1
}
if(verbose) {
message("Output size:")
if(update.params) message("params = ", round(nparams.out, 2))
if(update.data) message("data = ", round(ndata.out, 2))
message("Running posterior sampler...")
}
# if(debug) browser()
tm <- chrono()
# compute
ans <- model$cobj$Post(initParams = as.double(init.params),
initData = as.double(t(init.data)),
dT = as.double(dt),
nDimsPerObs = as.integer(par.index),
fixedParams = as.logical(fixed.params),
nSamples = as.integer(nsamples),
burn = as.integer(burn),
nParamsOut = as.integer(nparams.out),
nDataOut = as.integer(ndata.out),
dataOutSmp = as.integer(data.out$isamples-1),
dataOutComp = as.integer(data.out$icomp-1),
dataOutDims = as.integer(data.out$idims-1),
updateParams = as.double(update.params),
updateData = as.double(update.data),
priorArgs = prior,
tunePar = tune.par,
updateLogLik = as.integer(loglik.out),
nLogLikOut = as.integer(ifelse(loglik.out, nsamples, 1)),
updateLastMiss = as.integer(last.miss.out),
nLastMissOut = as.integer(nlast.miss.out),
nCores = as.integer(ncores),
displayProgress = as.logical(verbose))
tm <- chrono(tm, display = verbose)
names(ans) <- c("paramsOut", "dataOut", "paramAccept", "gibbsAccept",
"logLikOut", "lastMissOut", "lastIter", "mwgSd")
# acceptance rates
# if(debug) browser()
accept <- .set.accept(ans$gibbsAccept, ans$paramAccept,
nsamples+burn, par.index, fixed.params,
param.names, data.names,
update.data, update.params, verbose)
out <- list()
if(update.params) {
theta.out <- matrix(ans$paramsOut,
ncol = nparams, byrow = TRUE,
dimnames = list(NULL, param.names))
out <- c(out, list(params = theta.out))
} else out <- c(out, list(params = init.params))
if(update.data) {
x.out <- array(ans$dataOut,
dim = c(ndims.out, ncomp.out, nsamples.out),
dimnames = list(data.names[data.out$idims], NULL, NULL))
x.out <- aperm(x.out, perm = c(2,1,3))
out <- c(out, list(data = x.out))
} else out <- c(out, list(data = init.data))
if(loglik.out) out <- c(out, list(loglik = ans$logLikOut))
mwg.sd <- ans$mwgSd
mwg.sd[1:nparams][fixed.params] <- 0
mwg.sd[nparams+1:ndims][1:ndims <= par.index[1]] <- 0
names(mwg.sd) <- c(param.names, data.names)
out <- c(out, list(init = init, data.out = data.out,
mwg.sd = mwg.sd, hyper = hyper))
last.iter <- list(params = ans$lastIter[1:nparams],
data = matrix(ans$lastIter[nparams + 1:(ncomp*ndims)],
ncomp, ndims, byrow = TRUE))
names(last.iter$params) <- param.names
colnames(last.iter$data) <- data.names
out <- c(out, list(last.iter = last.iter))
if(last.miss.out) {
last.miss <- matrix(ans$lastMissOut, nsamples, nmissN, byrow = TRUE)
colnames(last.miss) <- data.names[par.index[ncomp]+(1:nmissN)]
out <- c(out, list(last.miss = last.miss))
}
out <- c(out, list(accept = accept))
out
}
#--- helper functions ----------------------------------------------------------
# which MCMC iterations and which time points are returned
# input is a scalar, integer vector, or list of integer vectors named
# with names(data.out) = c("icomp", "idims", "isamples") (in any order)
# output is a list of integer vectors with elements named as above
.set.data.out <- function(data.out, nsamples, ncomp, ndims, update.data) {
if(missing(data.out)) data.out <- 2e3
if(!is.list(data.out)) {
# keep complete data matrix at isamples == data.out
isamples <- data.out
icomp <- 1:ncomp
idims <- 1:ndims
} else {
if(!identical(sort(names(data.out)),
sort(c("icomp", "idims", "isamples")))) {
stop("data.out must be scalar, vector, or list with elements icomp, idims, and isamples.")
}
isamples <- data.out$isamples
icomp <- data.out$icomp
idims <- data.out$idims
}
if(length(isamples) == 1) {
# evenly space returned samples
isamples <- unique(floor(seq(1, nsamples, len = isamples)))
}
if(!update.data) {
# return the data once
isamples <- 1:nsamples
icomp <- 1:ncomp
idims <- 1:ndims
}
# check inputs
if(anyDuplicated(icomp) || !all(icomp %in% 1:ncomp)) {
stop("data.out$icomp must be a vector of integers between 1 and ncomp.")
}
if(anyDuplicated(idims) || !all(idims %in% 1:ndims)) {
stop("data.out$idims must be a vector of integers between 1 and ndims.")
}
if(anyDuplicated(isamples) || !all(isamples %in% 1:nsamples)) {
stop("data.out$isamples must be a vector of integers between 1 and nsamples.")
}
list(icomp = sort(icomp), idims = sort(idims), isamples = sort(isamples))
}
# jump sizes
.set.jump <- function(mwg.sd, adapt, param.names, data.names) {
nparams <- length(param.names)
ndims <- length(data.names)
.format.arg <- function(x) {
if(length(x) == 1) {
x <- rep(x, nparams+ndims)
names(x) <- c(param.names, data.names)
} else if(length(x) == nparams+ndims && is.null(names(x))) {
names(x) <- c(param.names, data.names)
}
x
}
.set.arg <- function(x, id) {
y <- rep(0, nparams+ndims)
names(y) <- c(param.names, data.names)
y[names(x)] <- x
y
}
mwg.sd <- .format.arg(mwg.sd)
if(!is.valid.vars(names(mwg.sd), c(param.names, data.names))) {
stop("names(mwg.sd) must be a unique subset of param.names and data.names.")
}
mwg.sd <- .set.arg(mwg.sd)
# adaptive MCMC
if(is.logical(adapt)) {
if(adapt) {
amax <- .01
arate <- .5
} else {
amax <- 0
arate <- 0
}
} else {
adapt <- TRUE
amax <- adapt$max
arate <- adapt$rate
if(is.null(amax) || is.null(arate)) {
stop("adapt must be logical or a list with elements max and rate.")
}
}
# check args
amax <- .format.arg(amax)
if(!is.valid.vars(names(amax), c(param.names, data.names))) {
stop("names(adapt$max) must be a unique subset of param.names and data.names.")
}
amax <- .set.arg(amax)
arate <- .format.arg(arate)
if(!is.valid.vars(names(arate), c(param.names, data.names))) {
stop("names(adapt$rate) must be a unique subset of param.names and data.names.")
}
arate <- .set.arg(arate)
if(any(arate < 0)) stop("adapt$rate must be non-negative.")
list(sd = as.double(mwg.sd), max = as.double(amax),
rate = as.double(arate), adapt = as.logical(amax > 0))
}
# parse acceptance rates
.set.accept <- function(bb.accept, vnl.accept, nsamples,
par.index, fixed.params,
param.names, data.names,
update.data, update.params, verbose) {
ndims <- length(data.names)
nparams <- length(param.names)
accept <- NULL
if(update.data) {
# eraker proposals
accept <- c(accept, list(data = bb.accept[-1]/nsamples))
bb.acc <- accept$data[par.index[-1] < ndims]*100
bb.acc <- signif(c(min(bb.acc), mean(bb.acc)), 3)
if(verbose) {
message("Bridge accept: min = ", bb.acc[1],
"%, avg = ", bb.acc[2], "%")
}
}
if(update.params) {
# parameter proposals
accept <- c(accept, list(params = vnl.accept[1:nparams]/nsamples))
if(verbose) {
for(ii in which(!fixed.params)) {
message(param.names[ii], " accept: ",
signif(accept$params[ii]*100,3), "%")
}
}
}
nmiss0 <- ndims-par.index[1]
if(update.data && (nmiss0 > 0)) {
# initial missing data proposals
accept <- c(
accept,
list(miss0 = vnl.accept[nparams+par.index[1] + (1:nmiss0)]/nsamples)
)
if(verbose) {
for(ii in 1:nmiss0) {
message(data.names[par.index[1] + ii], "0 accept: ",
signif(accept$miss0[ii]*100,3), "%")
}
}
}
accept
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.