###########################################################
## STAT 243 Final Project
## Adaptive Rejection Sampling Algorithm
##
## Date: 12/12/18
## Authors: Jonathan Morrell, Ziyang Zhou, Vincent Zijlmans
###########################################################
#' Adaptive Rejection Sampling
#'
#' \code{ars} returns a sample of length N from the density function f
#'
#' Adaptive Rejection Sampling algorithm for sampling from any univariate log-concave density function f(x).
#' This algorithm is particularly useful in situations where computing f(x) may be expensive, as
#' the number of times f(x) must be sampled scales approximately 3N^{1/3}, where N
#' is the total number of samples, which is significantly more efficient
#' than conventional rejection sampling. Also, f(x) is not required to be normalized.
#'
#' IMPORTANT: x0 must satisfy the following conditions:
#'
#' At least one of dh(x0)/dx must be positive or the left bound must not be -Inf
#'
#' AND
#'
#' At least one of dh(x0)/dx must be negative or the right bound must not be Inf.
#'
#' @author Jonathan Morrell, Ziyang Zhou, Vincent Zijlmans
#' @usage ars(f, N, x0 = c(-1.0, 1.0), bounds = c(-Inf, Inf), ...)
#'
#' @param f function from which to sample (must be log-concave)
#' @param N number of observations desired
#' @param x0 vector of initial x points used to approximate h=log(f(x))
#' @param bounds vector (length 2) of lower and upper bounds for the distribution function f
#' @param ... Additional arguments to pass to f
#'
#' @return returns (length N) vector of samples from the density function f(x)
#'
#' @seealso Wikipedia entry on \href{https://en.wikipedia.org/wiki/Rejection_sampling}{Rejection Sampling}.
#' @references Gilks, W. R., et al. (1995-01-01). "Adaptive Rejection Metropolis Sampling within Gibbs Sampling".
#' \emph{Journal of the Royal Statistical Society. Series C (Applied Statistics).} \strong{44} (4): 455–472.
#' @examples
#' N <- 10000
#' x <- ars(dnorm, N)
#' x <- ars(dexp, N, x0=c(1.0), bounds=c(0.0, Inf))
#' x <- ars(dgamma, N, x0=c(2.5), bounds=c(0.0, Inf), shape=3.0, rate=2.0)
#' ## Could use hist to visualize distribution, e.g.
#' ## hist(x, breaks = 50, freq=FALSE)
#'
#' The following will produce an error
#' \dontrun{
#' x <- ars(dgamma, N, x0=c(0.1), bounds=c(0.0, Inf), shape=3.0, rate=2.0)
#' }
ars <- function (f, N, x0 = c(-1.0, 1.0), bounds = c(-Inf, Inf), ...) {
## Input sanity check
if (!is.function(f)) {
stop('f is not a function.')
}
if (length(bounds) != 2) {
stop('Length of bounds must be 2! (upper and lower bounds required)')
}
if (bounds[1] == bounds[2]) {
warning('Same upper and lower bounds, replacing them by default: bounds=c(-Inf, Inf)')
bounds <- c(-Inf, Inf)
}
## Ensure bounds are c(lower, upper)
if (bounds[1] > bounds[2]) {
warning('Bounds should be in order c(lower, upper)')
bounds <- sort(bounds)
}
if (!all((x0 > bounds[1]) & (x0 < bounds[2]))) {
stop('Bad inputs: x0 must be inside bounds.')
}
## define h(x) = log(f(x))
h <- function(x) {
return (log(f(x, ...)))
}
## helper variables
dx <- 1E-8 # mesh size for finite difference approximation to h'(x)
eps <- 1E-7 # tolerance for non-log-concavity test
max_iters <- 10000 # prevent infinite loop
current_iter <- 0
bds_warn <- FALSE # Flag for boundary warning in main while loop
## initialize vectors
x0 <- sort(x0) # Ensures x0 is in ascending order
x_j <- c() # evaluated x values
h_j <- c() # evaluated h(x) values
dh_j <- c() # evaluated h'(x) values
x <- c() # accepted sample vector
## Evaluate h(x0)
h0 <- h(x0)
## Finite difference approximation to h'(x)
dh0 <- (h(x0 + dx) - h0)/dx
## Check for NaNs and infinities
isnum <- is.finite(h0)&is.finite(dh0)
x0 <- x0[isnum]
h0 <- h0[isnum]
dh0 <- dh0[isnum]
## Error if no good x0 values
if (length(h0) == 0) {
stop('h(x0) either infinite or NaN.')
}
## ARS requires either finite bounds or at least
## one positive and one negative derivative
if (!(dh0[1] > 0 || is.finite(bounds[1]))) {
stop('dh(x0)/dx must have at least one positive value, or left bound must be greater than -infinity.')
}
if (!(dh0[length(dh0)] < 0 || is.finite(bounds[2]))) {
stop('dh(x0)/dx must have at least one negative value, or right bound must be less than infinity.')
}
##### HELPER FUNCTIONS #####
calc_z_j <- function (x, h, dh, bounds) {
## Calculate intercepts of piecewise u_j(x)
##
## Arguments
## x, h, dh: evaluated x, h(x) and h'(x)
## bounds: bounds of distribution h(x)
##
## Value
## intercepts (bounds) of piecewise u_j(x)
L <- length(h)
z <- (h[2:L] - h[1:L-1] - x[2:L]*dh[2:L] + x[1:L-1]*dh[1:L-1])/(dh[1:L-1] - dh[2:L])
return (append(bounds[1], append(z, bounds[2])))
}
calc_u <- function (x, h, dh) {
## Calculate slope/intercept for piecewise u_j(x)
##
## Arguments
## x, h, dh: evaluated x, h(x), and h'(x)
##
## Value (list)
## m, b: slope/intercept of u(x)
return (list(m = dh, b = h - x*dh))
}
calc_l <- function (x, h) {
## Calculate slope/intercept for piecewise l_j(x)
##
## Arguments
## x, h: evaluated x, and h(x)
##
## Value (list)
## m, b: slope/intercept of l(x)
L <- length(h)
m <- (h[2:L] - h[1:L-1])/(x[2:L] - x[1:L-1])
b <- (x[2:L]*h[1:L-1] - x[1:L-1]*h[2:L])/(x[2:L] - x[1:L-1])
return (list(m = m, b = b))
}
calc_s_j <- function (m, b, z) {
## Calculate beta values and weights
## needed to sample from u(x)
##
## Arguments
## m, b: slope/offset for piecewise u_j(x)
## z: bounds z_j for piecewise function u_j(x)
##
## Value (list)
## beta: amplitude of each piecewise segment in u(x)=beta*exp(m*x)
## w: area of each segment
L <- length(z)
eb <- exp(b) # un-normalized betas
eb <- ifelse(is.finite(eb), eb, 0.0) # Inf/Nan -> 0
## treat m=0 case
nz <- (m != 0.0)
nz_sum <- sum((eb[nz]/m[nz])*(exp(m[nz]*z[2:L][nz]) - exp(m[nz]*z[1:L-1][nz])))
z_sum <- sum(eb[!nz]*(z[2:L][!nz] - z[1:L-1][!nz]))
## Ensure integral of s(x) > 0
if (nz_sum + z_sum <= 0.0) {
stop('Area of s(x)=exp(u(x)) <= 0')
}
## Normalize beta
beta <- eb/(nz_sum + z_sum)
## Calculate weights for both m!=0 and m=0 cases
w <- ifelse(nz, (beta/m)*(exp(m*z[2:L]) - exp(m*z[1:L-1])), beta*(z[2:L] - z[1:L-1]))
return (list(beta = beta, w = ifelse((w > 0) & is.finite(w), w, 0.0)))
}
draw_x <- function (N, beta, m, w, z) {
## Sample from distribution u(x), by selecting segment j
## based on segment areas (probabilities), and then sample
## from exponential distribution within segment using
## inverse CDF sampling.
##
## Arguments
## N: number of samples
## beta, m, w: parameters of u_j(x)
## z: intercepts (bounds) of u_j(x)
##
## Value (list)
## x: samples from u(x)
## J: segment index j of each sample
J <- sample(length(w), N, replace = TRUE, prob = w) # choose random segments with probability w
u <- runif(N) # uniform random samples
## Inverse CDF sampling of x
x <- ifelse(m[J] != 0, (1.0/m[J])*log((m[J]*w[J]/beta[J])*u + exp(m[J]*z[J])), z[J] + w[J]*u/beta[J])
return (list(x = x, J = J))
}
##### MAIN LOOP #####
## Loop until we have N samples (or until error)
while (length(x) < N) {
## Track iterations, 10000 is arbitrary upper bound
current_iter <- current_iter + 1
if (current_iter > max_iters) {
stop('Maximum number of iterations reached.')
}
## Vectorized sampling in chunks
## Chunk size will grow as square of iteration,
## up until it is the size of the sample (N)
## This ensures small samples at first, while
## u(x) is a poor approximation of h(x), and
## large samples when the approximation improves.
chunk_size <- min(c(N, current_iter**2))
##### INITIALIZATION AND UPDATING STEP #####
## only re-initialize if there are new samples
if (length(x0) > 0) {
## Update with new values
x_j <- append(x_j, x0)
h_j <- append(h_j, h0)
dh_j <- append(dh_j, dh0)
x0 <- c()
h0 <- c()
dh0 <- c()
## Sort so that x_j's are in ascending order
srtd <- sort(x_j, index.return = TRUE)
x_j <- srtd$x
h_j <- h_j[srtd$ix]
dh_j <- dh_j[srtd$ix]
L <- length(dh_j)
## Check for duplicates in x and h'(x)
## This prevents discontinuities when
## computing z_j's
if (L > 1) {
while (!all((abs(dh_j[1:L-1] - dh_j[2:L]) > eps) & ((x_j[2:L] - x_j[1:L-1]) > dx))) {
## Only keep values with dissimilar neighbors
## Always keep first index (one is always unique)
dup <- append(TRUE, ((abs(dh_j[1:L-1] - dh_j[2:L]) > eps) & ((x_j[2:L] - x_j[1:L-1]) > dx)))
x_j <- x_j[dup]
h_j <- h_j[dup]
dh_j <- dh_j[dup]
L <- length(dh_j)
if (L == 1) {
break
}
}
if (L > 1) {
## Ensure log-concavity of function
if(!all((dh_j[1:L-1] - dh_j[2:L]) >= eps)) {
stop('Input function f not log-concave.')
}
}
}
## pre-compute z_j, u_j(x), l_j(x), s_j(x)
z_j <- calc_z_j(x_j, h_j, dh_j, bounds)
u_j <- calc_u(x_j, h_j, dh_j)
m_u <- u_j$m
b_u <- u_j$b
l_j <- calc_l(x_j, h_j)
m_l <- l_j$m
b_l <- l_j$b
s_j <- calc_s_j(m_u, b_u, z_j)
beta_j <- s_j$beta
w_j <- s_j$w
}
##### SAMPLING STEP #####
## draw x from exp(u(x))
draws <- draw_x(chunk_size, beta_j, m_u, w_j, z_j)
x_s <- draws$x
J <- draws$J
## random uniform for rejection sampling
w <- runif(chunk_size)
## Warn if samples were outside of bounds
## This happens if bounds given don't reflect
## the actual bounds of the distribution
if (!all((x_s > bounds[1]) & (x_s < bounds[2]))) {
## Flag so warning only happens once
if (!bds_warn) {
warning('Sampled x* not inside bounds...please check bounds.')
bds_warn <- TRUE
}
## Only keep x values within bounds
ibd <- ((x_s > bounds[1]) & (x_s < bounds[2]))
J <- J[ibd]
x_s <- x_s[ibd]
w <- w[ibd]
}
## Index shift for l_j(x)
J_l <- J - ifelse(x_s < x_j[J], 1, 0)
## only use x-values where l_j(x) > -Inf
ibd <- (J_l >= 1) & (J_l < length(x_j))
## Perform first rejection test on u(x)/l(x)
y <- exp(x_s[ibd]*(m_l[J_l[ibd]] - m_u[J[ibd]]) + b_l[J_l[ibd]] - b_u[J[ibd]])
acc <- (w[ibd] <= y)
## Append accepted values to x
x <- append(x, x_s[ibd][acc])
## Append rejected values to x0
x0 <- append(x_s[!ibd], x_s[ibd][!acc])
if (length(x0) > 0) {
## Evaluate h(x0)
h0 <- h(x0)
## Finite difference approximation to h'(x)
dh0 <- (h(x0 + dx) - h0)/dx
## Check for NaNs and infinities
isnum <- is.finite(h0)&is.finite(dh0)
x0 <- x0[isnum]
h0 <- h0[isnum]
dh0 <- dh0[isnum]
w <- append(w[!ibd], w[ibd][!acc])[isnum]
J <- append(J[!ibd], J[ibd][!acc])[isnum]
## Perform second rejection test on u(x)/h(x)
acc <- (w <= exp(h0 - x0*m_u[J] - b_u[J]))
## Append accepted values to x
x <- append(x, x0[acc])
}
}
## Only return N samples (vectorized operations makes x sometimes larger)
return (x[1:N])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.