R/univariate.R

#' Create a univariate distribution sampler
#'
#' Generate a function which generates samples from the distribution given the inverse and
#' collocation points.
#'
#' The univariate_sampler function is used for generating unconditional distributions using SCMC
#' with 1-D interpolation, whereas the conditional_sampler enables generating samples from random
#' varaibles which are conditioned on other variables. The inverse_cdf function in the conditional
#' case must take k+1 arguments, where k is the number of conditions, and return the inverse cdf for
#' the variable Y|X_1, ..., X_k, taking the arguments in order: (X_1, ..., X_k, p), where p is from
#' (0,1). For example, if we sample Y|X, we have a function \eqn{invcdf(x, p) = F^{-1}_{Y|X=x}(p).}
#'
#' For univariate_sampler, col_pts should be a vector of nodes. For the conditional_sampler,
#' col_pts should be a list of vectors containing the node points for each dimension.
#'
#' @param inverse_cdf The inverse cdf (i.e. quantile) function of the distribution to be sampled.
#' @param col_pts The collocation points to use for interpolation (usually generated by
#'   scmc::optimal_points()).
#' @param xdist A string indicating the distribution of the cheap variable X ni the method.
#' Should be a <name> for which functions "p<name>" and "r<name>" exist (like "norm" or "unif")
#' @param gss The sigma value of the approximating normal variable if using grid stretching.
#' @param transform A function to apply to the inverse_cdf before interpolation.
#' @param inv_transform A function to apply to the sample after interpolating. Should be the inverse function of the transform argument
#' @return \itemize{ \item For univariate_sampler, a function which takes a single argument n, and
#'   generates a sample of size n from the wanted distribution. \item For conditional_sampler, a
#'   function which takes two arguments: \itemize{ \item n - the size of the sample to generate
#'   \item conditional_samples - a list of samples of size n which are generated from the
#'   distributions on which the wanted distribution is conditioned. The list should follow the order
#'   as in inverse_cdf function} }
#' @export
univariate_sampler <- function(inverse_cdf, col_pts, xdist = "norm", gss = 1,
                               transform = identity, inv_transform = identity) {
  N <- length(col_pts)
  interpol <- if (N <= 15) monomial else if (N <= 35) newton else lagrange

  if (xdist == "norm") {
    poly <- interpol(function(x) transform(inverse_cdf(pnorm(x, sd = gss))),
                     col_pts)

    if (require("RcppZiggurat"))
      rnorm <- function(n, sd) sd*zrnorm(n)

    function(n) {
      inv_transform(poly(rnorm(n, sd = gss)))
    }
  } else {
    xcdf <- match.fun(paste0("p", xdist))
    poly <- interpol(function(x) transform(inverse_cdf(xcdf(x))), col_pts)

    xrand <- match.fun(paste0("r", xdist))
    function(n) {
      inv_transform(poly(xrand(n)))
    }
  }
}


#' @rdname univariate_sampler
#' @export
conditional_sampler <- function(inverse_cdf, col_pts, gss = 1) {
  poly <- lagrange(col_pts = col_pts, FUN = function(...) {
    args <- list(...)
    n <- length(args)
    do.call(inverse_cdf, c(args[-n], list(pnorm(args[[n]], sd = gss))))
  })

  function(n, condition_samples = list()) {
    do.call(poly, c(condition_samples, list(rnorm(n, sd = gss))))
  }
}
Blaza/scmc documentation built on May 29, 2019, 6:41 a.m.