R/gaussint.R

Defines functions gaussint

Documented in gaussint

## excursions.int.R
##
##   Copyright (C) 2012, 2013, 2014 David Bolin, Finn Lindgren
##
##   This program is free software: you can redistribute it and/or modify
##   it under the terms of the GNU General Public License as published by
##   the Free Software Foundation, either version 3 of the License, or
##   (at your option) any later version.
##
##   This program is distributed in the hope that it will be useful,
##   but WITHOUT ANY WARRANTY; without even the implied warranty of
##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##   GNU General Public License for more details.
##
##   You should have received a copy of the GNU General Public License
##   along with this program.  If not, see <http://www.gnu.org/licenses/>.

#' Sequential estimation of Gaussian integrals
#'
#' \code{gaussint} is used for calculating \eqn{n}-dimensional Gaussian integrals
#' \deqn{\int_a^b \frac{|Q|^{1/2}}{(2\pi)^{n/2}}
#' \exp(-\frac1{2}(x-\mu)^{T}Q(x-\mu)) dx}{|Q|^(1/2)*(2\pi)^(-n/2) \int_a^b exp(-0.5*(x-\mu)^T Q (x-\mu)) dx}
#' A limit value \eqn{lim} can be used to stop the integration if the sequential
#' estimate goes below the limit, which can result in substantial computational
#' savings in cases when one only is interested in testing if the integral is above
#' the limit value. The integral is calculated sequentially, and estimates for
#' all subintegrals are also returned.
#'
#' @param mu Expectation vector for the Gaussian distribution.
#' @param Q.chol The Cholesky factor of the precision matrix (optional).
#' @param Q Precision matrix for the Gaussian distribution. If Q is supplied but not Q.chol,
#' the cholesky factor is computed before integrating.
#' @param a Lower limit in integral.
#' @param b Upper limit in integral.
#' @param lim If this argument is used, the integration is stopped and 0 is returned
#' if the estimated value goes below \eqn{lim}.
#' @param n.iter Number or iterations in the MC sampler that is used for approximating
#' probabilities. The default value is 10000.
#' @param ind Indices of the nodes that should be analyzed (optional).
#' @param use.reordering Determines what reordering to use:
#' \itemize{
#'   \item{"natural" }{No reordering is performed.}
#'   \item{"sparsity" }{Reorder for sparsity in the cholesky factor (MMD reordering
#'   is used).}
#'   \item{"limits" }{Reorder by moving all nodes with a=-Inf and b=Inf first and
#'   then reordering for sparsity (CAMD reordering is used).}}
#' @param max.size The largest number of sub-integrals to compute. Default is the total
#' dimension of the distribution.
#' @param max.threads Decides the number of threads the program can use. Set to 0 for
#' using the maximum number of threads allowed by the system (default).
#' @param seed The random seed to use (optional).
#'
#' @return A list with elements
#' \item{P }{Value of the integral.}
#' \item{E }{Estimated error of the P estimate.}
#' \item{Pv }{A vector with the estimates of all sub-integrals.}
#' \item{Ev }{A vector with the estimated errors of the Pv estimates.}
#' @export
#' @details The function uses sequential importance sampling to estimate the 
#' Gaussian integral, and returns all computed sub-integrals. This means that if, for 
#' example, the function is used to compute \eqn{P(x>0)} for an n-dimensional Gaussian
#' variable \eqn{x}, then all integrals \eqn{P(x_1>0,...,x_i>0)} for \eqn{i=1,...,n} are
#' computed. 
#' 
#' If one is only interested in whether \eqn{P(x>0)>alpha} or not, then one can 
#' stop the integration as soon as \eqn{P(x_1>0,...,x_i>0)<alpha}. This can save a lot of 
#' computation time if \eqn{P(x_1>0,...,x_i>0)<alpha} for \eqn{i} much smaller than 
#' \eqn{n}. This limit value is specified by the \code{lim} argument. 
#' 
#' Which reordering to use depends on what the purpose of the calculation is and what
#' the integration limits are. However, in general the \code{limits} reordering is typically
#' most appropriate since this combines sparisty (which improves accuracy and reduces
#' computational cost) with automatic handling of dimensions with limits \eqn{a=-Inf} and
#' \eqn{b=Inf}, which do not affect the probability but affect the computation time
#' if they are not handled separately. 
#' @author David Bolin \email{davidbolin@@gmail.com}
#' @references Bolin, D. and Lindgren, F. (2015) \emph{Excursion and contour uncertainty regions for latent Gaussian models}, JRSS-series B, vol 77, no 1, pp 85-106.
#' 
#' Bolin, D. and Lindgren, F. (2018), \emph{Calculating Probabilistic Excursion Sets and Related Quantities Using excursions}, Journal of Statistical Software, vol 86, no 1, pp 1-20.
#' @examples
#' ## Create mean and a tridiagonal precision matrix
#' n = 11
#' mu.x = seq(-5, 5, length=n)
#' Q.x = Matrix(toeplitz(c(1, -0.1, rep(0, n-2))))
#' ## Calculate the probability that the variable is between mu-3 and mu+3
#' prob = gaussint(mu=mu.x, Q=Q.x, a= mu.x-3, b=mu.x+3, max.threads=2)
#' prob$P

gaussint <- function(mu,
                     Q.chol,
                     Q,
                     a,
                     b,
                     lim = 0,
                     n.iter = 10000,
                     ind,
                     use.reordering = c("natural","sparsity","limits"),
                     max.size,
                     max.threads=0,
                     seed)
{

  if( missing(Q) && missing(Q.chol))
	  stop('Must specify a precision matrix or its Cholesky factor')

  if(missing(a))
    stop('Must specify lower integration limit')

  if(missing(b))
    stop('Must specify upper integration limit')

  if(!missing(mu))
    mu <- private.as.vector(mu)

  if(!missing(ind))
    ind <- private.as.vector(ind)

  a <- private.as.vector(a)
  b <- private.as.vector(b)

  if(!missing(Q))
    Q <- private.as.dgCMatrix(Q)

  if(!missing(Q.chol))
    Q.chol <- private.as.dtCMatrixU(Q.chol)


  n = length(a)
  if(length(b) != n)
    stop('Vectors with integration limits are of different length.')

  use.reordering <- match.arg(use.reordering)

  if(!missing(ind) && !is.null(ind)){
    a[!ind] = -Inf
    b[!ind] = Inf
  }

  if(missing(max.size))
    max.size = n

  reordered = FALSE
  if(!missing(Q.chol) && !is.null(Q.chol)){
    ## Cholesky factor is provided, use that and do not reorder
      L = Q.chol
      if(dim(L)[1] != dim(L)[2]){
        stop("Q.chol is not square")
      } else if(dim(L)[1] != n) {
        stop("Dimensions of Q.chol is different from the length of the integration limits.")
      }
  } else if(!missing(Q) && !is.null(Q)){
    if(dim(Q)[1] != dim(Q)[2]){
      stop("Q is not square")
      } else if(dim(Q)[1] != n) {
        stop("Dimensions of Q is different from the length of the integration limits.")
      }
    ## Cholesky factor is not provided and we are told to reorder
    if(use.reordering == "limits")
    {
      #Reorder by moving nodes with limits -inf ... inf first
      inf.ind = (a==-Inf) & (b==Inf)
      if(sum(inf.ind)>0){
        max.size = min(max.size,n-sum(inf.ind))
        n = length(a)
        cind = rep(1,n)
        cind[inf.ind] = 0
        reo = rep(0,n)
        out <- .C("reordering",nin = as.integer(n), Mp = as.integer(Q@p),
                  Mi = as.integer(Q@i), reo = as.integer(reo),
                  cind = as.integer(cind))
        reo = out$reo+1
        Q = Q[reo,reo]
        reordered = TRUE
      }

      L <- suppressWarnings(private.Cholesky(Q,perm=FALSE)$R)

    } else if(use.reordering == "sparsity"){
      ## Reorder for sparsity calculated by Matrix
      L <- suppressWarnings(private.Cholesky(Q,perm=TRUE))
      reo <- L$reo
      ireo <- L$ireo
      reordered = TRUE
      L <- L$R
    } else {
      ## Do not reorder
      L <- suppressWarnings(private.Cholesky(Q,perm=FALSE)$R)
    }
  }

  # Note: If lim > 0 and reorder == TRUE, we should calculate marginal
  # probabilities, see if bound is above lim, and then reorder

  if(!missing(mu) && !is.null(mu)){
    if(length(mu) != n){
      stop("The length of mu is different from the length of the integration limits.")
    }
    a = a - mu
    b = b - mu
  }

  a[a==Inf]  = .Machine$double.xmax
  b[b==Inf]  = .Machine$double.xmax
  a[a==-Inf] = -.Machine$double.xmax
  b[b==-Inf] = -.Machine$double.xmax

	if(reordered == TRUE){
		a = a[reo]
		b = b[reo]
  }

  if (is(L, "Matrix")) {
     L <- private.as.dtCMatrixU(L)
  } else {
    stop("Unsuported matrix type.")
  }

  if(!missing(seed) && !is.null(seed)){
    seed_provided = 1
    seed <- as.integer(seed)
    if(length(seed)==1){
      seed <- rep(seed,6)
    }
    seed.in = seed
  } else {
    seed_provided = 0
    seed.in = as.integer(rep(0,6))
  }

  Pv = Ev = rep(0,dim(L)[1])

  opts = c(n,n.iter,max.size,max.threads,seed_provided)

  out <- .C("shapeInt", Mp = as.integer(L@p), Mi = as.integer(L@i),
              Mv = as.double(L@x), a = as.double(a), b = as.double(b),
              opts = as.integer(opts), lim = as.double(lim),
              Pv = as.double(Pv), Ev = as.double(Ev),seed_in=seed.in)

  P = out$Pv[1]
  E = out$Ev[1]
  if(use.reordering == "limits") {
    out$Pv[1:(dim(L)[1]-max.size)] = out$Pv[dim(L)[1]-max.size+1]
    out$Ev[1:(dim(L)[1]-max.size)] = out$Ev[dim(L)[1]-max.size+1]
    P = out$Pv[1]
    E = out$Ev[1]
  } else if(use.reordering == "sparsity") {
    out$Pv = out$Pv[ireo]
    out$Ev = out$Ev[ireo]
  }
  return(list(Pv = out$Pv, Ev = out$Ev, P = P, E = E))
}
finnlindgren/excursions documentation built on Jan. 17, 2021, 12:20 a.m.