R/score-normal.R

Defines functions logScoreNormalIncremental logScoreNormalPrepare logScoreNormal.bn.list logScoreNormalOffline logScoreNormal.bn logScoreNormal localLogScoreNormal nunique

Documented in localLogScoreNormal logScoreNormal logScoreNormal.bn logScoreNormal.bn.list logScoreNormalIncremental logScoreNormalOffline logScoreNormalPrepare nunique

# Part of the "structmcmc" package, https://github.com/rjbgoudie/structmcmc
#
# This software is distributed under the GPL-3 license.  It is free,
# open source, and has the attribution requirements (GPL Section 7) in
#   https://github.com/rjbgoudie/structmcmc
#
# Note that it is required that attributions are retained with each function.
#
# Copyright 2008 Robert J. B. Goudie, University of Warwick

#' Number of unique.
#'
#' method description
#'
#' @param x ...
#' @export
nunique <- function(x){
  length(unique(x))
}

#' Local Normal-inverse-gamma (with g-prior) Log marginal likelihood.
#'
#' Compute the LOCAL log marginal likelihood of the supplied
#' Bayesian Networks. ie the contribution to the log marginal liklihood from
#' one individual node.
#'
#' Let \eqn{X}{X} be a data matrix with a number of predictors (in columns),
#' and \eqn{y}{y} be an response variable, and that \eqn{n}{n} observations
#' are available for each. For a graph \eqn{G}{G} (since this is local score
#' this is equivalent to an indicator vector), the model used is takes the
#' form
#' \eqn{y = \phi_{G} \beta + \epsilon}{y = phi_G * beta + epsilon}
#' with \eqn{\epsilon \sim N(0, \sigma^{2} I)}{epsilon ~ N(0, sigma^{2} I)}.
#' Note that the data needs to be standardised (zero-meaned).
#'
#' The design matrix \eqn{\phi_{G}}{phi_{G}} is a column of 1s, and then
#' columns corresponding to each of the parents of the node. No cross-terms
#' are included.
#'
#' The prior used factorises as
#' \eqn{p(\beta, \sigma) = p(\beta \mid \sigma)p(\sigma)}{
#'     p(beta, sigma) = p(beta | sigma)p(sigma)},
#' The variance has an uninformative, scale invariant Jeffrey's prior
#' \eqn{p(\sigma) = 1/\sigma^{2}}{p(sigma) = 1/sigma^2}, and the coefficients
#' have a zero-mean Normal prior (a Zellner g-prior), with \eqn{g = n}, so
#' that
#' \eqn{\beta \mid \sigma
#'      \sim
#'      N(0, g \sigma^{2}\left(\phi_{G}^{\prime}\phi_{G}\right)^{-1})}{
#'      beta | sigma ~ N(0, g * sigma^2 * (phi'_G phi_G)^-1)}
#'
#' The above specification gives the following marginal likelihood.
#'
#' \deqn{p(y \mid G)
#'       \propto
#'       (1 + n)^{-(\eta + 1)/2}
#'       \left(X^{T} X - \frac{n}{n + 1} X^{T}
#'        \phi_{G}(\phi^{T}_{G}\phi_{G})^{-1}\phi^{T}_{G}
#'        X\right)^{-\frac{n}{2}}
#'      }{
#'       P(y | G)
#'       propto
#'       (1 + n)^(-(eta + 1)/2) *
#'       (X' * X - (n/(n + 1)) * X' *
#'        phi_G * (phi'_G * phi_G)^(-1) * phi_G *
#'        X)^(-n/2)
#'      }
#'
#' @param node A numeric vector of length 1. The node to compute the local
#'   log score for.
#' @param parents A numeric vector. The parents of node.
#' @param logScoreParameters A list with the following components:
#'   \describe{
#'     \item{data}{A matrix with columns giving the values of each random
#'                 variable.}
#'     \item{nl}{A numeric vector of length nNodes(currentBN), specifying the
#'               number of levels that each random variable takes.}
#'   }
#' @param cache Optionally, provide an environment with cached local scores
#'   for this data.
#' @param checkInput A logical of length 1, specifying whether to check the
#'   inputs to the function.
#' @return A numeric vector of length 1, giving the log marginal likelihood.
#'   The environment 'cache' will also be updated because its scope is
#'   global.
#' @references
#'   Nott, D. J., & Green, P. J. (2004). \emph{Bayesian Variable Selection
#'   and the Swendsen-Wang Algorithm}. Journal of Computational and Graphical
#'   Statistics, 13, 141-157.
#'   \url{http://dx.doi.org/10.1198/1061860042958}
#'
#'   Smith, M., & Kohn, R. (1996). \emph{Nonparametric Regression using
#'   Bayesian Variable Selection}. Journal of Econometrics, 75, 317-343.
#'   \url{http://dx.doi.org/10.1016/0304-4076(95)01763-1}.
#'
#'   Geiger, D., & Heckerman, D. (1994). \emph{Learning Gaussian Networks}.
#'   Proceedings of the 10th Conference Annual Conference on Uncertainty in
#'   Artificial Intelligence (UAI-94), 235-240.
#'   \url{http://uai.sis.pitt.edu/displayArticleDetails.jsp?mmnu=1&smnu=2&
#'   article_id=509&proceeding_id=10}
#' @export
#' @seealso \code{\link{logScoreNormal}},
#'   \code{\link{logScoreNormalOffline}},
#'   \code{\link{logScoreNormalIncremental}}.
localLogScoreNormal <- function(node,
                                parents,
                                logScoreParameters,
                                cache,
                                checkInput = T){
  if (checkInput){
    stopifnot(class(node)                    %in% c("numeric", "integer"),
              length(node)                   ==   1,
              class(parents)                 %in% c("numeric", "integer"),
              is.list(logScoreParameters),
              inherits(logScoreParameters$data, "matrix"),
              class(logScoreParameters$nl)   %in% c("numeric", "integer"),
              class(cache)                   ==   "environment")
  }
  id <- fastid(c(node, parents))
  cacheRecord <- cache[[id]]
  if (!is.null(cacheRecord)){
    cacheRecord
  }
  else {
    eta <- length(parents)
    n <- dim(logScoreParameters[["data"]])[1L]
    Xi <- logScoreParameters[["data"]][, node]
    phi <- .Internal(cbind(0, 1, logScoreParameters$data[, parents]))
    crprodphi <- .Internal(crossprod(phi, NULL))
    d <- array(0, c(eta + 1L, eta + 1L))
    d[1L + 0L:eta * (eta + 2L)] <- 1L
    solvephi <- solve.default(crprodphi, d, tol = 1e-07)
    crossprodphixi <- .Internal(crossprod(Xi, phi))
    mx <- .Internal(crossprod(Xi, NULL)) -
          n/(n + 1) * crossprodphixi %*%
          solvephi %*% .Internal(t.default(crossprodphixi))
    out <- -(eta + 1)/2 * log(1 + n) - n/2 * log(mx)
    (cache[[id]] <- out)
  }
}

#' Normal-inverse-gamma (with g-prior) Log marginal likelihood.
#'
#' method description
#'
#' @param x ...
#' @param ... Further arguments passed to method
#' @export
#' @seealso \code{\link{logScoreNormal.bn}}
#' @examples
#' data <- cbind(c(-10, -2), c(-11, -4))
#' net <- bn(integer(0), 1)
#' logScoreNormal(net, data)
logScoreNormal <- function(x, ...){
  UseMethod("logScoreNormal")
}

#' Normal-inverse-gamma (with g-prior) Log marginal likelihood.
#'
#' Compute the log marginal likelihood of the supplied Bayesian Network.
#'
#' The data is scored as continuous, using a form of the Normal Prior.
#'
#' @param x An object of class "bn". The Bayesian Network by for which the
#'   marginal likelihood is computed.
#' @param data A matrix with columns giving the value of each random variable.
#' @param cache Optionally, provide an environment with cached local scores
#'   for this data.
#' @param checkInput A logical of length 1, specifying whether to check the
#'   inputs to the function.
#' @param ... Further arguments, currently unused
#' @return A numeric vector of length 1, giving the log marginal likelihood.
#'   The environment 'cache' will also be updated because its scope is
#'   global.
#' @S3method logScoreNormal bn
#' @method logScoreNormal bn
#' @seealso \code{\link{logScoreNormal}},
#'   \code{\link{logScoreNormal.bn.list}}
#' @examples
#' data <- cbind(c(-10, -2), c(-11, -4))
#' net <- bn(integer(0), 1)
#' logScoreNormal(net, data)
logScoreNormal.bn <- function(x,
                              data,
                              cache      = new.env(hash = T),
                              checkInput = T,
                              ...){
  if (isTRUE(checkInput)){
    stopifnot("bn"                            %in% class(x),
              is.valid(x),
              inherits(data, "matrix"),
              all(unlist(lapply(data, class)) %in% c("numeric", "integer")),
              class(cache)                    ==   "environment")
  }
  p <- nNodes(x)
  nodeSeq <- seq_len(p)
  logScoreParameters <- logScoreNormalPrepare(data               = data,
                                              logScoreParameters = list(),
                                              checkInput = F)
  sum(.Internal(unlist(lapply(nodeSeq, function(i){
    localLogScoreNormal(node               = i,
                        parents            = x[[i]],
                        logScoreParameters = logScoreParameters,
                        cache              = cache)
  }), F, F)))
}

#' Normal-inverse-gamma (with g-prior) Log marginal likelihood (offline).
#'
#' Compute the log marginal likelihood of the supplied Bayesian Network.
#'
#' This function is an alternative interface to logScoreNormal.
#' This interface is required by the MCMC sampler.
#'
#' @param x An object of class "bn". The Bayesian Network by for which the
#'   marginal likelihood is computed.
#' @param logScoreParameters A list with the following components:
#'   \describe{
#'     \item{data}{A matrix with columns giving the values of each random
#'                 variable.}
#'     \item{nl}{A numeric vector of length nNodes(currentBN), specifying the
#'               number of levels that each random variable takes.}
#'   }
#' @param cache Optionally, provide an environment with cached local scores
#'   for this data.
#' @param checkInput A logical of length 1, specifying whether to check the
#'   inputs to the function.
#' @return A numeric vector of length 1, giving the log marginal likelihood.
#'   The environment 'cache' will also be updated because its scope is
#'   global.
#' @export
#' @seealso \code{\link{logScoreNormal}},
#'   \code{\link{logScoreNormalIncremental}}
logScoreNormalOffline <- function(x,
                                  logScoreParameters,
                                  cache      = new.env(hash = T),
                                  checkInput = T){
  if (isTRUE(checkInput)){
    stopifnot("bn"                           %in% class(x),
              is.valid(x),
              is.list(logScoreParameters),
              inherits(logScoreParameters$data, "matrix"),
              class(logScoreParameters$nl)   %in% c("numeric", "integer"),
              length(logScoreParameters$nl)  ==   nNodes(x),
              class(cache)                   ==   "environment")
  }
  sum(.Internal(unlist(lapply(seq_len(nNodes(x)), function(head){
    localLogScoreNormal(node               = head,
                        parents            = x[[head]],
                        logScoreParameters = logScoreParameters,
                        cache              = cache,
                        checkInput         = F)
  }), F, F)))
}

#' Normal-inverse-gamma (with g-prior) Log marginal likelihood.
#'
#' Compute the log marginal likelihood of the supplied Bayesian Networks.
#'
#' The data is scored as continuous, using a form of the Normal Prior.
#'
#' @param x An object of class "bn.list", the Bayesian Networks for which
#'   the marginal likelihood are computed.
#' @param data A matrix, with columns giving the values of each random
#'   variable.
#' @param cache Optionally, provide an environment with cached local scores
#'   for this data.
#' @param ... Further arguments, currently unused.
#'
#' @return A numeric vector of length 1, giving the log marginal likelihood.
#'   The environment 'cache' will also be updated because its scope is
#'   global.
#' @S3method logScoreNormal bn.list
#' @method logScoreNormal bn.list
#' @seealso \code{\link{logScoreNormal.bn}}, \code{\link{logScoreNormal}}
logScoreNormal.bn.list <- function(x,
                                   data,
                                   cache = new.env(hash = TRUE),
                                   ...){
  stopifnot(inherits(data, "matrix"),
            all(unlist(lapply(data, class)) %in% c("numeric", "integer")),
            "bn.list"                       %in% class(x))
  unlist(lapply(x, function(bn){
    logScoreNormal(bn, data, cache)
  }))
}

#' Internal functions.
#'
#' Convert a data frame to the appropriate format for the fast/incremental
#' logScoreNormal functions, and return as part of the logScoreParameters
#' list.
#'
#' In particular, the data is converted to a matrix, and the factor levels
#' taken integer values from 0, 1, .... ie not on 1, 2, 3.
#'
#' @param data A data.frame, with columns being factors giving the values of
#'   each random variable.
#' @param logScoreParameters A list with the following components:
#'   \describe{
#'     \item{data}{A matrix with columns giving the values of each random
#'                 variable.}
#'     \item{nl}{A numeric vector of length nNodes(currentBN), specifying the
#'               number of levels that each random variable takes.}
#'   }
#' @param checkInput A logical of length 1, specifying whether to check the
#'   inputs to the function.
#' @return A list with the contents of logScoreParameters, with the following
#'   components added or altered:
#'   \describe{
#'     \item{data}{A matrix with columns giving the value of each random
#'                 variable.}
#'     \item{nl}{A numeric vector of length ncol(data), specifying the number
#'               of levels that each random variable takes.}
#'   }
#' @export
#' @seealso \code{\link{logScoreNormal}}
logScoreNormalPrepare <- function(data,
                                  logScoreParameters,
                                  checkInput = T){
  if (isTRUE(checkInput)){
    stopifnot(class(data)                     == "data.frame",
              all(unlist(lapply(data, class)) == "factor"))
  }
  nl <- apply(data, 2, nunique)
  modifyList(logScoreParameters, val = list(data = data, nl = nl))
}

#' Normal-inverse-gamma (with g-prior) Log marginal likelihood (online).
#'
#' Compute the difference in log marginal likelihood of the supplied
#' Bayesian Networks, quickly.
#'
#' This is a fast, incremental version of logScoreNormal.
#'
#' The data is scored as continuous, using a form of the Zellner Prior.
#'
#' @param currentBN An object of class "bn".
#' @param proposalBN An object of class "bn".
#' @param heads A numeric vector, specifying which nodes have different
#'   parents in currentBN and proposalBN.
#' @param logScoreParameters A list with the following components:
#'   \describe{
#'     \item{data}{A matrix with columns giving the values of each random
#'                 variable.}
#'     \item{nl}{A numeric vector of length nNodes(currentBN), specifying the
#'               number of levels that each random variable takes.}
#'   }
#' @param cache Optionally, provide an environment with cached local scores
#'   for this data.
#' @param checkInput A logical of length 1, specifying whether to check the
#'   inputs to the function.
#' @return logscore(proposalBN) - logscore(currentBN)
#' @export
#' @seealso \code{\link{logScoreNormal}},
#'   \code{\link{logScoreNormalOffline}}
logScoreNormalIncremental <- function(currentBN,
                                      proposalBN,
                                      heads,
                                      logScoreParameters,
                                      cache,
                                      checkInput = T){
  if (isTRUE(checkInput)){
    stopifnot("bn"                           %in% class(currentBN),
              is.valid(currentBN),
              "bn"                           %in% class(proposalBN),
              is.valid(proposalBN),
              nNodes(currentBN)              ==   nNodes(proposalBN),
              class(heads)                   %in% c("numeric", "integer"),
              is.list(logScoreParameters),
              inherits(logScoreParameters$data, "matrix"),
              class(logScoreParameters$nl)   %in% c("numeric", "integer"),
              length(logScoreParameters$nl)  ==   nNodes(currentBN),
              class(cache)                   ==   "environment")
  }
  sum(.Internal(unlist(lapply(heads, function(head){
    localLogScoreNormal(node               = head,
                        parents            = proposalBN[[head]],
                        logScoreParameters = logScoreParameters,
                        cache              = cache,
                        checkInput         = F) -
    cache[[fastid(c(head, currentBN[[head]]))]]
  }), F, F)))
}
rjbgoudie/structmcmc documentation built on Nov. 3, 2020, 3:41 a.m.