# Copyright (C) 2013 Philipp Benner
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#' @name gp.regression
#' @docType package
#' @title Gaussian Process Regression
#' @author Philipp Benner \email{Philipp.Benner@@mis.mpg.de}
#' @import ggplot2
#' @import scales
#' @importFrom Matrix chol
#' @importFrom mvtnorm pmvnorm
#' @importFrom gridExtra grid.arrange
#' @useDynLib gp.regression
NULL
library("Matrix")
#' Generate a new Gaussian process
#'
#' @param mean prior mean
#' @param kernelf kernel function
#' @param dim dimension of the x-values
#' @param likelihood a likelihood model of not Gaussian
#' @param link a link function used to transform the target domain
#' @export
new.gp <- function(mean, kernelf, dim=1, likelihood=new.likelihood("normal", 1.0), link=NULL)
{
stopifnot(length(mean) == 1)
if (any(class(likelihood) == "likelihood.normal") && !is.null(link)) {
stop("The Gaussian likelihood should not be combined with a link function!")
}
if (any(class(link) == "link.probit") && !is.null(likelihood)) {
stop("The probit link function should not be compined with a particular likelihood model.")
}
if (any(class(likelihood) == "likelihood.gamma") && is.null(link)) {
stop("A link function is required for the gamma likelihood model!")
}
if (any(class(likelihood) == "likelihood.student_t") && is.null(link)) {
link = new.link("null")
}
gp <- list(xp = NULL, # data
yp = NULL,
dim = dim, # dimension of the x-values
kernelf = kernelf, # kernel function
likelihood = likelihood, # likelihood model (requires a link function)
link = link, # link function
prior.mean = mean, # mean
prior.sigma.L = NULL, # cholesky decomposition of covariance matrix
prior.sigma.Linv = NULL # inverse cholesky decomposition
)
class(gp) <- "gp"
return (gp)
}
#' Show the dimension of the GP
#'
#' @param x Gaussian process
#' @method dim gp
#' @export
dim.gp <- function(x) {
x$dim
}
#' Summarize a distribution
#'
#' @param model probabilistic model
#' @param ... arguments to be passed to methods
#' @export
summarize <- function(model, ...)
{
UseMethod("summarize")
}
#' Compute posterior expectation and variance of a GP
#'
#' @param model object
#' @param x where to evaluate the posterior
#' @param return.covariance if true, return the covariance matrix as well
#' @param ... unused
#' @method summarize gp
#' @export
summarize.gp <- function(model, x, return.covariance = FALSE, ...)
{
gp <- model
if (is.null(x)) {
return (NULL)
}
if (!is.matrix(x)) {
x <- as.matrix(x)
}
# check dimension of the x-values
stopifnot(dim(x)[2] == gp$dim)
# xp and yp can be NULL, then simplify compute the prior
if (is.null(gp$xp) || is.null(gp$yp)) {
mean <- rep(gp$prior.mean, dim(x)[1])
covariance <- gp$kernelf(x)
variance <- diag(covariance)
}
else {
k1 <- gp$kernelf(gp$xp, x) # K(X , X*)
k2 <- t(k1) # K(X*, X )
k3 <- gp$kernelf(x) # K(X*, X*)
# check if a link is available
if (!is.null(gp$link)) {
list[mean, variance] <- approximate.posterior.summary(gp, k1, k2, k3, ...)
# propagate mean and variance through the link function
# (c.f. Rasmussen 2006, Eq. 3.25)
covariance <- NULL
if (!class(gp$link)[[1]] == "link.null"){
list[mean, variance] <- summarize(gp$link, mean, variance, ...)
}
}
else {
# posterior mean
mean <- drop(gp$prior.mean + (k2 %*% gp$prior.sigma.Linv) %*% (t(gp$prior.sigma.Linv) %*% (gp$yp - gp$prior.mean)))
# posterior covariance
covariance <- k3 - (k2 %*% gp$prior.sigma.Linv) %*% (t(gp$prior.sigma.Linv) %*% k1)
variance <- diag(covariance)
}
}
if (return.covariance) {
return (list(mean = mean, variance = variance, covariance = covariance))
}
else {
return (list(mean = mean, variance = variance))
}
}
#' Compute posterior of a model
#'
#' @param model probabilistic model
#' @param ... arguments to be passed to methods
#' @export
posterior <- function(model, ...)
{
UseMethod("posterior")
}
#' Compute posterior of a Gaussian process
#'
#' @param model Gaussian process
#' @param xp positions of measurements
#' @param yp measured values
#' @param ep uncertainty of measurements (optional)
#' @param ... unused
#' @method posterior gp
#' @export
posterior.gp <- function(model, xp, yp, ep=NULL, ...)
{
gp <- model
# check arguments
if (!is.null(xp) && !is.matrix(xp)) {
xp <- as.matrix(xp, 1)
}
# yp is usually a vector unless a link function is used
if (!is.null(yp) && !is.matrix(yp)) {
yp <- as.matrix(yp)
}
if (!is.null(ep) && !is.vector(ep)) {
ep <- as.vector(ep)
}
# check dimension of the x-values
stopifnot(dim(xp)[2] == gp$dim)
# extend covariance matrix
if (is.null(gp$xp)) {
# first time the posterior is computed
A12 <- NULL
A22 <- gp$kernelf(xp)
}
else {
A12 <- gp$kernelf(gp$xp, xp)
A22 <- gp$kernelf(xp)
}
# update gp measurements
gp$xp <- rbind (gp$xp, xp)
gp$yp <- rbind (gp$yp, yp)
# add Gaussian noise to measurements?
if (any(class(gp$likelihood) == "likelihood.normal")) {
if (is.null(ep)) {
# use the variance specified in the likelihood model
A22 <- A22 + diag(gp$likelihood$variance, dim(xp)[1])
}
else {
# ignore the likelihood model and use ep instead
A22 <- A22 + diag(ep, dim(xp)[1])
}
}
else {
if (!is.null(ep) && any(ep != 0.0)) {
warning("Adding Gaussian noise to observations although no Gaussian likelihood is used!")
# numerical stability
A22 <- A22 + diag(ep, dim(xp)[1])
}
}
# update cholesky decomposition and its inverse
tmp <- cholesky.inverse.update(gp$prior.sigma.L, gp$prior.sigma.Linv, A12, A22)#error occures here when t-likelihood is used with ep=NULL.
gp$prior.sigma.L <- tmp$K
gp$prior.sigma.Linv <- tmp$Kinv
# check if a link is available
if (!is.null(gp$link)) {
# if so, then call a specialized method to
# approximate the posterior of the gaussian process
gp <- approximate.posterior(gp, ...)
}
gp
}
#' Draw samples from a probability distribution
#'
#' @param model probabilistic model
#' @param ... arguments to be passed to methods
#' @export
draw.sample <- function(model, ...)
{
UseMethod("draw.sample")
}
#' Draw samples from a Gaussian process
#'
#' @param model Gaussian process
#' @param x locations where to evaluate the Gaussian process
#' @param ep uncertainty of measurements (optional)
#' @param ... arguments to be passed to the algorithms
#' @method draw.sample gp
#' @export
draw.sample.gp <- function(model, x, ep=NULL, ...)
{
gp <- model
x <- as.matrix(x)
y <- rep(0.0, nrow(x))
for (i in 1:length(x)) {
list[mean, variance] <- summarize(gp, x[i,], ...)
y[i] <- rnorm(1, mean = mean, sd = sqrt(variance))
gp <- posterior(gp, x[i,], y[i], ep=ep, ...)
}
y
}
#' Compute marginal likelihood of a model
#'
#' @param model probabilistic model
#' @param ... arguments to be passed to methods
#' @export
marginal.likelihood <- function(model, ...)
{
UseMethod("marginal.likelihood")
}
#' Compute marginal likelihood of a Gaussian process
#'
#' @param model Gaussian process
#' @param xp positions of measurements
#' @param yp measured values
#' @param ep uncertainty of measurements (optional)
#' @param ... unused
#' @method marginal.likelihood gp
#' @export
marginal.likelihood.gp <- function(model, xp, yp, ep=NULL, ...)
{
gp1 <- model
gp2 <- posterior(gp1, xp, yp, ep, ...)
ml <- function(gp) {
if (is.null(gp$xp)) {
return (0.0)
}
# check if a link is available
if (!is.null(gp$link)) {
k <- gp$kernelf(gp$xp)
list[mean, variance] <- approximate.posterior.summary(gp, k, k, k, ...)
list[mean, variance] <- summarize(gp$link, mean, variance, ...)
result <- -1/2*drop((mean %*% gp$prior.sigma.Linv) %*% (t(gp$prior.sigma.Linv) %*% mean))
result <- result + logp(gp$likelihood, gp$yp, mean) - 1/2*log(det(gp$approximation.B))
}
else {
# log marginal likelihood
result <- -1/2*drop((t(gp$yp - gp$prior.mean) %*% gp$prior.sigma.Linv) %*% (t(gp$prior.sigma.Linv) %*% (gp$yp - gp$prior.mean)))
result <- result - sum(log(diag(gp$prior.sigma.L))) - dim(gp$xp)[1]/2*log(2*pi)
}
result
}
ml(gp2) - ml(gp1)
}
#' Compute gradient of the marginal likelihood with respect to model parameters
#'
#' @param model probabilistic model
#' @param ... arguments to be passed to methods
#' @export
gradient.marginal.likelihood <- function(model, ...)
{
UseMethod("gradient.marginal.likelihood")
}
#' Compute gradient of the marginal likelihood of a Gaussian process with respect
#' to the kernel parameters
#'
#' @param model Gaussian process
#' @param n number of parameters of the model
#' @param xp positions of measurements
#' @param yp measured values
#' @param ep uncertainty of measurements (optional)
#' @param ... unused
#' @method gradient.marginal.likelihood gp
#' @export
gradient.marginal.likelihood.gp <- function(model, n, xp, yp, ep=NULL, ...)
{
gp <- posterior(model, xp, yp, ep, ...)
# initialize gradient to zero
gradient <- rep(0.0, n)
alpha <- gp$prior.sigma.Linv %*% (t(gp$prior.sigma.Linv) %*% (gp$yp - gp$prior.mean))
tmp <- alpha%*%t(alpha) - gp$prior.sigma.Linv%*%t(gp$prior.sigma.Linv)
# loop over parameters
for (i in 1:n) {
k0d <- gp$kernelf(xp, gradient=TRUE, i=i)
gradient[i] <- 1/2*sum(diag(tmp%*%k0d))
}
gradient
}
#' Maximize marginal likelihood with respect to the model parameters
#'
#' @param model probabilistic model
#' @param ... arguments to be passed to methods
#' @export
maximize.marginal.likelihood <- function(model, ...)
{
UseMethod("maximize.marginal.likelihood")
}
#' Compute gradient of the marginal likelihood of a Gaussian process with respect
#' to the kernel parameters
#'
#' @param moden Gaussian process
#' @param get.kernel function (specified as a variable or string) that returns the (programatic) kernel function for the given parameters.
#' @param init initial parameters
#' @param xp positions of measurements
#' @param yp measured values
#' @param ep uncertainty of measurements (optional)
#' @param eta adaptive step size parameter for the RProp algorithm
#' @param epsilon stop criterium
#' @param step.init initial step size
#' @param verbose print norm(gradient)^2 and parameter values at every step
#' @param ... unused
#' @method maximize.marginal.likelihood gp
#' @export
maximize.marginal.likelihood.gp <- function(model, get.kernel, init, xp, yp, ep=NULL,
eta=0.01, epsilon=0.01, step.init = 0.1, verbose=FALSE, ...)
{
gp <- model
# conditional marginal likelihood maximization is not supported
stopifnot(is.null(gp$xp))
# GP with link function is not supported
stopifnot(is.null(gp$link))
# initialize vector of parameters
pt <- as.vector(init)
# number of parameters
n <- length(pt)
# initialize adaptive step size
step.size <- rep(step.init, n)
# and the old gradient just to one
gradient.old <- rep(1.0, n)
repeat {
# get kernel for the current parameters
gp$kernelf <- do.call(get.kernel, as.list(pt))
# compute the gradient at the current position pt
gradient.new <- gradient.marginal.likelihood(gp, n, xp, yp, ep=ep, ...)
if (verbose) {
print(sprintf("1/n*norm(gradient)^2 = %f, parameters: %f",
sum(gradient.new^2)/n, pt))
print(sprintf("marginal likelihood: %f",
marginal.likelihood(gp, xp, yp, ep)))
}
# check if stop condition is met
if (sum(gradient.new^2)/n < epsilon) {
return (gp)
}
# update step size
for (i in 1:n) {
if (gradient.old[i]*gradient.new[i] > 0.0) {
step.size[i] = (1.0+eta)*step.size[i]
}
else {
step.size[i] = (1.0-eta)*step.size[i]
}
}
# update parameters
pt <- pt + sign(gradient.new)*step.size
# save current gradient
gradient.old <- gradient.new
}
}
# ------------------------------------------------------------------------------
if (FALSE) {
xp <- c(1, 2, 3)
yp <- c(0.7, 0.7, 0.7)
ep <- c(0.01, 0.01, 0.01)
gp1 <- new.gp(0.5, kernel.squared.exponential(1, 1),
likelihood = new.likelihood("normal", 0.01))
gp1 <- posterior(gp1, xp, yp)
plot(gp1, 0:40/10)
# update in two steps
gp2 <- new.gp(0.5, kernel.squared.exponential(1, 1))
gp2 <- posterior(gp2, xp[ 1], yp[ 1], ep[ 1])
gp2 <- posterior(gp2, xp[2:3], yp[2:3], ep[2:3])
# evaluate marginal likelihood
gp3 <- new.gp(0.5, kernel.squared.exponential(1, 1))
marginal.likelihood(gp3, xp, yp, ep)
# [1] -2.292447
# evaluate conditional marginal likelihood
gp4 <- new.gp(0.5, kernel.squared.exponential(1, 1))
gp4 <- posterior(gp4, xp[1], yp[1], ep[1])
marginal.likelihood(gp3, xp[2:3], yp[2:3], ep[2:3])
# [1] -1.648935
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.