R/lineqGPlikelihoods.R
In lineqGPR: Gaussian Process Regression Models with Linear Inequality Constraints

Documented in constrlogLikFun constrlogLikGrad lineqGPOptim logLikAdditiveFun logLikAdditiveGrad logLikFun logLikGrad

#' @title Gaussian Process Model Optimizations
#' @description Function for optimizations of \code{"lineqGP"} S3 class objects.
#' @param model a list with the structure of the constrained Kriging model.
#' @param x0 the initial values for the parameters to be optimized over.
#' @param eval_f a function to be minimized, with first argument the vector of parameters
#' over which minimization is to take place. It should return a scalar result.
#' @param lb a vector with lower bounds of the params. The params are forced to be positive.
#' See \code{\link{nloptr}}.
#' @param ub a vector with upper bounds of the params. See \code{\link{nloptr}}.
#' @param opts see \code{\link{nl.opts}}. Parameter \code{parfixed} indices of
#' fixed parameters to do not be optimised. If \code{estim.varnoise} is true, the
#' noise variance is estimated.
#' @param seed an optional number. Set a seed to replicate results.
#' @param estim.varnoise an optional logical. If \code{TRUE}, a noise variance is estimated.
#' @param bounds.varnoise a vector with bounds of noise variance.
#' @param add.constr an optional logical. If \code{TRUE}, the inequality constraints are taken
#' into account in the optimisation.
#' @param additive an optional logical. If \code{TRUE}, the likelihood of an additive GP model
#' is computed in the optimisation.
#' @param mcmc.opts if \code{add.constr}, mcmc options passed to methods.
#' @param max.trials the value of the maximum number of trials when errors are produced by instabilities.
#' @param ... further arguments passed to or from other methods.
#' @return An optimized \code{lineqGP} model.
#'
#' @section Comments:
#' This function has to be improved in the future for more stable procedures.
#' Cros-validation (CV) methods could be implemented in future versions.
#' @seealso \code{\link{nloptr}}
#' @author A. F. Lopez-Lopera.
#'
#' @import nloptr
#' @importFrom  purrr map
#' @export
lineqGPOptim <- function(model,
                         x0 = model$kernParam$par,
                         eval_f = "logLik",
                         lb = rep(0.01, length(x0)),
                         ub = rep(Inf, length(x0)),
                         opts = list(algorithm = "NLOPT_LD_MMA",
                                     print_level = 0,
                                     ftol_abs = 1e-3,
                                     maxeval = 50,
                                     check_derivatives = FALSE,
                                     parfixed = rep(FALSE, length(x0))),
                         seed = 1,
                         estim.varnoise = FALSE,
                         bounds.varnoise = c(0, Inf),
                         add.constr = FALSE,
                         additive = FALSE,
                         mcmc.opts = list(probe = "Genz", nb.mcmc = 1e3),
                         max.trials = 10, ...) {
  if (additive) {
    xmodel <- unlist(purrr::map(model$kernParam, "par"))
    if (length(x0) != length(xmodel)) {
      x0 <- xmodel
      lb <- rep(0.01, length(x0))
      ub <- rep(Inf, length(x0))
      opts$parfixed <-  rep(FALSE, length(x0))
    }
  }
  
  model <- augment(model)
  if (!("parfixed" %in% names(opts)))
    opts$parfixed <- rep(FALSE, length(par))
  # if (!("bounds.varnoise" %in% names(opts)))
  #   opts$estim.varnoise = FALSE
  
  if (!("algorithm" %in% names(opts)))
    opts$algorithm <- "NLOPT_LD_MMA"
  
  if (!("print_level" %in% names(opts)))
    opts$print_level <- 0
  
  if (!("ftol_abs" %in% names(opts)))
    opts$ftol_abs <- 1e-3
  
  if (!("maxeval" %in% names(opts)))
    opts$maxeval <- 50
  
  if (!("check_derivatives" %in% names(opts)))
    opts$check_derivatives <- FALSE
  
  # changing the functions according to fn
  if (eval_f == "logLik") {
    if (additive) 
      eval_f = paste(eval_f, "Additive", sep = "")
    fn <- paste(eval_f, "Fun", sep = "")
    gr <- paste(eval_f, "Grad", sep = "")
    if (add.constr) {
      fn <- paste("constr", fn, sep = "")
      gr <- paste("constr", gr, sep = "")
    }
  }   # CV methods will be implemented in future versions

  if (estim.varnoise) {
    if (!("varnoise" %in% names(model)))
      model$varnoise <- 0
    x0 <- c(x0, model$varnoise)
    lb <- c(lb, bounds.varnoise[1])
    ub <- c(ub, bounds.varnoise[2])
  }
  # seed <- 5e1^sum(x0) # to try with different and fixed initial parameters
  trial <- 1
  while(trial <= max.trials) {
    optim <- try(nloptr(x0 = x0,
                        eval_f = get(fn),
                        eval_grad_f = get(gr),
                        lb = lb,
                        ub = ub,
                        opts = opts,
                        model = model,
                        mcmc.opts = mcmc.opts,
                        parfixed = opts$parfixed,
                        estim.varnoise = estim.varnoise, ...))
    if (class(optim) == "try-error") {
      set.seed(seed)
      x0Rand <- runif(length(x0), lb, ub)
      x0[!opts$parfixed] <- x0Rand[!opts$parfixed]
      message("Non feasible solution. Re-initialising the optimizer.")
      seed <- seed + 1
      trial <- trial + 1
    } else {
      break
    }
  }
  if (trial > max.trials) {
    stop('Max number of trials has been exceeded with errors.')
  } else {
    # expanding the model according to the optimized parameters
    if (estim.varnoise) {
      model$varnoise <- optim$solution[length(optim$solution)]
      optim$solution <- optim$solution[-length(optim$solution)]
    }
    if (additive) {
      for (k in 1:model$d)
        model$kernParam[[k]]$par <- optim$solution[(k-1)*length(model$kernParam[[k]]$par) + 1:2]
    } else {
      model$kernParam$par <- optim$solution
    }
    model$kernParam$l <- optim$objective # used for multistart procedures
    model <- augment(model)
    return(model)
  }
}

#' @title Log-Likelihood of a Gaussian Process.
#' @description Compute the negative log-likelihood of a Gaussian Process.
#' @param par the values of the covariance parameters.
#' @param model an object with \code{"lineqGP"} S3 class.
#' @param parfixed not used.
#' @param mcmc.opts not used.
#' @param estim.varnoise If \code{true}, a noise variance is estimated.
#' @return The value of the negative log-likelihood.
#' @seealso \code{\link{logLikGrad}}, \code{\link{constrlogLikFun}},
#'          \code{\link{constrlogLikGrad}}
#' @author A. F. Lopez-Lopera.
#'
#' @references Rasmussen, C. E. and Williams, C. K. I. (2005),
#' "Gaussian Processes for Machine Learning (Adaptive Computation and Machine Learning)".
#' \emph{The MIT Press}.
#' \href{http://www.gaussianprocess.org/gpml/}{[link]}
#'
#' @export
logLikFun <- function(par = model$kernParam$par, model,
                      parfixed = NULL, mcmc.opts = NULL,
                      estim.varnoise = FALSE) {
  switch (class(model),
    lineqGP = {
      m <- model$localParam$m
      u <- vector("list", model$d) # list with d sets of knots
      for (j in 1:model$d)
        u[[j]] <- matrix(seq(0, 1, by = 1/(m[j]-1)))
    }, lineqSGP = {
      u <- model$u
      m <- c(unlist(model$localParam$m))
    } 
  )
  
  if (estim.varnoise) {
    varnoise <- par[length(par)]
    par <- par[-length(par)]
  } else {
    varnoise <- model$varnoise
  }

  AllGammas <- vector("list", model$d) # list with the d covariance matrices
  if (length(par) == 2 && length(unique(m)) == 1) {
    if (is.list(u)) {
      uBase <- u[[1]]
    } else {
      uBase <- u
    }
    
    GammaBase <- kernCompute(uBase, uBase,
                             model$kernParam$type,
                             par)
    for (j in 1:model$d) {
      AllGammas[[j]] <- GammaBase
    }
  } else {
    if (length(par[-1]) != model$d) {
      par <- c(par[1], rep(par[2], model$d))
      names(par) <- c(names(par)[1],
                      paste(names(par)[2], seq(model$d), sep = ""))
    }
    for (j in 1:model$d) {
      AllGammas[[j]] <- kernCompute(u[[j]], u[[j]],
                                    model$kernParam$type,
                                    par[c(1, j+1)])
    }
  }
  expr <- paste("AllGammas[[", seq(model$d), "]]", collapse = " %x% ")
  Gamma <- eval(parse(text = expr))

  Phi <- model$Phi
  Kyy <- Phi %*% Gamma %*% t(Phi)
  # if (estim.varnoise)
  Kyy <- Kyy + varnoise*diag(nrow(Kyy))
  cholKyy <- chol(Kyy + model$kernParam$nugget*diag(nrow(Kyy)))
  logDetKyy <- 2*sum(diag(log(cholKyy)))
  invKyy <- chol2inv(cholKyy)
  f <- 0.5*(logDetKyy + nrow(Kyy)*log(2*pi) + t(model$y)%*%invKyy%*%model$y)
  return(f)
}

#' @title Gradient of the Log-Likelihood of a Gaussian Process.
#' @description Compute the gradient of the negative log-likelihood of a Gaussian Process.
#' @param par the values of the covariance parameters.
#' @param model an object with \code{"lineqGP"} S3 class.
#' @param parfixed indices of fixed parameters to do not be optimised.
#' @param mcmc.opts not used.
#' @param estim.varnoise If \code{true}, a noise variance is estimated.
#' @return the gradient of the negative log-likelihood.
#'
#' @seealso \code{\link{logLikFun}}, \code{\link{constrlogLikFun}},
#'          \code{\link{constrlogLikGrad}}
#' @author A. F. Lopez-Lopera.
#'
#' @references Rasmussen, C. E. and Williams, C. K. I. (2005),
#' "Gaussian Processes for Machine Learning (Adaptive Computation and Machine Learning)".
#' \emph{The MIT Press}.
#' \href{http://www.gaussianprocess.org/gpml/}{[link]}
#'
#' @export
logLikGrad <- function(par = model$kernParam$par, model,
                       parfixed = rep(FALSE, length(par)), mcmc.opts = NULL,
                       estim.varnoise = FALSE) {
  switch (class(model),
          lineqGP = {
            m <- model$localParam$m
            u <- vector("list", model$d) # list with d sets of knots
            for (j in 1:model$d)
              u[[j]] <- matrix(seq(0, 1, by = 1/(m[j]-1)))
          }, lineqSGP = {
            u <- model$u
            m <- c(unlist(model$localParam$m))
          } 
  )

  if (estim.varnoise) {
    varnoise <- par[length(par)]
    par <- par[-length(par)]
  }
  
  AllGammas <- vector("list", model$d) # list with the d covariance matrices
  if (length(par) == 2 && length(unique(m)) == 1) {
    if (is.list(u)) {
      uBase <- u[[1]]
    } else {
      uBase <- u
    }
    GammaBase <- kernCompute(uBase, uBase,
                             model$kernParam$type,
                             par)
    for (j in 1:model$d) {
      AllGammas[[j]] <- GammaBase
    }
  } else {
    if (length(par[-1]) != model$d) {
      par <- c(par[1], rep(par[2], model$d))
      names(par) <- c(names(par)[1],
                      paste(names(par)[2], seq(model$d), sep = ""))
    }
    for (j in 1:model$d) {
      AllGammas[[j]] <- kernCompute(u[[j]], u[[j]],
                                    model$kernParam$type,
                                    par[c(1, j+1)])
    }
  }
  expr <- paste("AllGammas[[", seq(model$d), "]]", collapse = " %x% ")
  Gamma <- eval(parse(text = expr))

  Phi <- model$Phi
  Kyy <- Phi %*% Gamma %*% t(Phi)
  if (estim.varnoise)
    Kyy <- Kyy + varnoise*diag(nrow(Kyy))
  cholKyy <- chol(Kyy + model$kernParam$nugget*diag(nrow(Kyy)))
  invKyy <- chol2inv(cholKyy)

  gradf <- rep(0, length(par))
  idx_iter <- seq(length(par))
  idx_iter <- idx_iter[parfixed == FALSE]

  expr <- matrix(paste("AllGammas[[", seq(model$d),"]]", sep = ""),
                 model$d, model$d, byrow = TRUE)
  diag(expr) <- paste("attr(AllGammas[[", seq(model$d),
                      "]], 'gradient')[[2]]", sep = "")
  for (i in idx_iter) {
    if (i == 1) {
      gradGammaTemp <- model$d*Gamma/par[1]
    } else {
      expr2 <- paste(expr[i-1, ], collapse = " %x% ")
      gradGammaTemp <- eval(parse(text = expr2))
    }
    gradKyyTemp <- Phi %*% gradGammaTemp %*% t(Phi)
    alpha <- invKyy %*% model$y
    gradf[i] <- 0.5*sum(diag((invKyy - alpha%*%t(alpha)) %*% gradKyyTemp))
  }
  if (estim.varnoise) {
    gradKyynoise <- diag(nrow(Kyy))
    alpha <- invKyy %*% model$y
    gradf <- c(gradf, 
               0.5*sum(diag((invKyy - alpha%*%t(alpha)) %*% gradKyynoise)))
  }
  
  # gradf <- nl.grad(par, logLikFun, heps = 1e-9, model, parfixed, mcmc.opts)
  # gradf[parfixed] <- 0
  return(gradf)
}

#' @title Log-Constrained-Likelihood of a Gaussian Process.
#' @description Compute the negative log-constrained-likelihood of a Gaussian Process
#' conditionally to the inequality constraints (Lopez-Lopera et al., 2018).
#' @param par the values of the covariance parameters.
#' @param model an object with \code{"lineqGP"} S3 class.
#' @param parfixed not used.
#' @param mcmc.opts mcmc options. \code{mcmc.opts$probe} A character string corresponding
#' to the estimator for the orthant multinormal probabilities.
#' Options: \code{"Genz"} (Genz, 1992), \code{"ExpT"} (Botev, 2017).
#' If \code{probe == "ExpT"}, \code{mcmc.opts$nb.mcmc}  is the number of MCMC
#' samples used for the estimation.
#' @param estim.varnoise If \code{true}, a noise variance is estimated.
#' @return The value of the negative log-constrained-likelihood.
#' @details Orthant multinormal probabilities are estimated according to
#' (Genz, 1992; Botev, 2017). See (Lopez-Lopera et al., 2017).
#'
#' @seealso \code{\link{constrlogLikGrad}}, \code{\link{logLikFun}},
#'          \code{\link{logLikGrad}}
#' @author A. F. Lopez-Lopera.
#'
#' @references Lopez-Lopera, A. F., Bachoc, F., Durrande, N., and Roustant, O. (2018),
#' "Finite-dimensional Gaussian approximation with linear inequality constraints".
#' \emph{SIAM/ASA Journal on Uncertainty Quantification}, 6(3): 1224-1255.
#' \href{https://doi.org/10.1137/17M1153157}{[link]}
#'
#' @references Bachoc, F., Lagnoux, A., and Lopez-Lopera, A. F. (2018),
#' "Maximum likelihood estimation for Gaussian processes under inequality constraints".
#' \emph{ArXiv e-prints}
#' \href{https://arxiv.org/abs/1804.03378}{[link]}
#'
#' Genz, A. (1992),
#' "Numerical computation of multivariate normal probabilities".
#' \emph{Journal of Computational and Graphical Statistics},
#' 1:141-150.
#' \href{https://www.tandfonline.com/doi/abs/10.1080/10618600.1992.10477010}{[link]}
#'
#' Botev, Z. I. (2017),
#' "The normal law under linear restrictions: simulation and estimation via minimax tilting".
#' \emph{Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
#' 79(1):125-148.
#' \href{https://rss.onlinelibrary.wiley.com/doi/pdf/10.1111/rssb.12162}{[link]}
#'
#' @importFrom TruncatedNormal mvrandn mvNcdf
#' @import mvtnorm
#' @export
constrlogLikFun <- function(par = model$kernParam$par, model, parfixed = NULL,
                            mcmc.opts = list(probe = c("Genz"), nb.mcmc = 1e3),
                            estim.varnoise = FALSE) {
  # u <- matrix(seq(0, 1, by = 1/(model$localParam$m-1)), ncol = 1) # discretization vector
  # Gamma <- kernCompute(u, u, model$kernParam$type, par = par)
  # if (model$d == 2)
    # Gamma <- kronecker(Gamma, Gamma)
  m <- model$localParam$m
  u <- vector("list", model$d) # list with d sets of knots
  for (j in 1:model$d)
    u[[j]] <- matrix(seq(0, 1, by = 1/(m[j]-1)))

  if (estim.varnoise) {
    varnoise <- par[length(par)]
    par <- par[-length(par)]
  }
  
  AllGammas <- vector("list", model$d) # list with the d covariance matrices
  if (length(par) == 2 && length(unique(m)) == 1) {
    uBase <- u[[1]]
    GammaBase <- kernCompute(uBase, uBase,
                             model$kernParam$type,
                             par)
    for (j in 1:model$d) {
      AllGammas[[j]] <- GammaBase
    }
  } else {
    if (length(par[-1]) != model$d) {
      par <- c(par[1], rep(par[2], model$d))
      names(par) <- c(names(par)[1],
                      paste(names(par)[2], seq(model$d), sep = ""))
    }
    for (j in 1:model$d) {
      AllGammas[[j]] <- kernCompute(u[[j]], u[[j]],
                                    model$kernParam$type,
                                    par[c(1, j+1)])
    }
  }
  expr <- paste("AllGammas[[", seq(model$d), "]]", collapse = " %x% ")
  Gamma <- eval(parse(text = expr))
  Phi <- model$Phi
  GammaPhit <- Gamma %*% t(Phi)

  # computing the negative of the unconstrained log-likelihood
  Kyy <- Phi %*% GammaPhit
  if (estim.varnoise)
    Kyy <- Kyy + varnoise*diag(nrow(Kyy))
  Kyy <- Kyy + model$kernParam$nugget*diag(nrow(Kyy))
  cholKyy <- chol(Kyy)
  logDetKyy <- 2*sum(diag(log(cholKyy)))
  invKyy <- chol2inv(cholKyy)
  f <- 0.5*(logDetKyy + nrow(Kyy)*log(2*pi) + t(model$y)%*%invKyy%*%model$y)

  # computing the other terms of the constrained log-likelihood
  GammaPhitInvKyy <- GammaPhit %*% invKyy
  mu <- GammaPhitInvKyy %*% model$y
  mu.eta <- model$Lambda %*% mu
  Sigma <- Gamma - GammaPhitInvKyy %*% t(GammaPhit)
  Sigma.eta <- model$Lambda %*% Sigma %*% t(model$Lambda)
  if (min(eigen(Sigma.eta, symmetric = TRUE)$values) <= 0) # numerical stability
    Sigma.eta <- Sigma.eta + 1e-5*diag(nrow(Sigma.eta))
  Gamma.eta <- model$Lambda %*% Gamma %*% t(model$Lambda)
  if (min(eigen(Gamma.eta, symmetric = TRUE)$values) <= 0) # numerical stability
    Gamma.eta <- Gamma.eta + 1e-5*diag(nrow(Gamma.eta))

  set.seed(7)
  switch(mcmc.opts$probe,
         Genz = {
           f <- f - log(mvtnorm::pmvnorm(model$lb, model$ub,
                                         as.vector(mu.eta), sigma = Sigma.eta)[[1]]) +
             log(mvtnorm::pmvnorm(model$lb, model$ub,
                                  rep(0, nrow(Gamma.eta)), sigma = Gamma.eta)[[1]])
         }, ExpT = {
           f <- f - log(TruncatedNormal::mvNcdf(model$lb-mu.eta, model$ub-mu.eta,
                               Sigma.eta, mcmc.opts$nb.mcmc)$prob) +
             log(TruncatedNormal::mvNcdf(model$lb, model$ub, Gamma.eta, mcmc.opts$nb.mcmc)$prob)
         })
  return(f)
}

#' @title  Numerical Gradient of the Log-Constrained-Likelihood of a Gaussian Process.
#' @description Compute the gradient numerically of the negative log-constrained-likelihood of a Gaussian Process
#' conditionally to the inequality constraints (Lopez-Lopera et al., 2018).
#' @param par the values of the covariance parameters.
#' @param model an object with class \code{lineqGP}.
#' @param parfixed indices of fixed parameters to do not be optimised.
#' @param mcmc.opts mcmc options. \code{mcmc.opts$probe} A character string corresponding
#' to the estimator for the orthant multinormal probabilities.
#' Options: \code{"Genz"} (Genz, 1992), \code{"ExpT"} (Botev, 2017).
#' If \code{probe == "ExpT"}, \code{mcmc.opts$nb.mcmc}  is the number of MCMC
#' samples used for the estimation.
#' @param estim.varnoise If \code{true}, a noise variance is estimated.
#' @return The gradient of the negative log-constrained-likelihood.
#' @details Orthant multinormal probabilities are estimated via (Genz, 1992; Botev, 2017).
#'
#' @seealso \code{\link{constrlogLikFun}}, \code{\link{logLikFun}},
#'          \code{\link{logLikGrad}}
#' @section Comments:
#' As orthant multinormal probabilities don't have explicit expressions,
#' the gradient is implemented numerically based on \code{\link{nl.grad}}.
#' @author A. F. Lopez-Lopera.
#'
#' @references Lopez-Lopera, A. F., Bachoc, F., Durrande, N., and Roustant, O. (2018),
#' "Finite-dimensional Gaussian approximation with linear inequality constraints".
#' \emph{SIAM/ASA Journal on Uncertainty Quantification}, 6(3): 1224-1255.
#' \href{https://doi.org/10.1137/17M1153157}{[link]}
#'
#' @references Bachoc, F., Lagnoux, A., and Lopez-Lopera, A. F. (2018),
#' "Maximum likelihood estimation for Gaussian processes under inequality constraints".
#' \emph{ArXiv e-prints}
#' \href{https://arxiv.org/abs/1804.03378}{[link]}
#'
#' Genz, A. (1992),
#' "Numerical computation of multivariate normal probabilities".
#' \emph{Journal of Computational and Graphical Statistics},
#' 1:141-150.
#' \href{https://www.tandfonline.com/doi/abs/10.1080/10618600.1992.10477010}{[link]}
#'
#' Botev, Z. I. (2017),
#' "The normal law under linear restrictions: simulation and estimation via minimax tilting".
#' \emph{Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
#' 79(1):125-148.
#' \href{https://rss.onlinelibrary.wiley.com/doi/pdf/10.1111/rssb.12162}{[link]}
#'
#' @export
constrlogLikGrad <- function(par = model$kernParam$par, model,
                             parfixed = rep(FALSE, length(par)),
                             mcmc.opts = list(probe = "Genz", nb.mcmc = 1e3),
                             estim.varnoise = FALSE) {
  # gradf <- nl.grad(par, constrlogLikFun, heps = 1e-9, model, parfixed, mcmc.opts)
  # gradf[parfixed] <- 0

  if (estim.varnoise)
    parfixed <- c(parfixed, FALSE)
  
  # this is a faster version of "nl.grad" when some parameters are fixed.
  if (!is.numeric(par))
    stop("Argument 'par' must be a numeric value.")
  fun <- match.fun(constrlogLikFun)
  fn <- function(x) fun(x, model, parfixed, mcmc.opts, estim.varnoise)
  if (length(fn(par)) != 1)
    stop("Function 'f' must be a univariate function of 2 variables.")
  n <- length(par)
  hh <- rep(0, n)
  gradf <- rep(0, n)

  # the gradient is computed only w.r.t. the parameters to be optimised
  idx_iter <- seq(n)
  idx_iter <- idx_iter[parfixed == FALSE]
  heps <- 1e-9
  for (i in idx_iter) {
    hh[i] <- heps
    gradf[i] <- (fn(par + hh) - fn(par - hh))/(2 * heps)
    hh[i] <- 0
  }
  return(gradf)
}

#' @title Log-Likelihood of a Additive Gaussian Process.
#' @description Compute the negative log-likelihood of an Additive Gaussian Process.
#' @param par the values of the covariance parameters.
#' @param model an object with \code{"lineqAGP"} S3 class.
#' @param parfixed not used.
#' @param mcmc.opts not used.
#' @param estim.varnoise If \code{true}, a noise variance is estimated.
#' @return The value of the negative log-likelihood.
#' @seealso \code{\link{logLikAdditiveGrad}}
# #'          \code{\link{constrlogLikAdditiveFun}},
# #'          \code{\link{constrlogLikAdditiveGrad}}
#' @author A. F. Lopez-Lopera.
#'
#' @references Rasmussen, C. E. and Williams, C. K. I. (2005),
#' "Gaussian Processes for Machine Learning (Adaptive Computation and Machine Learning)".
#' \emph{The MIT Press}.
#' \href{http://www.gaussianprocess.org/gpml/}{[link]}
#'
#' @export
logLikAdditiveFun <- function(par = unlist(purrr::map(model$kernParam, "par")),
                              model,
                              parfixed = NULL, mcmc.opts = NULL,
                              estim.varnoise = FALSE) {
  m <- model$localParam$m
  u <- Gamma <- vector("list", model$d)
  Phi <- vector("list", model$d)
  logDetGamma <- rep(0, model$d)
  invGamma <- vector("list", model$d)
  
  for (j in 1:model$d)
    u[[j]] <- matrix(seq(0, 1, by = 1/(m[j]-1)), ncol = 1) # discretization vector
  
  if (estim.varnoise) {
    varnoise <- par[length(par)]
    par <- par[-length(par)]
  } else {
    varnoise <- model$varnoise
  }
  
  # computing the kernel matrix for the prior
  for (k in 1:model$d) {
    Gamma[[k]] <- kernCompute(u[[k]], u[[k]], model$kernParam[[k]]$type,
                              par[(k-1)*length(model$kernParam[[k]]$par) + 1:2])
    Phi[[k]] <- basisCompute.lineqGP(model$x[, k], u[[k]])
    # cholGamma <- chol(Gamma[[k]])
    # invGamma[[k]] <- chol2inv(cholGamma)
    # logDetGamma[k] <- 2*sum(diag(log(cholGamma)))
  }
  
  GammaPhitList <- vector("list", model$d)
  GammaPhitList[[1]] <- Gamma[[1]] %*% t(Phi[[1]])
  GammaPhitFull <- GammaPhitList[[1]]
  PhiGammaPhitFull <- Phi[[1]] %*% GammaPhitFull
  for (k in 2:model$d) {
    GammaPhitList[[k]] <- Gamma[[k]] %*% t(Phi[[k]])
    GammaPhitFull <- GammaPhitFull + GammaPhitList[[k]]
    PhiGammaPhitFull <- PhiGammaPhitFull + Phi[[k]] %*% GammaPhitList[[k]]
  }
  if (estim.varnoise)
    PhiGammaPhitFull <- PhiGammaPhitFull + varnoise*diag(nrow(PhiGammaPhitFull))
  Kyy <- PhiGammaPhitFull
  cholKyy <- chol(PhiGammaPhitFull + model$nugget*diag(nrow(Kyy)))
  logDetKyy <- 2*sum(diag(log(cholKyy)))
  invKyy <- chol2inv(cholKyy)
  
  # hfunBigPhi <- parse(text = paste("cbind(",
  #                                  paste("Phi[[", 1:model$d, "]]", sep = "", collapse = ","),
  #                                  ")", sep = ""))
  # hfunBigInvGamma <- parse(text = paste("bdiag(",
  #                                       paste("invGamma[[", 1:model$d, "]]", sep = "", collapse = ","),
  #                                       ")", sep = ""))
  # bigPhi <- eval(hfunBigPhi)
  # bigInvGamma <- eval(hfunBigInvGamma)
  # GammaPhitPhi <- bigInvGamma + t(bigPhi)%*%bigPhi/varnoise
  # invGammaPhitPhi <- chol2inv(chol(GammaPhitPhi))
  # invPhiGammaPhitFull <- (diag(length(model$y)) - bigPhi%*%invGammaPhitPhi%*%t(bigPhi)/varnoise)/varnoise
  # invKyy <- invPhiGammaPhitFull
  # logDetKyy <- log(varnoise^(length(model$y)-nrow(bigInvGamma))) + sum(logDetGamma) +
  #   log(det(as.matrix(GammaPhitPhi)))
  
  f <- 0.5*(logDetKyy + nrow(invKyy)*log(2*pi) + t(model$y)%*%invKyy%*%model$y)
  return(f)
}

#' @title  Gradient of the Log-Likelihood of a Additive Gaussian Process.
#' @description Compute the gradient of the negative log-likelihood of an Additive Gaussian Process.
#' @param par the values of the covariance parameters.
#' @param model an object with \code{"lineqAGP"} S3 class.
#' @param parfixed indices of fixed parameters to do not be optimised.
#' @param mcmc.opts not used.
#' @param estim.varnoise If \code{true}, a noise variance is estimated.
#' @return the gradient of the negative log-likelihood.
#'
#' @seealso \code{\link{logLikAdditiveFun}}
# #'          \code{\link{constrlogLikAdditiveFun}},
# #'          \code{\link{constrlogLikAdditiveGrad}}
#' @author A. F. Lopez-Lopera.
#'
#' @references Rasmussen, C. E. and Williams, C. K. I. (2005),
#' "Gaussian Processes for Machine Learning (Adaptive Computation and Machine Learning)".
#' \emph{The MIT Press}.
#' \href{http://www.gaussianprocess.org/gpml/}{[link]}
#'
#' @export
logLikAdditiveGrad <- function(par = unlist(purrr::map(model$kernParam, "par")), 
                               model,  parfixed = rep(FALSE, model$d*length(par)),
                               mcmc.opts = NULL,
                               estim.varnoise = FALSE) {
  m <- model$localParam$m
  u <- Gamma <- vector("list", model$d)
  Phi <- vector("list", model$d)
  # logDetGamma <- rep(0, model$d)
  invGamma <- vector("list", model$d)
  
  for (j in 1:model$d)
    u[[j]] <- matrix(seq(0, 1, by = 1/(m[j]-1)), ncol = 1) # discretization vector
  
  if (estim.varnoise) {
    varnoise <- par[length(par)]
    par <- par[-length(par)]
  } else {
    varnoise <- model$varnoise
  }
  
  # computing the kernel matrix for the prior
  for (k in 1:model$d) {
    Gamma[[k]] <- kernCompute(u[[k]], u[[k]], model$kernParam[[k]]$type,
                              par[(k-1)*length(model$kernParam[[k]]$par) + 1:2])
    Phi[[k]] <- basisCompute.lineqGP(model$x[, k], u[[k]])
    cholGamma <- chol(Gamma[[k]])
    invGamma[[k]] <- chol2inv(cholGamma)
  }
  
  # GammaPhitList <- vector("list", model$d)
  # GammaPhitList[[1]] <- Gamma[[1]] %*% t(Phi[[1]])
  # GammaPhitFull <- GammaPhitList[[1]]
  # PhiGammaPhitFull <- Phi[[1]] %*% GammaPhitFull
  # for (k in 2:model$d) {
  #   GammaPhitList[[k]] <- Gamma[[k]] %*% t(Phi[[k]])
  #   GammaPhitFull <- GammaPhitFull + GammaPhitList[[k]]
  #   PhiGammaPhitFull <- PhiGammaPhitFull + Phi[[k]] %*% GammaPhitList[[k]]
  # }
  # if (estim.varnoise)
  #   PhiGammaPhitFull <- PhiGammaPhitFull + varnoise*diag(nrow(PhiGammaPhitFull))
  # Kyy <- PhiGammaPhitFull
  # cholKyy <- chol(PhiGammaPhitFull + model$nugget*diag(nrow(Kyy)))
  # invKyy <- chol2inv(cholKyy)

  hfunBigPhi <- parse(text = paste("cbind(",
                                   paste("Phi[[", 1:model$d, "]]", sep = "", collapse = ","),
                                   ")", sep = ""))
  hfunBigInvGamma <- parse(text = paste("bdiag(",
                                        paste("invGamma[[", 1:model$d, "]]", sep = "", collapse = ","),
                                        ")", sep = ""))
  bigPhi <- eval(hfunBigPhi)
  bigInvGamma <- eval(hfunBigInvGamma)
  GammaPhitPhi <- varnoise*bigInvGamma + t(bigPhi)%*%bigPhi
  invGammaPhitPhi <- chol2inv(chol(GammaPhitPhi))
  invPhiGammaPhitFull <- (diag(length(model$y)) - bigPhi%*%invGammaPhitPhi%*%t(bigPhi))/varnoise
  invKyy <- invPhiGammaPhitFull

  gradKyyTemp <- c()
  idx_iter <- seq(length(par))
  idx_iter <- idx_iter[parfixed == FALSE]

  alpha <- invKyy %*% model$y
  cteTermLik <- invKyy - alpha%*%t(alpha)
  gradf <- rep(0, length(par))
  for (k in 1:model$d) {
    gradGammaSigma2 <- attr(Gamma[[k]], 'gradient')[[1]]
    gradKyySigma2 <- Phi[[k]] %*% gradGammaSigma2 %*% t(Phi[[k]])
    gradf[(k-1)*length(model$kernParam[[k]]$par) + 1] <- 0.5*sum(diag(cteTermLik %*% gradKyySigma2))
    
    gradGammaTheta <- attr(Gamma[[k]], 'gradient')[[2]]
    gradKyyTheta <- Phi[[k]] %*% gradGammaTheta %*% t(Phi[[k]])
    gradf[(k-1)*length(model$kernParam[[k]]$par) + 2] <- 0.5*sum(diag(cteTermLik %*% gradKyyTheta))
  }
  gradf[parfixed == TRUE] <- 0
  
  if (estim.varnoise) {
    gradKyynoise <- diag(nrow(invKyy))
    gradf <- c(gradf, 0.5*sum(diag(cteTermLik %*% gradKyynoise)))
  }
  
  # gradf <- nl.grad(par, logLikFun, heps = 1e-9, model, parfixed, mcmc.opts)
  # gradf[parfixed] <- 0
  return(gradf)
}

# #' @title Log-Constrained-Likelihood of an Additive Gaussian Process.
# #' @description Compute the negative log-constrained-likelihood of an Additive
# #' Gaussian Process conditionally to the inequality constraints
# #' (Lopez-Lopera et al., 2018).
# #' @param par the values of the covariance parameters.
# #' @param model an object with \code{"lineqAGP"} S3 class.
# #' @param parfixed not used.
# #' @param mcmc.opts mcmc options. \code{mcmc.opts$probe} A character string corresponding
# #' to the estimator for the orthant multinormal probabilities.
# #' Options: \code{"Genz"} (Genz, 1992), \code{"ExpT"} (Botev, 2017).
# #' If \code{probe == "ExpT"}, \code{mcmc.opts$nb.mcmc}  is the number of MCMC
# #' samples used for the estimation.
# #' @param estim.varnoise If \code{true}, a noise variance is estimated.
# #' @return The value of the negative log-constrained-likelihood.
# #' @details Orthant multinormal probabilities are estimated according to
# #' (Genz, 1992; Botev, 2017). See (Lopez-Lopera et al., 2018).
# #'
# #' @seealso \code{\link{constrlogLikAdditiveGrad}}, \code{\link{logLikAdditiveFun}},
# #'          \code{\link{logLikAdditiveGrad}}
# #' @author A. F. Lopez-Lopera.
# #'
# #' @references Lopez-Lopera, A. F., Bachoc, F., Durrande, N., and Roustant, O. (2018),
# #' "Finite-dimensional Gaussian approximation with linear inequality constraints".
# #' \emph{SIAM/ASA Journal on Uncertainty Quantification}, 6(3): 1224-1255.
# #' \href{https://doi.org/10.1137/17M1153157}{[link]}
# #'
# #' @references Bachoc, F., Lagnoux, A., and Lopez-Lopera, A. F. (2018),
# #' "Maximum likelihood estimation for Gaussian processes under inequality constraints".
# #' \emph{ArXiv e-prints}
# #' \href{https://arxiv.org/abs/1804.03378}{[link]}
# #'
# #' Genz, A. (1992),
# #' "Numerical computation of multivariate normal probabilities".
# #' \emph{Journal of Computational and Graphical Statistics},
# #' 1:141-150.
# #' \href{https://www.tandfonline.com/doi/abs/10.1080/10618600.1992.10477010}{[link]}
# #'
# #' Botev, Z. I. (2017),
# #' "The normal law under linear restrictions: simulation and estimation via minimax tilting".
# #' \emph{Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
# #' 79(1):125-148.
# #' \href{https://rss.onlinelibrary.wiley.com/doi/pdf/10.1111/rssb.12162}{[link]}
# #'
# #' @import TruncatedNormal mvtnorm
# #' @export
# constrlogLikAdditiveFun <- function(par = unlist(purrr::map(model$kernParam, "par")),
#                                     model,
#                                     parfixed = NULL, mcmc.opts = NULL,
#                                     estim.varnoise = FALSE) {
#   m <- model$localParam$m
#   u <- Gamma <- vector("list", model$d)
#   Phi <- vector("list", model$d)
#   
#   for (j in 1:model$d)
#     u[[j]] <- matrix(seq(0, 1, by = 1/(m[j]-1)), ncol = 1) # discretization vector
#   
#   if (estim.varnoise) {
#     varnoise <- par[length(par)]
#     par <- par[-length(par)]
#   }
#   
#   # computing the kernel matrix for the prior
#   for (k in 1:model$d) {
#     Gamma[[k]] <- kernCompute(u[[k]], u[[k]], model$kernParam[[k]]$type,
#                               par[(k-1)*model$d + 1:2])
#     Phi[[k]] <- basisCompute.lineqGP(model$x[, k], u[[k]])
#   }
#   
#   GammaFull <- Gamma[[1]]
#   GammaPhitList <- vector("list", model$d)
#   GammaPhitList[[1]] <- Gamma[[1]] %*% t(Phi[[1]])
#   GammaPhitFull <- GammaPhitList[[1]]
#   PhiGammaPhitFull <- Phi[[1]] %*% GammaPhitFull
#   for (k in 2:model$d) {
#     GammaPhitList[[k]] <- Gamma[[k]] %*% t(Phi[[k]])
#     GammaPhitFull <- GammaPhitFull + GammaPhitList[[k]]
#     PhiGammaPhitFull <- PhiGammaPhitFull + Phi[[k]] %*% GammaPhitList[[k]]
#   }
#   if (estim.varnoise)
#     PhiGammaPhitFull <- PhiGammaPhitFull + varnoise*diag(nrow(PhiGammaPhitFull))
#   
#   Kyy <- PhiGammaPhitFull
#   cholKyy <- chol(PhiGammaPhitFull + model$nugget*diag(nrow(Kyy)))
#   logDetKyy <- 2*sum(diag(log(cholKyy)))
#   invKyy <- chol2inv(cholKyy)
#   f <- 0.5*(logDetKyy + nrow(Kyy)*log(2*pi) + t(model$y)%*%invKyy%*%model$y)
#   
#   # computing the other terms of the constrained log-likelihood
#   GammaPhitInvKyy <- GammaPhit %*% invKyy
#   mu <- GammaPhitInvKyy %*% model$y
#   mu.eta <- model$Lambda %*% mu
#   Sigma <- Gamma - GammaPhitInvKyy %*% t(GammaPhit)
#   Sigma.eta <- model$Lambda %*% Sigma %*% t(model$Lambda)
#   if (min(eigen(Sigma.eta, symmetric = TRUE)$values) <= 0) # numerical stability
#     Sigma.eta <- Sigma.eta + 1e-5*diag(nrow(Sigma.eta))
#   Gamma.eta <- model$Lambda %*% Gamma %*% t(model$Lambda)
#   if (min(eigen(Gamma.eta, symmetric = TRUE)$values) <= 0) # numerical stability
#     Gamma.eta <- Gamma.eta + 1e-5*diag(nrow(Gamma.eta))
#   
#   set.seed(7)
#   switch(mcmc.opts$probe,
#          Genz = {
#            f <- f - log(pmvnorm(model$lb, model$ub,
#                                 as.vector(mu.eta), sigma = Sigma.eta)[[1]]) +
#              log(pmvnorm(model$lb, model$ub,
#                          rep(0, nrow(Gamma.eta)), sigma = Gamma.eta)[[1]])
#          }, ExpT = {
#            f <- f - log(mvNcdf(model$lb-mu.eta, model$ub-mu.eta,
#                                Sigma.eta, mcmc.opts$nb.mcmc)$prob) +
#              log(mvNcdf(model$lb, model$ub, Gamma.eta, mcmc.opts$nb.mcmc)$prob)
#          })
#   
#   
#   return(f)
# }

# #' @title  Numerical Gradient of the Log-Likelihood of a Additive Gaussian Process.
# #' @description Compute the gradient numerically of the negative log-likelihood of an Additive Gaussian Process.
# #' @param par the values of the covariance parameters.
# #' @param model an object with \code{"lineqGP"} S3 class.
# #' @param parfixed indices of fixed parameters to do not be optimised.
# #' @param mcmc.opts not used.
# #' @param estim.varnoise If \code{true}, a noise variance is estimated.
# #' @return the gradient of the negative log-likelihood.
# #'
# #' @seealso \code{\link{logLikAdditiveFun}}, \code{\link{logLikFun}},
# #'          \code{\link{logLikGrad}}
# #' @author A. F. Lopez-Lopera.
# #'
# #' @references Rasmussen, C. E. and Williams, C. K. I. (2005),
# #' "Gaussian Processes for Machine Learning (Adaptive Computation and Machine Learning)".
# #' \emph{The MIT Press}.
# #' \href{http://www.gaussianprocess.org/gpml/}{[link]}
# #'
# #' @export
# constrlogLikAdditiveGrad <- function(par = unlist(purrr::map(model$kernParam, "par")),
#                                model,  parfixed = rep(FALSE, model$d*length(par)),
#                                mcmc.opts = list(probe = "Genz", nb.mcmc = 1e3),
#                                estim.varnoise = FALSE) {
#   # gradf <- nl.grad(par, constrlogLikFun, heps = 1e-9, model, parfixed, mcmc.opts)
#   # gradf[parfixed] <- 0
#   if (estim.varnoise)
#     parfixed <- c(parfixed, FALSE)
# 
#   # this is a faster version of "nl.grad" when some parameters are fixed.
#   if (!is.numeric(par))
#     stop("Argument 'par' must be a numeric value.")
#   fun <- match.fun(logLikAdditiveFun)
#   fn <- function(x) fun(x, model, parfixed, mcmc.opts, estim.varnoise)
#   if (length(fn(par)) != 1)
#     stop("Function 'f' must be a univariate function of 2 variables.")
#   n <- length(par)
#   hh <- rep(0, n)
#   gradf <- rep(0, n)
# 
#   # the gradient is computed only w.r.t. the parameters to be optimised
#   idx_iter <- seq(n)
#   idx_iter <- idx_iter[parfixed == FALSE]
#   heps <- 1e-9
#   dimIdx <- 0
#   for (i in idx_iter) {
#     hh[i] <- heps
#     gradf[i] <- (fn(par + hh) - fn(par - hh))/(2 * heps)
#     hh[i] <- 0
#   }
#   return(gradf)
# }
#
Any scripts or data that you put into this service are public.
lineqGPR documentation built on Jan. 11, 2020, 9:23 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
lineqGPR
Gaussian Process Regression Models with Linear Inequality Constraints

R/lineqGPlikelihoods.R
In lineqGPR: Gaussian Process Regression Models with Linear Inequality Constraints

Defines functions lineqGPOptim logLikFun logLikGrad constrlogLikFun constrlogLikGrad logLikAdditiveFun logLikAdditiveGrad

Documented in constrlogLikFun constrlogLikGrad lineqGPOptim logLikAdditiveFun logLikAdditiveGrad logLikFun logLikGrad

Try the lineqGPR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

lineqGPR Gaussian Process Regression Models with Linear Inequality Constraints

R/lineqGPlikelihoods.R In lineqGPR: Gaussian Process Regression Models with Linear Inequality Constraints

Defines functions lineqGPOptim logLikFun logLikGrad constrlogLikFun constrlogLikGrad logLikAdditiveFun logLikAdditiveGrad

Documented in constrlogLikFun constrlogLikGrad lineqGPOptim logLikAdditiveFun logLikAdditiveGrad logLikFun logLikGrad

Try the lineqGPR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

lineqGPR
Gaussian Process Regression Models with Linear Inequality Constraints

R/lineqGPlikelihoods.R
In lineqGPR: Gaussian Process Regression Models with Linear Inequality Constraints