R/GP.R
In dynemu: Emulation of Dynamic Simulators via One-Step-Ahead Approach

#' fitting the model with squared exponential kernel.
#'
#' @param X vector or matrix of input locations.
#' @param y vector of response values.
#' @param g nugget parameter. Default is 1.490116e-08.
#' @param lower lower bound of theta. Default if 0.001.
#' @param upper upper bound of theta. Default if 1000.
#' @param Xscale logical indicating whether to scale X or not. Default is TRUE.
#' @param Yscale logical indicating whether to scale y or not. Only used if constant=FALSE. Default is TRUE.
#' @param constant logical indicating for constant mean (constant=TRUE) or zero mean (constant=FALSE). Default is FALSE.
#'
#' @return A list containing hyperparameters, covariance inverse matrix, X, y and logical inputs:
#' \itemize{
#'   \item \code{theta}: vector of lengthscale hyperparameter.
#'   \item \code{g}: copy of g.
#'   \item \code{Ki}: matrix of covariance inverse.
#'   \item \code{mu.hat}: optimized constant mean. If constant=FALSE, 0.
#'   \item \code{X}: copy of X. If Xscale=TRUE, scaled X.
#'   \item \code{y}: copy of y. If Yscale=TRUE, scaled y.
#'   \item \code{tau2hat}: estimated scale hyperparameter.
#'   \item \code{Xscale}: copy of Xscale.
#'   \item \code{Yscale}: copy of Yscale.
#'   \item \code{constant}: copy of constant.
#' }
#'
#' @importFrom plgp distance covar.sep
#' @importFrom stats optim quantile
#' @noRd
#' @keywords internal
#' @examples
#' \dontrun{
#' library(lhs)
#' ### synthetic function ###
#' f1 <- function(x) {
#'   sin(8 * pi * x)
#' }
#'
#' ### training data ###
#' n1 <- 15
#'
#' X1 <- maximinLHS(n1, 1)
#' y1 <- f1(X1)
#'
#' GP(X1, y1)
#' }
GP <- function(X, y, g = sqrt(.Machine$double.eps), constant = FALSE, p=0.05, min_cor = 0.01, max_cor = 0.99) { # p=0.05 for hetGP
  if (constant) {
    if (is.null(dim(X))) X <- matrix(X, ncol = 1)

    Xscaled <- (X - matrix(apply(X, 2, range)[1,], nrow = nrow(X), ncol = ncol(X), byrow = TRUE)) %*%
      diag(1/(apply(X, 2, range)[2,] - apply(X, 2, range)[1,]), ncol(X))
    lower <- -quantile(distance(Xscaled)[lower.tri(distance(Xscaled))], p) / log(min_cor) *
      (apply(X, 2, range)[2,] - apply(X, 2, range)[1,])^2 
    upper <- -quantile(distance(Xscaled)[lower.tri(distance(Xscaled))], 1-p) / log(max_cor) *
      (apply(X, 2, range)[2,] - apply(X, 2, range)[1,])^2 
    init <- sqrt(lower * upper)

    n <- length(y)

    nlsep <- function(par, X, Y) {
      theta <- par # lengthscale
      K <- covar.sep(X, d = theta, g = g)
      Ki <- solve(K)
      ldetK <- determinant(K, logarithm = TRUE)$modulus

      one.vec <- matrix(1, ncol = 1, nrow = n)
      beta <- drop(solve((t(cbind(one.vec, X)) %*% Ki %*% cbind(one.vec, X))) %*% (t(cbind(one.vec, X)) %*% Ki %*% matrix(Y)))

      tau2hat <- drop(t(Y - cbind(1, X) %*% beta) %*% Ki %*% (Y - cbind(1, X) %*% beta) / n)
      ll <- -(n / 2) * log(tau2hat) - (1 / 2) * ldetK
      return(drop(-ll))
    }

    gradnlsep <- function(par, X, Y) {
      theta <- par
      K <- covar.sep(X, d = theta, g = g)
      Ki <- solve(K)

      one.vec <- matrix(1, ncol = 1, nrow = n)
      beta <- drop(solve((t(cbind(one.vec, X)) %*% Ki %*% cbind(one.vec, X))) %*% (t(cbind(one.vec, X)) %*% Ki %*% matrix(Y)))

      KiY <- Ki %*% (Y - cbind(1, X) %*% beta)
      ## loop over theta components
      dlltheta <- rep(NA, length(theta))
      for (k in 1:length(dlltheta)) {
        dotK <- K * distance(X[, k]) / (theta[k]^2)
        dlltheta[k] <- (n / 2) * t(KiY) %*% dotK %*% KiY / (t(Y) %*% KiY) - (1 / 2) * sum(diag(Ki %*% dotK))
      }

      return(-c(dlltheta))
    }

    outg <- optim(init, nlsep, gradnlsep,
      method = "L-BFGS-B", lower = lower, upper = upper, X = X, Y = y
    )

    theta <- outg$par
    K <- covar.sep(X, d = theta, g = g)
    Ki <- solve(K)
    one.vec <- matrix(1, ncol = 1, nrow = n)
    beta <- drop(solve((t(cbind(one.vec, X)) %*% Ki %*% cbind(one.vec, X))) %*% (t(cbind(one.vec, X)) %*% Ki %*% matrix(y)))
    tau2hat <- drop(t(y - cbind(1, X) %*% beta) %*% Ki %*% (y - cbind(1, X) %*% beta) / nrow(X))
    names(theta) <- NULL

    return(list(Ki = Ki, X = X, y = y, theta = theta, beta = beta, g = g, tau2hat = tau2hat, constant = constant))
  } else {
    if (is.null(dim(X))) X <- matrix(X, ncol = 1)

    Xscaled <- (X - matrix(apply(X, 2, range)[1,], nrow = nrow(X), ncol = ncol(X), byrow = TRUE)) %*%
      diag(1/(apply(X, 2, range)[2,] - apply(X, 2, range)[1,]), ncol(X))
    lower <- -quantile(distance(Xscaled)[lower.tri(distance(Xscaled))], p) / log(min_cor) *
      (apply(X, 2, range)[2,] - apply(X, 2, range)[1,])^2
    upper <- -quantile(distance(Xscaled)[lower.tri(distance(Xscaled))], 1-p) / log(max_cor) *
      (apply(X, 2, range)[2,] - apply(X, 2, range)[1,])^2
    init <- sqrt(lower * upper)

    n <- length(y)

    nlsep <- function(par, X, Y) {
      theta <- par # lengthscale
      K <- covar.sep(X, d = theta, g = g)
      Ki <- solve(K)
      ldetK <- determinant(K, logarithm = TRUE)$modulus
      ll <- -(n / 2) * log(t(Y) %*% Ki %*% Y) - (1 / 2) * ldetK
      return(drop(-ll))
    }

    outg <- optim(init, nlsep, method = "L-BFGS-B", lower = lower, upper = upper, X = X, Y = y)

    theta <- outg$par
    K <- covar.sep(X, d = theta, g = g)
    Ki <- solve(K)
    mu.hat <- 0
    tau2hat <- drop(t(y) %*% Ki %*% y / n)
    names(theta) <- NULL

    return(list(Ki = Ki, X = X, y = y, theta = theta, g = g, mu.hat = mu.hat, tau2hat = tau2hat, constant = constant))
  }
}

#' predictive posterior mean and variance with squared exponential kernel.
#'
#' @param fit an object of class GP.
#' @param xnew vector or matrix of new input locations to predict.
#'
#' @return A list predictive posterior mean and variance:
#' \itemize{
#'   \item \code{mu}: vector of predictive posterior mean.
#'   \item \code{sig2}: vector of predictive posterior variance.
#' }
#'
#' @importFrom plgp covar.sep
#' @noRd
#' @keywords internal
#' @examples
#' \dontrun{
#' library(lhs)
#' ### synthetic function ###
#' f1 <- function(x) {
#'   sin(8 * pi * x)
#' }
#'
#' ### training data ###
#' n1 <- 15
#'
#' X1 <- maximinLHS(n1, 1)
#' y1 <- f1(X1)
#'
#' fit1 <- GP(X1, y1)
#'
#' ### test data ###
#' x <- seq(0, 1, 0.01)
#' pred.GP(fit1, x)
#' }
pred.GP <- function(fit, xnew) {
  xnew <- as.matrix(xnew)

  Ki <- fit$Ki
  theta <- fit$theta
  beta <- fit$beta
  g <- fit$g
  X <- fit$X
  y <- fit$y
  tau2hat <- fit$tau2hat
  mu.X <- cbind(1, X) %*% beta
  mu.xnew <- cbind(1, xnew) %*% beta

  KXX <- covar.sep(xnew, d = theta, g = g)
  KX <- covar.sep(xnew, X, d = theta, g = 0)

  mup2 <- mu.xnew + KX %*% Ki %*% (y - mu.X)
  Sigmap2 <- pmax(0, diag(tau2hat * (KXX - KX %*% Ki %*% t(KX))))

  return(list(mu = mup2, sig2 = Sigmap2))
}