dolka: Utility functions for Bayesian optimization

Documented in qEI

## =============================================================================
##
##' @description Analytical expression of the multipoint expected
##'     improvement criterion, also known as the \eqn{q}-\emph{point
##'     expected improvement} and denoted by \eqn{q}-EI or \code{qEI}.
##' 
##' @title Computes the Multipoint Expected Improvement (qEI) Criterion
##'  
##' @param x A numeric matrix representing the set of input points
##'     (one row corresponds to one point) where to evaluate the qEI
##'     criterion.
##'
##' @param model An object of class \code{km}.
##'
##' @param plugin Optional scalar: if provided, it replaces the
##'     minimum of the current observations.
##'
##' @param type \code{"SK"} or \code{"UK"} (by default), depending
##'     whether uncertainty related to trend estimation has to be
##'     taken into account.
##'
##' @param minimization Logical specifying if EI is used in
##'     minimiziation or in maximization.
##'
##' @param fastCompute Logical. If \code{TRUE}, a fast approximation
##'     method based on a semi-analytic formula is used. See the
##'     reference Marmin (2014) for details.
##'
##' @param eps Numeric value of \eqn{epsilon} in the fast computation
##'     trick.  Relevant only if \code{fastComputation} is
##'     \code{TRUE}.
##'
##' @param deriv Logical. When \code{TRUE} the dervivatives of the
##'     kriging mean vector and of the kriging covariance (Jacobian
##'     arrays) are computed and stored in the environment given in
##'     \code{envir}.
##' 
##' @param envir An optional environment specifying where to get
##'     intermediate values calculated in \code{\link{qEI}}.
##'
##' @return The multipoint Expected Improvement, defined as
##'     \deqn{qEI(X_{new}) := E\left[\{ \min Y(X)  - \min Y(X_{new}) \}_{+}
##'     \vert Y(X) = y(X) \right],}{qEI(Xnew) := E[{ min Y(X)  - min Y(Xnew) }_{+}
##'     \vert Y(X) = y(X)],}
##' where \eqn{X} is the current design of experiments,
##' \eqn{X_new}{Xnew} is a new candidate design, and
##' \eqn{Y} is a random process assumed to have generated the
##' objective function \eqn{y}.
##'
##' @author Sebastien Marmin, Clement Chevalier and David Ginsbourger.
##' 
##' @seealso \code{\link{EI}}
##'
##' @references
##' 
##' C. Chevalier and D. Ginsbourger (2014) Learning and Intelligent
##' Optimization - 7th International Conference, Lion 7, Catania,
##' Italy, January 7-11, 2013, Revised Selected Papers, chapter Fast
##' computation of the multipoint Expected Improvement with
##' applications in batch selection, pages 59-69, Springer.
##' 
##' D. Ginsbourger, R. Le Riche, L. Carraro (2007), A Multipoint
##' Criterion for Deterministic Parallel Global Optimization based on
##' Kriging. The International Conference on Non Convex Programming,
##' 2007.
##' 
##' S. Marmin (2014). Developpements pour l'evaluation et la
##' maximisation du critere d'amelioration esperee multipoint en
##' optimisation globale. Master's thesis, Mines Saint-Etienne
##' (France) and University of Bern (Switzerland).
##' 
##' D. Ginsbourger, R. Le Riche, and L. Carraro. Kriging is
##' well-suited to parallelize optimization (2010), In Lim Meng Hiot,
##' Yew Soon Ong, Yoel Tenne, and Chi-Keong Goh, editors,
##' \emph{Computational Intelligence in Expensive Optimization
##' Problems}, Adaptation Learning and Optimization, pages
##' 131-162. Springer Berlin Heidelberg.
##' 
##' J. Mockus (1988), \emph{Bayesian Approach to Global
##' Optimization}. Kluwer academic publishers.
##' 
##' M. Schonlau (1997), \emph{Computer experiments and global
##' optimization}, Ph.D. thesis, University of Waterloo.
##' 
##' @keywords models parallel optimization
##'
##' @importFrom mnormt dmnorm pmnorm
##' @export
##'
##' @examples
##' \donttest{
##' set.seed(007)
##' ## Monte-Carlo validation
##' 
##' ## a 4-d, 81-points grid design, and the corresponding response
##' ## ============================================================
##' d <- 4; n <- 3^d
##' design <- expand.grid(rep(list(seq(0, 1, length = 3)), d))
##' names(design) <- paste0("x", 1:d)
##' y <- apply(design, 1, hartman4)
##' 
##' ## learning
##' ## ========
##' model <- km(~1, design = design, response = y, control = list(trace = FALSE))
##' 
##' ## pick up 10 points sampled from the 1-point expected improvement
##' ## ===============================================================
##' q <- 10
##' X <- sampleFromEI(model, n = q)
##' 
##' ## simulation of the minimum of the kriging random vector at X
##' ## ===========================================================
##' t1 <- proc.time()
##' newdata <- as.data.frame(X)
##' colnames(newdata) <- colnames(model@X)
##' 
##' krig  <- predict(object = model, newdata = newdata, type = "UK",
##'                  se.compute = TRUE, cov.compute = TRUE)
##' mk <- krig$mean
##' Sigma.q <- krig$cov
##' mychol <- chol(Sigma.q)
##' nsim <- 300000
##' white.noise <- rnorm(n = nsim * q)
##' minYsim <- apply(crossprod(mychol, matrix(white.noise, nrow = q)) + mk,
##'                  MARGIN = 2, FUN = min)
##' 
##' ## simulation of the improvement (minimization)
##' ## ============================================
##' qImprovement <- min(model@y) - minYsim
##' qImprovement <- qImprovement * (qImprovement > 0)
##' 
##' ## empirical expectation of the improvement and confidence interval (95%)
##' ## ======================================================================
##' EIMC <- mean(qImprovement)
##' seq <- sd(qImprovement) / sqrt(nsim)
##' confInterv <- c(EIMC - 1.96 * seq, EIMC + 1.96 * seq)
##'
##' ## evaluate time
##' ## =============
##' tMC <- proc.time() - t1
##' 
##' ## MC estimation of the qEI
##' ## ========================
##' print(EIMC) 
##' 
##' ## qEI with analytical formula and with fast computation trick
##' ## ===========================================================
##' tForm <- system.time(qEI(X, model, fastCompute = FALSE))
##' tFast <- system.time(qEI(X, model))
##' 
##' rbind("MC" = tMC, "form" = tForm, "fast" = tFast)
##' 
##' }
##' 
qEI <- function(x, model, plugin = NULL,
                type = c("UK", "SK"),
                minimization = TRUE,
                fastCompute = TRUE,
                eps = 1e-5,
                deriv = TRUE,
                envir = NULL) {

    type <- match.arg(type)
    d <- model@d

    if (!is.matrix(x)) {
        x <- matrix(x, ncol = d)
    }
    
    xb <- rbind(model@X, x)
    nExp <- model@n
    x <- unique(round(xb, digits = 8))
    
    if ((nExp + 1) > length(x[ , 1])) {
        return (0)
    }

    x <- matrix(x[(nExp + 1):length(x[ , 1]), ], ncol = d)

    if (is.null(plugin) &&  minimization) plugin <- min(model@y)
    if (is.null(plugin) && !minimization) plugin <- max(model@y)

    ## if (length(x[ , 1]) == 1) {
    ##     return(EI(x = x, model = model, plugin = plugin,
    ##              type = type, envir = envir))
    ## }

    ## Compute the kriging prediction with derivatives
    pred  <- predict(object = model, newdata = x, type = type,
                     se.compute = TRUE,
                     cov.compute = TRUE,
                     deriv = TRUE,
                     checkNames = FALSE)
    
    if (!is.null(envir)) {
        assign("pred", value = pred, envir = envir) 
    }
    
    sigma <- pred$cov
    mu <- pred$mean
    
    q <- length(mu)
    
    pk <- first_term <- second_term <- rep(0, times = q)
    
    if (!fastCompute) {
        symetric_term <- matrix(0, q, q)
        non_symetric_term <- matrix(0, q, q)
    }
    
    for (k in 1:q){
        ## covariance matrix of the vector Z^(k)
        Sigma_k <- covZk(sigma = sigma, index = k )
        
        ## mean of the vector Z^(k)
        mu_k <- mu - mu[k] 
        mu_k[k] <- -mu[k]
        if (minimization) mu_k <- -mu_k
        
        b_k <- rep(0, times = q)
        b_k[k] <- -plugin
        if(minimization) b_k <- -b_k
        pk[k] <- pmnorm(x = b_k - mu_k, varcov = Sigma_k, maxpts = q * 200)[1]
        
        first_term[k] <- (mu[k] - plugin) * pk[k]
        if (minimization) first_term[k] <- -first_term[k]
        
        if (fastCompute) {
	    second_term[k] <- (pmnorm(x = b_k + eps * Sigma_k[ , k] - mu_k,
                                      varcov = Sigma_k,
                                      maxpts = q * 200)[1] - pk[k]) / eps
        } else {
            for(i in 1:q){
        	non_symetric_term[k, i] <- Sigma_k[i, k]
                if (i >= k) {
                    mik <- mu_k[i]
                    sigma_ii_k <- Sigma_k[i,i]
	            bik <- b_k[i]
	            phi_ik <- dnorm(x = bik, mean = mik, sd = sqrt(sigma_ii_k))
                    
                    ##need c.i^(k) and Sigma.i^(k)
	            cik <- get_cik(b = b_k , m = mu_k , sigma = Sigma_k , i = i)
	            sigmaik <- get_sigmaik(sigma = Sigma_k , i = i)
	            Phi_ik <- pmnorm(x = cik, varcov = sigmaik,
                                     maxpts = (q - 1) * 200)[1]
	            symetric_term[k, i] <- phi_ik * Phi_ik
                }
            }
        }
    }
    if (!fastCompute) {
        symetric_term <- symetric_term + t(symetric_term)
        diag(symetric_term) <- 0.5 * diag(symetric_term) 
        second_term <- sum(symetric_term * non_symetric_term)
    }

    ## XXXY explain this. Where is the 'get'???
    if (!is.null(envir)) {
        
        assign("pk", pk, envir = envir)
        if (fastCompute == FALSE) {
            assign("symetric_term", symetric_term, envir = envir)
        }

        assign("kriging.mean", mu, envir = envir)
        assign("kriging.cov", sigma, envir = envir)
        
        ## assign("Tinv.c", krig$Tinv.c, envir = envir)
        ## assign("c", krig$c, envir = envir)
    }
    
    somFinal <- sum(first_term,second_term)
    return(somFinal)

}


get_sigmaik <- function(sigma, i) {
    
    result <- sigma
    q <- nrow(sigma)
    
    for (u in 1:q) {
        for (v in 1:q) {
            if ((u != i) && (v != i)){
                result[u, v] <- sigma[u, v] -
                    sigma[u,i] * sigma[v, i] / sigma[i, i] 
            } else{
                result[u, v] <- 0
            }
        }
    }
    
    result <- result[-i, -i]
    
    return(result)
}


get_cik <- function(b , m, sigma , i) {
    
    sigmai <- sigma[i, ] / sigma[i, i]
    result <- b - m 
    result <- result - result[i] * sigmai
    result <- result[-i]
    
    return(result)
    
}

covZk <- function(sigma, index){
  
    result <- sigma
    q <- nrow(sigma)
    
    result[index, index] <- sigma[index, index]
    
    for (i in 1:q) {
        if (i != index){
            result[index, i] <- result[i, index] <-
                sigma[index, index] - sigma[index, i]
        }
    }
    
    for(i in 1:q) {
        for(j in i:q) {
            if ((i != index) && (j != index)){
                result[i, j] <- result[j, i] <-
                    sigma[i, j] + sigma[index, index] -
                    sigma[index, i] - sigma[index, j]
            }
        }
    }
    
    return(result)
    
}