decompo <- function(x, mu, Sigma, k) {
## ###
if (length(k) == length(x[ , 1])) {
return(dmnorm(c(x), c(mu), Sigma))
}
x1 <- x[k, 1]
x2 <- x[-k, 1]
mu1 <- mu[k, 1]
Sig22 <- Sigma[-k, -k]
Sig21 <- matrix(Sigma[-k, k], ncol = length(k))
Sig12 <- t(Sig21)
Sig11 <- Sigma[k, k]
Sig11Inv <- solve(Sig11)
varcov <- Sig22 - Sig21 %*% Sig11Inv %*% Sig12
varcov <- 0.5 * (varcov + t(varcov))
if (min(diag(varcov)) <= 0) {
diag(varcov) <- diag(varcov) * (diag(varcov) >= 0) + 1e-9
}
moy <- mu[-k, 1] + Sig21 %*% Sig11Inv %*% (x1 - mu1)
return (
dmnorm(x = x1, mu[k, 1], varcov = Sig11) *
pmnorm(x = t(matrix(x2)), mean = t(moy), varcov = varcov,
maxpts = (length(moy) * 200))
)
}
## gradient of CDF
GPhi <- function(x, mu, Sigma) {
q <- length(x[ , 1])
res <- matrix(NaN, q, 1)
for (i in 1:q) {
res[i, 1] <- decompo(x, mu, Sigma, i)
}
return( res )
}
## hessian matrix of CDF
HPhi <- function(x, mu, Sigma, gradient = NULL) {
q <- length(x[ , 1])
if (q == 1) {
res <- -(x - mu) / Sigma * dnorm(x, mu, sqrt(Sigma))
}
else {
res <- matrix(0,q,q)
for (i in 1:(q - 1)) {
for (j in (i + 1):q) {
res[i, j] <- decompo(x, mu, Sigma, c(i, j))
}
}
## hessian matrix is symetric diagonal terms can be computed
## with the gradient of CDF and the other hessian terms
res <- res + t(res)
if(is.null(gradient)) {
res <- res - diag(((c(x) - c(mu)) * c(GPhi(x, mu, Sigma)) +
diag(Sigma %*% res)) / diag(Sigma))
} else {
res <- res - diag(((c(x) - c(mu)) * c(gradient) +
diag(Sigma %*% res)) / diag(Sigma))
}
}
return(res)
}
## *****************************************************************************
##' @description Compute an exact or approximate radient of the
##' multipoint expected improvement criterion.
##'
##' @title Gradient of the Multipoint Expected Improvement (qEI) Criterion
##'
##' @param x a matrix representing the set of input points (one row corresponds
##' to one point) where to evaluate the gradient,
##'
##' @param model an object of class \code{\link[DiceKriging]{km}}.
##'
##' @param plugin optional scalar: if provided, it replaces the
##' minimum of the current observations.
##'
##' @param type "SK" or "UK" (by default), depending whether
##' uncertainty related to trend estimation has to be taken into
##' account,
##'
##' @param minimization logical specifying if EI is used in
##' minimiziation or in maximization.
##'
##' @param fastCompute if TRUE, a fast approximation method based on a
##' semi-analytic formula is used (see [Marmin 2014] for details).
##'
##' @param eps the value of \emph{epsilon} of the fast computation
##' trick. Relevant only if \code{fastComputation} is TRUE.
##'
##' @param envir an optional environment specifying where to get
##' intermediate values calculated in \code{\link{qEI}}.
##'
##' @return The gradient of the multipoint expected improvement
##' criterion with respect to \code{x}. A 0-matrix is returned if
##' the batch of input points contains twice the same point or a
##' point from the design experiment of the km object (the
##' gradient does not exist in these cases).
##'
##' @section Caution: When \code{envir} is not \code{NULL} this
##' environment is assumed to contain the kriging predictions with
##' the derivatives evaluated. The user should make sure that the
##' model used to compute the prediction is the model used here
##' and that the prediction was made with the same value for the
##' argument \code{type}.
##'
##' @author Sebastien Marmin, Clement Chevalier and David Ginsbourger.
##'
##' @seealso \code{\link{qEI}}
##'
##' @references
##'
##' S. Marmin, C. Chevalier and D. Ginsbourger (2019)
##' Differentiating the multipoint Expected Improvement for optimal batch design.
##' arXiv:1503.05509.
##'
##' C. Chevalier and D. Ginsbourger (2014) Learning and Intelligent
##' Optimization - 7th International Conference, Lion 7, Catania, Italy,
##' January 7-11, 2013, Revised Selected Papers, chapter Fast computation of
##' the multipoint Expected Improvement with applications in batch selection,
##' pages 59-69, Springer.
##'
##' D. Ginsbourger, R. Le Riche, L. Carraro (2007), A Multipoint Criterion for
##' Deterministic Parallel Global Optimization based on Kriging. The
##' International Conference on Non Convex Programming, 2007.
##'
##' D. Ginsbourger, R. Le Riche, and L. Carraro. Kriging is well-suited to
##' parallelize optimization (2010), In Lim Meng Hiot, Yew Soon Ong, Yoel
##' Tenne, and Chi-Keong Goh, editors, \emph{Computational Intelligence in
##' Expensive Optimization Problems}, Adaptation Learning and Optimization,
##' pages 131-162. Springer Berlin Heidelberg.
##'
##' S. Marmin. Developpements pour l'evaluation et la maximisation du critere
##' d'amelioration esperee multipoint en optimisation globale (2014). Master's
##' thesis, Mines Saint-Etienne (France) and University of Bern (Switzerland).
##'
##' J. Mockus (1988), \emph{Bayesian Approach to Global Optimization}. Kluwer
##' academic publishers.
##'
##' M. Schonlau (1997), \emph{Computer experiments and global optimization},
##' Ph.D. thesis, University of Waterloo.
##'
##' @keywords models optimize
##'
##' @export
##'
##' @examples
##' set.seed(15)
##' # Example 1 - validation by comparison to finite difference approximations
##'
##' # a 9-points factorial design, and the corresponding response
##' d <- 2
##' n <- 9
##' design <- expand.grid(x1 = seq(0, 1, length = 3), x2 = seq(0, 1, length = 3))
##' y <- apply(design, 1, branin)
##'
##' # learning
##' model <- km(~1, design = design, response = y)
##'
##' # pick up 2 points sampled from the simple expected improvement
##' q <- 2 # increase to 4 for a more meaningful test
##' X <- sampleFromEI(model, n = q)
##'
##' # compute the gradient at the 4-point batch
##' grad.analytic <- qEI.grad(X, model)
##'
##' # numerically compute the gradient
##' grad.numeric <- matrix(NaN, q, d)
##' eps <- 1e-6
##' EPS <- matrix(0,q,d)
##' for (i in 1:q) {
##' for (j in 1:d) {
##' EPS[i, j] <- eps
##' grad.numeric[i,j] <- (qEI(X + EPS, model, fastCompute = FALSE)-
##' qEI(X, model, fastCompute = FALSE)) / eps
##' EPS[i, j] <- 0
##' }
##' }
##' print(grad.numeric)
##' print(grad.analytic)
##'
##' \dontrun{
##' lower <- c(0, 0); upper <- c(1, 1)
##' # graphics: displays the EI criterion, the design points in black,
##' # the batch points in red and the gradient in blue.
##' nGrid <- 50
##' gridAxe1 <- seq(lower[1], upper[1], length = nGrid)
##' gridAxe2 <- seq(lower[2], upper[2], length = nGrid)
##' grid <- expand.grid(gridAxe1, gridAxe2)
##' aa <- apply(grid, 1, EI, model = model)
##' myMat <- matrix(aa, nrow = nGrid)
##' image(x = gridAxe1, y = gridAxe2, z = myMat,
##' col = colorRampPalette(c("darkgray", "white"))(5 * 10),
##' ylab = names(design)[1], xlab = names(design)[2],
##' main = sprintf("qEI - gradient of a batch of %d points", q),
##' axes = TRUE,
##' zlim = c(min(myMat), max(myMat)))
##' contour(x = gridAxe1, y = gridAxe2, z = myMat,
##' add = TRUE, nlevels = 10)
##' points(X[ , 1], X[ , 2], pch = 19, col = 'red')
##' points(model@X[ , 1], model@X[ , 2], pch = 19)
##' arrows(X[ , 1], X[ , 2],
##' X[ , 1] + 0.012 * grad.analytic[ , 1],
##' X[ , 2] + 0.012 * grad.analytic[ , 2], col = 'blue')
##' }
qEI.grad <- function(x, model,
plugin = NULL,
type = c("UK", "SK"),
minimization = TRUE, fastCompute = TRUE,
eps = 1e-6,
envir = NULL) {
type <- match.arg(type)
## compteur_global <<- compteur_global + 1
if (!minimization) {
if (is.null(plugin)) {
plugin <- -max(model@y)
}
}
d <- model@d
if (!is.matrix(x)) x <- matrix(x, ncol = d)
q <- nrow(x)
xb <- rbind(model@X, x)
ux <- unique(round(xb, digits = 8))
if(length(xb[ , 1]) != length(ux[ , 1])) {
return (matrix(0, q, d))
}
if (is.null(plugin)) plugin <- min(model@y)
if (q == 1) {
if (!minimization) {
stop("'qEI.grad' doesn't work in \'minimization = FALSE\' when ",
"dim = 1 (in progress).")
}
return(EI.grad(x, model, plugin, type, envir))
}
## ==========================================================================
## XXXY the variable 'pk' does not seem to be used later!
## ==========================================================================
if (!is.null(envir)) {
if (fastCompute == TRUE) {
toget <- matrix(c("pk"), 1, 1)
apply(toget, 1, get, envir = envir)
pk <- envir$pk
} else {
toget <- matrix(c("pk","symetric_term"), 2, 1)
apply(toget, 1, get, envir = envir)
pk <- envir$pk
symetric_term <- envir$symetric_term
}
}
## ==========================================================================
## Get kriging (conditionnal) mean and covariance and their
## derivatives
## XXXY This code from DiceOptim is desactivated here.
## ==========================================================================
if (FALSE) {
kriging.dx <- krigingDeriv(x = x, model = model,
type = type, envir = envir)
if (is.null(kriging.dx)) {
return (matrix(0, q, d))
}
}
if (!is.null(envir)) {
pred <- get("pred", envir = envir)
} else {
pred <- predict(object = model, newdata = x, type = type,
se.compute = FALSE,
cov.compute = TRUE,
deriv = TRUE,
checkNames = FALSE)
}
## ==========================================================================
## Kriging mean vector and covariance matrix
##
## o kriging.mean: vector with length q
##
## o kriging.cov: 2-dimensional array with dim c(q, q)
##
## ==========================================================================
kriging.mean <- pred[["mean"]]
kriging.cov <- pred[["cov"]]
## =========================================================================
## Jacobian arrays of the mean vector and the covariance matrix
##
## o kriging.mean.jacob: 3-dimensional array with dim c(q, d, q)
##
## kriging.mean.jacob[l, j, k] = d(m_k) / d(x_lj)
##
## o kriging.cov.jacob: 4-dimensional array with dim c(q, d, q, q)
##
## kriging.cov.jacob[l, j, k, i]= d(Sigma_ki) / (d x_lj)
##
## CAUTION Since the indexation of this code taken from DiceOptim
## differs from that in the predict method, 'aperm' is used.
## =========================================================================
kriging.mean.jacob <- aperm(pred[["mean.deriv"]], perm = c(1, 3, 2))
kriging.cov.jacob <- aperm(pred[["cov.deriv"]], perm = c(3, 4, 1, 2))
if (!minimization) {
kriging.mean <- -kriging.mean;
kriging.mean.jacob <- -kriging.mean.jacob
}
## Initialisation
EI.grad <- matrix(0, q, d) # result
b <- matrix(rep(0, q), q, 1)
L <- -diag(rep(1, q))
Dpk <- matrix(0, q, d)
if (fastCompute == TRUE) {
termB <- matrix(0, q, d)
}
Sigk_dx <- array(0, dim = c(q, d, q, q))
mk_dx <- array(0, dim = c(q, d, q))
## XXXY Notes YD:
##
## o Since 'bk' and 'mk' are used through indexed forms, they both
## could be vectors.
##
## o Why not use bk - mk?
## First sum of the formula
for (k in 1:q) {
bk <- b
bk[k, 1] <- plugin # creation of vector b^(k)
Lk <- L
Lk[ , k] <- rep(1, q) # linear transform: Y to Zk
tLk <- t(Lk)
mk <- Lk %*% kriging.mean # mean of Zk (m^(k) in the formula)
Sigk <- Lk %*% kriging.cov %*% tLk # covariance of Zk
Sigk <- 0.5 * (Sigk + t(Sigk)) # numerical symetrization
## term A1
if (is.null(envir)) {
EI.grad[k, ] <- EI.grad[k, ] - kriging.mean.jacob[k, , k] *
pmnorm(x = t(bk), mean = t(mk), varcov = Sigk, maxpts = q * 200)
} else {
EI.grad[k, ] <- EI.grad[k, ] - kriging.mean.jacob[k, , k] * pk[k]
}
## compute gradient ans hessian matrix of the CDF term pk.
gradpk <- GPhi(x = bk - mk, mu = b, Sigma = Sigk)
hesspk <- HPhi(x = bk, mu = mk, Sigma = Sigk, gradient = gradpk)
## term A2
for (l in 1:q) {
for (j in 1:d) {
Sigk_dx[l, j, , ] <- Lk %*% kriging.cov.jacob[l, j, , ] %*% tLk
mk_dx[l, j, ] <- Lk %*% kriging.mean.jacob[l, j, ]
Dpk[l, j] <- 0.5 * sum(hesspk * Sigk_dx[l, j, , ]) -
crossprod(gradpk, mk_dx[l, j, ])
}
}
EI.grad <- EI.grad + (plugin - kriging.mean[k]) * Dpk
## term B
if (fastCompute == TRUE) {
gradpk1 <- GPhi(x = bk - mk + Sigk[ , k] * eps, mu = b, Sigma = Sigk)
hesspk1 <- HPhi(x = bk + Sigk[ , k] * eps, mu = mk, Sigma = Sigk,
gradient = gradpk1)
for (l in 1:q) {
for (j in 1:d) {
f1 <- -crossprod(mk_dx[l, j, ], gradpk1) +
eps * crossprod(Sigk_dx[l, j, , k], gradpk1) +
0.5 * sum(Sigk_dx[l, j, , ] * hesspk1)
f <- -crossprod(mk_dx[l, j, ], gradpk) +
0.5 * sum(Sigk_dx[l, j, , ] * hesspk)
termB[l, j] <- (f1 - f) / eps
}
}
} else {
B1 <- B2 <- B3 <- matrix(0,q,d)
for (i in 1:q) {
Sigk_ik <- Sigk[i, k]
Sigk_ii <- Sigk[i, i]
mk_i <- mk[i]
mk_dx_i <- mk_dx[ , , i]
bk_i <- bk[i, 1]
ck_pi <- bk[-i, 1] - mk[-i, ] - (bk[i, 1] - mk[i, 1]) / Sigk_ii * Sigk[-i, i]
Sigk_pi <- 0.5 * (Sigk[-i, -i] - 1 / Sigk_ii * Sigk[-i, i] %*% t(Sigk[-i, i]) +
t(Sigk[-i, -i] - 1 / Sigk_ii * Sigk[-i, i] %*% t(Sigk[-i, i])))
Sigk_dx_ii <- Sigk_dx[ , , i, i]
Sigk_dx_ik <- Sigk_dx[ , , i, k]
phi_ik <- dnorm(bk[i, 1], mk_i, sqrt(Sigk_ii))
dphi_ik_dSig <- ((bk_i - mk_i)^2 / (2 * Sigk_ii^2) - 0.5 * 1 / Sigk_ii) * phi_ik
dphi_ik_dm <- (bk_i - mk_i) / Sigk_ii * phi_ik
if (is.null(envir)) {
Phi_ik <- pmnorm(x = ck_pi, mean = rep(0, q - 1), varcov = Sigk_pi,
maxpts = (q - 1) * 200)
} else {
Phi_ik <- symetric_term[k, i] / phi_ik
}
GPhi_ik <- GPhi(x = matrix(ck_pi, q - 1, 1), mu = matrix(0, q - 1, 1),
Sigma = Sigk_pi)
HPhi_ik <- HPhi(x = matrix(ck_pi, q - 1, 1), mu = matrix(0, q - 1, 1),
Sigma = Sigk_pi, gradient = GPhi_ik)
Sigk_mi <- Sigk[-i, i]
for (l in 1:q) {
for (j in 1:d) {
## B1
B1[l, j] <- B1[l, j] + Sigk_dx_ik[l, j] * phi_ik * Phi_ik
## B2
B2[l, j] <- B2[l, j] + Sigk_ik *
(mk_dx_i[l,j] * dphi_ik_dm +
dphi_ik_dSig * Sigk_dx_ii[l, j]) * Phi_ik
## B3
dck_pi <- -mk_dx[l, j, -i] +
(mk_dx_i[l, j] * Sigk_ii + (bk_i - mk_i) * Sigk_dx_ii[l, j]) /
Sigk_ii^2 * Sigk_mi -
(bk[i, 1] - mk_i) / Sigk_ii * Sigk_dx[l, j, -i, i]
SigtCross <- tcrossprod(Sigk_dx[l, j, -i, i], Sigk_mi)
dSigk_pi <- Sigk_dx[l, j, -i, -i] + Sigk_dx_ii[l, j] / Sigk_ii^2 *
tcrossprod(Sigk_mi, Sigk_mi) - Sigk_ii^-1 *
(SigtCross + t(SigtCross))
B3[l, j] <- B3[l, j] +
Sigk_ik * phi_ik * (crossprod(GPhi_ik, dck_pi) +
0.5 * sum(HPhi_ik * dSigk_pi))
}
}
}
}
if (fastCompute == TRUE) {
EI.grad <- EI.grad + termB
} else {
EI.grad <- EI.grad + B1 + B2 + B3
}
}
if (is.nan(sum(EI.grad))) {
return(matrix(0, q, d))
}
return (EI.grad)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.