Nothing
decompo <- function(x, mu, Sigma,k) {
#####
if (length(k)==length(x[,1])) {
return(dmnorm(c(x),c(mu),Sigma))
}
x1 <- x[k,1]
x2 <- x[-k,1]
mu1 <- mu[k,1]
Sig22 <- Sigma[-k,-k]
Sig21 <- matrix(Sigma[-k,k],ncol=length(k))
Sig12 <- t(Sig21)
Sig11 <- Sigma[k,k]
Sig11Inv <- solve(Sig11)
varcov <- Sig22 - Sig21%*%Sig11Inv%*%Sig12
varcov <- 0.5*(varcov+t(varcov))
if (min(diag(varcov))<=0) {diag(varcov) <- diag(varcov)*(diag(varcov)>=0)+10^(-9)}
moy <- mu[-k,1]+Sig21 %*% Sig11Inv%*%(x1-mu1)
return (
dmnorm(x=x1, mu[k,1], varcov = Sig11)
*
pmnorm(x=t(matrix(x2)), mean = t(moy), varcov = varcov,maxpts=(length(moy)*200))
)
}
# gradient of CDF
GPhi <- function(x,mu,Sigma) {
q <- length(x[,1])
res <- matrix(NaN,q,1)
for (i in 1:q) {
res[i,1] <- decompo(x,mu,Sigma,i)
}
return( res )
}
# hessian matrix of CDF
HPhi <- function(x,mu,Sigma,gradient=NULL) {
q <- length(x[,1])
if (q == 1) {
res <- -(x-mu)/Sigma*dnorm(x,mu,sqrt(Sigma))
}
else {
res <- matrix(0,q,q)
for (i in 1:(q-1)) {
for (j in (i+1):q) {
res[i,j] <- decompo(x,mu,Sigma,c(i,j))
}
}
res <- res + t(res) # hessian matrix is symetric
# diagonal terms can be computed with the gradient of CDF and the other hessian terms
if(is.null(gradient)) {
res <- res - diag(((c(x)-c(mu))*c(GPhi(x,mu,Sigma)) + diag(Sigma%*%res))/diag(Sigma))
} else {
res <- res - diag(((c(x)-c(mu))*c(gradient) + diag(Sigma%*%res))/diag(Sigma))
}
}
return(res)
}
krigingDeriv <- function(x, model, type="UK", envir=NULL){
# Compute from a km object the derivatives of kriging mean and covariance in respect
# of a batch of points.
########################################################################################
q <- length(x[,1])
########################################################################################
# Convert x in proper format(s)
d <- length(x[1,])
if (d != model@d){ stop("x does not have the right size") }
newdata.num <- matrix(as.numeric(x),ncol=d)
newdata <- data.frame((newdata.num))
colnames(newdata) = colnames(model@X)
########################################################################################
# Get quantities related to the model
T <- model@T
X <- model@X
z <- model@z
u <- model@M
covStruct <- model@covariance
F.newdata <- model.matrix(model@trend.formula, data=newdata)
# Get quantities related to the prediction
if (is.null(envir))
{
predx <- predict(object=model, newdata=newdata, type=type,
checkNames = FALSE,se.compute=FALSE,cov.compute=TRUE)
kriging.mean <- predx$mean
kriging.cov <- predx$cov
v <- predx$Tinv.c
c <- predx$c
} else
{ # If uploaded through "envir", no prediction computation is necessary
toget <- matrix(c("kriging.mean","kriging.cov","Tinv.c","c"), 4, 1)
apply(toget, 1, get, envir=envir)
kriging.mean <- envir$kriging.mean
kriging.cov <- envir$kriging.cov
v <- envir$Tinv.c
c <- envir$c
}
########################################################################################
# Pursue calculation only if standard deviation is non-zero
if ( prod(diag(kriging.cov)/(model@covariance@sd2) < 1e-08) )
{ return (NULL)
} else
{
kriging.mean.jacob <- array(0,dim=c(q,d,q)) # three dimensions : d(m_k)/d(x_ij)
kriging.cov.jacob <- array(0,dim=c(q,d,q,q)) # four dimensions : d(Sigma_kl)/d(x_ij)
tuuinv <- solve(t(u)%*%u)
covM <- covMatrix(covStruct, as.matrix(newdata))[[1]]
for(k in 1:q) {
# Compute derivatives of the covariance and trend functions
dc <- covVector.dx(x=newdata.num[k,], X=X, object=covStruct, c=c[,k])
f.deltax <- trend.deltax(x=newdata.num[k,], model=model)
# Compute gradients of the kriging mean at point k
W <- backsolve(t(T), dc, upper.tri=FALSE)
kriging.mean.jacob[k,,k] <- t(z)%*%W + model@trend.coef%*%f.deltax
# Compute gradients of the kriging covariance between point k and point l
for (l in 1:q) {
# Compute gradients of the kernel between point k and point l
ker.grad <- covVector.dx(x=newdata.num[k,], X=newdata[l,],
object=covStruct, c=covM[k,l])
kriging.cov.jacob[k,,k,l] <- ker.grad - t(v[,l])%*%W
if (type=="UK") {
kriging.cov.jacob[k,,k,l] <- kriging.cov.jacob[k,,k,l] + t((t(f.deltax) - t(W)%*%u)%*%tuuinv%*%t(F.newdata[l,] - t(v[,l])%*%u))
}
}
}
}
for(l in 1:q) {
for(j in 1:d) {
kriging.cov.jacob[l,j,,] <- kriging.cov.jacob[l,j,,] + t(kriging.cov.jacob[l,j,,])
}
}
########################################################################################
return(list(kriging.mean,kriging.mean.jacob,kriging.cov,kriging.cov.jacob))
}
##' Computes an exact or approximate gradient of the multipoint expected
##' improvement criterion
##'
##' @title Gradient of the multipoint expected improvement (qEI) criterion
##' @param x a matrix representing the set of input points (one row corresponds
##' to one point) where to evaluate the gradient,
##' @param model an object of class \code{\link[DiceKriging]{km}},
##' @param plugin optional scalar: if provided, it replaces the minimum of the
##' current observations,
##' @param type "SK" or "UK" (by default), depending whether uncertainty
##' related to trend estimation has to be taken into account,
##' @param minimization logical specifying if EI is used in minimiziation or in
##' maximization,
##' @param fastCompute if TRUE, a fast approximation method based on a
##' semi-analytic formula is used (see [Marmin 2014] for details),
##' @param eps the value of \emph{epsilon} of the fast computation trick.
##' Relevant only if \code{fastComputation} is TRUE,
##' @param envir an optional environment specifying where to get intermediate
##' values calculated in \code{\link{qEI}}.
##' @return The gradient of the multipoint expected improvement criterion with
##' respect to x. A 0-matrix is returned if the batch of input points contains
##' twice the same point or a point from the design experiment of the km object
##' (the gradient does not exist in these cases).
##' @author Sebastien Marmin
##'
##' Clement Chevalier
##'
##' David Ginsbourger
##' @seealso \code{\link{qEI}}
##' @references
##'
##' C. Chevalier and D. Ginsbourger (2014) Learning and Intelligent
##' Optimization - 7th International Conference, Lion 7, Catania, Italy,
##' January 7-11, 2013, Revised Selected Papers, chapter Fast computation of
##' the multipoint Expected Improvement with applications in batch selection,
##' pages 59-69, Springer.
##'
##' D. Ginsbourger, R. Le Riche, L. Carraro (2007), A Multipoint Criterion for
##' Deterministic Parallel Global Optimization based on Kriging. The
##' International Conference on Non Convex Programming, 2007.
##'
##' D. Ginsbourger, R. Le Riche, and L. Carraro. Kriging is well-suited to
##' parallelize optimization (2010), In Lim Meng Hiot, Yew Soon Ong, Yoel
##' Tenne, and Chi-Keong Goh, editors, \emph{Computational Intelligence in
##' Expensive Optimization Problems}, Adaptation Learning and Optimization,
##' pages 131-162. Springer Berlin Heidelberg.
##'
##' S. Marmin. Developpements pour l'evaluation et la maximisation du critere
##' d'amelioration esperee multipoint en optimisation globale (2014). Master's
##' thesis, Mines Saint-Etienne (France) and University of Bern (Switzerland).
##'
##' J. Mockus (1988), \emph{Bayesian Approach to Global Optimization}. Kluwer
##' academic publishers.
##'
##' M. Schonlau (1997), \emph{Computer experiments and global optimization},
##' Ph.D. thesis, University of Waterloo.
##' @keywords models optimize
##' @examples
##'
##' set.seed(15)
##' # Example 1 - validation by comparison to finite difference approximations
##'
##' # a 9-points factorial design, and the corresponding response
##' d <- 2
##' n <- 9
##' design <- expand.grid(seq(0,1,length=3), seq(0,1,length=3))
##' names(design)<-c("x1", "x2")
##' design <- data.frame(design)
##' names(design)<-c("x1", "x2")
##' y <- apply(design, 1, branin)
##'y <- data.frame(y)
##' names(y) <- "y"
##'
##' # learning
##' model <- km(~1, design=design, response=y)
##'
##' # pick up 2 points sampled from the simple expected improvement
##' q <- 2 # increase to 4 for a more meaningful test
##' X <- sampleFromEI(model,n=q)
##'
##' # compute the gradient at the 4-point batch
##' grad.analytic <- qEI.grad(X,model)
##' # numerically compute the gradient
##' grad.numeric <- matrix(NaN,q,d)
##' eps <- 10^(-6)
##' EPS <- matrix(0,q,d)
##' for (i in 1:q) {
##' for (j in 1:d) {
##' EPS[i,j] <- eps
##' grad.numeric[i,j] <- 1/eps*(qEI(X+EPS,model,fastCompute=FALSE)-qEI(X,model,fastCompute=FALSE))
##' EPS[i,j] <- 0
##' }
##' }
##' print(grad.numeric)
##' print(grad.analytic)
##'
##' \dontrun{
##' # graphics: displays the EI criterion, the design points in black,
##' # the batch points in red and the gradient in blue.
##' nGrid <- 15
##' gridAxe1 <- seq(lower[1],upper[1],length=nGrid)
##' gridAxe2 <- seq(lower[2],upper[2],length=nGrid)
##' grid <- expand.grid(gridAxe1,gridAxe2)
##' aa <- apply(grid,1,EI,model=model)
##' myMat <- matrix(aa,nrow=nGrid)
##' image(x = gridAxe1, y = gridAxe2, z = myMat,
##' col = colorRampPalette(c("darkgray","white"))(5*10),
##' ylab = names(design)[1], xlab=names(design)[2],
##' main = "qEI-gradient of a batch of 4 points", axes = TRUE,
##' zlim = c(min(myMat), max(myMat)))
##' contour(x = gridAxe1, y = gridAxe2, z = myMat,
##' add = TRUE, nlevels = 10)
##' points(X[,1],X[,2],pch=19,col='red')
##' points(model@@X[,1],model@@X[,2],pch=19)
##' arrows(X[,1],X[,2],X[,1]+0.012*grad.analytic[,1],X[,2]+0.012*grad.analytic[,2],col='blue')
##' }
##'
##' @export qEI.grad
qEI.grad <- function(x, model, plugin=NULL, type="UK", minimization = TRUE, fastCompute = TRUE, eps = 10^(-6), envir=NULL) {
#compteur_global <<- compteur_global + 1
if (!minimization) {
if (is.null(plugin)) {
plugin <- -max(model@y)
}
}
# Compute the multipoint expected improvement from a batch of point and a km object.
#### Arguments:
# x: position of multEI evaluation. One row correspond to one point.
# model: a km object.
# plugin: the threshold used to compute the EI
# type: do we perform a universal krigiging ("UK") ?
d <- model@d
if (!is.matrix(x)) {
x <- matrix(x,ncol = d)
}
q <- nrow(x)
xb <- rbind(model@X,x)
ux <- unique(round(xb,digits=8))
if(length(xb[,1])!=length(ux[,1])) {return (matrix(0,q,d))}
if (is.null(plugin)) plugin <- min(model@y)
if (q == 1) {
if (!minimization) {
stop("qEI.grad doesn't work in \'minimization = FALSE\' when dim = 1 (in progress).")
}
return(EI.grad(x,model,plugin,type,envir))
}
if(!is.null(envir)) {
if (fastCompute == TRUE) {
toget <- matrix(c("pk"), 1, 1)
apply(toget, 1, get, envir=envir)
pk <- envir$pk
} else {
toget <- matrix(c("pk","symetric_term"), 2, 1)
apply(toget, 1, get, envir=envir)
pk <- envir$pk
symetric_term <- envir$symetric_term
}
}
# Get kriging (conditionnal) mean and covariance and their derivatives
kriging.dx <- krigingDeriv(x=x, model=model, type=type, envir=envir)
if (is.null(kriging.dx)) {return (matrix(0,q,d))}
kriging.mean <- kriging.dx[[1]] # 1 dimension
kriging.mean.jacob <- kriging.dx[[2]] # 3 dimensions q*d*q, kriging.mean.jacob[l,j,k] = d(m_k)/d(x_lj)
if (!minimization) {kriging.mean <- -kriging.mean; kriging.mean.jacob <- -kriging.mean.jacob}
kriging.cov <- kriging.dx[[3]] # 2 dimensions q*q
kriging.cov.jacob <- kriging.dx[[4]] # 4 dimensions q*d*q*q : kriging.cov.jacob[l,j,k,i]= d(Sigma_ki)/d(x_lj)
# Initialisation
EI.grad <- matrix(0,q,d) # result
b <- matrix(rep(0,q),q,1)
L <- -diag(rep(1,q))
Dpk <- matrix(0,q,d)
if (fastCompute == TRUE) {
termB <- matrix(0,q,d)
}
Sigk_dx <- array(0,dim=c(q,d,q,q))
mk_dx <- array(0,dim=c(q,d,q))
# First sum of the formula
for (k in 1:q) {
bk <- b
bk[k,1] <- plugin # creation of vector b^(k)
Lk <- L
Lk[,k] <- rep(1,q) # linear application to transform Y to Zk
tLk <- t(Lk)
mk <- Lk %*% kriging.mean # mean of Zk (written m^(k) in the formula)
Sigk <- Lk %*% kriging.cov %*% tLk # covariance of Zk
Sigk <- 0.5*(Sigk+t(Sigk)) # numerical symetrization
# term A1
if (is.null(envir)) {
EI.grad[k,] <- EI.grad[k,] - kriging.mean.jacob[k,,k]*pmnorm(x=t(bk), mean = t(mk), varcov = Sigk,maxpts=q*200)
} else {
EI.grad[k,] <- EI.grad[k,] - kriging.mean.jacob[k,,k]*pk[k]
}
# compute gradient ans hessian matrix of the CDF term pk.
gradpk <- GPhi(bk-mk,b,Sigk)
hesspk <- HPhi(x = bk, mu = mk, Sigma = Sigk, gradient = gradpk)
# term A2
for (l in 1:q) {
for (j in 1:d) {
Sigk_dx[l,j,,] <- Lk%*%kriging.cov.jacob[l,j,,]%*%tLk
mk_dx[l,j,] <- Lk%*%kriging.mean.jacob[l,j,]
Dpk[l,j] <- 0.5*sum(hesspk*Sigk_dx[l,j,,]) - crossprod(gradpk,mk_dx[l,j,])
}
}
EI.grad <- EI.grad + (plugin-kriging.mean[k])*Dpk
# term B
if (fastCompute == TRUE) {
gradpk1<- GPhi(bk-mk+Sigk[,k]*eps,b,Sigk)
hesspk1<- HPhi(x = bk+Sigk[,k]*eps, mu = mk, Sigma = Sigk, gradient = gradpk1)
for (l in 1:q) {
for (j in 1:d) {
f1 <- -crossprod(mk_dx[l,j,],gradpk1) + eps*crossprod(Sigk_dx[l,j,,k],gradpk1) + 0.5*sum(Sigk_dx[l,j,,]*hesspk1)
f <- -crossprod(mk_dx[l,j,],gradpk ) + 0.5*sum(Sigk_dx[l,j,,]*hesspk)
termB[l,j] <- 1/eps*(f1-f)
}
}
} else {
B1 <- B2 <- B3 <- matrix(0,q,d)
for (i in 1:q) {
Sigk_ik <- Sigk[i,k]
Sigk_ii <- Sigk[i,i]
mk_i <- mk[i]
mk_dx_i <- mk_dx[,,i]
bk_i <- bk[i,1]
ck_pi <- bk[-i,1]-mk[-i,]-(bk[i,1]-mk[i,1])/Sigk_ii*Sigk[-i,i]
Sigk_pi <- 0.5*(Sigk[-i,-i] - 1/Sigk_ii*Sigk[-i,i]%*%t(Sigk[-i,i])+t(Sigk[-i,-i] - 1/Sigk_ii*Sigk[-i,i]%*%t(Sigk[-i,i])))
Sigk_dx_ii <- Sigk_dx[,,i,i]
Sigk_dx_ik <- Sigk_dx[,,i,k]
phi_ik <- dnorm(bk[i,1],mk_i,sqrt(Sigk_ii))
dphi_ik_dSig <- ((bk_i-mk_i)^2/(2*Sigk_ii^2)-0.5*1/Sigk_ii)*phi_ik
dphi_ik_dm <- (bk_i-mk_i)/Sigk_ii*phi_ik
if (is.null(envir)) {
Phi_ik <- pmnorm(x=ck_pi, mean = rep(0,q-1), varcov = Sigk_pi,maxpts=(q-1)*200)
} else {
Phi_ik <- symetric_term[k,i]/phi_ik
}
GPhi_ik <- GPhi(matrix(ck_pi,q-1,1),matrix(0,q-1,1),Sigk_pi)
HPhi_ik <- HPhi(matrix(ck_pi,q-1,1),matrix(0,q-1,1),Sigk_pi,GPhi_ik)
Sigk_mi <- Sigk[-i,i]
for (l in 1:q) {
for (j in 1:d) {
# B1
B1[l,j] <- B1[l,j] + Sigk_dx_ik[l,j]*phi_ik*Phi_ik
# B2
B2[l,j] <- B2[l,j] + Sigk_ik*(mk_dx_i[l,j]*dphi_ik_dm + dphi_ik_dSig*Sigk_dx_ii[l,j])*Phi_ik
# B3
dck_pi <- -mk_dx[l,j,-i]+(mk_dx_i[l,j]*Sigk_ii+(bk_i-mk_i)*Sigk_dx_ii[l,j])/Sigk_ii^2*Sigk_mi-(bk[i,1]-mk_i)/Sigk_ii*Sigk_dx[l,j,-i,i]
SigtCross <- tcrossprod(Sigk_dx[l,j,-i,i],Sigk_mi)
dSigk_pi <- Sigk_dx[l,j,-i,-i]+Sigk_dx_ii[l,j]/Sigk_ii^2*tcrossprod(Sigk_mi,Sigk_mi)-Sigk_ii^-1*(SigtCross+t(SigtCross))
B3[l,j] <- B3[l,j] + Sigk_ik*phi_ik*(crossprod(GPhi_ik,dck_pi)+0.5*sum(HPhi_ik*dSigk_pi))
}
}
}
}
if (fastCompute == TRUE) {
EI.grad <- EI.grad + termB
} else {
EI.grad <- EI.grad + B1 + B2 + B3
}
}
if(is.nan(sum(EI.grad))) {return(matrix(0,q,d))}
return (EI.grad)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.