R/testFunctionGeneration.R
In CEGO: Combinatorial Efficient Global Optimization

Documented in createSimulatedTestFunction testFunctionGeneratorSim

###################################################################################
#' Simulation-based Test Function Generator, Object Interface
#' 
#' Generate test functions for assessment of optimization algorithms with
#' non-conditional or conditional simulation, based on real-world data.
#' For a more streamlined interface, see \code{\link{testFunctionGeneratorSim}}.
#'
#' @param xsim list of samples in input space, for simulation
#' @param fit an object generated by \code{\link{modelKriging}}
#' @param nsim the number of simulations, or test functions, to be created
#' @param conditionalSimulation whether (TRUE) or not (FALSE) to use conditional simulation
#' @param seed a random number generator seed. Defaults to NA; which means no seed is set. For sake of reproducibility, set this to some integer value.\cr
#'
#' @return a list of functions, where each function is the interpolation of one simulation realization. The length of the list depends on the nsim parameter.
#' 
#' @seealso \code{\link{modelKriging}}, \code{\link{simulate.modelKriging}}, \code{\link{testFunctionGeneratorSim}}
#' 
#' @references N. A. Cressie. Statistics for Spatial Data. JOHN WILEY & SONS INC, 1993.
#' @references C. Lantuejoul. Geostatistical Simulation - Models and Algorithms. Springer-Verlag Berlin Heidelberg, 2002.
#' @references Zaefferer, M.; Fischbach, A.; Naujoks, B. & Bartz-Beielstein, T. Simulation Based Test Functions for Optimization Algorithms Proceedings of the Genetic and Evolutionary Computation Conference 2017, ACM, 2017, 8.
#'
#' @examples
#' nsim <- 10
#' seed <- 12345
#' n <- 6
#' set.seed(seed)
#' #target function:
#' fun <- function(x){
#'   exp(-20* x) + sin(6*x^2) + x
#' }
#' # "vectorize" target
#' f <- function(x){sapply(x,fun)}
#' # distance function
#' dF <- function(x,y)(sum((x-y)^2)) #sum of squares 
#' #start pdf creation
#' # plot params
#' par(mfrow=c(4,1),mar=c(2.3,2.5,0.2,0.2),mgp=c(1.4,0.5,0))
#' #test samples for plots
#' xtest <- as.list(seq(from=-0,by=0.005,to=1))
#' plot(xtest,f(xtest),type="l",xlab="x",ylab="Obj. function")
#' #evaluation samples (training)
#' xb <- as.list(runif(n)) 
#' yb <- f(xb)
#' # support samples for simulation
#' x <- as.list(sort(c(runif(100),unlist(xb))))
#' # fit the model	
#' fit <- modelKriging(xb,yb,dF,control=list(
#'    algThetaControl=list(method="NLOPT_GN_DIRECT_L",funEvals=100),useLambda=FALSE))
#' fit
#' #predicted obj. function values
#' ypred <- predict(fit,as.list(xtest))$y
#' plot(unlist(xtest),ypred,type="l",xlab="x",ylab="Estimation")
#' points(unlist(xb),yb,pch=19)
#' ##############################	
#' # create test function non conditional
#' ##############################
#' fun <- createSimulatedTestFunction(x,fit,nsim,FALSE,seed=1)
#' ynew <- NULL
#' for(i in 1:nsim)
#'   ynew <- cbind(ynew,fun[[i]](xtest))
#' rangeY <- range(ynew)
#' plot(unlist(xtest),ynew[,1],type="l",ylim=rangeY,xlab="x",ylab="Simulation")
#' for(i in 2:nsim){
#'   lines(unlist(xtest),ynew[,i],col=i,type="l")
#' }
#' ##############################	
#' # create test function conditional
#' ##############################
#' fun <- createSimulatedTestFunction(x,fit,nsim,TRUE,seed=1)
#' ynew <- NULL
#' for(i in 1:nsim)
#'   ynew <- cbind(ynew,fun[[i]](xtest))
#' rangeY <- range(ynew)
#' plot(unlist(xtest),ynew[,1],type="l",ylim=rangeY,xlab="x",ylab="Conditional sim.")
#' for(i in 2:nsim){
#'   lines(unlist(xtest),ynew[,i],col=i,type="l")
#' }
#' points(unlist(xb),yb,pch=19)
#' dev.off()
#' 
#' @export
###################################################################################
createSimulatedTestFunction <- function(xsim, fit, nsim=10, conditionalSimulation=TRUE,seed=NA){
	if(!is.list(xsim))xsim<-list(xsim)
  simfit <- simulate(fit,nsim,seed,xsim,conditionalSimulation,TRUE)
  ynew <- simfit$y
  
  fit$Psi <- simfit$psi
  fit$origPsi <- simfit$origPsi
  fit$origD <- simfit$origD
  fit$D <- simfit$D
  fit$A <- simfit$A
  fit$U <- simfit$U
  fit$a <- simfit$a
  fit$isIndefinite <- simfit$isIndefinite
  fit$isCNSD <- simfit$isCNSD
  fit$maximumDistance <- simfit$maximumDistance
  fit$Psinv <- MASS::ginv(fit$Psi) #should already be fixed in the simulation function, hence ginv should be fine
  fit$x <- xsim
	
	PsinvSaved  <- fit$Psinv
	
	n <- length(xsim)
	
	##precompute transformations
	if(fit$indefiniteType=="PSD" & !fit$indefiniteRepair & fit$isIndefinite & any(fit$indefiniteMethod==c("clip","flip","square","diffusion"))){ #RETRANSFORMATION OF THE SOLUTION ONLY  
    fit$Psinv <- t(fit$A)%*%fit$Psinv #retransform the result	for prediction
		fit$PsinvA <- fit$Psinv %*% fit$A #retransform the result	(for variance estimation only)
	}
	##
	  
  fun <- list()
  for(i in 1:nsim){
    fit$y <- ynew[,i,drop=FALSE]
    fit$yMu <- ynew[,i,drop=FALSE] - fit$mu 
		##
		if(fit$useLambda){ 
			PsiB <- fit$Psi-diag(fit$lambda,n)+diag(.Machine$double.eps,n) 
			fit$SSQReint <- as.numeric((t(fit$yMu)%*%PsinvSaved%*%PsiB%*%PsinvSaved%*%fit$yMu)/n) #PsinvSaved is used intentionally, needs to be untransformed Psinv
			fit$PsinvReint <- try(chol2inv(chol(PsiB)), TRUE) 
			if(class(fit$PsinvReint)[1] == "try-error"){
				fit$PsinvReint <- MASS::ginv(PsiB) 
			}	
			#now apply same transformations as for non-reinterpolating matrices
			if(fit$indefiniteType=="PSD" & fit$isIndefinite  & !fit$indefiniteRepair & any(fit$indefiniteMethod==c("clip","flip","square","diffusion"))){ #RETRANSFORMATION OF THE SOLUTION ONLY  
				fit$PsinvReint <- t(fit$A)%*%fit$PsinvReint %*% fit$A #retransform
			} 
		}
		##
		
    ##create the test function
    #f <- function(x){		
    #	predict(fit,x)$y
    #}
		testFun <- NULL
    assign("testFun", eval(substitute(
      function(x){
        predict(fit,x)$y
      }, 
      list(fit=fit)
    )
    ),
    envir=environment())	
    
    fun[[i]] <- testFun
  }	
  return(fun)
}


###################################################################################
#' Simulation-based Test Function Generator, Data Interface
#' 
#' Generate test functions for assessment of optimization algorithms with
#' non-conditional or conditional simulation, based on real-world data.
# todo extend
#'
#' @param x list of samples in input space, training data
#' @param y column vector of observations for each sample, training data
#' @param xsim list of samples in input space, for simulation
#' @param distanceFunction a suitable distance function of type f(x1,x2), returning a scalar distance value, preferably between 0 and 1.
#'    Maximum distances larger 1 are no problem, but may yield scaling bias when different measures are compared.
#' 		Should be non-negative and symmetric.  It can also be a list of several distance functions. In this case, Maximum Likelihood Estimation (MLE) is used 
#'		to determine the most suited distance measure.
#'		The distance function may have additional parameters. For that case, see distanceParametersLower/Upper in the controls.
#'    If distanceFunction is missing, it can also be provided in the control list.
#' @param controlModel (list), with the options for the model building procedure,
#' it will be passed to the \code{\link{modelKriging}} function. 
#' @param controlSimulation (list), with the parameters of the simulation:
#' \describe{
#' \item{\code{nsim}}{ the number of simulations, or test functions, to be created.}
#' \item{\code{conditionalSimulation}}{ whether (TRUE) or not (FALSE) to use conditional simulation.}
#' \item{\code{simulationSeed}}{ a random number generator seed. Defaults to NA; which means no seed is set. For sake of reproducibility, set this to some integer value.}
#' }
#'
#' @return a list with the following elements: \code{fun} is a list of functions, where each function is the interpolation of one simulation realization. The length of the list depends on the nsim parameter.
#' \code{fit} is the result of the modeling procedure, that is, the model fit of class \code{modelKriging}.
#' 
#' @seealso \code{\link{modelKriging}}, \code{\link{simulate.modelKriging}}, \code{\link{createSimulatedTestFunction}}, 
#' 
#' @references N. A. Cressie. Statistics for Spatial Data. JOHN WILEY & SONS INC, 1993.
#' @references C. Lantuejoul. Geostatistical Simulation - Models and Algorithms. Springer-Verlag Berlin Heidelberg, 2002.
#' @references Zaefferer, M.; Fischbach, A.; Naujoks, B. & Bartz-Beielstein, T. Simulation Based Test Functions for Optimization Algorithms Proceedings of the Genetic and Evolutionary Computation Conference 2017, ACM, 2017, 8.
#'
#' @examples
#' nsim <- 10
#' seed <- 12345
#' n <- 6
#' set.seed(seed)
#' #target function:
#' fun <- function(x){
#'   exp(-20* x) + sin(6*x^2) + x
#' }
#' # "vectorize" target
#' f <- function(x){sapply(x,fun)}
# distance function
#' dF <- function(x,y)(sum((x-y)^2)) #sum of squares 
#' # plot params
#' par(mfrow=c(4,1),mar=c(2.3,2.5,0.2,0.2),mgp=c(1.4,0.5,0))
#' #test samples for plots
#' xtest <- as.list(seq(from=-0,by=0.005,to=1))
#' plot(xtest,f(xtest),type="l",xlab="x",ylab="Obj. function")
#' #evaluation samples (training)
#' xb <- as.list(runif(n))
#' yb <- f(xb)
#' # support samples for simulation
#' x <- as.list(sort(c(runif(100),unlist(xb))))
#' # fit the model	and simulate: 
#' res <- testFunctionGeneratorSim(xb,yb,x,dF,
#'    list(algThetaControl=list(method="NLOPT_GN_DIRECT_L",funEvals=100),
#'      useLambda=FALSE),
#'    list(nsim=nsim,conditionalSimulation=FALSE))
#' fit <- res$fit
#' fun <- res$fun
#' #predicted obj. function values
#' ypred <- predict(fit,as.list(xtest))$y
#' plot(unlist(xtest),ypred,type="l",xlab="x",ylab="Estimation")
#' points(unlist(xb),yb,pch=19)
#' ##############################	
#' # plot non-conditional simulation
#' ##############################
#' ynew <- NULL
#' for(i in 1:nsim)
#'   ynew <- cbind(ynew,fun[[i]](xtest))
#' rangeY <- range(ynew)
#' plot(unlist(xtest),ynew[,1],type="l",ylim=rangeY,xlab="x",ylab="Simulation")
#' for(i in 2:nsim){
#'   lines(unlist(xtest),ynew[,i],col=i,type="l")
#' }
#' ##############################	
#' # create and plot test function, conditional
#' ##############################
#' fun <- testFunctionGeneratorSim(xb,yb,x,dF,
#'    list(algThetaControl=
#'      list(method="NLOPT_GN_DIRECT_L",funEvals=100),
#' 			useLambda=FALSE),
#'    list(nsim=nsim,conditionalSimulation=TRUE))$fun
#' ynew <- NULL
#' for(i in 1:nsim)
#'   ynew <- cbind(ynew,fun[[i]](xtest))
#' rangeY <- range(ynew)
#' plot(unlist(xtest),ynew[,1],type="l",ylim=rangeY,xlab="x",ylab="Conditional sim.")
#' for(i in 2:nsim){
#'   lines(unlist(xtest),ynew[,i],col=i,type="l")
#' }
#' points(unlist(xb),yb,pch=19)
#'
#' @export
###################################################################################
testFunctionGeneratorSim <- function(x,y,xsim,distanceFunction,controlModel=list(),controlSimulation=list()){
	con<-list(nsim=1,conditionalSimulation=FALSE,simulationSeed=NA)
	con[names(controlSimulation)] <- controlSimulation
 	controlSimulation<-con

	fit <- modelKriging(x,y,distanceFunction,control=controlModel)
	testFuns <- createSimulatedTestFunction(xsim=xsim,fit=fit,
			nsim=controlSimulation$nsim,
			conditionalSimulation=controlSimulation$conditionalSimulation,
			seed=controlSimulation$simulationSeed)
	return(list(fit=fit,fun=testFuns))
}