Nothing
## Optimizes the deviance using multiple optim starts
##
## May 8th, 2012
#' Gaussian Process Model fitting
#'
#' For an (\emph{n} x \emph{d}) design matrix, \code{X},
#' and the corresponding (\emph{n} x 1) simulator output \code{Y},
#' this function fits the GP model and returns the parameter estimates.
#' The optimization routine assumes that
#' the inputs are scaled to the unit hypercube \eqn{[0,1]^d}.
#'
#' This function fits the following GP model,
#' \eqn{y(x) = \mu + Z(x)}{y(x) = \mu + Z(x)},
#' \eqn{x \in [0,1]^{d}}{x in [0,1]^d}, where \eqn{Z(x)}{Z(x)} is
#' a GP with mean 0, \eqn{Var(Z(x_i)) = \sigma^2}{Var(Z(x_i)) = \sigma^2}, and
#' \eqn{Cov(Z(x_i),Z(x_j)) = \sigma^2R_{ij}}{Cov(Z(x_i),Z(x_j)) =
#' \sigma^2*R_{ij}}. Entries in covariance matrix R are determined by
#' \code{corr} and parameterized by \code{beta}, a \code{d}-vector of
#' parameters. For computational stability \eqn{R^{-1}}{R^-1} is replaced with
#' \eqn{R_{\delta_{lb}}^{-1}}, where \eqn{R_{\delta{lb}} = R + \delta_{lb}I}
#' and \eqn{\delta_{lb}} is the nugget parameter described in Ranjan et al.
#' (2011).
#'
#' The parameter estimate \code{beta} is obtained by minimizing
#' the deviance using a multi-start gradient based search (L-BFGS-B)
#' algorithm. The starting points are selected using the k-means
#' clustering algorithm on a large maximin LHD for values of
#' \code{beta}, after discarding \code{beta} vectors
#' with high deviance. The \code{control} parameter determines the
#' quality of the starting points of the L-BFGS-B algoritm.
#'
#' \code{control} is a vector of three tunable parameters used
#' in the deviance optimization algorithm. The default values
#' correspond to choosing 2*d clusters (using k-means clustering
#' algorithm) based on 80*d best points (smallest deviance,
#' refer to \code{\link{GP_deviance}}) from a 200*d - point
#' random maximin LHD in \code{beta}. One can change these values
#' to balance the trade-off between computational cost and robustness
#' of likelihood optimization (or prediction accuracy).
#' For details see MacDonald et al. (2015).
#'
#' The \code{nug_thres} parameter is outlined in Ranjan et al. (2011) and is
#' used in finding the lower bound on the nugget
#' (\eqn{\delta_{lb}}{\delta_{lb}}).
#'
#' @param X the (\code{n x d}) design matrix
#' @param Y the (\code{n x 1}) vector of simulator outputs.
#' @param control a vector of parameters used in the search for optimal beta
#' (search grid size, percent, number of clusters). See `Details'.
#' @param nug_thres a parameter used in computing the nugget. See `Details'.
#' @param trace logical, if \code{TRUE}, will provide information on the
#' \code{\link{optim}} runs
#' @param maxit the maximum number of iterations within \code{\link{optim}},
#' defaults to 100
#' @param corr a list of parameters for the specifing the correlation to be
#' used. See \code{\link{corr_matrix}}.
#' @param optim_start a matrix of potentially likely starting values for
#' correlation hyperparameters for the \code{\link{optim}} runs, i.e., initial
#' guess of the d-vector \code{beta}
#' @return an object of class \code{GP} containing parameter estimates
#' \code{beta} and \code{sig2}, nugget parameter \code{delta}, the data
#' (\code{X} and \code{Y}), and a specification of the correlation structure
#' used.
#' @author Blake MacDonald, Hugh Chipman, Pritam Ranjan
#' @seealso \code{\link{plot}} for plotting in 1 and 2 dimensions; \cr
#' \code{\link{predict}} for predicting the response and error surfaces; \cr
#' \code{\link{optim}} for information on the L-BFGS-B procedure; \cr
#' \code{\link{GP_deviance}} for computing the deviance.
#' @references MacDonald, K.B., Ranjan, P. and Chipman, H. (2015). GPfit: An R
#' Package for Fitting a Gaussian Process Model to Deterministic Simulator
#' Outputs. Journal of Statistical Software, 64(12), 1-23.
#' \url{http://www.jstatsoft.org/v64/i12/} \cr
#'
#' Ranjan, P., Haynes, R., and Karsten, R. (2011). A Computationally Stable
#' Approach to Gaussian Process Interpolation of Deterministic Computer
#' Simulation Data, Technometrics, 53(4), 366 - 378.
#' @keywords Gaussian Process
#' @examples
#'
#' ## 1D Example 1
#' n = 5
#' d = 1
#' computer_simulator <- function(x){
#' x = 2 * x + 0.5
#' y = sin(10 * pi * x) / (2 * x) + (x - 1)^4
#' return(y)
#' }
#' set.seed(3)
#' library(lhs)
#' x = maximinLHS(n, d)
#' y = computer_simulator(x)
#' GPmodel = GP_fit(x, y)
#' print(GPmodel)
#'
#'
#' ## 1D Example 2
#' n = 7
#' d = 1
#' computer_simulator <- function(x) {
#' y <- log(x + 0.1) + sin(5 * pi * x)
#' return(y)
#' }
#' set.seed(1)
#' library(lhs)
#' x = maximinLHS(n, d)
#' y = computer_simulator(x)
#' GPmodel = GP_fit(x, y)
#' print(GPmodel, digits = 4)
#'
#'
#' ## 2D Example: GoldPrice Function
#' computer_simulator <- function(x) {
#' x1 = 4 * x[, 1] - 2
#' x2 = 4 * x[, 2] - 2
#' t1 = 1 + (x1 + x2 + 1)^2 * (19 - 14 * x1 + 3 * x1^2 - 14 * x2 +
#' 6 * x1 *x2 + 3 * x2^2)
#' t2 = 30 + (2 * x1 - 3 * x2)^2 * (18 - 32 * x1 + 12 * x1^2 + 48 * x2 -
#' 36 * x1 * x2 + 27 * x2^2)
#' y = t1 * t2
#' return(y)
#' }
#' n = 30
#' d = 2
#' set.seed(1)
#' library(lhs)
#' x = maximinLHS(n, d)
#' y = computer_simulator(x)
#' GPmodel = GP_fit(x, y)
#' print(GPmodel)
#'
#' @export GP_fit
#' @importFrom lhs maximinLHS
#' @importFrom stats kmeans optim predict
GP_fit <- function(
X,
Y,
control = c(200 * d, 80 * d, 2 * d),
nug_thres = 20,
trace = FALSE,
maxit = 100,
corr = list(
type = "exponential",
power = 1.95),
optim_start = NULL) {
if (!is.matrix(X)) {
X = as.matrix(X)
}
## Checking to make sure that the X design is scaled between 0 and 1
if ((min(X) < 0) | (max(X) > 1) | ((max(X) - min(X)) <= 0.5)) {
warning("X should be in range (0, 1)")
}
n <- nrow(X)
d <- ncol(X)
## Checking the dimensions between the different inputs
if (n != length(Y)) {
stop("The dimensions of X and Y do not match. \n")
}
if (nug_thres < 10 || nug_thres > 25) {
warning("nug_thres is outside of the normal range (10, 25)")
}
## Need to make sure that the control values are relatively higher / lower and need
## to print a warning if they are not
if (length(control) != 3) {
stop("control is defined incorrectly. Wrong number of arguments. \n")
}
if (control[1] < control[2]) {
stop("control is defined incorrectly. Need control[1] >= control[2]. \n")
}
if (control[2] < control[3]) {
stop("control is defined incorrectly. Need control[2] >= control[3]. \n")
}
## Checking to see if the vigen starting values
## for the optim runs are in the correct format
if (!is.null(optim_start)) {
if (!is.matrix(optim_start)) {
if (length(optim_start)/d != floor(length(optim_start)/d)) {
stop("The dimension of optim_start does not match the dimension
of the problem \n")
}
optim_start = matrix(optim_start, byrow = TRUE, ncol = d)
} else if (ncol(optim_start) != d) {
stop("The dimension of optim_start does not match the dimension
of the problem \n")
}
}
param_search <- control[1]
param_percent <- control[2]
param_clust <- control[3]
###########################################################
## Using a grid of control[1] points as default, need to find the
## control[2] corresponding values with the lowest deviance
## starting out based on the defined range for beta
beta_range <- switch(corr$type,
"exponential" = c((corr$power - 4) - log10(d), log10(5) + corr$power - log10(d)),
"matern" = c((2) - log10(d), log10(5) + 2 - log10(d)),
stop("corr type must be 'exponential' or 'matern'"))
param_init_ps <- maximinLHS(n = param_search, k = d) *
(beta_range[2] - beta_range[1]) + beta_range[1]
param_lower <- rep(-10, d)
param_upper <- rep(10, d)
##-------------------------------------------------------##
## Need to evaluate the deviance for the control[1] points
deviance1 <- cbind(NA, param_init_ps)
deviance1[, 1L] <- apply(
X = param_init_ps,
MARGIN = 1L,
FUN = function(row) GP_deviance(
beta = row,
X = X,
Y = Y,
nug_thres = nug_thres,
corr = corr))
## Need to order the initial values based on their deviance
deviance2 <- deviance1[order(deviance1[, 1L, drop = TRUE]), , drop = FALSE]
##-------------------------------------------------------##
## Taking the control[2] smallest deviance values
deviance3 <- deviance2[seq_len(param_percent), , drop = FALSE]
##-------------------------------------------------------##
## Going to cluster these control[2] "best" observations into control[3]
## groups using K-means, over 5 iterations and the smallest WSS as
## the criterion
points_percen <- deviance3[, seq_len(d) + 1L, drop = FALSE]
## Taking the best of 5 different runs of K-means
km <- kmeans(
x = points_percen,
centers = param_clust,
nstart = 5L)
##-------------------------------------------------------------------##
## Need to set the control[3] best points in cluster as starting values
param_init <- matrix(0, nrow = param_clust, ncol = d)
for (ipar in seq_len(param_clust)) {
id <- which(km$cluster == ipar)
fid <- which.min(deviance3[id, 1L])
param_init[ipar, ] <- deviance3[id[fid], seq_len(d) + 1L]
}
#############################################################
## For the diagonal search: use 3 points along the diagonal,
## 1 near each of the ends and one near the middle of the range,
## this is only necessary above 1 dimension
if (d >= 2L) {
param_wrap = matrix(
c(0.2, 0.5, 0.8) *
(beta_range[2L] - beta_range[1L]) + beta_range[1L],
byrow = TRUE)
# Need to run optim() on the wrapped function the 3 times
# in order to find the lowest starting points of the 3 values
dev <- matrix(NA_real_, nrow = 3L, ncol = 3L)
for (ipw in seq_len(nrow(param_wrap))) {
temp <- optim(
par = param_wrap[ipw],
fn = dev_wrapper,
X = X,
Y = Y,
nug_thres = nug_thres,
corr = corr,
method = "L-BFGS-B",
lower = param_lower,
upper = param_upper,
control = list(maxit = maxit))
dev[ipw, 1:3] <- c(
temp$par,
temp$value,
param_wrap[ipw, 1L, drop = TRUE])
}
## Take the best of the 3 based on the likelihood value
dev <- dev[order(dev[, 2L]), ]
##---------------------------------------------------------##
## Combining the 2*d centers from the clusters with the
## single point from the diagonal search
param_init <- rbind(rep(dev[1L, 1L], d), param_init)
}
#############################################################
## We now have the control[3]+1 initializing points for the
## optim search, as well as any points that are specified by
## the user
param_init <- rbind(param_init, optim_start)
dev_val <- matrix(NA_real_, nrow = nrow(param_init), ncol = d + 1L)
for (ipi in seq_len(nrow(param_init))) {
temp <- optim(
par = param_init[ipi, , drop = TRUE],
fn = GP_deviance,
X = X,
Y = Y,
nug_thres = nug_thres,
corr = corr,
method = "L-BFGS-B",
lower = param_lower,
upper = param_upper,
control = list(maxit = maxit))
dev_val[ipi, ] <- c(temp$par, temp$value)
}
#############################################################
## Making a print statement of what the progress of the optimizer
## only if trace == TRUE
if (trace) {
optim_result = cbind(param_init,dev_val)
col_name = NULL
if (d==1){
row_name = NULL
} else {
row_name = c("Diagonal Search")
}
for (i in 1:d){
col_name = cbind(col_name, paste("Beta", as.character(i), "Start"))
}
for (i in 1:d){
col_name = cbind(col_name, paste("Beta", as.character(i), "Final"))
}
col_name = cbind(col_name, "Deviance Value")
for (i in seq_len(param_clust)) {
row_name = cbind(row_name, paste("Start", as.character(i)))
}
colnames(optim_result) = col_name
rownames(optim_result) = row_name
print(optim_result)
}
# value
dev_val <- dev_val[order(dev_val[, d + 1L]), ]
beta <- dev_val[1L, seq_len(d), drop = TRUE]
dim(Y) <- c(n, 1L)
One <- rep(1, n)
#### sig_invb ####
sigInvb <- sig_invb(X = X, Y = Y, beta = beta,
corr = corr, nug_thres = nug_thres)
#### carry on ####
sig2 <- t(Y - One %*% sigInvb$mu_hat) %*% sigInvb$Sig_invb / n
GP <- list(
X = X,
Y = Y,
sig2 = as.vector(sig2),
beta = beta,
delta = sigInvb$delta,
nugget_threshold_parameter = nug_thres,
correlation_param = corr)
class(GP) <- "GP"
return(GP)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.