#' @title Fit Mixed Inputs Kriging Model
#'
#' @description Given mixed continuous and categorical design points \code{x}
#' and a continuous output \code{y}, fit an extended Kriging model. There a several
#' methods available for handling the categorical inputs. The optimal parameter set
#' is estimated via Maximum Likelihood Estimation.
#'
#' @param x [\code{data.frame}]\cr
#' Mixed continuous and categorical input points.
#' @param y [\code{data.frame}]\cr
#' Continuous output variable.
#' @param cat.type [\code{character(1)}]\cr
#' Which method should be used for the categorical inputs?
#' Possible choices:\cr
#' \tabular{ll}{
#' \dQuote{EC}: \tab Exchangeable Correlation\cr
#' \dQuote{MC}: \tab Multiplicative Correlation\cr
#' \dQuote{UC}: \tab Hypersphere Decomposition-Based Unrestrictive Correlation\cr
#' \dQuote{TMC}: \tab Toeplitz Matrix Multiplication-Based Correlation
#' }
#' @param config [\code{CCConfig}]\cr
#' Configuration object. See \code{\link{makeCCConfig}}.
#' @param pars.init [\code{numeric}]\cr
#' Optional vector of initial paramters for the optimization. Defaults to \code{NULL},
#' which means the best of 100 randomly generated parameter settings is used as
#' the initial setting.
#' @export
cckm = function(x, y, config, pars.init = NULL) {
if (!config$cat.interaction && config$cat.type == "GMC")
stop("Currently method GMC works only when interactions are considered.")
cat.type = config$cat.type
cont.type = config$cont.type
nu = config$cont.par$nu
info = config$info
d = ncol(x)
n = nrow(x)
if (cat.type == "GMC")
k.GMC = nlevels(as.factor(config$cat.par$perm))
cat.inds = which(sapply(x, is.factor))
cont.inds = setdiff(1:d, cat.inds)
q = length(cont.inds)
m = length(cat.inds)
n.levels = sapply(x[, cat.inds, drop = FALSE], nlevels)
s = prod(n.levels)
if (m >= 2) {
## create a lookup table like
## v1 v2 coded
## a d 1
## b d 2
## a e 3
## b e 4
##
## add this data.frame to the model object
cat.lut = expand.grid(sapply(cat.inds, function(j) levels(x[, j])))
if (config$cat.interaction) {
## transform all categorical inputs into a single one with levels 1, ..., s
## find out the values of the coded categorical variable:
cat.coded = apply(sapply(1:nrow(x), function(j) sapply(1:nrow(cat.lut),
function(i) check.equal(cat.lut[i, ], x[j, cat.inds]))), 2, which)
## replace categorical inputs with 'cat.coded'
x = data.frame(x[, -cat.inds, drop = FALSE], cat.coded = as.factor(cat.coded))
}
} else {
cat.lut = NULL
}
model = makeCCKriging(x = x, y = y, config, cat.lut = cat.lut)
if (is.null(pars.init)) {
if (config$cat.interaction || m == 1) { # FIXME: should we update m after the data has been transformed?
cat.pars.init = switch(cat.type,
"EC" = function() runif(1, min = 0, max = 1),
"MC" = function() runif(s, min = 1e-10, max = 10),
"UC" = function() runif((s^2 - s)/2, min = 1e-10, max = pi),
"TMC" = function() rnorm(s),
"GMC" = function() rnorm(k.GMC)
)
} else {
cat.pars.init = switch(cat.type,
"EC" = function() runif(m, min = 0, max = 1),
"MC" = function() runif(sum(n.levels), min = 1e-10, max = 10),
"UC" = function() runif(sum((n.levels^2 - n.levels)/2), min = 1e-10, max = pi),
"TMC" = function() rnorm(sum(n.levels))
)
}
pars.init = replicate(100, c(runif(q, min = 1e-10, max = 10), cat.pars.init()))
likelihood.vals = apply(pars.init, 2, getLikelihood, model = model, config = config, info = info)
pars.init = pars.init[, which.min(likelihood.vals)]
} else {
likelihood.vals = getLikelihood(model, pars.init, config = config, info = info)
}
ll.before.optim = min(likelihood.vals)
## set bounds for optimization
lower = rep(1e-10, q)
upper = rep(1e10, q)
if (cat.type == "EC") {
lower = c(lower, 1e-10)
upper = c(upper, 1)
} else if (cat.type == "MC") {
lower = c(lower, rep(1e-10, s))
upper = c(upper, rep(1e10, s))
} else if (cat.type == "UC") {
lower = c(lower, rep(1e-10, (s^2-s)/2))
upper = c(upper, rep(pi, (s^2-s)/2))
} else if (cat.type == "TMC") {
lower = c(lower, rep(-1e10, s))
upper = c(upper, rep(1e10, s))
} else if (cat.type == "GMC") {
lower = c(lower, rep(-1e10, k.GMC))
upper = c(upper, rep(1e10, k.GMC))
}
opt = optim(par = pars.init, fn = getLikelihood, lower = lower,
upper = upper, model = model, method = "L-BFGS-B", config = config, info = info)
opt.values = format(c(ll.before.optim, opt$value), digits = 5)
print(sprintf("Negative log likelihood before optimization: %s", opt.values[1]))
print(sprintf("Negative log likelihood after optimization: %s", opt.values[2]))
model$par = opt$par
return(model)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.