R/cckm.R

#' @title Fit Mixed Inputs Kriging Model
#'
#' @description Given mixed continuous and categorical design points \code{x}
#'   and a continuous output \code{y}, fit an extended Kriging model. There a several
#'   methods available for handling the categorical inputs. The optimal parameter set
#'   is estimated via Maximum Likelihood Estimation.
#'
#' @param x [\code{data.frame}]\cr
#'   Mixed continuous and categorical input points.
#' @param y [\code{data.frame}]\cr
#'   Continuous output variable.
#' @param cat.type [\code{character(1)}]\cr
#'   Which method should be used for the categorical inputs?
#'   Possible choices:\cr
#'   \tabular{ll}{
#'     \dQuote{EC}: \tab Exchangeable Correlation\cr
#'     \dQuote{MC}: \tab Multiplicative Correlation\cr
#'     \dQuote{UC}: \tab Hypersphere Decomposition-Based Unrestrictive Correlation\cr
#'     \dQuote{TMC}: \tab Toeplitz Matrix Multiplication-Based Correlation
#'   }
#' @param config [\code{CCConfig}]\cr
#'   Configuration object. See \code{\link{makeCCConfig}}.
#' @param pars.init [\code{numeric}]\cr
#'   Optional vector of initial paramters for the optimization. Defaults to \code{NULL},
#'   which means the best of 100 randomly generated parameter settings is used as
#'   the initial setting.
#' @export
cckm = function(x, y, config, pars.init = NULL) {
  if (!config$cat.interaction && config$cat.type == "GMC")
    stop("Currently method GMC works only when interactions are considered.")

  cat.type = config$cat.type
  cont.type = config$cont.type
  nu = config$cont.par$nu
  info = config$info
  d = ncol(x)
  n = nrow(x)
  if (cat.type == "GMC")
    k.GMC = nlevels(as.factor(config$cat.par$perm))

  cat.inds = which(sapply(x, is.factor))
  cont.inds = setdiff(1:d, cat.inds)

  q = length(cont.inds)
  m = length(cat.inds)
  n.levels = sapply(x[, cat.inds, drop = FALSE], nlevels)
  s = prod(n.levels)

  if (m >= 2) {
    ## create a lookup table like
    ## v1    v2    coded
    ## a     d     1
    ## b     d     2
    ## a     e     3
    ## b     e     4
    ##
    ## add this data.frame to the model object
    cat.lut = expand.grid(sapply(cat.inds, function(j) levels(x[, j])))

    if (config$cat.interaction) {
      ## transform all categorical inputs into a single one with levels 1, ..., s
      ## find out the values of the coded categorical variable:
      cat.coded = apply(sapply(1:nrow(x), function(j) sapply(1:nrow(cat.lut),
        function(i) check.equal(cat.lut[i, ], x[j, cat.inds]))), 2, which)
      ## replace categorical inputs with 'cat.coded'
      x = data.frame(x[, -cat.inds, drop = FALSE], cat.coded = as.factor(cat.coded))
    }
  } else {
    cat.lut = NULL
  }
  model = makeCCKriging(x = x, y = y, config, cat.lut = cat.lut)
  if (is.null(pars.init)) {
    if (config$cat.interaction || m == 1) {  # FIXME: should we update m after the data has been transformed?
      cat.pars.init = switch(cat.type,
        "EC" = function() runif(1, min = 0, max = 1),
        "MC" = function() runif(s, min = 1e-10, max = 10),
        "UC" = function() runif((s^2 - s)/2, min = 1e-10, max = pi),
        "TMC" = function() rnorm(s),
        "GMC" = function() rnorm(k.GMC)
      )
    } else {
      cat.pars.init = switch(cat.type,
        "EC" = function() runif(m, min = 0, max = 1),
        "MC" = function() runif(sum(n.levels), min = 1e-10, max = 10),
        "UC" = function() runif(sum((n.levels^2 - n.levels)/2), min = 1e-10, max = pi),
        "TMC" = function() rnorm(sum(n.levels))
      )
    }
    pars.init = replicate(100, c(runif(q, min = 1e-10, max = 10), cat.pars.init()))
    likelihood.vals = apply(pars.init, 2, getLikelihood, model = model, config = config, info = info)
    pars.init = pars.init[, which.min(likelihood.vals)]
  } else {
    likelihood.vals = getLikelihood(model, pars.init, config = config, info = info)
  }
  ll.before.optim = min(likelihood.vals)

  ## set bounds for optimization
  lower = rep(1e-10, q)
  upper = rep(1e10, q)

  if (cat.type == "EC") {
    lower = c(lower, 1e-10)
    upper = c(upper, 1)
  } else if (cat.type == "MC") {
    lower = c(lower, rep(1e-10, s))
    upper = c(upper, rep(1e10, s))
  } else if (cat.type == "UC") {
    lower = c(lower, rep(1e-10, (s^2-s)/2))
    upper = c(upper, rep(pi, (s^2-s)/2))
  } else if (cat.type == "TMC") {
    lower = c(lower, rep(-1e10, s))
    upper = c(upper, rep(1e10, s))
  } else if (cat.type == "GMC") {
    lower = c(lower, rep(-1e10, k.GMC))
    upper = c(upper, rep(1e10, k.GMC))
  }
  opt = optim(par = pars.init, fn = getLikelihood, lower = lower,
    upper = upper, model = model, method = "L-BFGS-B", config = config, info = info)
  opt.values = format(c(ll.before.optim, opt$value), digits = 5)
  print(sprintf("Negative log likelihood    before   optimization: %s", opt.values[1]))
  print(sprintf("Negative log likelihood    after    optimization: %s", opt.values[2]))

  model$par = opt$par
  return(model)
}
dominikkirchhoff/CCKriging documentation built on May 19, 2019, 4:05 p.m.