CCKriging: Kriging for Mixed Continuous and Categorical Input Variables

library(CCKriging)
library(DiceDesign)
library(checkmate)
library(BBmisc)

##### short example with one qualitative variable

grid = seq(0, 2*pi, length.out = 100)
x1 = sin(grid)
x2 = sin(grid + 1)
v = sample(c("a", "b"), size = 100, replace = TRUE)

x = x1
x[v == "b"] = x2[v == "b"]

w = data.frame(x = x, v = v)
y = sort(runif(100, 0, 1))

d = 2

pars.init = replicate(1, c(runif(d, min = 1e-10, max = 5), 1))#runif(1, 0, 10)))
likelihood.vals = apply(pars.init, 2, getLikelihood, x = w, y = y)
pars.init = pars.init[, which.min(likelihood.vals)]
print(sprintf("Initial likelihood before optimization: %g", min(likelihood.vals)))
optim(par = pars.init, fn = getLikelihood, lower = 1e-10,
  upper = rep(1e10, d), x = w, y = y, method = "L-BFGS-B")

par.grid1 = seq(1e-10, 1e10, length.out = 100)
par.grid2 = par.grid1
par.grid = expand.grid(par.grid1, par.grid2)
par.grid = cbind(par.grid, runif(10000, 0, 5))

all.res = vector("numeric", 10000)
for (i in seq_along(par.grid[, 1])) {
  res = try(getLikelihood(x = w, y = y, par = par.grid[i, ]))
  if (class(res) == "try-error")
    res = 0
  all.res[i] = res
}

##### another example (example 1 from Zhou et al)

X1 = lhsDesign(8, dimension = 1)$design
X1 = maximinESE_LHS(X1)$design
X2 = lhsDesign(8, dimension = 1)$design
X2 = maximinESE_LHS(X2)$design
X3 = lhsDesign(8, dimension = 1)$design
X3 = maximinESE_LHS(X3)$design
## optional fourth factor, NOT in Zhou et al:
X4 = lhsDesign(8, dimension = 1)$design
X4 = maximinESE_LHS(X4)$design

y1 = cos(6.8 * pi/2 * X1)
y2 = -cos(7 * pi/2 * X2)
y3 = cos(7.2 * pi/2 * X3)
## optional
y4 = -cos(-6.6 * pi/2 * X4)

w = data.frame(x = c(X1, X2, X3), v = c(rep("a", 8), rep("b", 8), rep("c", 8)))
y = c(y1, y2, y3)

## with four factors:
w.ext = data.frame(x = c(X1, X2, X3, X4), v = c(rep("a", 8), rep("b", 8), rep("c", 8), rep("d", 8)))
y.ext = c(y1, y2, y3, y4)

### CD
cc.cd = cckm(x = w, y = y, cat.type = "CD")
pars.init = replicate(100, c(runif(d, min = 1e-10, max = 5), c(1.28, 0.2, 1.25)))
likelihood.vals = apply(pars.init, 2, getLikelihood, x = w, y = y, cat.type = "CD")
pars.init = pars.init[, which.min(likelihood.vals)]
print(sprintf("Initial likelihood before optimization: %g", min(likelihood.vals)))
opt = optim(par = pars.init, fn = getLikelihood, lower = c(rep(1e-10, d), rep(1e-1, 3)),
  upper = c(rep(1e10, d), rep(50, length(pars.init) - d)), x = w, y = y, cat.type = "CD",
  method = "L-BFGS-B")
print(sprintf("Likelihood after optimization: %g", opt$value))

corr.matrix.CD = getCorrMatrix(w, par = opt$par, cat.type = "CD")
gplots::heatmap.2(corr.matrix.CD, main = "Categorical Distances", density.info = "none",
  dendrogram = "none", trace = "none", Colv = NA, Rowv = NA)
model.CD = list(x = w, y = y, par = opt$par, cat.type = "CD")

### EC
config = makeCCConfig()
cc.ec = cckm(x = w, y = y, config)
cc.ec
plot(cc.ec)

cc.ec.ext = cckm(x = w.ext, y = y.ext, config)
cc.ec.ext
plot(cc.ec.ext)

### MC
config = makeCCConfig(cat.type = "MC")
cc.mc = cckm(x = w, y = y, config)
cc.mc
plot(cc.mc)

cc.mc.ext = cckm(x = w.ext, y = y.ext, config)
cc.mc.ext
plot(cc.mc.ext)

### UC
config = makeCCConfig(cat.type = "UC")
cc.uc = cckm(x = w, y = y, config)
cc.uc
plot(cc.uc)

cc.uc.ext = cckm(x = w.ext, y = y.ext, config)
cc.uc.ext
plot(cc.uc.ext)

### GMC
config = makeCCConfig("GMC", cat.par = list(perm = as.character(c(1, 3, 1, 1, 2, 2, 1, 3, 1))))
cc.gmc = cckm(x = w, y = y, config)
cc.gmc
plot(cc.gmc)

config = makeCCConfig("GMC", cat.par = list(perm = as.character(c(1, 3, 4, 2, 1, 1, 3, 1, 2, 3, 4, 4, 1, 2, 3, 4))))
cc.gmc.ext = cckm(x = w.ext, y = y.ext, config)
cc.gmc.ext
plot(cc.gmc.ext)

### TMC
config = makeCCConfig("TMC")
cc.tmc = cckm(x = w, y = y, cat.type = "TMC")
cc.tmc
plot(cc.tmc)

cc.tmc.ext = cckm(x = w.ext, y = y.ext, config)
cc.tmc.ext
plot(cc.tmc.ext)

pred.grid.a = data.frame(x = seq(0, 1, length.out = 1000), v = "a")
cc.pred = predict(cc.tmc, newdata = pred.grid.a)
plot(w$x[w$v == "a"], y[w$v == "a"], pch = "x")
for (i in 1:1000) {
  points(cc.pred[[i]]$x0$x, cc.pred[[i]]$pred, pch = ".")
  points(cc.pred[[i]]$x0$x, cc.pred[[i]]$pred - 100*cc.pred[[i]]$s.square, pch = ".", col = "red")
  points(cc.pred[[i]]$x0$x, cc.pred[[i]]$pred + 100*cc.pred[[i]]$s.square, pch = ".", col = "red")
}


##### example for ERCIM-17, composed of six univariate test functions

x = vector("list", 6)
for (i in 1:6) {
  x[[i]] = lhsDesign(10, dimension = 1)$design
  x[[i]] = maximinESE_LHS(x[[i]])$design
}
library(smoof)
schwefel = makeSchwefelFunction(dimensions = 1L)
## redefine for x = 0 ... 1, minimum in x = 0.9209687: y = -1
f1 = function(x) {
  schwefel((x * 1000) - 500) / 418.9829
}

## minimum in x = 0.7917: y = -0.936
alp02 = convertToMinimization(makeAlpine02Function(dimensions = 1L))
f2 = function(x) {
  alp02((x * 10)) / 3
}

## minimum in x = 0.422: y = -0.79
f3 = function(x) {
  12 * (x - 0.55)^2 - sin(19 * x)
}

## constant function, y = 0
f4 = function(x) {
  return(0)
}

## minimum in x = 0.079: y = -0.9
f5 = function(x) {
  -0.9 * f1(x)
}

## minimum in x = 0.398: y = -0.89
f6 = function(x) {
  10 * (x - 0.5)^2 - sin(20 * x)
}

y = sapply(x[[1]], f1)
y = c(y, sapply(x[[2]], f2))
y = c(y, sapply(x[[3]], f3))
y = c(y, sapply(x[[4]], f4))
y = c(y, sapply(x[[5]], f5))
y = c(y, sapply(x[[6]], f6))
x = unlist(x)
w = data.frame(x = x, v = factor(rep(c("a", "b", "c", "d", "e", "f"), each = 10)))

## same example, six lvl combinations are divided into two variables:
w2 = data.frame(x = x, v1 = factor(rep(rep(c("a", "b", "c"), each = 10), 2)),
  v2 = factor(rep(c("a", "b"), each = 30)))

## models
config = makeCCConfig(cat.interaction = FALSE)
cc.ec = cckm(x = w, y = y, config)  # -44.114
cc.ec
plot(cc.ec)

config = makeCCConfig(cat.type = "MC")
cc.mc = cckm(x = w, y = y, config)  # -54.535
cc.mc
plot(cc.mc)  # recognizes positive correlation between c and f (0.68)

config = makeCCConfig(cat.type = "UC")
cc.uc = cckm(x = w, y = y, config)  # -30.767
cc.uc
plot(cc.uc)  # recognizes pos. corr. between c and f (0.40) & neg. corr. between a and e (-0.99)

config = makeCCConfig(cat.type = "TMC")
cc.tmc = cckm(x = w, y = y, config)  # -44.114
cc.tmc
plot(cc.tmc)  # recognizes pos. corr. between c and f (0.70) & neg. corr. between a and e (-0.35)

##### plots
x = seq(0, 1, length.out = 10000)
plot(x, sapply(x, f1), type = "l", ylim = c(-1, 3), ylab = "test functions")
lines(x, sapply(x, f2), col = "red")
lines(x, sapply(x, f3), col = "blue")
lines(x, sapply(x, f4))
lines(x, sapply(x, f5), col = "green")
lines(x, sapply(x, f6), col = "orange")
#####

# a = Schwefel((x * 1000) - 500) / 418.9829
# b = - Alpine02(x * 10)/3
# c = 12 * (x - 0.55)^2 - sin(19 * x)
# d = 0
# e = -0.9 * Schwefel((x * 1000) - 500) / 418.9829
# f = 10 * (x - 0.5)^2 - sin(20 * x)

##### another example (example 3 from Zhou et al., random quadratic curve in Han et al., 2009)

proc1coef.mu = c(1, 0, -1, 6, 4, 5, -6, -6, -6)

b = sapply(proc1coef.mu, rnorm, n = 1, sd = 0.01)

f = function(x, b) {
  return(b %*% c(1, x, x^2))
}

X = seq(0, 1, by = 0.25)
X2 = X[-c(1, 2)]

F1 = sapply(X, f, b = b[1:3])
F2 = sapply(X, f, b = b[4:6])
F3 = sapply(X2, f, b = b[7:9])

w = data.frame(x = c(X, X, X2), v = c(rep("a", 5), rep("b", 5), rep("c", 3)))
y = c(F1, F2, F3)

plot(X, F1, type = "l", ylim = c(-20, 20))
points(X, F2, type = "l", col = "red")
points(X2, F3, type = "l", col = "blue")

### EC
cc.ec = cckm(x = w, y = y, cat.type = "EC")
cc.ec
plot(cc.ec)

### MC
cc.mc = cckm(x = w, y = y, cat.type = "MC")
cc.mc
plot(cc.mc)

### UC
cc.uc = cckm(x = w, y = y, cat.type = "UC")
cc.uc
plot(cc.uc)

### TMC
cc.tmc = cckm(x = w, y = y, cat.type = "TMC")
cc.tmc
plot(cc.tmc)

newdata = data.frame(x = seq(0, 1, length.out = 200), v = "a")
cc.tmc.pred = predict(cc.tmc, newdata)
plot(cc.tmc.pred$model$x$x[cc.tmc.pred$model$x$v == "a"],
  cc.tmc.pred$model$y[cc.tmc.pred$model$x$v == "a"], pch = "x")
lines(cc.tmc.pred$x0$x, cc.tmc.pred$pred)
lines(cc.tmc.pred$x0$x, cc.tmc.pred$pred - 150 * cc.tmc.pred$s.square, col = "red")
lines(cc.tmc.pred$x0$x, cc.tmc.pred$pred + 150 * cc.tmc.pred$s.square, col = "red")

##### plot models #####

grid = seq(0, 1, length.out = 1000)

par(mfrow = c(2,2))

plot(grid, cos(6.8 * pi/2 * grid), type = "l")
grid.pred = sapply(grid,
  function(x) predict.cdKriging(model, newdata = data.frame(x = x, v = "a"))$prediction)
lines(grid, grid.pred, col = "red")
points(X1, y1, pch = "x")

plot(grid, -cos(7 * pi/2 * grid), type = "l")
grid.pred = sapply(grid,
  function(x) predict.cdKriging(model, newdata = data.frame(x = x, v = "b"))$prediction)
lines(grid, grid.pred, col = "red")
points(X2, y2, pch = "x")

plot(grid, cos(7.2 * pi/2 * grid), type = "l")
grid.pred = sapply(grid,
  function(x) predict.cdKriging(model, newdata = data.frame(x = x, v = "c"))$prediction)
lines(grid, grid.pred, col = "red")
points(X3, y3, pch = "x")


##### #####

newdata = data.frame(x = 0.4, v = "c")
prediction = predict.cdKriging(model, newdata = newdata)$prediction

grid = seq(0, 1, length.out = 1000)
plot(grid, cos(6.8 * pi/2 * grid), type = "l")
lines(grid, -cos(7 * pi/2 * grid), type = "l", col = "red")
lines(grid, cos(7.2 * pi/2 * grid), type = "l", col = "blue")
points(X1, y1, pch = "x")
points(X2, y2, pch = "x", col = "red")
points(X3, y3, pch = "x", col = "blue")

if (newdata$v == "a")
  col = "black"
if (newdata$v == "b")
  col = "red"
if (newdata$v == "c")
  col = "blue"
points(newdata$x, prediction, col = col, pch = "x", cex = 1.5)

##### ######

par.grid1 = seq(1e-10, 1e2, length.out = 100)
par.grid2 = par.grid1
par.grid3 = 1:10
par.grid = expand.grid(par.grid1, par.grid2, par.grid3)
# par.grid = cbind(par.grid, runif(10000, 0, 5))

all.res = vector("numeric", 10000)
for (i in seq_along(par.grid[, 1])) {
  res = try(getLikelihood(x = w, y = y, par = par.grid[i, ]))
  if (class(res) == "try-error")
    res = 0
  all.res[i] = res
}


# covStruct.create(covtype = "qualitative", known.covparam = "All", d = d,
#   var.names = c("x", "v"), coef.cov = opt$par)
# covMatrix(model@covariance, X = w, noise.var = )

dominikkirchhoff/CCKriging documentation built on May 19, 2019, 4:05 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

dominikkirchhoff/CCKriging
Kriging for Mixed Continuous and Categorical Input Variables

examples/examples.R
In dominikkirchhoff/CCKriging: Kriging for Mixed Continuous and Categorical Input Variables

R Package Documentation

Browse R Packages

We want your feedback!

dominikkirchhoff/CCKriging Kriging for Mixed Continuous and Categorical Input Variables

examples/examples.R In dominikkirchhoff/CCKriging: Kriging for Mixed Continuous and Categorical Input Variables

R Package Documentation

Browse R Packages

We want your feedback!

dominikkirchhoff/CCKriging
Kriging for Mixed Continuous and Categorical Input Variables

examples/examples.R
In dominikkirchhoff/CCKriging: Kriging for Mixed Continuous and Categorical Input Variables