knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
run_long = FALSE
reticulate::use_condaenv('mlr3keras', required = TRUE)
devtools::load_all()
library("bbotk")
library("data.table")
library("ggplot2")

Current performances

keys = cfgs()$keys()
instances = rbindlist(map(keys, function(x) {
  cf = cfgs(x, workdir=workdir)
  tgt = ifelse(x == "task_set", cf$target_variables[2], cf$target_variables[1])
  data.table(cfg = x, level = cf$task_levels, tgt = tgt)
}), fill=TRUE)
instances
make_objective = function(i) {
  cf = cfgs(instances$cfg[i], workdir = workdir)
  if (instances$cfg[i] == "nb301") {
    obj = cf$get_objective(task = NULL, instances$tgt[i], retrafo = TRUE)
  } else if (is.na(instances$level[i])) {
    obj = cf$get_objective()
  } else {
    obj = cf$get_objective(
      instances$level[i],
      instances$tgt[i]
    )
  }
  i
  return(obj)
}
run_rs = function(i) {
  n_evals = 10^6
  batch_size = 10^4
  ins = OptimInstanceSingleCrit$new(
    objective = make_objective(i),
    terminator = trm("combo",
      terminators = list(
        trm("evals", n_evals = n_evals),
        trm("stagnation_batch", n = 5L, threshold = 10^-3)
      ),
      any = TRUE
    )
  )
  opt("random_search", batch_size = batch_size)$optimize(ins)

  ins$archive$data[, evals := seq_len(nrow(ins$archive$data))]
  ins$archive$data[, c("evals" , instances$tgt[i]), with = FALSE]

  minimize = ins$objective$codomain$tags[[instances$tgt[i]]] == "minimize"
  dt = ins$archive$data
  if (minimize) {
    dt$objective_bbotk = cummin(ins$archive$data[[instances$tgt[i]]])
    dt = dt[which(objective_bbotk != shift(objective_bbotk)),]
  } else {
    dt$objective_bbotk = cummax(ins$archive$data[[instances$tgt[i]]])
    dt = dt[objective_bbotk != shift(objective_bbotk)]
  }
  dt = cbind(dt, instances[i,])
  dt[, level := ifelse(is.na(level), "none", level)]
  return(dt)
}

run_cfg = function(cfg) {
  is = which(instances$cfg == cfg)
  if (length(is) > 5) is = sample(is, min(length(is), 5))
  rbindlist(map(is, run_rs))
}
cfg = "branin"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  geom_text(aes(label=round(objective_bbotk, 3)), vjust=-.5) +
  scale_x_log10() +
  theme_bw() +
  ggtitle(cfg)
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)
cfg = "fcnet"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  geom_text(aes(label=round(objective_bbotk, 3)), vjust=-.5) +
  scale_x_log10() +
  theme_bw() +
  ggtitle(cfg)
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)
cfg = "nb301"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  scale_x_log10() +
  theme_bw()
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)
cfg = "rbv2_aknn"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  scale_x_log10() +
  theme_bw()
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)
cfg = "rbv2_rpart"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  scale_x_log10() +
  theme_bw()
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)
cfg = "rbv2_ranger"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  scale_x_log10() +
  theme_bw()
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)
cfg = "rbv2_glmnet"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  scale_x_log10() +
  theme_bw()
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)
cfg = "rbv2_svm"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  scale_x_log10() +
  theme_bw()
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)
cfg = "rbv2_super"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  scale_x_log10() +
  theme_bw()
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)
cfg = "rbv2_xgboost"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  scale_x_log10() +
  theme_bw()
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)
cfg = "lcbench"
dt = run_cfg(cfg)
p = ggplot(dt, aes(x = evals, y = objective_bbotk, color = level)) +
  geom_point() +
  geom_line() +
  scale_x_log10() +
  theme_bw()
ggsave(paste0("attic/random_search/", cfg, "_random_search.pdf"), p)

Original Scale

cfg = cfgs("lcbench", workdir = workdir)

x_ids = cfg$param_set$ids()
y_ids = cfg$codomain$ids()

xdt = cfg$data$xtest[, ..x_ids, drop = FALSE]
li = c(
  mlr3misc::imap(mlr3misc::keep(xdt, function(x) is.character(x) || is.factor(x)), char_to_int, readRDS(cfg$dicts_path)),
  continuous = list(reticulate::r_to_py(as.matrix(keep(xdt, is.numeric)))$astype("float32"))
)

rt = reticulate::import("onnxruntime")
session = rt$InferenceSession(cfg$onnx_model_path)
dt = session$run(NULL, li)[[1L]]
colnames(dt) = colnames(cfg$data$ytest)


compute_metrics(
  as.matrix(cfg$data$ytest[, y_ids]),
  as.matrix(dt)
)

model = keras::load_model_hdf5(cfg$keras_model_path)

ll = predict(model, li)


head(ll)
head(dt)

yy = cfg$data$ytest[, y_ids]


compute_metrics(
  as.matrix(),
  as.matrix(setNames(data.frame(ll), colnames(dt)))
)
library("ggplot2")
df = oo[[2]]
library("data.table")
data.table(df) -> df
df[, grp := NULL]
df = df[, lapply(.SD, mean), by = .(variable, cfg, level)]
df[, xx := as.integer(as.factor(level))]
max_x =  ceiling(sqrt(max(df$xx)))
df[, x := xx %% max_x]
df[, y := ceiling(xx / max_x)]

ggplot(
  df[variable == "logloss",],
  aes(x = x, y = y, fill = rsq)
) +
geom_tile(color = "black", size = .1) +
theme_minimal() +
scale_fill_gradient(low="yellow",  high="red") +
geom_text(aes(x = x, y = y, label = paste(level, "\n", round(rsq, 2)))) +
theme(
  axis.text = element_blank(),
  axis.ticks = element_blank(),
  axis.title = element_blank()
) +
ggtitle("RSq for Logloss")


slds-lmu/paper_2021_multi_fidelity_surrogates documentation built on Feb. 20, 2022, 11:53 a.m.