Q_table_Env = R6::R6Class(
"Q_table_Env",
inherit = rlR::Environment,
public = list(
step_cnt = NULL,
s_r_d_info = NULL,
task = NULL,
mbo_cache = NULL, # store pipeline, hyperparameter set and corresponding performance for MBO
model_best_perf = NULL, # best performance of sampled model until now
model_trained = NULL, # store all trained models (limited to budget)
budget = NULL, # maximun models to be evaluated
measure = NULL,
cv_instance = NULL,
initialize = function(task, budget, measure, cv_instance){
self$flag_continous = FALSE # non-continuous action
self$flag_tensor = FALSE # no use of cnn
self$act_cnt = g_act_cnt # 5 available operators/actions at each stage
self$state_dim = g_state_dim
self$step_cnt = 0L
self$s_r_d_info = list(
state = "s",
reward = 0,
done = FALSE,
info = list())
self$task = task
self$mbo_cache = hash()
self$model_trained = NULL
self$budget = budget
self$measure = measure
self$cv_instance = cv_instance
},
evaluateArm = function(vec_arm) {
print(vec_arm)
return(vec_arm)
},
# This function will be called at each step of the learning
step = function(action) {
action = g_operators[, self$step_cnt + 1][action]
self$s_r_d_info[["state"]] = paste0(self$s_r_d_info[["state"]], "-[", action, "]")
print(self$s_r_d_info[["state"]])
self$s_r_d_info[["reward"]] = 0
self$step_cnt = self$step_cnt + 1L
if (self$step_cnt >= g_max_depth) {
model = g_getRLPipeline(self$s_r_d_info[["state"]])
print(paste(model, collapse = " --> "))
# stop RL agent if no enough budget for this episode:
model_id = paste(model, collapse = "\t")
if (has.key(model_id, self$mbo_cache)){
require_budget = g_mbo_iter*sum(getParamLengths(g_getParamSetFun(model)))
} else {
require_budget = (g_init_design + g_mbo_iter)*sum(getParamLengths(g_getParamSetFun(model)))
}
if(self$budget < require_budget) stop("too small total budget for reinbo table!")
if (self$budget - length(self$model_trained) < require_budget) {
self$agent$interact$idx_episode = self$agent$interact$maxiter
self$s_r_d_info[["done"]] = TRUE
} else {
# train model with hyperparameter tuning:
self$tuning(model)
self$s_r_d_info[["reward"]] = self$model_best_perf # best performance of the model until now
self$s_r_d_info[["done"]] = TRUE
print(paste("Best Perfomance:", self$model_best_perf))
}
}
return(self$s_r_d_info)
},
# This function will be called at the beginning of the learning and at the end of each episode
reset = function() {
self$step_cnt = 0
self$s_r_d_info[["state"]] = "s"
self$s_r_d_info[["done"]] = FALSE
self$s_r_d_info
},
# Hyperparameter tuning for generated model, return best performance as reward and update mbo_cache
tuning = function(model) {
model_id = paste(model, collapse = "\t") # mdoel_id for search in mbo_cache
ps = g_getParamSetFun(model) # generate parameter set
# check if we have already evaluated this model
# if already in mbo_cache:
if (has.key(model_id, self$mbo_cache)){
previous_perf = max(self$mbo_cache[[model_id]][ , "y"]) # best performance until now
epis_unimproved = self$mbo_cache[[model_id]][1, "epis_unimproved"] # number of episodes that performance has not been improved
# if in more than 2 episodes that the performance of this model has not been improved,
# stop further hyperparameter tuning:
if (epis_unimproved > 2) {
self$model_best_perf = previous_perf
} else {
# else: use parameter set and performance in memory as initial design
design = self$mbo_cache[[model_id]][ , -length(self$mbo_cache[[model_id]])]
# run several iterations of MBO:
run = mbo_fun(self$task, model, design, self$measure, self$cv_instance)
# best accuracy:
self$model_best_perf = run$y
# update mbo_cache:
self$mbo_cache[[model_id]] = run$opt.path$env$path
# add result to self$model_trained:
new = run$opt.path$env$path$y[run$opt.path$env$dob != 0]
self$model_trained = c(self$model_trained, new)
# check if the performance of this model has been improved in this episode:
if (run$y <= previous_perf) {
self$mbo_cache[[model_id]]["epis_unimproved"] = epis_unimproved + 1
} else {
self$mbo_cache[[model_id]]["epis_unimproved"] = 0
}
}
} else {
# if not in mbo_cache:
design = generateDesign(n = g_init_design*sum(getParamLengths(ps)), par.set = ps)
run = mbo_fun(self$task, model, design, self$measure, self$cv_instance)
self$model_best_perf = run$y
self$mbo_cache[[model_id]] = run$opt.path$env$path
self$mbo_cache[[model_id]]["epis_unimproved"] = 0
new = run$opt.path$env$path$y
self$model_trained = c(self$model_trained, new)
}
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.