SurroNN = R6::R6Class("SurroNN",
inherit = Surrogate,
public = list(
lr = NULL,
conf = NULL,
agent = NULL,
custom_flag = NULL,
action_input = NULL,
sess = NULL,
np = NULL,
initialize = function(agent) {
self$np = reticulate::import("numpy", convert = FALSE)
self$agent = agent
self$act_cnt = self$agent$act_cnt
self$custom_flag = FALSE
self$state_dim = self$agent$state_dim
self$conf = self$agent$conf
self$lr = self$conf$get("agent.lr")
self$sess = agent$sess
self$model = self$makeModel()
},
#@depend self$agent$task
makeModel = function() {
do.call(self$agent$network_build_funs[[self$agent$task]], args = list(state_dim = self$state_dim, act_cnt = self$act_cnt))
},
setModel = function(obj) {
self$model = obj
self$custom_flag = TRUE
},
getWeights = function() {
keras::get_weights(self$model)
},
setWeights = function(weights) {
keras::set_weights(self$model, weights)
},
persist = function(file_path) {
keras::save_model_hdf5(object = self$model, file_path = file_path)
},
train_weighted = function(X_train, Y_train, sample_weight) {
keras::train_on_batch(object = self$model, x = X_train, y = Y_train, sample_weight = sample_weight)
},
train = function(X_train, Y_train, batchsize = NULL, epochs = 1L) {
keras::fit(object = self$model, x = X_train, y = Y_train, batchsize = batchsize, epochs = epochs, verbose = 0)
},
batch_update = function(X_train, Y_train) {
keras::train_on_batch(object = self$model, x = X_train, y = Y_train)
},
pred = function(X) {
res = keras::predict_on_batch(self$model, X)
res # FIXME: prediction might be NA from Keras
},
afterEpisode = function() {
lr = keras::k_get_value(self$model$optimizer$lr)
self$agent$interact$toConsole("learning rate: %s \n", lr)
self$lr = self$lr * self$agent$lr_decay
keras::k_set_value(self$model$optimizer$lr, self$lr)
}
),
private = list(
deep_clone = function(name, value) {
# With x$clone(deep=TRUE) is called, the deep_clone gets invoked once for each field, with the name and value.
if (name == "model") {
weights = self$getWeights()
if (self$custom_flag) {
model = keras::clone_model(self$model)
} else {
model = self$makeModel()
}
keras::set_weights(model, weights)
return(model)
} else {
# For all other fields, just return the value
value
}
}
)
)
SurroTF = R6::R6Class("SurroTF",
inherit = SurroNN,
public = list(
lr = NULL,
conf = NULL,
agent = NULL,
custom_flag = NULL,
action_input = NULL,
sess = NULL,
tf_states = NULL,
tf_acts = NULL,
tf_return = NULL, # return
train_op = NULL,
all_act_prob = NULL,
makeModel = function() {
tf = import("tensorflow")
with(tf$name_scope("inputs"), {
self$tf_states = tf$placeholder(tf$float32, shape(NULL, self$state_dim), name = "state")
self$tf_acts = tf$placeholder(tf$int32, shape(NULL), name = "actions_num")
self$tf_return = tf$placeholder(tf$float32, shape(NULL), name = "actions_value")
})
# fc1
layer = tf$layers$dense(
inputs = self$tf_states,
units = 10L,
activation = tf$nn$tanh, # tanh activation
kernel_initializer = tf$random_normal_initializer(mean = 0, stddev = 0.3),
bias_initializer=tf$constant_initializer(0.1),
name = 'fc1'
)
# fc2
all_act = tf$layers$dense(
inputs = layer,
units = self$act_cnt,
activation = NULL,
kernel_initializer = tf$random_normal_initializer(mean = 0, stddev = 0.3),
bias_initializer = tf$constant_initializer(0.1),
name = 'fc2'
)
self$all_act_prob = tf$nn$softmax(all_act, name = 'act_prob') # use softmax to convert to probability
with(tf$name_scope("loss"), {
neg_log_prob = tf$nn$sparse_softmax_cross_entropy_with_logits(logits = all_act, labels = self$tf_acts) # this is negative log of chosen action
#neg_log_prob = tf$reduce_sum(-tf$log(self$all_act_prob) * tf$one_hot(self$tf_acts, self$agent$act_cnt), axis = 1)
loss = tf$reduce_mean(neg_log_prob * self$tf_return) # reward guided loss
})
with(tf$name_scope("train"), {
self$train_op = tf$train$AdamOptimizer(self$lr)$minimize(loss)
})
self$sess$run(tensorflow::tf$global_variables_initializer())
},
batch_update = function(X_train, Y_train) {
acts = unlist(self$agent$list.acts)
acts = acts - 1L
np = import("numpy")
acts = np$array(acts)
#acts = np$reshape(acts, nrow(X_train))
#np$shape(acts)
#acts = array(acts, dim = c(nrow(X_train), 1L))
returns = self$agent$amf
#returns = array(self$agent$amf, dim = c(nrow(X_train), 1L))
#returns = np$reshape(returns, nrow(X_train))
#np$shape(returns)
returns = np$array(returns)
#self$sess$run(self$train_op, py_dict(c(self$tf_obs, self$tf_acts, self$tf_vt), c(self$agent$replay.x, acts, returns)))
self$sess$run(self$train_op, dict("inputs/state:0" = X_train, "inputs/actions_num:0" = acts, "inputs/actions_value:0" = returns))
},
pred = function(X) {
obs = self$np$array(X)
self$sess$run(self$all_act_prob, dict("inputs/state:0" = obs))
# py_dict does not work in this case self$sess$run(self$all_act_prob, py_dict(self$tf_obs$name, obs))
},
afterEpisode = function() {
}
)
)
SurroGrad = R6::R6Class("SurroGrad",
inherit = SurrogateNN,
public = list(
loss2WeightGrad = function(state, action) {
input = self$model$input
#input = tf$stop_gradient()
output = self$model$output
loss = self$model$total_loss
#loss$graph$get_collection("variables")
#loss$graph$get_collection("trainable_variables")
#loss$graph$get_tensor_by_name(iname)
#y_ <- tf$placeholder(tf$float32, shape(NULL, self$agent$act_cnt))
#ph_a = tf$placeholder(tf$float32, c(NULL, 2), name = "action")
#ph_a = tf$stop_gradient(ph_a)
targets = self$model$targets
sample_weights = self$model$sample_weights[[1]]
#aname = self$model$targets[[1L]]$name
#cross_entropy = tf$reduce_mean(-tf$reduce_sum(y_ * tf$log(output), reduction_indices=1L))
weight = self$model$trainable_weights
tf_grad = keras::k_gradients(loss, weight)
#tf_grad = tf$gradients(cross_entropy, list(input, targets))
#tf_grad = tf$gradients(loss, list(input, targets))
#tf_grad = keras::k_gradients(loss, input)
#iname = self$model$input$name
#oname = self$model$output$name
self$sess$run(tensorflow::tf$global_variables_initializer())
np = reticulate::import("numpy", convert = FALSE)
sstate = np$array(state)
sweights = np$array(rep(1, dim(state)[2L]))
saction = np$array(action)
#feed_dict = py_dict(c(iname, aname), c(sstate, saction))
#feed_dict = py_dict(c(iname, "ph_a"), c(sstate, saction))
#fun = k_function(list(input, targets), tf_grad)
#self$model$optimizer$get_gradients(loss, weight)
feed_dict = py_dict(c(input, targets, sample_weights), c(sstate, saction, sweights))
self$sess$run(tf_grad, feed_dict)
},
getGradients = function(state) {
yhat = self$pred(state)
grad = self$loss2WeightGrad(state = state, action = yhat)
}
)
)
SurroDDPG = R6::R6Class("SurroDDPG",
inherit = SurroNN,
public = list(
makeModel = function() {
model = self$agent$createBrain() # the agent itself is responsible for creating the brain
return(model)
},
# calculate gradients with respect to input arm instead of weights
calGradients2Action = function(state_input, action_input) {
output = self$model$output
# FIXME: hard coded here.
input_action = self$model$input[[1L]]
input_action_shape = input_action$shape
tf_grad = keras::k_gradients(output, input_action) # Returns the gradients of 'variables' w.r.t. 'loss'.
aname = input_action$name
sname = self$model$input[[2L]]$name
oname = self$model$output$name
sstate = self$np$array(state_input)
saction = self$np$array(action_input)
feed_dict = py_dict(c(sname, aname), c(sstate, saction))
#FIXME: do we need to provide the output as well?
#feed_dict = py_dict(c(sname, aname, oname), c(sstate, saction, output))
self$sess$run(tf_grad, feed_dict)
},
calGradients = function(state, action) {
output = self$model$output
mweights = self$model$trainable_weights
tf_grad = keras::k_gradients(output, mweights)
iname = self$model$input$name
oname = self$model$output$name
ph_a = tf$placeholder(tf$float32, c(NULL, 2), name = "action")
self$sess$run(tensorflow::tf$global_variables_initializer())
np = reticulate::import("numpy", convert = FALSE)
sstate = np$array(state)
saction = np$array(action)
feed_dict = py_dict(c(iname, "ph_a"), c(sstate, saction))
self$sess$run(tf_grad, feed_dict)
},
loss2WeightGrad= function(state, action) {
input = self$model$input
#input = tf$stop_gradient()
output = self$model$output
loss = self$model$total_loss
#loss$graph$get_collection("variables")
#loss$graph$get_collection("trainable_variables")
#loss$graph$get_tensor_by_name(iname)
#y_ <- tf$placeholder(tf$float32, shape(NULL, self$agent$act_cnt))
#ph_a = tf$placeholder(tf$float32, c(NULL, 2), name = "action")
#ph_a = tf$stop_gradient(ph_a)
targets = self$model$targets
sample_weights = self$model$sample_weights[[1]]
#aname = self$model$targets[[1L]]$name
#cross_entropy = tf$reduce_mean(-tf$reduce_sum(y_ * tf$log(output), reduction_indices=1L))
weight = self$model$trainable_weights
tf_grad = keras::k_gradients(loss, weight)
#tf_grad = tf$gradients(cross_entropy, list(input, targets))
#tf_grad = tf$gradients(loss, list(input, targets))
#tf_grad = keras::k_gradients(loss, input)
#iname = self$model$input$name
#oname = self$model$output$name
self$sess$run(tensorflow::tf$global_variables_initializer())
np = reticulate::import("numpy", convert = FALSE)
sstate = np$array(state)
sweights = np$array(rep(1, dim(state)[2L]))
saction = np$array(action)
#feed_dict = py_dict(c(iname, aname), c(sstate, saction))
#feed_dict = py_dict(c(iname, "ph_a"), c(sstate, saction))
#fun = k_function(list(input, targets), tf_grad)
#self$model$optimizer$get_gradients(loss, weight)
feed_dict = py_dict(c(input, targets, sample_weights), c(sstate, saction, sweights))
self$sess$run(tf_grad, feed_dict)
},
getGradients = function(state) {
yhat = self$pred(state)
grad = self$loss2WeightGrad(state = state, action = yhat)
}
)
)
# continous_cross_entropy = function(act_chosen) {
#
# }
#
# library(keras)
# quantile <- 0.5
# tilted_loss <- function(q, y, f) {
# e <- y - f
# k_mean(k_maximum(q * e, (q - 1) * e), axis = 2)
# }
#
# sess <- k_get_session()
# ys <- k_constant(c(1,2,3,4), shape = c(2,2))
# yhats <- k_constant(c(1,3,3,4), shape = c(2,2))
# sess$run(tilted_loss(quantile, ys, yhats))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.