# hyper-parameters range
rlR.conf.lod = list(
render = list(name = "render", note = "Whether to show rendering video or not", value = FALSE),
log = list(name = "log", note = "Whether to log important information on drive", value = FALSE),
console = list(name = "console", note = "Whether to enable debug info output to console", value = FALSE),
agent.gamma = list(name = "agent.gamma", note = "The discount factor in reinforcement learning", value = 0.99),
agent.flag.reset.net = list(name = "agent.flag.reset.net", note = "Whether to reset the neural network ", value = TRUE), #FIXME: should be set this?
agent.lr.decay = list(name = "agent.lr.decay", note = "The decay factor of the learning rate at each step", value = exp(-0.001)), # decaying with regard to step is better since some episode can be too long
agent.lr = list(name = "agent.lr", note = "learning rate for the agent", value = 1e-3),
agent.lr.min = list(name = "agent.lr.min", note = "minimum learning rate", value = 0),
agent.store.model = list(name = "agent.store.model", note = "whether to store the model of the agent or not", value = FALSE), #FIXME: exclude this
agent.update.target.freq = list(name = "agent.update.target.freq", note = "How often should the target network be set", value = 2000L),
agent.start.learn = list(name = "agent.start.learn", note = "after how many transitions should replay begin", value = 64L),
agent.clip.td = list(name = "agent.clip.td", note = "whether to clip TD error", value = FALSE),
policy.maxEpsilon = list(name = "policy.maxEpsilon", note = "The maximum epsilon exploration rate", value = 1.0),
policy.minEpsilon = list(name = "policy.minEpsilon", note = "The minimum epsilon exploration rate", value = 0.01),
policy.decay.rate = list(name = "policy.decay.rate", note = "the decay rate", value = 1.0),
policy.decay.type = list(name = "policy.decay.type", note = "the way to decay epsion, can be decay_geo, decay_exp, decay_linear", value = "decay_geo"),
policy.aneal.steps = list(name = "policy.aneal.steps", note = "only valid when policy.decay.type = 'decay_linear'", value = 1e6),
policy.softmax.magnify = list(name = "policy.softmax.magnify", value = 1),
replay.batchsize = list(name = "replay.batchsize", note = "how many samples to take from replay memory each time", value = 64),
replay.memname = list(name = "replay.memname", range = c("Uniform"), note = "The type of replay memory", value = "Uniform"),
replay.mem.size = list(name = "replay.mem.size", note = "The size of the replay memory", value = 2e4),
replay.epochs = list(name = "replay.epochs", note = "How many gradient decent epochs to carry out for one replay", value = 1L),
replay.freq = list(name = "replay.freq", note = "how many steps to wait until one replay", value = 1L)
)
rlR.conf.dt = data.table::rbindlist(rlR.conf.lod, fill = TRUE)
rlR.conf.df = as.data.frame(rlR.conf.dt)
# define default hyper-parameters
rlR.conf.default = lapply(rlR.conf.lod, function(x) x$value)
#' @title listAvailConf
#' @description List defaults hyper-parameters names
#' @export
listAvailConf = function() {
rlR.conf.dt
}
rlR.conf.AgentActorCritic = function() {
conf = RLConf$new(
render = FALSE,
log = FALSE,
agent.lr = 1e-2,
agent.gamma = 0.9,
agent.lr.decay = 1,
console = TRUE,
policy.name = "Prob",
policy.maxEpsilon = 0,
policy.minEpsilon = 0,
replay.epochs = 1L,
replay.memname = "Latest"
#agent.nn.arch.actor = list(nhidden = 64, act1 = "tanh", act2 = "softmax", loss = "categorical_crossentropy", lr = 1e-4, kernel_regularizer = "regularizer_l2(l=0.0001)", bias_regularizer = "regularizer_l2(l=1e-4)", decay = 0.9, clipnorm = 5),
#agent.nn.arch.critic = list(nhidden = 64, act1 = "tanh", act2 = "linear", loss = "mse", lr =1e-4, kernel_regularizer = "regularizer_l2(l=0.0001)", bias_regularizer = "regularizer_l2(l=1e-4)", decay = 0.9, clipnorm = 5)
)
}
rlR.conf.AgentDDPG = function() {
conf = RLConf$new(
render = FALSE,
log = FALSE,
agent.lr = 1e-2,
agent.gamma = 0.9,
agent.lr.decay = 1,
console = TRUE,
policy.name = "Prob",
policy.maxEpsilon = 0,
policy.minEpsilon = 0,
replay.batchsize = 32, # saves a lot of time compared to when batchsize = 64
replay.epochs = 1L,
replay.memname = "Uniform"
)
}
#' @title get Default Configuration according to agent name
#' @description List defaults hyper-parameters
#' @param agent_name The name for Agent
#' @export
#' @examples
#' conf = rlR::getDefaultConf("AgentDQN")
getDefaultConf = function(agent_name) {
get(paste0("rlR.conf.", agent_name))()
}
#' @title show Default Configuration
#' @description List defaults hyper-parameters in dataframe
#' @export
#' @examples
#' df = rlR::showDefaultConf()
showDefaultConf = function() {
rlR.conf.df = data.frame(unlist(rlR.conf.default))
colnames(rlR.conf.df) = NULL
rlR.conf.df
}
rlR.conf4log = list(
policy.epi_wait_ini = 5L, # initially the performance should increase
policy.epi_wait_middle = 25L,
policy.epi_wait_expl = 40L,
replay.mem.dt = FALSE,
replay.mem.laplace.smoother = 0.001,
resultTbPath = "Perf.RData",
LOGGERNAMENN = "nn.logger",
LOGGERNAMERL = "rl.logger",
NNSufix = "nn.log",
RLSufix = "rl.log.R"
)
agent.brain.dict.AgentDQN = agent.brain.dict.AgentFDQN = agent.brain.dict.AgentDDQN = function() list(value_fun = makeValueNet.DQN)
agent.brain.dict.AgentPG = function() list(policy_fun = makePolicyNet)
agent.brain.dict.AgentPGBaseline = function() list(policy_fun = makePolicyNet, value_fun = makeValueNet)
agent.brain.dict.AgentActorCritic = function() list(policy_fun = makePolicyNet2, value_fun = makeValueNet2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.