## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(message = TRUE, eval = TRUE, collapse = TRUE, comment = "#>")
## ------------------------------------------------------------------------
set.seed(12)
library(reinforcelearn)
## ------------------------------------------------------------------------
env = makeEnvironment("gridworld", shape = c(3, 3), goal.states = 0L)
agent = makeAgent(policy = "softmax", val.fun = "table", algorithm = "qlearning")
## ------------------------------------------------------------------------
interact(env, agent, n.episodes = 5L)
## ------------------------------------------------------------------------
getValueFunction(agent)
## ------------------------------------------------------------------------
# Uniform random policy
makePolicy("random")
# Epsilon-greedy policy
makePolicy("epsilon.greedy", epsilon = 0.2)
# Softmax policy
makePolicy("softmax")
## ------------------------------------------------------------------------
makeValueFunction("table", n.states = 9L, n.actions = 4L)
## ---- eval = FALSE-------------------------------------------------------
# library(keras)
# model = keras_model_sequential() %>%
# layer_dense(shape = 10L, input_shape = 4L, activation = "linear") %>%
# compile(optimizer = optimizer_sgd(lr = 0.1), loss = "mae")
# makeValueFunction("neural.network", model)
## ------------------------------------------------------------------------
makeAlgorithm("qlearning")
## ------------------------------------------------------------------------
policy = makePolicy("epsilon.greedy", epsilon = 0.2)
val.fun = makeValueFunction("table", n.states = 9L, n.actions = 4L)
algorithm = makeAlgorithm("qlearning")
agent = makeAgent(policy, val.fun, algorithm)
## ------------------------------------------------------------------------
agent = makeAgent("epsilon.greedy", "table", "qlearning",
policy.args = list(epsilon = 0.2))
## ------------------------------------------------------------------------
env = makeEnvironment("gridworld", shape = c(3, 2), goal.states = 0L)
agent = makeAgent("random")
interact(env, agent, n.steps = 3L, visualize = TRUE)
## ------------------------------------------------------------------------
env = makeEnvironment("gridworld", shape = c(4, 4), goal.states = 0L,
initial.state = 15L)
agent = makeAgent("random")
for (i in 1:3L) {
## comment in the next line to wait on enter press before taking the next action.
# invisible(readline(prompt = "Press [enter] to take the next action"))
interact(env, agent, n.steps = 1L, learn = FALSE, visualize = TRUE)
}
## ------------------------------------------------------------------------
(memory = makeReplayMemory(size = 2L, batch.size = 1L))
agent = makeAgent("random", replay.memory = memory)
interact(env, agent, n.steps = 2L, visualize = TRUE)
getReplayMemory(agent)
## ---- message = FALSE----------------------------------------------------
env = makeEnvironment("gridworld", shape = c(4, 4), goal.states = c(0, 15))
policy = makePolicy("epsilon.greedy", epsilon = 0.1)
memory = makeReplayMemory(size = 100L, batch.size = 20L)
agent = makeAgent(policy, "table", "qlearning", replay.memory = memory)
for (i in 1:100) {
interact(env, agent, n.steps = 20L, learn = FALSE)
interact(env, agent, n.steps = 1L, learn = TRUE)
}
action.vals = getValueFunction(agent)
matrix(getStateValues(action.vals), ncol = 4L)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.