knitr::opts_chunk$set(comment = "#>", collapse = FALSE, message = FALSE) knitr::opts_chunk$set(fig.path = 'Images/', eval = TRUE, cache = FALSE, size = "footnotesize", fig.asp = 0.618, fig.width = 4.5, fig.align = "center", message = FALSE, comment = "#>", collapse = TRUE, echo = TRUE)
library(reinforcelearn) env = makeEnvironment("windy.gridworld")
The optimal solution is 15 steps.
policy = makePolicy("epsilon.greedy", epsilon = 0.1) agent = makeAgent(policy, "table", "qlearning") res = interact(env, agent, n.episodes = 500L)
library(ggplot2) df = data.frame(episode = seq_along(res$steps), steps = res$steps) ggplot(df, aes(episode, steps), col = "brown1") + geom_point(alpha = 0.2) + theme_bw() + labs( title = "Q-Learning", x = "Episode", y = "Steps per episode" ) + coord_cartesian(ylim = c(0, 200)) + geom_smooth(se = FALSE, size = 1) + geom_hline(yintercept = 15, size = 1, col = "black", lty = 2)
env$resetEverything() policy = makePolicy("epsilon.greedy", epsilon = 0.1) alg = makeAlgorithm("qlearning", lambda = 0.8, traces = "accumulate") agent = makeAgent(policy, "table", alg) res = interact(env, agent, n.episodes = 500L)
library(ggplot2) df = data.frame(episode = seq_along(res$steps), steps = res$steps) ggplot(df, aes(episode, steps), col = "brown1") + geom_point(alpha = 0.2) + theme_bw() + labs( title = "Q-Learning", subtitle = "Eligibility traces", x = "Episode", y = "Steps per episode" ) + coord_cartesian(ylim = c(0, 200)) + geom_smooth(se = FALSE, size = 1) + geom_hline(yintercept = 15, size = 1, col = "black", lty = 2)
env$resetEverything() policy = makePolicy("epsilon.greedy", epsilon = 0.1) mem = makeReplayMemory(size = 10L, batch.size = 10L) agent = makeAgent(policy, "table", "qlearning", replay.memory = mem) res = interact(env, agent, n.episodes = 500L)
library(ggplot2) df = data.frame(episode = seq_along(res$steps), steps = res$steps) ggplot(df, aes(episode, steps), col = "brown1") + geom_point(alpha = 0.2) + theme_bw() + labs( title = "Q-Learning", subtitle = "Experience replay", x = "Episode", y = "Steps per episode" ) + coord_cartesian(ylim = c(0, 200)) + geom_smooth(se = FALSE, size = 1) + geom_hline(yintercept = 15, size = 1, col = "black", lty = 2)
env$resetEverything() library(keras) model = keras_model_sequential() %>% layer_dense(units = env$n.actions, activation = "linear", input_shape = c(env$n.states), kernel_initializer = initializer_zeros(), use_bias = FALSE) %>% compile(loss = "mae", optimizer = optimizer_sgd(lr = 1)) mem = makeReplayMemory(size = 2L, batch.size = 2L) val = makeValueFunction("neural.network", model = model) policy = makePolicy("epsilon.greedy", epsilon = 0.1) preprocess = function(x) to_categorical(x, num_classes = env$n.states) agent = makeAgent(policy, val, "qlearning", preprocess = preprocess, replay.memory = mem) res = interact(env, agent, n.episodes = 500L)
library(ggplot2) df = data.frame(episode = seq_along(res$steps), steps = res$steps) ggplot(df, aes(episode, steps), col = "brown1") + geom_point(alpha = 0.2) + theme_bw() + labs( title = "Q-Learning", subtitle = "Experience replay and neural network", x = "Episode", y = "Steps per episode" ) + coord_cartesian(ylim = c(0, 200)) + geom_smooth(se = FALSE, size = 1) + geom_hline(yintercept = 15, size = 1, col = "black", lty = 2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.