Nothing
#' Gridworld
#'
#' Creates gridworld environments.
#'
#' A gridworld is an episodic navigation task, the goal is to get from start state to goal state.
#'
#' Possible actions include going left, right, up or down. If `diagonal.moves = TRUE` diagonal
#' moves are also possible, leftup, leftdown, rightup and rightdown.
#'
#' When stepping into a cliff state you get a reward of `reward.cliff`,
#' usually a high negative reward and transition to a state specified by `cliff.transition.states`.
#'
#' In each column a deterministic wind specified via `wind` pushes you up a specific number of
#' grid cells (for the next action).
#'
#' A stochastic gridworld is a gridworld where with probability `stochasticity` the next state
#' is chosen at random from all neighbor states independent of the actual action.
#'
#' If an action would take you off the grid, the new state is the nearest cell inside the grid.
#' For each step you get a reward of `reward.step`, until you reach a goal state,
#' then the episode is done.
#'
#' States are enumerated row-wise and numeration starts with 0.
#' Here is an example 4x4 grid:
#' \tabular{rrrr}{
#' 0 \tab 1 \tab 2 \tab 3 \cr
#' 4 \tab 5 \tab 6 \tab 7 \cr
#' 8 \tab 9 \tab 10 \tab 11 \cr
#' 12 \tab 13 \tab 14 \tab 15 \cr
#' }
#' So a board position could look like this (G: goal state, x: current state, C: cliff state):
#' \tabular{rrrr}{
#' G \tab o \tab o \tab o \cr
#' o \tab o \tab o \tab o \cr
#' o \tab x \tab o \tab o \cr
#' o \tab o \tab o \tab C \cr
#' }
#'
#' @section Usage:
#' `makeEnvironment("gridworld", shape = NULL, goal.states = NULL, cliff.states = NULL,
#' reward.step = -1, reward.cliff = -100, diagonal.moves = FALSE, wind = rep(0, shape[2]),
#' cliff.transition.states = NULL, cliff.transition.done = FALSE, stochasticity = 0, ...)`
#'
#' @param shape \[`integer(2)`] \cr
#' Shape of the gridworld (number of rows x number of columns).
#' @param goal.states \[`integer`] \cr
#' Goal states in the gridworld.
#' @param cliff.states \[`integer`] \cr
#' Cliff states in the gridworld.
#' @param reward.step \[`integer(1)`] \cr
#' Reward for taking a step.
#' @param cliff.transition.states \[`integer`] \cr
#' States to which the environment transitions if stepping into the cliff.
#' If it is a vector, all states will have equal probability.
#' Only used when `cliff.transition.done == FALSE`,
#' else specify the `initial.state` argument.
#' @param reward.cliff \[`integer(1)`] \cr
#' Reward for taking a step in the cliff state.
#' @param diagonal.moves \[`logical(1)`] \cr
#' Should diagonal moves be allowed?
#' @param wind \[`integer`] \cr
#' Strength of the upward wind in each cell.
#' @param cliff.transition.done \[`logical(1)`] \cr
#' Should the episode end after stepping into the cliff?
#' @param stochasticity \[`numeric(1)`] \cr
#' Probability of random transition to any of the neighboring states when taking any action.
#' @param ... \[`any`] \cr Arguments passed on to [makeEnvironment].
#'
#' @name Gridworld
#' @md
#' @inheritSection Environment Methods
#' @export
#'
#' @examples
#' # Gridworld Environment (Sutton & Barto Example 4.1)
#' env1 = makeEnvironment("gridworld", shape = c(4L, 4L), goal.states = 0L,
#' initial.state = 15L)
#' env1$reset()
#' env1$visualize()
#' env1$step(0L)
#' env1$visualize()
#'
#' # Windy Gridworld (Sutton & Barto Example 6.5)
#' env2 = makeEnvironment("gridworld", shape = c(7, 10), goal.states = 37L,
#' reward.step = -1, wind = c(0, 0, 0, 1, 1, 1, 2, 2, 1, 0),
#' initial.state = 30L)
#'
#' # Cliff Walking (Sutton & Barto Example 6.6)
#' env3 = makeEnvironment("gridworld", shape = c(4, 12), goal.states = 47L,
#' cliff.states = 37:46, reward.step = -1, reward.cliff = -100,
#' cliff.transition.states = 36L, initial.state = 36L)
NULL
Gridworld = R6::R6Class("Gridworld",
inherit = MdpEnvironment,
public = list(
initialize = function(shape = NULL, goal.states = NULL, cliff.states = NULL,
reward.step = -1, reward.cliff = -100, diagonal.moves = FALSE, wind = rep(0, shape[2]),
cliff.transition.states = NULL, cliff.transition.done = FALSE, stochasticity = 0, ...) {
checkmate::assertIntegerish(shape, len = 2)
if (prod(shape) <= 1) {
stop("A gridworld with only one state is not allowed!")
}
checkmate::assertIntegerish(goal.states)
goal.states = goal.states + 1
checkmate::assertIntegerish(cliff.states, null.ok = TRUE)
if (!is.null(cliff.states)) {
cliff.states = cliff.states + 1
}
checkmate::assertIntegerish(cliff.transition.states, null.ok = TRUE)
if (!is.null(cliff.transition.states)) {
cliff.transition.states = cliff.transition.states + 1
}
if (any(goal.states > prod(shape)) || any(cliff.states > prod(shape)) |
any(cliff.transition.states > prod(shape))) {
stop("All states must be inside the grid! States are numerated row-wise starting with 0, check Details!")
}
checkmate::assertIntegerish(wind, len = shape[2])
checkmate::assertNumber(reward.step)
checkmate::assertNumber(reward.cliff)
checkmate::assertFlag(diagonal.moves)
checkmate::assertFlag(cliff.transition.done)
checkmate::assertNumber(stochasticity, lower = 0, upper = 1)
n.states = prod(shape)
states = seq_len(n.states)
n.col = shape[2]
if (diagonal.moves) {
n.actions = 8
action.names = c("left" = 0L, "right" = 1L, "up" = 2L, "down" = 3L,
"leftup" = 4L, "leftdown" = 5L, "rightup" = 6L, "rightdown" = 7L)
} else {
n.actions = 4
action.names = c("left" = 0L, "right" = 1L, "up" = 2L, "down" = 3L)
}
rewards = makeRewardMatrix(reward.step, reward.cliff, n.states, n.actions,
cliff.states, goal.states)
transitions = array(matrix(0, nrow = n.states, ncol = n.states),
dim = c(n.states, n.states, 8))
border.states = list(left = seq(1, n.states - n.col + 1, n.col),
right = seq(n.col, n.states, n.col),
up = seq(1, n.col),
down = seq(n.states - n.col + 1, n.states))
non.terminal.states = setdiff(states, c(goal.states, cliff.states))
actions = list("left", "right", "up", "down", "leftup",
"leftdown", "rightup", "rightdown")
actions = lapply(actions, function(x) {class(x) = x; x})
m.cliff = NULL
if (cliff.transition.done) {
goal.states = c(goal.states, cliff.states)
} else {
if (!is.null(cliff.states)) {
if (!is.null(cliff.transition.states)) {
cliff.pairs = as.matrix(expand.grid(cliff.states, cliff.transition.states))
cliff.prob = 1 / length(cliff.transition.states)
m.cliff = cbind(cliff.pairs, cliff.prob)
} else {
non.terminal.states = c(non.terminal.states, cliff.states)
}
}
}
n.states = length(non.terminal.states)
new.states = vapply(actions, go, states = non.terminal.states, border.states = border.states,
n.col = n.col, FUN.VALUE = numeric(n.states))
if (!is.matrix(new.states)) {
new.states = as.matrix(new.states, nrow = 1)
}
m.stoch = matrix(0, nrow = n.states * 8, ncol = 3)
m.stoch[, 1] = rep(non.terminal.states, 8)
m.stoch[, 2] = c(new.states)
m.stoch[, 3] = stochasticity / 8
m.goal = matrix(c(goal.states, goal.states, rep(1, length(goal.states))), ncol = 3)
m = rbind(m.cliff, m.goal, m.stoch)
m = m[rep(seq_len(nrow(m)), each = 8), ]
m = cbind(m, action = rep(1:8, nrow(m) / 8))
new.states = c(apply(new.states, 2, applyWind, states = non.terminal.states, n.col = n.col, wind = wind))
new.states = getIntoBounds(new.states, n.col = n.col)
m2 = matrix(c(rep(non.terminal.states, 8), new.states, rep(1 - stochasticity, length(new.states)),
rep(1:8, each = length(non.terminal.states))), ncol = 4)
m = rbind(m, m2)
colnames(m) = c("row", "col", "prob", "action")
m = as.matrix(aggregate(prob ~ row + col + action, data = as.data.frame(m), FUN = "sum"))
transitions[m[, c("row", "col", "action")]] = m[, "prob"]
transitions = transitions[, , seq_len(n.actions)]
visualize = function(env) {
message(cat(visualizeGridworld(shape, env$state), "\n"))
}
# fixme: make action.names and visualize overwriteable
super$initialize(transitions = transitions, rewards = rewards,
visualize = visualize, action.names = action.names, ...)
}
)
)
visualizeGridworld = function(shape, current.state) {
one.row = paste(rep("-", shape[2]), collapse = " ")
grid.vis = paste("", one.row, collapse = "")
for (i in seq_len(shape[1] - 1L)) {
grid.vis = paste(grid.vis, "\n", one.row)
}
n = current.state + 1
# find position of nth -
str.pos = gregexpr("-", grid.vis)[[1]][n]
# replace nth - with o (current state in grid)
grid.vis = sub(paste0("^(.{", str.pos - 1, "})(.)(.*$)", collapse = ""),
"\\1o\\3", grid.vis)
grid.vis
}
makeRewardMatrix = function(reward.step, reward.cliff, n.states, n.actions,
cliff.states, goal.states) {
rewards = matrix(reward.step, nrow = n.states, ncol = n.actions)
rewards[cliff.states, ] = reward.cliff
rewards[goal.states, ] = 0
rewards
}
go = function(x, ...) {
UseMethod("go", x)
}
#' @export
go.left = function(x, states, border.states, ...) {
ifelse(states %in% border.states[["left"]], states, states - 1)
}
#' @export
go.right = function(x, states, border.states, ...) {
ifelse(states %in% border.states[["right"]], states, states + 1)
}
#' @export
go.up = function(x, states, border.states, n.col) {
ifelse(states %in% border.states[["up"]], states, states - n.col)
}
#' @export
go.down = function(x, states, border.states, n.col) {
ifelse(states %in% border.states[["down"]], states, states + n.col)
}
#' @export
go.leftup = function(x, states, border.states, n.col) {
go.left(x, go.up(x, states, border.states, n.col), border.states)
}
#' @export
go.leftdown = function(x, states, border.states, n.col) {
go.left(x, go.down(x, states, border.states, n.col), border.states)
}
#' @export
go.rightup = function(x, states, border.states, n.col) {
go.right(x, go.up(x, states, border.states, n.col), border.states)
}
#' @export
go.rightdown = function(x, states, border.states, n.col) {
go.right(x, go.down(x, states, border.states, n.col), border.states)
}
applyWind = function(states, new.states, wind, n.col) {
column = states %% n.col
column[column == 0] = n.col
new.states - wind[column] * n.col
}
getIntoBounds = function(new.states, n.col) {
while (any(new.states <= 0)) {
new.states[new.states <= 0] = new.states[new.states <= 0] + n.col
}
new.states
}
#' Cliff Walking
#'
#' Gridworld environment for reinforcement learning from Sutton & Barto (2017).
#' Grid of shape 4x12 with a goal state in the bottom right of the grid.
#' Episodes start in the lower left state. Possible actions include going left, right, up and down.
#' Some states in the lower part of the grid are a cliff,
#' so taking a step into this cliff will yield a high negative reward of - 100 and move the agent
#' back to the starting state.
#' Elsewise rewards are - 1, for the goal state 0.
#'
#' This is the gridworld (goal state denoted G, cliff states denoted C, start state denoted S):
#' \tabular{rrrrrrrrrrrr}{
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' S \tab C \tab C \tab C \tab C \tab C \tab C \tab C \tab C \tab C \tab C \tab G \cr
#' }
#'
#' @section Usage:
#' `makeEnvironment("cliff.walking", ...)`
#'
#' @param ... \[`any`] \cr Arguments passed on to [makeEnvironment].
#'
#' @md
#'
#' @name CliffWalking
#' @aliases cliff.walking
#'
#' @references Sutton and Barto (Book draft 2017): Reinforcement Learning: An Introduction Example 6.6
#' @export
#'
#' @inheritSection Environment Methods
#' @examples
#' env = makeEnvironment("cliff.walking")
NULL
CliffWalking = R6::R6Class("CliffWalking",
inherit = Gridworld,
public = list(
initialize = function(...) {
super$initialize(shape = c(4, 12), goal.states = 47,
cliff.states = 37:46, reward.step = -1, reward.cliff = -100,
cliff.transition.states = 36, initial.state = 36, ...)
}
)
)
#' Windy Gridworld
#'
#' Windy Gridworld problem for reinforcement learning. Actions include
#' going left, right, up and down. In each column the wind pushes you up a
#' specific number of steps (for the next action). If an action would
#' take you off the grid, you remain in the previous state. For each step you
#' get a reward of -1, until you reach into a terminal state.
#'
#' This is the gridworld (goal state denoted G, start state denoted S).
#' The last row specifies the upward wind in each column.
#' \tabular{rrrrrrrrrr}{
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' S \tab . \tab . \tab . \tab . \tab . \tab . \tab G \tab . \tab . \cr
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \tab . \cr
#' 0 \tab 0 \tab 0 \tab 1 \tab 1 \tab 1 \tab 2 \tab 2 \tab 1 \tab 0 \cr
#' }
#'
#' @section Usage:
#' `makeEnvironment("windy.gridworld", ...)`
#'
#' @param ... \[`any`] \cr Arguments passed on to [makeEnvironment].
#'
#' @md
#'
#' @name WindyGridworld
#' @aliases windy.gridworld
#'
#' @references Sutton and Barto (Book draft 2017): Reinforcement Learning: An Introduction Example 6.5
#' @export
#'
#' @inheritSection Environment Methods
#' @examples
#' env = makeEnvironment("windy.gridworld")
NULL
WindyGridworld = R6::R6Class("WindyGridworld",
inherit = Gridworld,
public = list(
initialize = function(...) {
super$initialize(shape = c(7, 10), goal.states = 37,
reward.step = -1, wind = c(0, 0, 0, 1, 1, 1, 2, 2, 1, 0),
initial.state = 30, ...)
}
)
)
# fixme: add character actions (e.g. "left")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.