#' Create reinforcement learning environment.
#'
#' This function creates an environment for reinforcement learning.
#'
#' Use the `step` method to interact with the environment.
#'
#' Note that all states and actions are numerated starting with 0!
#'
#' For a detailed explanation and more examples
#' have a look at the vignette "How to create an environment?".
#'
#' @param class \[`character(1)`] \cr
#' Class of environment. One of `c("custom", "mdp", "gym", "gridworld")`.
#' @param discount \[`numeric(1)` in (0, 1)] \cr Discount factor.
#' @param ... \[`any`] \cr Arguments passed on to the specific environment.
#'
#' @md
#'
#' @return R6 class of class Environment.
#'
#' @section Methods:
#' * `$step(action)` \cr
#' Take action in environment.
#' Returns a list with `state`, `reward`, `done`.
#' * `$reset()` \cr
#' Resets the `done` flag of the environment and returns an initial state.
#' Useful when starting a new episode.
#' * `$visualize()` \cr
#' Visualizes the environment (if there is a visualization function).
#'
#' @section Environments:
#' * [Environment]
#' * [GymEnvironment]
#' * [MdpEnvironment]
#' * [Gridworld]
#' * [MountainCar]
#'
#' @export
#' @examples
#' step = function(self, action) {
#' state = list(mean = action + rnorm(1), sd = runif(1))
#' reward = rnorm(1, state[[1]], state[[2]])
#' done = FALSE
#' list(state, reward, done)
#' }
#'
#' reset = function(self) {
#' state = list(mean = 0, sd = 1)
#' state
#' }
#'
#' env = makeEnvironment(step = step, reset = reset, discount = 0.9)
#' env$reset()
#' env$step(100)
#'
#' # Create a Markov Decision Process.
#' P = array(0, c(2, 2, 2))
#' P[, , 1] = matrix(c(0.5, 0.5, 0, 1), 2, 2, byrow = TRUE)
#' P[, , 2] = matrix(c(0, 1, 0, 1), 2, 2, byrow = TRUE)
#' R = matrix(c(5, 10, -1, 2), 2, 2, byrow = TRUE)
#' env = makeEnvironment("mdp", transitions = P, rewards = R)
#'
#' env$reset()
#' env$step(1L)
#'
#' # Create a Gridworld.
#' grid = makeEnvironment("gridworld", shape = c(4, 4),
#' goal.states = 15, initial.state = 0)
#' grid$visualize()
#'
#' \dontrun{
#' # Create an OpenAI Gym environment.
#' # Make sure you have Python, gym and reticulate installed.
#' env = makeEnvironment("gym", gym.name = "MountainCar-v0")
#'
#' # Take random actions for 200 steps.
#' env$reset()
#' for (i in 1:200) {
#' action = sample(env$actions, 1)
#' env$step(action)
#' env$visualize()
#' }
#' env$close()
#' }
makeEnvironment = function(class = "custom", discount = 1, ...) {
checkmate::assertChoice(class,
c("custom", "mdp", "gym", "gridworld", "windy.gridworld", "cliff.walking",
"mountain.car", "mountain.car.continuous"))
switch(class,
custom = Environment$new(discount = discount, ...), # default
mdp = MdpEnvironment$new(discount = discount, ...),
gym = GymEnvironment$new(discount = discount, ...),
gridworld = Gridworld$new(discount = discount, ...),
windy.gridworld = WindyGridworld$new(discount = discount, ...),
cliff.walking = CliffWalking$new(discount = discount, ...),
mountain.car = MountainCar$new(discount = discount, ...),
mountain.car.continuous = MountainCarContinuous$new(discount = discount, ...)
)
}
#' Custom Reinforcement Learning Environment
#'
#' @section Usage:
#' `makeEnvironment("custom", step, reset, visualize = NULL, discount = 1, action.names = NULL)`
#'
#' @param step \[`function(self, action)`] \cr
#' Custom step function.
#' @param reset \[`function(self)`] \cr
#' Custom reset function.
#' @param visualize \[`function(self)`] \cr
#' Optional custom visualization function.
#' @param discount \[`numeric(1)` in (0, 1)] \cr Discount factor.
#' @param action.names \[`named integer`] \cr
#' Optional action names for a discrete action space.
#'
#' @md
#'
#' @inheritSection makeEnvironment Methods
#'
#' @name Environment
#' @export
#'
#' @examples
#' step = function(self, action) {
#' state = list(mean = action + rnorm(1), sd = runif(1))
#' reward = rnorm(1, state[[1]], state[[2]])
#' done = FALSE
#' list(state, reward, done)
#' }
#'
#' reset = function(self) {
#' state = list(mean = 0, sd = 1)
#' state
#' }
#'
#' env = makeEnvironment(step = step, reset = reset)
#' env$reset()
#' env$step(100)
NULL
Environment = R6::R6Class("Environment",
public = list(
action.names = NULL,
n.step = 0L,
episode = 0L,
episode.step = 0L,
episode.return = 0,
previous.state = NULL,
state = NULL,
reward = NULL,
done = FALSE,
discount = NULL,
resetEverything = function() {
self$n.step = 0L
self$episode = 0
self$reset()
},
reset = function() {
self$episode.step = 0L
self$episode.return = 0
self$done = FALSE
self$state = private$reset_(self)
self$state
},
step = function(action) {
if (is.character(action)) {
action = self$action.names[action]
}
self$previous.state = self$state
res = private$step_(self, action)
self$episode.return = self$episode.return +
self$discount ^ self$episode.step * res[[2]]
self$n.step = self$n.step + 1L
self$episode.step = self$episode.step + 1L
self$state = res[[1]]
self$reward = res[[2]]
self$done = res[[3]]
if (self$done) {
self$episode = self$episode + 1L
}
list(state = res[[1]], reward = res[[2]], done = res[[3]])
},
visualize = function() {
private$visualize_(self)
},
initialize = function(step, reset, visualize = NULL, discount, action.names = NULL) {
checkmate::assertFunction(step)
checkmate::assertFunction(reset)
checkmate::assertFunction(visualize, null.ok = TRUE)
checkmate::assertNumber(discount, lower = 0, upper = 1)
checkmate::assertIntegerish(action.names, null.ok = TRUE)
private$step_ = step
private$reset_ = reset
self$discount = discount
self$action.names = action.names
if (!missing(visualize)) {
checkmate::assertFunction(visualize)
private$visualize_ = visualize
} else {
private$visualize_ = function(self) {}
}
self$reset()
}
),
private = list(
# step_: custom step method depending on problem that returns list with
# next state, reward, done
step_ = NULL,
# reset_: custom reset method depending on problem that returns state
reset_ = NULL,
visualize_ = NULL
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.