#' MDP Environment
#'
#' Markov Decision Process environment.
#'
#' @section Usage:
#' `makeEnvironment("MDP", transitions, rewards, initial.state, ...)`
#'
#' @param transitions \[`array (n.states x n.states x n.actions)`] \cr
#' State transition array.
#' @param rewards \[`matrix (n.states x n.actions)`] \cr
#' Reward array.
#' @param initial.state \[`integer`] \cr
#' Optional starting state.
#' If a vector is given a starting state will be
#' randomly sampled from this vector whenever `reset` is called.
#' Note that states are numerated starting with
#' 0. If `initial.state = NULL` all non-terminal states are
#' possible starting states.
#' @param ... \[`any`] \cr Arguments passed on to [makeEnvironment].
#'
#' @md
#'
#' @name MdpEnvironment
#' @inheritSection Environment Methods
#' @export
#'
#' @examples
#' # Create a Markov Decision Process.
#' P = array(0, c(2, 2, 2))
#' P[, , 1] = matrix(c(0.5, 0.5, 0, 1), 2, 2, byrow = TRUE)
#' P[, , 2] = matrix(c(0, 1, 0, 1), 2, 2, byrow = TRUE)
#' R = matrix(c(5, 10, -1, 2), 2, 2, byrow = TRUE)
#' env = makeEnvironment("mdp", transitions = P, rewards = R)
#' env$reset()
#' env$step(1L)
NULL
MdpEnvironment = R6::R6Class("MdpEnvironment",
inherit = Environment,
public = list(
action.space = NULL,
actions = NULL,
initial.state = NULL,
n.actions = NULL,
n.states = NULL,
rewards = NULL,
state.space = NULL,
states = NULL,
terminal.states = NULL,
transitions = NULL,
initialize = function(transitions, rewards, initial.state, ...) {
checkmate::assertArray(transitions, any.missing = FALSE, d = 3L)
checkmate::assertArray(rewards, any.missing = FALSE, d = 2L)
self$state.space = "Discrete"
self$action.space = "Discrete"
self$n.actions = dim(transitions)[3]
self$n.states = dim(transitions)[1]
self$actions = seq_len(self$n.actions) - 1L
self$states = seq_len(self$n.states) - 1L
self$transitions = transitions
self$rewards = rewards
terminal.states = apply(transitions, 3L, function(x) diag(x))
self$terminal.states = which(apply(terminal.states, 1L, function(x) all(x == 1L))) - 1L
if (length(self$terminal.states) == 0) {
warning("There are no terminal states in the MDP!")
self$terminal.states = -1L
}
if (missing(initial.state)) {
self$initial.state = setdiff(self$states, self$terminal.states)
} else {
checkmate::assertIntegerish(initial.state, upper = self$n.states - 1L)
self$initial.state = initial.state
}
step_ = function(env, action) {
# if (is.character(action)) {
# action = self$action.names[action]
# }
reward = self$rewards[self$state + 1L, action + 1L] # use old state here!
state = sample(self$states, size = 1L,
prob = self$transitions[self$state + 1L, , action + 1L])
if (state %in% self$terminal.states) {
done = TRUE
} else {
done = FALSE
}
list(state, reward, done)
}
reset_ = function(env) {
state = ifelse(length(self$initial.state) > 1L,
sample(self$initial.state, size = 1L), self$initial.state)
state
}
# call initialize of superclass with mdp step and reset function
super$initialize(step_, reset_, ...)
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.