Nothing
## ---- include = FALSE----------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup---------------------------------------------------------------
library(matricks)
## ----init.gid.world------------------------------------------------------
# Defining possible actions
# FALSE = unavailable field
actions <- m(T, T, T, F, T|
T, T, F, T, T|
F, T, T, T, T|
T, T, T, T, T|
F, F, T, T, T|
F, T, T, T, F)
plot(actions)
# Defining rewards matrix with two terminal states
rewards <- with_same_dims(actions, 0)
rewards <- sv(rewards,
c(4, 4) ~ 1., # Win
c(5, 3) ~ -1) # Lose
# We add small penalties, which encourages models
# to reduce number of steps it passes
rewards[rewards == 0] <- -0.1
plot(rewards)
# List of possible state indices
states <- matrix_idx(actions, actions)
## ----fixed.policy--------------------------------------------------------
# Symbols for moves
U <- "U" # Up
D <- "D" # Down
L <- "L" # Left
R <- "R" # Right
fixed.policy <- m(R , D , L , NA, D |
R , D , NA, D , L |
NA, R , D , D , D |
R , R , D , NA, D |
NA, NA, NA, U , L |
NA, R , U , L , NA)
## ----as.idx--------------------------------------------------------------
as_idx <- function(x){
n.row <- nrow(x)
n.col <- ncol(x)
result <- matrix(list(), nrow = n.row, ncol = n.col)
for (i in 1:n.row){
for (j in 1:n.col){
coords <- c(i, j)
if (is.na(at(x, coords)))
next
x.val <- x[i, j]
switch (x.val,
U = c(i - 1, j),
D = c(i + 1, j),
L = c(i, j - 1),
R = c(i, j + 1),
) -> move.idx
if (!is_idx_possible(x, move.idx))
next
result[i, j] <- list(move.idx)
}
}
result
}
fixed.policy.idx <- as_idx(fixed.policy)
fixed.policy.idx
## ----policy.evaluation---------------------------------------------------
evaluate_policy <- function(actions, rewards, policy,
epsilon = 1e-3, gamma = 0.9){
V <- with_same_dims(policy, 0)
policy.idx <- as_idx(policy)
while (TRUE) {
biggest.change <- 0
for (move in seq_matrix(policy.idx)) {
s <- move[[1]] # Action, value at index s
a <- move[[2]]
old.v <- at(V, s)
if(!at(actions, s))
next
if(is.null(a))
next
r <- at(rewards, a)
at(V, s) <- r + gamma * at(V, a)
biggest.change <- max(biggest.change, abs(old.v - at(V, s)))
}
print(biggest.change)
if (biggest.change < epsilon){
break
}
}
return(V)
}
## ----experiment----------------------------------------------------------
V1 <- evaluate_policy(actions = actions,
rewards = rewards,
policy = fixed.policy)
V1
plot(V1)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.