# inst/doc/policy_evaluation.R In matricks: Useful Tricks for Matrix Manipulation

```## ---- include = FALSE----------------------------------------------------
knitr::opts_chunk\$set(
collapse = TRUE,
comment = "#>"
)

## ----setup---------------------------------------------------------------
library(matricks)

## ----init.gid.world------------------------------------------------------
# Defining possible actions
# FALSE = unavailable field
actions <- m(T, T, T, F, T|
T, T, F, T, T|
F, T, T, T, T|
T, T, T, T, T|
F, F, T, T, T|
F, T, T, T, F)
plot(actions)
# Defining rewards matrix with two terminal states
rewards <- with_same_dims(actions, 0)
rewards <- sv(rewards,
c(4, 4) ~ 1., # Win
c(5, 3) ~ -1) # Lose
# We add small penalties, which encourages models
# to reduce number of steps it passes
rewards[rewards == 0] <- -0.1
plot(rewards)
# List of possible state indices
states <- matrix_idx(actions, actions)

## ----fixed.policy--------------------------------------------------------
# Symbols for moves
U <- "U" # Up
D <- "D" # Down
L <- "L" # Left
R <- "R" # Right

fixed.policy <- m(R , D , L , NA, D |
R , D , NA, D , L |
NA, R , D , D , D |
R , R , D , NA, D |
NA, NA, NA, U , L |
NA, R , U , L , NA)

## ----as.idx--------------------------------------------------------------
as_idx <- function(x){
n.row <- nrow(x)
n.col <- ncol(x)

result <- matrix(list(), nrow = n.row, ncol = n.col)

for (i in 1:n.row){
for (j in 1:n.col){
coords <- c(i, j)

if (is.na(at(x, coords)))
next

x.val <- x[i, j]

switch (x.val,
U = c(i - 1, j),
D = c(i + 1, j),
L = c(i, j - 1),
R = c(i, j + 1),
) -> move.idx

if (!is_idx_possible(x, move.idx))
next

result[i, j] <- list(move.idx)
}
}
result
}

fixed.policy.idx <- as_idx(fixed.policy)
fixed.policy.idx

## ----policy.evaluation---------------------------------------------------
evaluate_policy <- function(actions, rewards, policy,
epsilon = 1e-3, gamma = 0.9){

V <- with_same_dims(policy, 0)
policy.idx <- as_idx(policy)

while (TRUE) {
biggest.change <- 0

for (move in seq_matrix(policy.idx)) {
s <- move[[1]] # Action, value at index s
a <- move[[2]]

old.v <- at(V, s)

if(!at(actions, s))
next

if(is.null(a))
next

r <- at(rewards, a)
at(V, s) <- r + gamma * at(V, a)
biggest.change <- max(biggest.change, abs(old.v - at(V, s)))
}

print(biggest.change)

if (biggest.change < epsilon){
break
}
}
return(V)
}

## ----experiment----------------------------------------------------------
V1 <- evaluate_policy(actions = actions,
rewards = rewards,
policy  = fixed.policy)
V1
plot(V1)
```

## Try the matricks package in your browser

Any scripts or data that you put into this service are public.

matricks documentation built on March 26, 2020, 6:22 p.m.