In boettiger-lab/pomdpplus: Planning and Learning in Uncertain Systems

```library("sarsop")
library("ggplot2")
library("tidyr")
library("dplyr")
```
```log_dir <- "https://raw.githubusercontent.com/cboettig/pomdp-solutions-library/master/archive"

who <- data.frame(model = "allen")

who <- data.frame(model = "allen", C = 15, n_states = 61, r = 1)
# who <- data.frame(model = "allen", r= 1, K = 50, C = 15)
# who <- data.frame(model = "allen", r = 1, K = 30, C = 15)
#who <- data.frame(model = "allen", r = 1, K = 40, C = 10, sigma_m = 0.05)

meta <- meta_from_log(who, log = log_dir)
meta
```

Import a model solution from the library

```i <- 1 # just use first match from the log

alpha <- alphas_from_log(meta, log = log_dir)[[i]]
f <- f_from_log(meta)[[i]]
m <- models_from_log(meta)[[i]]

n_states = meta\$n_states[[i]]
discount = meta\$discount[[i]]
```

Find the deterministic optimal solution for this model

```S_star <- round( optimize(function(x) -f(x,0) + x / discount, c(1, n_states) )\$minimum)
det_policy <- function(x) if(x <= S_star) 1 else x - S_star # adjusted for index values, starting at 1
```

Experiment 1:

if we believe the prior value was certainly s, we are then more conservative with anything above f(S), and less conservative with anything below f(S):

```s <- S_star
a0 <- 0 # action we took
certain_prior <- numeric(length(m\$observation[,1,1]))
certain_prior[s+1] <- 1

df <- compute_policy(alpha, m\$transition, m\$observation, m\$reward, certain_prior, a0+1) # action as index
```
```df %>% rowwise() %>% mutate(det = det_policy(state)) %>%
ggplot(aes(state, state - policy)) + geom_line() + geom_point() + geom_line(aes(state, state - det)) + geom_vline(xintercept = f(s,a0))
```

Experiment 2

Role of different prior beliefs on the optimal policy:

```low <-  compute_policy(alpha, m\$transition, m\$observation, m\$reward, m\$observation[,4,1])
ave <-  compute_policy(alpha, m\$transition, m\$observation, m\$reward, m\$observation[,20,1])
unif <- compute_policy(alpha, m\$transition, m\$observation, m\$reward)
high <- compute_policy(alpha, m\$transition, m\$observation, m\$reward, m\$observation[,35,1])
df <- dplyr::bind_rows(low, ave, unif, high, .id = "prior")

ggplot(df, aes(state, state - policy, col = prior, pch = prior)) +
geom_point(alpha = 0.5, size = 3) +
geom_line()
```

Experiment 3:

Simulate dynamics under the policy

```set.seed(1234)
sim <- sim_pomdp(m\$transition, m\$observation, m\$reward, discount, x0 = 25, Tmax = 50, alpha = alpha)
sim\$df %>% select(-value) %>% gather(variable, state, -time) %>%
ggplot(aes(time, state, color = variable)) + geom_line() + geom_point()
```

Posterior is not stationary:

```Tmax <- length(sim\$df\$time)
data.frame(time = 0:Tmax, sim\$state_posterior) %>%
gather(state, belief, -time, factor_key = TRUE) %>%
mutate(state = as.numeric(state)) %>%
filter(time > 25) %>%
ggplot(aes(state, belief, group = time, alpha = time)) + geom_line()
```

Experiment 4:

Do replicate simulations avoid crashes?

```reps <- function(n){
data.frame(sim = n,
sim_pomdp(m\$transition, m\$observation, m\$reward, discount,
alpha = alpha, x0 = round(n_states/2), Tmax=Tmax)\$df)
}

set.seed(12345)
sim_df <- purrr::map_df(1:100, reps)

ggplot(sim_df, aes(time, state, group = sim)) + geom_line()
```

boettiger-lab/pomdpplus documentation built on Jan. 29, 2018, 3:59 a.m.