library("appl") library("ggplot2") library("tidyr") library("dplyr")
log_dir <- "https://raw.githubusercontent.com/cboettig/pomdp-solutions-library/master/archive" who <- data.frame(model = "ricker") #who <- data.frame(model = "ricker", r = 0.6, K = 30) #who <- data.frame(model = "ricker", r = 0.6, K = 50, n_states = 61) meta <- meta_from_log(who, log = log_dir) meta
i <- 1 # just use first match from the log alpha <- alphas_from_log(meta, log = log_dir)[[i]] f <- f_from_log(meta)[[i]] m <- models_from_log(meta)[[i]] n_states = meta$n_states[[i]] discount = meta$discount[[i]]
Find the deterministic optimal solution for this model
S_star <- round( optimize(function(x) -f(x,0) + x / discount, c(1, n_states) )$minimum) det_policy <- function(x) if(x <= S_star) 1 else x - S_star # adjusted for index values, starting at 1
if we believe the prior value was certainly s, we are then more conservative with anything above f(S), and less conservative with anything below f(S):
s <- S_star a0 <- 0 # action we took certain_prior <- numeric(length(m$observation[,1,1])) certain_prior[s+1] <- 1 df <- compute_policy(alpha, m$transition, m$observation, m$reward, certain_prior, a0+1) # action as index
df %>% rowwise() %>% mutate(det = det_policy(state)) %>% ggplot(aes(state, state - policy)) + geom_line() + geom_point() + geom_line(aes(state, state - det)) + geom_vline(xintercept = f(s,a0))
Role of different prior beliefs on the optimal policy:
low <- compute_policy(alpha, m$transition, m$observation, m$reward, m$observation[,4,1]) ave <- compute_policy(alpha, m$transition, m$observation, m$reward, m$observation[,20,1]) unif <- compute_policy(alpha, m$transition, m$observation, m$reward) high <- compute_policy(alpha, m$transition, m$observation, m$reward, m$observation[,35,1]) df <- dplyr::bind_rows(low, ave, unif, high, .id = "prior") ggplot(df, aes(state, state - policy, col = prior, pch = prior)) + geom_point(alpha = 0.5, size = 3) + geom_line()
Simulate dynamics under the policy
set.seed(1234) sim <- sim_pomdp(m$transition, m$observation, m$reward, discount, x0 = 25, Tmax = 50, alpha = alpha) sim$df %>% select(-value) %>% gather(variable, state, -time) %>% ggplot(aes(time, state, color = variable)) + geom_line() + geom_point()
Posterior is not stationary:
Tmax <- length(sim$df$time) data.frame(time = 0:Tmax, sim$state_posterior) %>% gather(state, belief, -time, factor_key = TRUE) %>% mutate(state = as.numeric(state)) %>% filter(time > 25) %>% ggplot(aes(state, belief, group = time, alpha = time)) + geom_line()
Do replicate simulations avoid crashes?
reps <- function(n){ data.frame(sim = n, sim_pomdp(m$transition, m$observation, m$reward, discount, alpha = alpha, x0 = round(n_states/2), Tmax=Tmax)$df) } set.seed(12345) sim_df <- purrr::map_df(1:100, reps) ggplot(sim_df, aes(time, state, group = sim)) + geom_line()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.