In boettiger-lab/pomdpplus: Planning and Learning in Uncertain Systems

library("appl")
library("pomdpplus")
library("ggplot2")
library("tidyr")
library("purrr")
library("dplyr")
knitr::opts_chunk$set(cache = TRUE)

log_dir <- "https://raw.githubusercontent.com/cboettig/pomdp-solutions-library/master/library"
meta <- appl::meta_from_log(data.frame(model = "allen", r = 0.5, K = 30), log_dir) 

meta <- meta[1:5,]

meta

Import parameters from log

setup <- meta[1,]

states <- 0:(setup$n_states - 1)
actions <- states
obs <- states

sigma_g <- setup$sigma_g
sigma_m <- setup$sigma_m

reward_fn <- function(x,h) pmin(x,h)
discount <- setup$discount 

models <- models_from_log(meta, reward_fn)
alphas <- alphas_from_log(meta, log_dir)

Policy based on a uniform prior belief over the models:

C0 <- compute_plus_policy(alphas, models, model_prior  = c(1,0,0,0,0))
C5 <- compute_plus_policy(alphas, models, model_prior  = c(0,1,0,0,0))
C10 <- compute_plus_policy(alphas, models, model_prior = c(0,0,1,0,0))
C15 <- compute_plus_policy(alphas, models, model_prior = c(0,0,0,1,0))
C20 <- compute_plus_policy(alphas, models, model_prior = c(0,0,0,0,1))
unif <- compute_plus_policy(alphas, models)

df <- dplyr::bind_rows(C0, C5, C10, C15, unif, .id = "prior")

ggplot(df, aes(states[state], states[state] - actions[policy], col = prior, pch = prior)) + 
  geom_point(alpha = 0.5, size = 3) + 
  geom_line()

set.seed(1234)
out <- sim_plus(models = models, discount = discount,
                x0 = 20, a0 = 1, Tmax = 100, 
                true_model = models[[2]], 
                alphas = alphas)


out$df %>% 
  dplyr::select(-value) %>% 
  tidyr::gather(variable, stock, -time) %>% 
  ggplot(aes(time, stock, color = variable)) + geom_line()  + geom_point()

Evolution of the belief state:

Tmax <-length(out$state_posterior[,1])
out$state_posterior %>% data.frame(time = 1:Tmax) %>% 
  tidyr::gather(state, probability, -time, factor_key =TRUE) %>% 
  dplyr::mutate(state = as.numeric(state)) %>% 
  ggplot(aes(state, probability, group = time, alpha = time)) + geom_line()

out$model_posterior %>% data.frame(time = 1:Tmax) %>% 
  tidyr::gather(model, probability, -time, factor_key =TRUE) %>% 
  ggplot(aes(model, probability, group = time, alpha = time)) + geom_point()

Replicates:

sims <- 
purrr::map_df(1:25, 
       function(i) sim_plus(models = models, discount = discount,
                x0 = 20, a0 = 1, Tmax = 100, 
                true_model = models[[3]], 
                alphas = alphas)$df,
  .id = "rep")

sims %>% 
  ggplot(aes(time, state, group = rep)) + geom_line(alpha = 0.5)

boettiger-lab/pomdpplus documentation built on May 24, 2019, 3:05 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

boettiger-lab/pomdpplus
Planning and Learning in Uncertain Systems

In boettiger-lab/pomdpplus: Planning and Learning in Uncertain Systems

Import parameters from log

R Package Documentation

Browse R Packages

We want your feedback!

boettiger-lab/pomdpplus Planning and Learning in Uncertain Systems

In boettiger-lab/pomdpplus: Planning and Learning in Uncertain Systems

Import parameters from log

R Package Documentation

Browse R Packages

We want your feedback!

boettiger-lab/pomdpplus
Planning and Learning in Uncertain Systems