library("sarsop") library("pomdpplus") library("ggplot2") library("tidyr") library("dplyr") knitr::opts_chunk$set(cache = TRUE)
## Not run, all parameters read from log instead states <- 0:21 actions <- states obs <- states sigma_g <- 0.1 sigma_m <- sigma_g reward_fn <- function(x,h) pmin(x,h) discount <- 0.95 K1 <- function(x, h, r = 1, K = 10){ s <- pmax(x - h, 0) s * exp(r * (1 - s / K) ) } K2 <- function(x, h, r = 1, K = 15){ s <- pmax(x - h, 0) s * exp(r * (1 - s / K) ) } models <- lapply(list(K1, K2), function(f) fisheries_matrices(states, actions, obs, reward_fn, f, sigma_g, sigma_m)) ## Potentially intensive step alphas <- sarsop_plus(models, discount, precision = 1)
Alternately, we could import parameters from log
log_dir <- "https://raw.githubusercontent.com/cboettig/pomdp-solutions-library/master/library" meta <- sarsop::meta_from_log(data.frame(model ="ricker", r = 1, K = c(10, 15)), log_dir)[1:2,] meta setup <- meta[1,] states <- 0:(setup$n_states - 1) actions <- states obs <- states sigma_g <- setup$sigma_g sigma_m <- setup$sigma_m reward_fn <- function(x,h) pmin(x,h) discount <- setup$discount models <- models_from_log(meta, reward_fn) alphas <- alphas_from_log(meta, log_dir)
We can compare results for a different priors over states. For simplicity of interpretation, we assume the model is known to be model 2 (model prior (0,1)
)
As expected, the policy is much more conservative when prior belief is lower!
low <- compute_plus_policy(alphas, models, c(0, 1), models[[2]]$observation[,4,1]) ave <- compute_plus_policy(alphas, models, c(0, 1), models[[2]]$observation[,10,1]) unif <- compute_plus_policy(alphas, models, c(0, 1)) high <- compute_plus_policy(alphas, models, c(0, 1), models[[2]]$observation[,15,1]) df <- dplyr::bind_rows(low, ave, unif, high, .id = "prior") ggplot(df, aes(states[state], states[state] - actions[policy], col = prior, pch = prior)) + geom_point(alpha = 0.5, size = 3) + geom_line()
Use alphas to compute policy given model priors, for comparison:
compare_policies <- function(alphas, models){ low <- compute_plus_policy(alphas, models, c(1, 0)) unif <- compute_plus_policy(alphas, models, c(1/2, 1/2)) high <- compute_plus_policy(alphas, models, c(0, 1)) dplyr::bind_rows(low, unif, high, .id = "prior") } df <- compare_policies(alphas, models) ggplot(df, aes(states[state], states[state] - actions[policy], col = prior, pch = prior)) + geom_point(alpha = 0.5, size = 3) + geom_line()
set.seed(123) out <- sim_plus(models = models, discount = discount, x0 = 20, a0 = 1, Tmax = 100, true_model = models[[2]], alphas = alphas) out$df %>% dplyr::select(-value) %>% tidyr::gather(variable, stock, -time) %>% ggplot(aes(time, stock, color = variable)) + geom_line() + geom_point()
Evolution of the belief state:
Tmax <-length(out$state_posterior[,1]) out$state_posterior %>% data.frame(time = 1:Tmax) %>% tidyr::gather(state, probability, -time, factor_key =TRUE) %>% dplyr::mutate(state = as.numeric(state)) %>% ggplot(aes(state, probability, group = time, alpha = time)) + geom_line()
Final 20 belief states continue to move around:
Tmax <-length(out$state_posterior[,1]) out$state_posterior %>% data.frame(time = 1:Tmax) %>% tidyr::gather(state, probability, -time, factor_key =TRUE) %>% dplyr::mutate(state = as.numeric(state)) %>% dplyr::filter(time > 80) %>% ggplot(aes(state, probability, group = time, alpha = time)) + geom_line()
Model posterior converges more quickly to the true model (examining first 15 probabilities already shows model 2 probability nearly 1)
out$model_posterior %>% data.frame(time = 1:Tmax) %>% tidyr::gather(model, probability, -time, factor_key =TRUE) %>% dplyr::filter(time < 50) %>% ggplot(aes(model, probability, group = time, alpha = time)) + geom_point()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.