In boettiger-lab/pomdpplus: Planning and Learning in Uncertain Systems

Setup

library("mdplearning")
library("appl")
library("pomdpplus")

library("ggplot2")
library("tidyr")
library("purrr")
library("dplyr")

library("seewave") ## For KL divergence only

knitr::opts_chunk$set(cache = FALSE, message=FALSE)
theme_set(theme_bw())

(Scaled data created from RAM data via script in mdplearning/data-raw/scaled_data.R)

library("nimble")
data("scaled_data")
ricker_code <- nimbleCode({
  x[1] <- x0
  for(t in 1:(N-1)){
    s[t] <-  x[t] - min(x[t], a[t])
    mu[t] <- log(s[t])  + r * (1 - s[t] / K) 
    x[t+1] ~ dlnorm(mu[t], sd = sigma) 
  }
})

ricker_model <- nimbleModel(code = ricker_code, 
                            constants = list(N = length(scaled_data$a), a = scaled_data$a),
                            inits =  list(K = 1, r = 0.1, sigma = 0.1, x0 = scaled_data$y[1]), 
                            data = data.frame(x = scaled_data$y))

mloglik <- function(pars){
    if(any(pars < 0 )) return(1e12)
    ricker_model$r <- pars[1]
    ricker_model$K <- pars[2]
    ricker_model$sigma <- pars[3]
    - calculate(ricker_model)
}

ricker_fit <- optim(c(r = .1, K = 1, sigma = 0.1), mloglik, control = list(maxit = 20000))
ricker_fit$par

Problem definition

(Code to create the logged alpha vectors; cached & not evaluated, very slow)

mc.cores <- round(parallel::detectCores() / 2)

log_dir <- "tuna"
states <- seq(0, 1.2, len=50) # Vector of all possible states
actions <- states  # Vector of actions: harvest
obs <- states
K = 0.9903371
r = 0.05699246
sigma_g = 0.01720091
discount = 0.99

vars <- expand.grid(r = rev(seq(0.025, 0.2, by =0.025)), sigma_m = c(0.1, 0.2, 0.3, 0.4, 0.5))

# At 150 length, initialization time ~ 100,000 seconds, exceeds runtime

## Detect available memory (linux servers only)
memory <- round(0.95 * as.numeric(gsub(".* (\\d+) .*", "\\1", system("cat /proc/meminfo", intern=TRUE)[1])))
## Bind this to a data.frame listing each of the fixed parameters across all runs
fixed <- data.frame( K = K, C = NA, sigma_g = sigma_g, discount = discount, model = "ricker", 
                     precision = 0.0000001, memory = memory / mc.cores, timeout = 250000, timeInterval = 25000,
                     max_state = max(states), max_obs = max(obs), max_action = max(actions), 
                     min_state = min(states), min_obs = min(obs), min_action = min(actions))
pars <- data.frame(vars, fixed)
## Usual assumption at the moment for reward fn
reward_fn <- function(x,h) pmin(x,h)
## Compute alphas for the above examples
models <- lapply(1:dim(pars)[1], function(i){
  ## Select the model
  f <- switch(as.character(pars[i, "model"]), 
              allen = appl:::allen(pars[i, "r"], pars[i, "K"], pars[i, "C"]),
              ricker = appl:::ricker(pars[i, "r"], pars[i, "K"])
  )
  ## Compute matrices
  fisheries_matrices(states, actions, obs, 
                     reward_fn, f = f, 
                     sigma_g = pars[i, "sigma_g"], 
                     sigma_m  = pars[i, "sigma_m"])
})

alphas <- sarsop_plus(models, 
                      discount = pars[1, "discount"], 
                      precision = pars[1, "precision"], 
                      timeout = pars[1, "timeout"],
                      timeInterval = pars[1, "timeInterval"],
                      log_dir = log_dir, 
                      log_data = pars,
                      mc.cores = mc.cores)