library("mdplearning") library("appl") library("pomdpplus") library("ggplot2") library("tidyr") library("purrr") library("dplyr") library("seewave") ## For KL divergence only knitr::opts_chunk$set(cache = FALSE, message=FALSE) theme_set(theme_bw())
(Scaled data created from RAM data via script in mdplearning/data-raw/scaled_data.R
)
library("nimble") data("scaled_data") ricker_code <- nimbleCode({ x[1] <- x0 for(t in 1:(N-1)){ s[t] <- x[t] - min(x[t], a[t]) mu[t] <- log(s[t]) + r * (1 - s[t] / K) x[t+1] ~ dlnorm(mu[t], sd = sigma) } }) ricker_model <- nimbleModel(code = ricker_code, constants = list(N = length(scaled_data$a), a = scaled_data$a), inits = list(K = 1, r = 0.1, sigma = 0.1, x0 = scaled_data$y[1]), data = data.frame(x = scaled_data$y)) mloglik <- function(pars){ if(any(pars < 0 )) return(1e12) ricker_model$r <- pars[1] ricker_model$K <- pars[2] ricker_model$sigma <- pars[3] - calculate(ricker_model) } ricker_fit <- optim(c(r = .1, K = 1, sigma = 0.1), mloglik, control = list(maxit = 20000)) ricker_fit$par
(Code to create the logged alpha vectors; cached & not evaluated, very slow)
mc.cores <- round(parallel::detectCores() / 2) log_dir <- "tuna" states <- seq(0, 1.2, len=50) # Vector of all possible states actions <- states # Vector of actions: harvest obs <- states K = 0.9903371 r = 0.05699246 sigma_g = 0.01720091 discount = 0.99 vars <- expand.grid(r = rev(seq(0.025, 0.2, by =0.025)), sigma_m = c(0.1, 0.2, 0.3, 0.4, 0.5)) # At 150 length, initialization time ~ 100,000 seconds, exceeds runtime ## Detect available memory (linux servers only) memory <- round(0.95 * as.numeric(gsub(".* (\\d+) .*", "\\1", system("cat /proc/meminfo", intern=TRUE)[1]))) ## Bind this to a data.frame listing each of the fixed parameters across all runs fixed <- data.frame( K = K, C = NA, sigma_g = sigma_g, discount = discount, model = "ricker", precision = 0.0000001, memory = memory / mc.cores, timeout = 250000, timeInterval = 25000, max_state = max(states), max_obs = max(obs), max_action = max(actions), min_state = min(states), min_obs = min(obs), min_action = min(actions)) pars <- data.frame(vars, fixed) ## Usual assumption at the moment for reward fn reward_fn <- function(x,h) pmin(x,h) ## Compute alphas for the above examples models <- lapply(1:dim(pars)[1], function(i){ ## Select the model f <- switch(as.character(pars[i, "model"]), allen = appl:::allen(pars[i, "r"], pars[i, "K"], pars[i, "C"]), ricker = appl:::ricker(pars[i, "r"], pars[i, "K"]) ) ## Compute matrices fisheries_matrices(states, actions, obs, reward_fn, f = f, sigma_g = pars[i, "sigma_g"], sigma_m = pars[i, "sigma_m"]) })
alphas <- sarsop_plus(models, discount = pars[1, "discount"], precision = pars[1, "precision"], timeout = pars[1, "timeout"], timeInterval = pars[1, "timeInterval"], log_dir = log_dir, log_data = pars, mc.cores = mc.cores)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.