Carl Boettiger
library("mdplearning")
library("appl")
library("ggplot2")
library("tidyr")
library("purrr")
library("dplyr")
knitr::opts_chunk$set(cache = TRUE)
#states <- seq(0,1.2, length=200)
#states <- c(seq(0,0.12, length=30), seq(0.12, 1.2, length=200)) # Vector of actions: harvest
p <- 2
states <- seq(0, 1.2^(1/p), len=150)^p # Vector of all possible states
actions <- states
obs <- states
K <- 0.9903371
r <- 0.05699246
sigma_g <- 0.01720091
discount <- 0.99
vars <- expand.grid(r = seq(0.025, 0.2, by =0.025), sigma_m = 0.4)
fixed <- data.frame(model = "ricker", sigma_g = sigma_g, discount = discount, K = K, C = NA)
models <- data.frame(vars, fixed)
## Usual assumption at the moment for reward fn
reward_fn <- function(x,h) pmin(x,h)
true_m <- appl::fisheries_matrices(states, actions, obs, reward_fn, f = appl:::ricker(r, K),
sigma_g = sigma_g, sigma_m = 0.3)
tuna_models <- parallel::mclapply(1:dim(models)[1], function(i){
f <- switch(models[i, "model"],
allen = appl:::allen(models[i, "r"], models[i, "K"], models[i, "C"]),
ricker = appl:::ricker(models[i, "r"], models[i, "K"])
)
appl::fisheries_matrices(states, actions, obs, reward_fn, f = f,
sigma_g = models[i, "sigma_g"], sigma_m = models[i, "sigma_m"])
}, mc.cores = parallel::detectCores())
models <- tuna_models
reward <- models[[1]]$reward
transitions <- lapply(models, `[[`, "transition")
observation <- models[[1]]$observation
unif <- mdp_compute_policy(transitions, reward, discount)
prior <- numeric(length(models))
prior[1] <- 1
low <- mdp_compute_policy(transitions, reward, discount, prior)
prior <- numeric(length(models))
prior[2] <- 1
true <- mdp_compute_policy(transitions, reward, discount, prior)
bind_rows(unif = unif, low = low, true = true, .id = "model") %>%
ggplot(aes(states[state], states[state] - actions[policy], col = model)) + geom_line()
set.seed(123)
data("scaled_data")
y <- sapply(scaled_data$y, function(y) which.min(abs(states - y)))
a <- sapply(scaled_data$a, function(a) which.min(abs(actions - a)))
Tmax <- length(y)
data("bluefin_tuna")
to_mt <- max(bluefin_tuna$total) # 1178363 # scaling factor for data
states_mt <- to_mt * states
actions_mt <- to_mt * actions
year <- 1952:2009
future <- 2009:2067
mdp_hindcast <- mdp_historical(transitions, reward, discount, state = y, action = a)
mdp_hindcast$df %>%
mutate("actual catch" = actions_mt[action],
"estimated stock" = states_mt[state],
optimal = actions_mt[optimal],
time = year[time]) %>%
select(-state, -action) %>%
gather(variable, stock, -time) %>%
ggplot(aes(time, stock, color = variable)) + geom_line(lwd=1) # + geom_point()
## Warning: Removed 1 rows containing missing values (geom_path).
Show the final belief over models for pomdp and mdp:
barplot(as.numeric(mdp_hindcast$posterior[Tmax,]))
# delta function for true model distribution
h_star = array(0,dim = length(models))
h_star[2] = 1
## Fn for the base-2 KL divergence from true model, in a friendly format
kl2 <- function(value) seewave::kl.dist(value, h_star, base = 2)[[2]]
mdp_hindcast$posterior %>%
mutate(time = year[1:Tmax], rep = 1) %>%
gather(model, value, -time, -rep) %>%
group_by(time, rep) %>%
summarise(kl = kl2(value)) %>%
ggplot(aes(time, kl)) +
stat_summary(geom="line", fun.y = mean, lwd = 1)
All forecasts start from final stock, go forward an equal length of time:
#x0 <- which.min(abs(1 - states)) # Initial stock, for hindcasts
x0 <- y[length(y)] # Final stock, e.g. forcasts, = 9
Tmax <- length(y)
set.seed(123)
We simulate replicates under MDP learning (with observation uncertainty):
Tmax <- 150
future <- 2009:(2009+Tmax)
mdp_forecast <-
pomdpplus::plus_replicate(25,
mdp_learning(transition = transitions, reward = reward,
model_prior = as.numeric(mdp_hindcast$posterior[length(y),]),
discount = discount, x0 = x0, Tmax = Tmax,
true_transition = true_m$transition,
observation = observation),
mc.cores = parallel::detectCores())
historical <- bluefin_tuna[c("tsyear", "total", "catch_landings")] %>%
rename(time = tsyear, state = total, action = catch_landings)
bind_rows(
historical = historical,
mdp = mdp_forecast$df %>%
select(-value, -obs) %>%
mutate(state = states_mt[state], action = actions_mt[action], time = future[time]),
.id = "method") %>%
rename("catch (MT)" = action, "stock (MT)" = state) %>%
gather(variable, stock, -time, -rep, -method) %>%
ggplot(aes(time, stock)) +
geom_line(aes(group = interaction(rep,method), color = method), alpha=0.1) +
stat_summary(aes(color = method), geom="line", fun.y = mean, lwd=1) +
stat_summary(aes(fill = method), geom="ribbon", fun.data = mean_sdl, fun.args = list(mult=1), alpha = 0.25) +
facet_wrap(~variable, ncol = 1, scales = "free_y")
model_prior <- as.numeric(mdp_hindcast$posterior[length(y),])
planning <- mdp_compute_policy(transitions, reward, discount, model_prior)
df <- mdp_planning(true_m$transition, reward, discount, x0 = x0, Tmax = Tmax,
policy = planning$policy, observation = observation)
df %>%
select(-value) %>%
mutate(state = states[state], obs = states[obs], action = actions[action]) %>%
gather(series, stock, -time) %>%
ggplot(aes(time, stock, color = series)) + geom_line()
f <- appl:::ricker(r, K)
S_star <- optimize(function(x) x / discount - f(x,0), c(min(states),max(states)))$minimum
h <- pmax(states - S_star, 0)
policy <- sapply(h, function(h) which.min((abs(h - actions))))
det <- data.frame(policy, value = 1:length(states), state = 1:length(states))
df <- mdp_planning(true_m$transition, reward, discount, x0 = x0, Tmax = Tmax,
policy = det$policy, observation = observation)
df %>%
select(-value) %>%
mutate(state = states[state], obs = states[obs], action = actions[action]) %>%
gather(series, stock, -time) %>%
ggplot(aes(time, stock, color = series)) + geom_line()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.