library("mdplearning") library("appl") library("ggplot2") library("tidyr") library("purrr") library("dplyr") knitr::opts_chunk$set(cache = TRUE)
#states <- seq(0,1.2, length=200) #states <- c(seq(0,0.12, length=30), seq(0.12, 1.2, length=200)) # Vector of actions: harvest p <- 2 states <- seq(0, 1.2^(1/p), len=150)^p # Vector of all possible states actions <- states obs <- states K <- 0.9903371 r <- 0.05699246 sigma_g <- 0.01720091 discount <- 0.99 vars <- expand.grid(r = seq(0.025, 0.2, by =0.025), sigma_m = 0.4) fixed <- data.frame(model = "ricker", sigma_g = sigma_g, discount = discount, K = K, C = NA) pars <- data.frame(vars, fixed) ## Usual assumption at the moment for reward fn reward_fn <- function(x,h) pmin(x,h)
true_m <- appl::fisheries_matrices(states, actions, obs, reward_fn, f = appl:::ricker(r, K), sigma_g = sigma_g, sigma_m = pars[1, "sigma_m"]) models <- parallel::mclapply(1:dim(pars)[1], function(i){ f <- switch(pars[i, "model"], allen = appl:::allen(pars[i, "r"], pars[i, "K"], pars[i, "C"]), ricker = appl:::ricker(pars[i, "r"], pars[i, "K"]) ) appl::fisheries_matrices(states, actions, obs, reward_fn, f = f, sigma_g = pars[i, "sigma_g"], sigma_m = pars[i, "sigma_m"]) }, mc.cores = parallel::detectCores())
reward <- models[[1]]$reward transitions <- lapply(models, `[[`, "transition") observation <- models[[1]]$observation
unif <- mdp_compute_policy(transitions, reward, discount) prior <- numeric(length(models)) prior[1] <- 1 low <- mdp_compute_policy(transitions, reward, discount, prior) prior <- numeric(length(models)) prior[2] <- 1 true <- mdp_compute_policy(transitions, reward, discount, prior) bind_rows(unif = unif, low = low, true = true, .id = "model") %>% ggplot(aes(states[state], states[state] - actions[policy], col = model)) + geom_line()
set.seed(123) data("scaled_data") y <- sapply(scaled_data$y, function(y) which.min(abs(states - y))) a <- sapply(scaled_data$a, function(a) which.min(abs(actions - a))) Tmax <- length(y) data("bluefin_tuna") to_mt <- max(bluefin_tuna$total) # 1178363 # scaling factor for data states_mt <- to_mt * states actions_mt <- to_mt * actions year <- 1952:2009 future <- 2009:2067
mdp_hindcast <- mdp_historical(transitions, reward, discount, state = y, action = a)
mdp_hindcast$df %>% mutate("actual catch" = actions_mt[action], "estimated stock" = states_mt[state], optimal = actions_mt[optimal], time = year[time]) %>% select(-state, -action) %>% gather(variable, stock, -time) %>% ggplot(aes(time, stock, color = variable)) + geom_line(lwd=1) # + geom_point()
Show the final belief over models for pomdp and mdp:
barplot(as.numeric(mdp_hindcast$posterior[Tmax,]))
# delta function for true model distribution h_star = array(0,dim = length(models)) h_star[2] = 1 ## Fn for the base-2 KL divergence from true model, in a friendly format kl2 <- function(value) seewave::kl.dist(value, h_star, base = 2)[[2]] mdp_hindcast$posterior %>% mutate(time = year[1:Tmax], rep = 1) %>% gather(model, value, -time, -rep) %>% group_by(time, rep) %>% summarise(kl = kl2(value)) %>% ggplot(aes(time, kl)) + stat_summary(geom="line", fun.y = mean, lwd = 1)
All forecasts start from final stock, go forward an equal length of time:
#x0 <- which.min(abs(1 - states)) # Initial stock, for hindcasts x0 <- y[length(y)] # Final stock, e.g. forcasts, = 9 Tmax <- length(y) set.seed(123)
We simulate replicates under MDP learning (with observation uncertainty):
Tmax <- 150 future <- 2009:(2009+Tmax) mdp_forecast <- pomdpplus::plus_replicate(25, mdp_learning(transition = transitions, reward = reward, model_prior = as.numeric(mdp_hindcast$posterior[length(y),]), discount = discount, x0 = x0, Tmax = Tmax, true_transition = true_m$transition, observation = observation), mc.cores = parallel::detectCores())
historical <- bluefin_tuna[c("tsyear", "total", "catch_landings")] %>% rename(time = tsyear, state = total, action = catch_landings) bind_rows( historical = historical, mdp = mdp_forecast$df %>% select(-value, -obs) %>% mutate(state = states_mt[state], action = actions_mt[action], time = future[time]), .id = "method") %>% rename("catch (MT)" = action, "stock (MT)" = state) %>% gather(variable, stock, -time, -rep, -method) %>% ggplot(aes(time, stock)) + geom_line(aes(group = interaction(rep,method), color = method), alpha=0.1) + stat_summary(aes(color = method), geom="line", fun.y = mean, lwd=1) + stat_summary(aes(fill = method), geom="ribbon", fun.data = mean_sdl, fun.args = list(mult=1), alpha = 0.25) + facet_wrap(~variable, ncol = 1, scales = "free_y")
model_prior <- as.numeric(mdp_hindcast$posterior[length(y),]) planning <- mdp_compute_policy(transitions, reward, discount, model_prior) df <- mdp_planning(true_m$transition, reward, discount, x0 = x0, Tmax = Tmax, policy = planning$policy, observation = observation)
df %>% select(-value) %>% mutate(state = states[state], obs = states[obs], action = actions[action]) %>% gather(series, stock, -time) %>% ggplot(aes(time, stock, color = series)) + geom_line()
f <- appl:::ricker(r, K) S_star <- optimize(function(x) x / discount - f(x,0), c(min(states),max(states)))$minimum h <- pmax(states - S_star, 0) policy <- sapply(h, function(h) which.min((abs(h - actions)))) det <- data.frame(policy, value = 1:length(states), state = 1:length(states)) df <- mdp_planning(true_m$transition, reward, discount, x0 = x0, Tmax = Tmax, policy = det$policy, observation = observation)
df %>% select(-value) %>% mutate(state = states[state], obs = states[obs], action = actions[action]) %>% gather(series, stock, -time) %>% ggplot(aes(time, stock, color = series)) + geom_line()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.