setwd("~/Documents/code/nonparametric-bayes/inst/examples/BUGS/")
Plotting and knitr options, (can generally be ignored)
library(knitcitations) opts_chunk$set(tidy=FALSE, warning=FALSE, message=FALSE, cache=FALSE, comment=NA, fig.width=6, fig.height=4) library(ggplot2) # plotting opts_knit$set(upload.fun = socialR::flickr.url) theme_set(theme_bw(base_size=10)) theme_update(panel.background = element_rect(fill = "transparent", colour = NA), plot.background = element_rect(fill = "transparent", colour = NA)) cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
Load necessary libraries,
library(nonparametricbayes) # loads the rest as dependencies
Uses the model derived in citet("10.1080/10236190412331335373")
, of a Ricker-like growth curve with an allee effect, defined in the pdgControl package,
f <- Ricker p <- c(.5, 10) K <- p[2] C <- 5 # policy threshold
Various parameters defining noise dynamics, grid, and policy costs.
sigma_g <- 0.05 sigma_m <- 0.0 z_g <- function() rlnorm(1, 0, sigma_g) z_m <- function() 1+(2*runif(1, 0, 1)-1) * sigma_m x_grid <- seq(0, 1.5 * K, length=50) h_grid <- x_grid profit <- function(x,h) pmin(x, h) - 10*(x > C) # big penalty for violating threshold delta <- 0.01 OptTime <- 50 # stationarity with unstable models is tricky thing reward <- 0 xT <- 0 Xo <- K # observations start from x0 <- Xo # simulation under policy starts from Tobs <- 40
set.seed(1234) #harvest <- sort(rep(seq(0, .5, length=7), 5)) x <- numeric(Tobs) x[1] <- 6 nz <- 1 for(t in 1:(Tobs-1)) x[t+1] = z_g() * f(x[t], h=0, p=p) obs <- data.frame(x = c(rep(0,nz), pmax(rep(0,Tobs-1), x[1:(Tobs-1)])), y = c(rep(0,nz), x[2:Tobs])) # plot(x)
est <- par_est(obs, init = c(r = p[1], K = mean(obs$x[obs$x>0]), s = sigma_g))
#inv gamma has mean b / (a - 1) (assuming a>1) and variance b ^ 2 / ((a - 2) * (a - 1) ^ 2) (assuming a>2) s2.p <- c(5,5) d.p = c(10, 1/0.1)
Estimate the Gaussian Process (nonparametric Bayesian fit)
gp <- gp_mcmc(obs$x, y=obs$y, n=1e5, s2.p = s2.p, d.p = d.p) gp_dat <- gp_predict(gp, x_grid, burnin=1e4, thin=300)
Show traces and posteriors against priors
plots <- summary_gp_mcmc(gp)
# Summarize the GP model tgp_dat <- data.frame( x = x_grid, y = gp_dat$E_Ef, ymin = gp_dat$E_Ef - 2 * sqrt(gp_dat$E_Vf), ymax = gp_dat$E_Ef + 2 * sqrt(gp_dat$E_Vf) )
We initiate the MCMC chain (init_p
) using the true values of the parameters p
from the simulation. While impossible in real data, this gives the parametric Bayesian approach the best chance at succeeding. y
is the timeseries (recall obs
has the $x_t$, $x_{t+1}$ pairs)
# a bit unfair to start with the correct values, but anyhow... init_p = p names(init_p) = c("r0", "K", "theta") y <- obs$x[-1] N=length(y);
We'll be using the JAGS Gibbs sampler, a recent open source BUGS implementation with an R interface that works on most platforms. We initialize the usual MCMC parameters; see ?jags
for details.
jags.data <- list("N","y") n.chains = 1 n.iter = 40000 n.burnin = floor(10000) n.thin = max(1, floor(n.chains * (n.iter - n.burnin)/1000))
The actual model is defined in a model.file
that contains an R function that is automatically translated into BUGS code by R2WinBUGS. The file defines the priors and the model, as seen when read in here
cat(readLines(con="bugmodel-UPrior.txt"), sep="\n")
We define which parameters to keep track of, and set the initial values of parameters in the transformed space used by the MCMC. We use logarithms to maintain strictly positive values of parameters where appropriate.
# Uniform priors on standard deviation terms jags.params=c("K","logr0","logtheta","stdQ", "stdR") jags.inits <- function(){ list("K"=init_p["K"],"logr0"=log(init_p["r0"]),"logtheta"=log(init_p["theta"]), "stdQ"=sqrt(0.05),"stdR"=sqrt(0.1),"x"=y,.RNG.name="base::Wichmann-Hill", .RNG.seed=123) } set.seed(12345) time_jags <- system.time( jagsfit <- jags(data=jags.data, inits=jags.inits, jags.params, n.chains=n.chains, n.iter=n.iter, n.thin=n.thin, n.burnin=n.burnin,model.file="bugmodel-UPrior.txt") ) time_jags <- unname(time_jags["elapsed"]);
jags_matrix <- as.data.frame(as.mcmc.bugs(jagsfit$BUGSoutput)) par_posteriors <- melt(cbind(index = 1:dim(jags_matrix)[1], jags_matrix), id = "index")
# Traces ggplot(par_posteriors) + geom_line(aes(index, value)) + facet_wrap(~ variable, scale="free", ncol=1)
## priors (untransformed variables) K_prior <- function(x) dunif(x, 0.01, 40) logr_prior <- function(x) dunif(x, -6, 6) logtheta_prior <- function(x) dunif(x, -6, 6) stdQ_prior <- function(x) dunif(x, 0.001, 100) stdR_prior <- function(x) dunif(x, 0.001, 100) par_priors <- list(K = K_prior, deviance = function(x) 0 * x, logr0 = logr_prior, logtheta = logtheta_prior, stdQ = stdQ_prior, stdR = stdR_prior) par_prior_curves <- ddply(par_posteriors, "variable", function(dd){ grid <- seq(min(dd$value), max(dd$value), length = 100) data.frame(value = grid, density = par_priors[[dd$variable[1]]](grid)) }) # posterior distributions ggplot(par_posteriors, aes(value)) + stat_density(geom="path", position="identity", alpha=0.7) + geom_line(data=par_prior_curves, aes(x=value, y=density), col="red") + facet_wrap(~ variable, scale="free", ncol=2)
# um, cleaner if we were just be using the long form, par_posterior mcmc <- as.mcmc(jagsfit) mcmcall <- mcmc[,-2] who <- colnames(mcmcall) who mcmcall <- cbind(mcmcall[,1],mcmcall[,2],mcmcall[,3],mcmcall[,4],mcmcall[,5]) colnames(mcmcall) <- who pardist <- mcmcall pardist[,2] = exp(pardist[,2]) # transform model parameters back first pardist[,3] = exp(pardist[,3]) bayes_coef <- apply(pardist,2,mean) bayes_pars <- unname(c(bayes_coef[2], bayes_coef[1], bayes_coef[3])) bayes_pars
par_bayes_means <- sapply(x_grid, f, 0, bayes_pars)
We initiate the MCMC chain (init_p
) using the true values of the parameters p
from the simulation. While impossible in real data, this gives the parametric Bayesian approach the best chance at succeeding. y
is the timeseries (recall obs
has the $x_t$, $x_{t+1}$ pairs)
init_p = p names(init_p) = c("r0", "K") y <- obs$x[-1] N=length(y);
We'll be using the JAGS Gibbs sampler, a recent open source BUGS implementation with an R interface that works on most platforms. We initialize the usual MCMC parameters; see ?jags
for details.
jags.data <- list("N","y") n.chains = 1 n.iter = 40000 n.burnin = floor(10000) n.thin = max(1, floor(n.chains * (n.iter - n.burnin)/1000))
The actual model is defined in a model.file
that contains an R function that is automatically translated into BUGS code by R2WinBUGS. The file defines the priors and the model, as seen when read in here
cat(readLines(con="ricker-UPrior.txt"), sep="\n")
We define which parameters to keep track of, and set the initial values of parameters in the transformed space used by the MCMC. We use logarithms to maintain strictly positive values of parameters where appropriate.
# Uniform priors on standard deviation terms jags.params=c("K","logr0", "stdQ", "stdR") jags.inits <- function(){ list("K"=init_p["K"],"logr0"=log(init_p["r0"]), "stdQ"=sqrt(0.05),"stdR"=sqrt(0.1),"x"=y,.RNG.name="base::Wichmann-Hill", .RNG.seed=123) } set.seed(12345) time_jags <- system.time( jagsfit <- jags(data=jags.data, inits=jags.inits, jags.params, n.chains=n.chains, n.iter=n.iter, n.thin=n.thin, n.burnin=n.burnin,model.file="ricker-UPrior.txt") ) time_jags <- unname(time_jags["elapsed"]);
jags_matrix <- as.data.frame(as.mcmc.bugs(jagsfit$BUGSoutput)) par_posteriors <- melt(cbind(index = 1:dim(jags_matrix)[1], jags_matrix), id = "index") # Traces ggplot(par_posteriors) + geom_line(aes(index, value)) + facet_wrap(~ variable, scale="free", ncol=1)
## priors (untransformed variables) K_prior <- function(x) dunif(x, 0.01, 40) logr_prior <- function(x) dunif(x, -6, 6) stdQ_prior <- function(x) dunif(x, 0.001, 100) stdR_prior <- function(x) dunif(x, 0.001, 100) par_priors <- list(K = K_prior, deviance = function(x) 0 * x, logr0 = logr_prior, stdQ = stdQ_prior, stdR = stdR_prior) par_prior_curves <- ddply(par_posteriors, "variable", function(dd){ grid <- seq(min(dd$value), max(dd$value), length = 100) data.frame(value = grid, density = par_priors[[dd$variable[1]]](grid)) }) # posterior distributions ggplot(par_posteriors, aes(value)) + stat_density(geom="path", position="identity", alpha=0.7) + geom_line(data=par_prior_curves, aes(x=value, y=density), col="red") + facet_wrap(~ variable, scale="free", ncol=2)
ricker_pardist <- jags_matrix[! names(jags_matrix) == "deviance" ] ricker_pardist[,"logr0"] = exp(ricker_pardist[,"logr0"]) # transform model parameters back first posterior.mode <- function(x) { ux <- unique(x) ux[which.max(tabulate(match(x, ux)))] } apply(ricker_pardist,2,mean) bayes_coef <- apply(ricker_pardist,2, posterior.mode) # much better estimates ricker_bayes_pars <- unname(c(bayes_coef[2], bayes_coef[1])) ricker_bayes_pars
logr0_prior_p <- c(-6.0, 6.0) logtheta_prior_p <- c(-6.0, 6.0) logK_prior_p <- c(-6.0, 6.0) stdQ_prior_p <- c(0.0001, 100) stdR_prior_p <- c(0.0001, 100) bugs.model <- paste(sprintf( "model{ logr0 ~ dunif(%s, %s) logtheta ~ dunif(%s, %s) logK ~ dunif(%s, %s) stdQ ~ dunif(%s, %s) stdR ~ dunif(%s, %s)", logr0_prior_p[1], logr0_prior_p[2], logtheta_prior_p[1], logtheta_prior_p[2], logK_prior_p[1], logK_prior_p[2], stdQ_prior_p[1], stdQ_prior_p[2], stdR_prior_p[1], stdR_prior_p[2]), " iQ <- 1 / (stdQ * stdQ); iR <- 1 / (stdR * stdR); r0 <- exp(logr0) theta <- exp(logtheta) K <- exp(logK) x[1] ~ dunif(0, 10) for(t in 1:(N-1)){ mu[t] <- r0 * pow(abs(x[t]), theta) / (1 + pow(abs(x[t]), theta) / K) x[t+1] ~ dnorm(mu[t], iQ) } for(t in 1:(N)){ y[t] ~ dnorm(x[t], iR) } }") writeLines(bugs.model, "myers.bugs")
## priors (untransformed variables) logK_prior <- function(x) dunif(x, logK_prior_p[1], logK_prior_p[2]) logr_prior <- function(x) dunif(x, logr0_prior_p[1], logr0_prior_p[2]) logtheta_prior <- function(x) dunif(x, logtheta_prior_p[1], logtheta_prior_p[2]) stdQ_prior <- function(x) dunif(x, stdQ_prior_p[1], stdQ_prior_p[2]) stdR_prior <- function(x) dunif(x, stdR_prior_p[1], stdR_prior_p[2])
true_means <- sapply(x_grid, f, 0, p) mle_means <- sapply(x_grid, est$f, 0, est$p) ricker_means <- sapply(x_grid, est$f, 0, ricker_bayes_pars[c(1,2)]) allen_means <- sapply(x_grid, f, 0, bayes_pars) # myers_means <- sapply(x_grid, Myer_harvest, 0, myers_bayes_pars) models <- data.frame(x=x_grid, GP=tgp_dat$y, True=true_means, MLE=mle_means, Ricker = ricker_means, Allen = allen_means) models <- melt(models, id="x") names(models) <- c("x", "method", "value")
plot_gp <- ggplot(tgp_dat) + geom_ribbon(aes(x,y,ymin=ymin,ymax=ymax), fill="gray80") + geom_line(data=models, aes(x, value, col=method), lwd=1, alpha=0.8) + geom_point(data=obs, aes(x,y), alpha=0.8) + xlab(expression(X[t])) + ylab(expression(X[t+1])) + scale_colour_manual(values=cbPalette) print(plot_gp)
Compute the optimal policy under each model using stochastic dynamic programming. We begin with the policy based on the GP model,
MaxT = 1000 # uses expected values from GP, instead of integrating over posterior #matrices_gp <- gp_transition_matrix(gp_dat$E_Ef, gp_dat$E_Vf, x_grid, h_grid) # Integrate over posteriors matrices_gp <- gp_transition_matrix(gp_dat$Ef_posterior, gp_dat$Vf_posterior, x_grid, h_grid) # Solve the SDP using the GP-derived transition matrix opt_gp <- value_iteration(matrices_gp, x_grid, h_grid, MaxT, xT, profit, delta, reward)
Determine the optimal policy based on the true and MLE models
matrices_true <- f_transition_matrix(f, p, x_grid, h_grid, sigma_g) opt_true <- value_iteration(matrices_true, x_grid, h_grid, OptTime=MaxT, xT, profit, delta=delta) matrices_estimated <- f_transition_matrix(est$f, est$p, x_grid, h_grid, est$sigma_g) opt_estimated <- value_iteration(matrices_estimated, x_grid, h_grid, OptTime=MaxT, xT, profit, delta=delta)
Determine the optimal policy based on parametric Bayesian model
allen_f <- function(x,h,p) unname(f(x,h,p[c(2, 1, 3)])) matrices_par_bayes <- parameter_uncertainty_SDP(allen_f, x_grid, h_grid, pardist, 4) opt_par_bayes <- value_iteration(matrices_par_bayes, x_grid, h_grid, OptTime=MaxT, xT, profit, delta=delta)
Bayesian Ricker
ricker_f <- function(x, h, p) est$f(x, h, unname(p[c(2, 1)])) matrices_alt <- parameter_uncertainty_SDP(ricker_f, x_grid, h_grid, as.matrix(ricker_pardist), 3) opt_alt <- value_iteration(matrices_alt, x_grid, h_grid, OptTime=MaxT, xT, profit, delta=delta)
Assemble the data
OPT = data.frame(GP = opt_gp$D, True = opt_true$D, MLE = opt_estimated$D, Ricker = opt_alt$D, Allen = opt_par_bayes$D) colorkey=cbPalette names(colorkey) = names(OPT)
policies <- melt(data.frame(stock=x_grid, sapply(OPT, function(x) x_grid[x])), id="stock") names(policies) <- c("stock", "method", "value") ggplot(policies, aes(stock, stock - value, color=method)) + geom_line(lwd=1.2, alpha=0.8) + xlab("stock size") + ylab("escapement") + scale_colour_manual(values=colorkey)
sims <- lapply(OPT, function(D){ set.seed(1) lapply(1:100, function(i) ForwardSimulate(f, p, x_grid, h_grid, x0, D, z_g, profit=profit, OptTime=OptTime) ) }) dat <- melt(sims, id=names(sims[[1]][[1]])) dt <- data.table(dat) setnames(dt, c("L1", "L2"), c("method", "reps")) # Legend in original ordering please, not alphabetical: dt$method = factor(dt$method, ordered=TRUE, levels=names(OPT))
ggplot(dt) + geom_line(aes(time, fishstock, group=interaction(reps,method), color=method), alpha=.1) + scale_colour_manual(values=colorkey, guide = guide_legend(override.aes = list(alpha = 1)))
ggplot(dt[method %in% c("True", "Ricker", "MLE")]) + geom_line(aes(time, fishstock, group=interaction(reps,method), color=method), alpha=.1) + scale_colour_manual(values=colorkey, guide = guide_legend(override.aes = list(alpha = 1)))
Profit <- dt[, sum(profit), by=c("reps", "method")] Profit[, mean(V1), by="method"]
ggplot(Profit, aes(V1)) + geom_histogram() + facet_wrap(~method, scales = "free_y") + guides(legend.position = "none")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.