knitr::opts_chunk$set(echo = TRUE)

Setting

This script applies the proposed ABC with Wasserstein distance, to approximate the posterior distribution in a cosine trend model.

The model specifies $Y_t \sim \mathcal{N}(\cos(2\pi \omega t + \phi), \sigma^2)$, the parameters are $(\omega, \phi, \sigma^2)$, the prior is uniform on $[0,0.1]$ for $\omega$, uniform on $[0,2\pi]$ for $\phi$, and $\mathcal{N}(0,1)$ for $\log(\sigma)$. The data are generated from $\omega_\star = 1/80$, $\phi_\star = \pi/4$ and $\sigma_\star = 1$.

We begin by loading the package, registering multiple cores, setting the random number generator, etc.

# load package
library(winference)
# register parallel cores
registerDoParallel(cores = detectCores())
# remove all
rm(list = ls())
# apply preferences for ggplotting
require(gridExtra)
theme_set(theme_bw())
# set RNG seed
set.seed(11)

Data and model

We define the model and generate some data from it.

nobservations <- 100

rprior <- function(nparticles, ...){
  omegas <- runif(nparticles, min = 0, max = 1/10)
  phis <- runif(nparticles, min = 0, max = 2*pi)
  logsigma <- rnorm(nparticles)
  return(cbind(omegas, phis, logsigma))
}

# evaluate the log-density of the prior, for each particle
dprior <- function(thetas, ...){
  logdensities <- dnorm(thetas[,3], 0, 1, log = TRUE)
  logdensities <- logdensities + dunif(thetas[,1], min = 0, max = 1/10, log = TRUE)
  logdensities <- logdensities + dunif(thetas[,2], min = 0, max = 2*pi, log = TRUE)
  return(logdensities)
}
#
# function to generate a dataset for each theta value
simulate <- function(theta){
  observations <- cos(2*pi*theta[1]*(1:nobservations) + theta[2]) + rnorm(nobservations, mean = 0, sd = exp(theta[3]))
  return(observations)
}
# model in a list
target <- list(rprior = rprior,
              dprior = dprior,
              simulate = simulate,
              parameter_names = c("omega", "phi", "logsigma"),
              thetadim = 3, ydim = 1,
              parameters = list())
# data-generating parameter
theta_star <- c(1/80, pi/4, 0)
# generate observations
obs <- target$simulate(theta_star)
# plot observations
plot(obs, type = "l", ylab = "y")

Distance calculation and Monte Carlo algorithm

We define a way of calculating a distance between fake data and the observed data. Here we use curve matching: we augment each observation $y_t$ with the time index $t$, and then define a ground metric on the space of $(t,y_t)$. We are also replacing the Wasserstein distance by the Swapping distance.

lambda <- 1
multiplier <- lambda*(max(obs) - min(obs))
augment <- function(series) rbind(series, multiplier * (1:length(series))/length(series))
augmented_obs <- augment(obs)

# use the swapping distance instead of the exact Wasserstein
compute_distance <- function(y_fake){
  augmented_y_fake <- augment(y_fake)
  return(swap_distance(augmented_obs, augmented_y_fake, p = 1, ground_p = 2, tolerance = 1e-5)$distance)
}
# test: compute_d(target$simulate(target$rprior(1)))

We specify algorithmic parameters in a list: $1024$ particles, one move per juvenation step, a mixture of Gaussian as a proposal distribution, etc.

# algorithmic parameters: number of particles, number of moves per rejuvenation step,
# proposal distribution, the number of steps to perform in total, the diversity parameter
# used in the threshold adaptation, the number of hits to use in the r-hit kernel,
# and the maximum number of trials to use in the r-hit kernel before rejecting.
param_algo <- list(nthetas = 1024, nmoves = 1, proposal = mixture_rmixmod(),
                   minimum_diversity = 0.5, R = 2, maxtrials = 1e5)

We now run the algorithm, for a certain budget of model simulations. This might take a few minutes.

# now run the algorithm until 3e5 model simulations have been performed
wsmcresults <- wsmc(compute_distance, target, param_algo, maxsimulation = 3e5)

Now we can look at the ouput, with various plots, for instance the thresholds against the number of model simulations.

# names(wsmcresults)
qplot(x = cumsum(wsmcresults$ncomputed), y = wsmcresults$threshold_history, geom = "line") + scale_y_log10() + scale_x_log10() + xlab("# model simulations") + geom_point()

We can look at the marginal distributions of parameters, with the data-generating parameters indicated by vertical lines.

# and let's look at the parameters themselves
grid.arrange(plot_marginal(wsmcresults, i = 1) + geom_vline(xintercept = theta_star[1]),
             plot_marginal(wsmcresults, i = 2) + geom_vline(xintercept = theta_star[2]),
             plot_marginal(wsmcresults, i = 3) + geom_vline(xintercept = theta_star[3]), ncol=3)

We see that the marginals start concentrating around the data-generating parameters, as expected from the theory.

Posterior samples via MCMC

We use the WABC output to initialize a Metropolis-Hastings algorithm, targeting the actual posterior distribution.

# define log-likelihood function
target$loglikelihood <- function(thetas, ys, ...){
  evals <- rep(0, nrow(thetas))
  for (itheta in 1:nrow(thetas)){
    backbone <- cos(2 * pi * thetas[itheta,1] * (1:nobservations) + thetas[itheta,2])
    evals[itheta] <- sum(dnorm(ys, mean = backbone, sd = exp(thetas[itheta,3]), log = TRUE))
  }
  return(evals)
}
thetas <- wsmcresults$thetas_history[[length(wsmcresults$thetas_history)]]
thetas_cov <- cov(thetas)
# initial states of the Markov chain
theta_init <- thetas[sample(x = 1:nrow(thetas), 4),]
# tuning parameters
tuning_parameters <- list(niterations = 50000, nchains = nrow(theta_init),
                          cov_proposal = thetas_cov,
                          adaptation = 10000, init_chains = theta_init)
# run adaptive MH scheme
mh <- metropolishastings(obs, target, tuning_parameters)
burnin <- 0
chain.df <- mhchainlist_to_dataframe(mh$chains)

# trace plot, to check MCMC convergence
g1 <- ggplot(chain.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.1, group = ichain, colour = factor(ichain))) + geom_line() + theme(legend.position = "none")
g1 <- g1 + geom_hline(yintercept = theta_star[1], col = "red")
g1

# let's look at the marginal distributions
wsmc.df <- wsmc_to_dataframe(wsmcresults)
g1 <- ggplot(chain.df, aes(x = X.1)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5) +
  geom_density(data = wsmc.df %>% filter(step == length(wsmcresults$thetas_history)), aes(x = omega, y = ..density.., fill = "ABC"), alpha = 0.5) + scale_fill_manual(name = "", values = c("Posterior" = "black", "ABC" = "darkblue")) + xlab(expression(omega))

g2 <- ggplot(chain.df, aes(x = X.2)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5) +
  geom_density(data = wsmc.df %>% filter(step == length(wsmcresults$thetas_history)), aes(x = phi, y = ..density.., fill = "ABC"), alpha = 0.5) + scale_fill_manual(name = "", values = c("Posterior" = "black", "ABC" = "darkblue")) + xlab(expression(phi))

g3 <- ggplot(chain.df, aes(x = X.3)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5) +
  geom_density(data = wsmc.df %>% filter(step == length(wsmcresults$thetas_history)), aes(x = logsigma, y = ..density.., fill = "ABC"), alpha = 0.5) + scale_fill_manual(name = "", values = c("Posterior" = "black", "ABC" = "darkblue")) + xlab(expression(log(sigma)))

grid.arrange(g1, g2, g3, ncol = 1)

We see that the WABC posterior matches the actual posterior. It would be a closer match if we had run it for more steps.



pierrejacob/winference documentation built on Feb. 17, 2020, 10:28 p.m.