knitr::opts_chunk$set(echo = TRUE)
This script applies the proposed ABC with Wasserstein distance, to approximate the posterior distribution in a cosine trend model.
The model specifies $Y_t \sim \mathcal{N}(\cos(2\pi \omega t + \phi), \sigma^2)$, the parameters are $(\omega, \phi, \sigma^2)$, the prior is uniform on $[0,0.1]$ for $\omega$, uniform on $[0,2\pi]$ for $\phi$, and $\mathcal{N}(0,1)$ for $\log(\sigma)$. The data are generated from $\omega_\star = 1/80$, $\phi_\star = \pi/4$ and $\sigma_\star = 1$.
We begin by loading the package, registering multiple cores, setting the random number generator, etc.
# load package library(winference) # register parallel cores registerDoParallel(cores = detectCores()) # remove all rm(list = ls()) # apply preferences for ggplotting require(gridExtra) theme_set(theme_bw()) # set RNG seed set.seed(11)
We define the model and generate some data from it.
nobservations <- 100 rprior <- function(nparticles, ...){ omegas <- runif(nparticles, min = 0, max = 1/10) phis <- runif(nparticles, min = 0, max = 2*pi) logsigma <- rnorm(nparticles) return(cbind(omegas, phis, logsigma)) } # evaluate the log-density of the prior, for each particle dprior <- function(thetas, ...){ logdensities <- dnorm(thetas[,3], 0, 1, log = TRUE) logdensities <- logdensities + dunif(thetas[,1], min = 0, max = 1/10, log = TRUE) logdensities <- logdensities + dunif(thetas[,2], min = 0, max = 2*pi, log = TRUE) return(logdensities) } # # function to generate a dataset for each theta value simulate <- function(theta){ observations <- cos(2*pi*theta[1]*(1:nobservations) + theta[2]) + rnorm(nobservations, mean = 0, sd = exp(theta[3])) return(observations) } # model in a list target <- list(rprior = rprior, dprior = dprior, simulate = simulate, parameter_names = c("omega", "phi", "logsigma"), thetadim = 3, ydim = 1, parameters = list()) # data-generating parameter theta_star <- c(1/80, pi/4, 0) # generate observations obs <- target$simulate(theta_star) # plot observations plot(obs, type = "l", ylab = "y")
We define a way of calculating a distance between fake data and the observed data. Here we use curve matching: we augment each observation $y_t$ with the time index $t$, and then define a ground metric on the space of $(t,y_t)$. We are also replacing the Wasserstein distance by the Swapping distance.
lambda <- 1 multiplier <- lambda*(max(obs) - min(obs)) augment <- function(series) rbind(series, multiplier * (1:length(series))/length(series)) augmented_obs <- augment(obs) # use the swapping distance instead of the exact Wasserstein compute_distance <- function(y_fake){ augmented_y_fake <- augment(y_fake) return(swap_distance(augmented_obs, augmented_y_fake, p = 1, ground_p = 2, tolerance = 1e-5)$distance) } # test: compute_d(target$simulate(target$rprior(1)))
We specify algorithmic parameters in a list: $1024$ particles, one move per juvenation step, a mixture of Gaussian as a proposal distribution, etc.
# algorithmic parameters: number of particles, number of moves per rejuvenation step, # proposal distribution, the number of steps to perform in total, the diversity parameter # used in the threshold adaptation, the number of hits to use in the r-hit kernel, # and the maximum number of trials to use in the r-hit kernel before rejecting. param_algo <- list(nthetas = 1024, nmoves = 1, proposal = mixture_rmixmod(), minimum_diversity = 0.5, R = 2, maxtrials = 1e5)
We now run the algorithm, for a certain budget of model simulations. This might take a few minutes.
# now run the algorithm until 3e5 model simulations have been performed wsmcresults <- wsmc(compute_distance, target, param_algo, maxsimulation = 3e5)
Now we can look at the ouput, with various plots, for instance the thresholds against the number of model simulations.
# names(wsmcresults) qplot(x = cumsum(wsmcresults$ncomputed), y = wsmcresults$threshold_history, geom = "line") + scale_y_log10() + scale_x_log10() + xlab("# model simulations") + geom_point()
We can look at the marginal distributions of parameters, with the data-generating parameters indicated by vertical lines.
# and let's look at the parameters themselves grid.arrange(plot_marginal(wsmcresults, i = 1) + geom_vline(xintercept = theta_star[1]), plot_marginal(wsmcresults, i = 2) + geom_vline(xintercept = theta_star[2]), plot_marginal(wsmcresults, i = 3) + geom_vline(xintercept = theta_star[3]), ncol=3)
We see that the marginals start concentrating around the data-generating parameters, as expected from the theory.
We use the WABC output to initialize a Metropolis-Hastings algorithm, targeting the actual posterior distribution.
# define log-likelihood function target$loglikelihood <- function(thetas, ys, ...){ evals <- rep(0, nrow(thetas)) for (itheta in 1:nrow(thetas)){ backbone <- cos(2 * pi * thetas[itheta,1] * (1:nobservations) + thetas[itheta,2]) evals[itheta] <- sum(dnorm(ys, mean = backbone, sd = exp(thetas[itheta,3]), log = TRUE)) } return(evals) } thetas <- wsmcresults$thetas_history[[length(wsmcresults$thetas_history)]] thetas_cov <- cov(thetas) # initial states of the Markov chain theta_init <- thetas[sample(x = 1:nrow(thetas), 4),] # tuning parameters tuning_parameters <- list(niterations = 50000, nchains = nrow(theta_init), cov_proposal = thetas_cov, adaptation = 10000, init_chains = theta_init) # run adaptive MH scheme mh <- metropolishastings(obs, target, tuning_parameters) burnin <- 0 chain.df <- mhchainlist_to_dataframe(mh$chains) # trace plot, to check MCMC convergence g1 <- ggplot(chain.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.1, group = ichain, colour = factor(ichain))) + geom_line() + theme(legend.position = "none") g1 <- g1 + geom_hline(yintercept = theta_star[1], col = "red") g1 # let's look at the marginal distributions wsmc.df <- wsmc_to_dataframe(wsmcresults) g1 <- ggplot(chain.df, aes(x = X.1)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5) + geom_density(data = wsmc.df %>% filter(step == length(wsmcresults$thetas_history)), aes(x = omega, y = ..density.., fill = "ABC"), alpha = 0.5) + scale_fill_manual(name = "", values = c("Posterior" = "black", "ABC" = "darkblue")) + xlab(expression(omega)) g2 <- ggplot(chain.df, aes(x = X.2)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5) + geom_density(data = wsmc.df %>% filter(step == length(wsmcresults$thetas_history)), aes(x = phi, y = ..density.., fill = "ABC"), alpha = 0.5) + scale_fill_manual(name = "", values = c("Posterior" = "black", "ABC" = "darkblue")) + xlab(expression(phi)) g3 <- ggplot(chain.df, aes(x = X.3)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5) + geom_density(data = wsmc.df %>% filter(step == length(wsmcresults$thetas_history)), aes(x = logsigma, y = ..density.., fill = "ABC"), alpha = 0.5) + scale_fill_manual(name = "", values = c("Posterior" = "black", "ABC" = "darkblue")) + xlab(expression(log(sigma))) grid.arrange(g1, g2, g3, ncol = 1)
We see that the WABC posterior matches the actual posterior. It would be a closer match if we had run it for more steps.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.