knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(SampleR)
library(ggplot2)
library(stats)

This vignette aims to show how different sampling algorithms may navigate a 'patchy' environment. Let's start by creating such an environment.

First, we create a matrix with what will be the means of 15 different Gaussians

set.seed(1)
# Create a matrix with the means of 15 different Gaussians
M <- matrix(0, nrow = 15, ncol = 2)
for (i in 1:15){
  M[i,] <- runif(2) * 18 - 9
}

We'll also require a set of weights (which in this case will all be the same), which we will use in our probability density function, as well as a set of cumulative weights, for our random sampler.

weights <- rep(1/15, length.out = 15)
cumulative_weights <- vector()
for (i in 1:length(weights)){
  if (i == 1){
    cumulative_weights[i] <- weights[i]
  } else {
    cumulative_weights[i] <- weights[i] + cumulative_weights[i-1]
  }
}

The probability density function is the weighted sum of each of the density functions

pd_func <- function(x, log = FALSE){
  densities <- vector()
  for (i in 1:length(weights)){
    densities[i] <- mvtnorm::dmvnorm(x, mean = c(M[i,1], M[i,2]), log = log)
  }
  return(sum(densities * weights))
}

The random function chooses a random distribution from the mixture to draw a sample from (with weighted probabilities)

rand_func <- function(x){
  randNums <- matrix(ncol = dims, nrow = x)
  for (j in 1:x){
    randomN <- stats::runif(1)
    finish = F
    i = 0
    while (!finish){
      i = i + 1
      if (randomN <= cumulative_weights[i]){
        dist <- listDistr[[i]]
        randNums[j,] <- unlist(unname(dist$rand(1)))
        finish = T
      }
    }
  }
  return(randNums)
}

We can now draw a map of the density

mapDensity <- function(pdf, start, size, cellsPerRow = 50){
  # start is a vector <- c(x, y)
  #  is a number n so that the map ranges from x, y to x + n, y + n
  xRange <- seq(from = start[1], to = start[1] + size, length.out = cellsPerRow)
  xxRange <- rep(xRange, cellsPerRow)

  yRange <- seq(from = start[2], to = start[2] + size, length.out = cellsPerRow)

  for (i in 1:cellsPerRow){
    if (i == 1){
      yyRange <- rep(yRange[i], cellsPerRow)
    } else {
      yyRange <- c(yyRange, rep(yRange[i], cellsPerRow))
    }
  }

  density <- vector()


  for (i in 1:length(yyRange)){
    density[i] <- pdf(c(xxRange[i],yyRange[i]))
    }

  df <- data.frame(x = xxRange, y = yyRange, density = density)
  return(df)
}



hills_df <- mapDensity(pd_func, c(-10,-10), 20, 150)

hill_map <- ggplot(hills_df) + 
  geom_raster(mapping = aes(x = x, y = y, fill = density)) + 
  scale_fill_viridis_c() + 
  theme_void()
print(hill_map)

An MCMC sampler may not be able to explore the whole of the space, as it is unable to make long jumps

MCMC <- sampler_mcmc(pd_func, start = c(0,0), sigma_prop = diag(2) / 8)
MCMC_df <- data.frame(x = MCMC[[1]][,1], y = MCMC[[1]][,2])
MCMC_path <- hill_map + 
  geom_path(MCMC_df, mapping = aes(x,y), colour = "red", linetype = "dashed", size = .3) + 
  geom_point(MCMC_df, mapping = aes(x,y), colour = "white",size =.1)
print(MCMC_path)

An MC3 sampler, on the other hand, runs hotter chains under the hood with which it switches stochastically, which allows it to visit far-off regions and thus explore the whole hypothesis space

MC3 <- sampler_mc3(pd_func, start = c(0,0), sigma_prop = diag(2) / 8, swap_all = FALSE)
MC3_df <- data.frame(x = MC3[[1]][,1,1], y = MC3[[1]][,2,1])
MC3_path <- hill_map + 
  geom_path(MC3_df, mapping = aes(x, y), colour = "red", linetype = "dashed", size = .3) + 
  geom_point(MC3_df, mapping = aes(x,y), colour = "white", size =.1)
print(MC3_path)


lucas-castillo/SampleR documentation built on Jan. 1, 2021, 8:25 a.m.