scripts/ess_old.R

library(exp4FusionRCPP)

set.seed(1912)

########################################

split_data <- function(samples, k) {
  # check that k divides n = length(samples)
  if (length(samples)%%k != 0) {
    stop('split_data: length of samples is not divisible by k')
  }

  # split data into (n/k) batches
  batches <- split(samples, ceiling(seq_along(samples)/k))
  names(batches) <- NULL

  return(batches)
}

calculate_batch_means <- function(batches) {
  # check that batches is a list
  if (class(batches) != "list") {
    stop('calculate_batch_means: batches is not a list')
  }

  # calculate the means for each of the batches
  batch_means <- sapply(batches, mean)
  # calculate the variance of the batch m
  var_batch_means <- var(batch_means)

  return(list('means' = batch_means,
              'var' = var_batch_means))
}

calculate_ESS <- function(samples, k, random = FALSE) {
  # check that k divides n = length(samples)
  if (length(samples)%%k != 0) {
    stop('calculate_ESS: length of samples is not divisible by k')
  }

  # if random == TRUE, we randomly swap around the indicies of the samples
  if (random == TRUE) {
    samples <- samples[sample.int(n = length(samples),
                                  size = length(samples),
                                  replace = FALSE)]
  }

  # split the data into (n/k) batches
  batches <- split_data(samples, k)
  # calculate batch means
  batch_means <- calculate_batch_means(batches)

  # obtain an approximation to variance of samples
  variance <- var(samples)
  # obtain an approximation to tau (autocorrelation time)
  tau <- (k*batch_means$var)/variance

  # obtain an approximation to ESS
  ESS <- length(samples)/tau

  return(ESS)
}

ESS_study <- function(m_choices, n, k, input_samples) {
  print(paste('##### n: ', n))

  # set variables to store results
  hierarchical_samples <- list()
  ESS_results <- list()
  computational_cost <- list()

  # iterate through the values of m
  for (i in 1:length(m_choices)) {
    hierarchical_samples[[i]] <- parallel_h_fusion_exp_4_rcpp(N_schedule = c(m_choices[i], n),
                                                              mean = 0,
                                                              time_schedule = rep(1, 2),
                                                              start_beta = 1/4,
                                                              C_schedule = rep(2, 2),
                                                              L = 3,
                                                              base_samples = input_samples)

    curve(target_density_exp_4(x, 0), -4, 4, main = paste('m = ', m_choices[i]))
    lines(density(  hierarchical_samples[[i]]$samples[[1]]), col = 'blue')

    # save the ESS and also iterate the values 100 times to double check
    MC_ESS <- sapply(1:100, function(j) {
      calculate_ESS(hierarchical_samples[[i]]$samples[[1]], k, random = T)
    })

    ESS_results[[i]] <- list('m' = m_choices[i],
                             'ESS' = calculate_ESS(hierarchical_samples[[i]]$samples[[1]], k),
                             'MC_ESS' = MC_ESS,
                             'mean_MC_ESS' = mean(MC_ESS),
                             'sd_MC_ESS' = sd(MC_ESS))

    # save the computational cost (in time)
    computational_cost[[i]] <- sum(hierarchical_samples[[i]]$time)

    print(paste('m: ', m_choices[i], ' || ESS: ', ESS_results[[i]]$ESS, ' || MC_ESS: ', ESS_results[[i]]$mean_MC_ESS,
                ' +/- ', ESS_results[[i]]$sd_MC_ESS))
  }

  return(list('hierarchical_results' = hierarchical_samples,
              'ESS_results' = ESS_results))
}

plot_kdes <- function(list_of_h_results, m_choices, n_values, mean, k,
                      colours, plot_dim = c(1,1), ylim, adjust) {
  if (length(list_of_h_results) != length(n_values)) {
    stop('plot_kdes: list_of_h_results and n_values should be lists of equal length')
  }
  if (length(list_of_h_results) != length(colours)) {
    stop('plot_kdes: list_of_h_results and colours should be of equal length')
  }
  # change plot dimensions (i.e. how many plots on a page?)
  par(mfrow=plot_dim)
  for (i in 1:length(list_of_h_results[[1]])) {
    curve(expr = target_density_exp_4(x, mean = mean),
          from = mean - 4,
          to = mean + 4,
          main = paste('k = ', k, ' || m = ', m_choices[i]),
          ylab = 'pdf',
          ylim = ylim)
    for (j in 1:length(list_of_h_results)) {
      lines(density(list_of_h_results[[j]][[i]]$samples[[1]], adjust = adjust), col = colours[j], lty = j, lwd = 1.5)
    }
    legend('topright', legend = paste('n = ', n_values), col = colours, lwd = 1.5, lty = 1:length(list_of_h_results))
  }
  # reset plot dimensions
  par(mfrow=c(1,1))
}

print_ESS_results <- function(ESS_results, m_choices) {
  for (i in 1:length(ESS_results)) {
    print(paste('m: ', m_choices[i], ' || ESS: ', ESS_results[[i]]$ESS, ' || MC_ESS: ', ESS_results[[i]]$mean_MC_ESS,
                ' +/- ', ESS_results[[i]]$sd_MC_ESS))
  }
}

plot_ESS_results <- function(ESS_results, m_choices, n) {
  plot(x = m_choices, y = sapply(1:length(m_choices), function(i) ESS_results[[i]]$mean_MC_ESS),
       xlab = '', ylab = 'ESS', pch = 4, main = paste('n = ', n), xaxt = "n")

  points(x = m_choices, y = sapply(1:length(m_choices), function(i) {
    ESS_results[[i]]$mean_MC_ESS + 3*ESS_results[[i]]$sd_MC_ESS
  }), pch = '-', col = 'blue', lwd = 2)
  points(x = m_choices, y = sapply(1:length(m_choices), function(i) {
    ESS_results[[i]]$mean_MC_ESS - 3*ESS_results[[i]]$sd_MC_ESS
  }), pch = '-', col = 'blue', lwd = 2)
  points(x = m_choices, y = sapply(1:length(m_choices), function(i) ESS_results[[i]]$ESS),
         pch = 20, col = 'red')
  lines(x = c(0, 50000), y = c(0, 50000), lty = 2)
  axis(1, at=m_choices, lwd.ticks=0.5, labels=m_choices, las = 3)
  axis(2, at=m_choices, lwd.ticks=0.5, labels=rep("", length(m_choices)))
  mtext("m", side=1, line=4)
}

########################################

m_choices <- c(100, 1000, 2500, 5000, 7500, 10000, 15000, 20000, 25000, 50000)

########################################

input_samples <- base_rejection_sampler_exp_4(beta = 1/4,
                                              nsamples = 100000,
                                              proposal_mean = 0,
                                              proposal_sd = 1.5,
                                              dominating_M = 1.4)
curve(tempered_target_density_exp_4(x, 0, beta = 1/4), -4, 4)
# check the samples look okay
for (samples in input_samples) {
  lines(density(samples), col = 'blue')
}

########################################

results_10000_2 <- ESS_study(m_choices = m_choices, n = 10000, k = 2, input_samples = input_samples)
results_1000_2 <- ESS_study(m_choices = m_choices, n = 1000, k = 2, input_samples = input_samples)
results_100_2 <- ESS_study(m_choices = m_choices, n = 100, k = 2, input_samples = input_samples)

########################################

dev.new()
plot_ESS_results(results_10000_2$ESS_results, m_choices, 10000)
plot_ESS_results(results_1000_2$ESS_results, m_choices, 1000)
plot_ESS_results(results_100_2$ESS_results, m_choices, 100)

list_of_h_2 <- list(results_10000_2$hierarchical_results,
                    results_1000_2$hierarchical_results,
                    results_100_2$hierarchical_results)
plot_kdes(list_of_h_results = list_of_h_2,
          m_choices = m_choices,
          n_values = c(10000, 1000, 100),
          mean = 0,
          k = 2,
          colours = c('blue', 'red', 'forestgreen'),
          plot_dim = c(1, 1),
          ylim = c(0, 0.6), adjust = 0.5)


########################################

results_10000_5 <- ESS_study(m_choices = m_choices, n = 10000, k = 5, input_samples = input_samples)
results_1000_5 <- ESS_study(m_choices = m_choices, n = 1000, k = 5, input_samples = input_samples)
results_100_5 <- ESS_study(m_choices = m_choices, n = 100, k = 5, input_samples = input_samples)

########################################

dev.new()
plot_ESS_results(results_10000_5$ESS_results, m_choices, 10000)
plot_ESS_results(results_1000_5$ESS_results, m_choices, 1000)
plot_ESS_results(results_100_5$ESS_results, m_choices, 100)

list_of_h_5 <- list(results_10000_5$hierarchical_results,
                    results_1000_5$hierarchical_results,
                    results_100_5$hierarchical_results)
plot_kdes(list_of_h_results = list_of_h_5,
          m_choices = m_choices,
          n_values = c(10000, 1000, 100),
          mean = 0,
          k = 5,
          colours = c('blue', 'red', 'forestgreen'),
          plot_dim = c(1, 1),
          ylim = c(0, 0.6), adjust = 0.5)
rchan26/exp4FusionRCPP documentation built on Nov. 6, 2019, 7:01 p.m.