## EXPERIMENTS (new tidier source!)
## code used to run the experiments (in parallel) that were actually used to make the plots
## in the report. Yes, believe it or not, it was this terrible (no longer tidy!) code.
rm(list = ls())
## Required packages ---------------------
library(foreach)
library(doParallel)
library(future)
library(doRNG)
## Function dependencies -----------------
standard_SMC <- function(n_particles, observations, emission_density, transition_sam, initial_sam, ...){
n_obs <- length(observations)
ancestors <- matrix(NA, nrow = n_obs - 1, ncol = n_particles)
positions <- matrix(NA, nrow = n_obs, ncol = n_particles)
weights <- matrix(NA, nrow = n_obs, ncol = n_particles)
## Initial states
positions[1, ] <- initial_sam(n_particles, ...)
weights_tmp <- emission_density(observations[1], positions[1, ], ...)
weights[1, ] <- weights_tmp / sum(weights_tmp)
for (t in 2:(n_obs)){
## Resample
ancestors[t-1, ] <- sample(1:n_particles, n_particles, prob=weights_tmp, replace=TRUE)
positions_tmp <- positions[t-1, ancestors[t-1, ]]
## Propagate
positions[t, ] <- transition_sam(positions_tmp, ...)
## Calculate importance weights
weights_tmp <- emission_density(observations[t], positions[t, ], ...)
weights[t, ] <- weights_tmp / sum(weights_tmp)
}
list(positions = positions, weights = weights, ancestors = ancestors)
}
standard_SMC_anc <- function(n_particles, observations, emission_density, transition_sam, initial_sam, ...){
n_obs <- length(observations)
ancestors <- matrix(NA, nrow = n_obs - 1, ncol = n_particles)
positions <- numeric(n_particles)
## Initial states
positions <- initial_sam(n_particles, ...)
weights_tmp <- emission_density(observations[1], positions, ...)
for (t in 2:(n_obs)){
## Resample
ancestors[t-1, ] <- sample(1:n_particles, n_particles, prob=weights_tmp, replace=TRUE)
positions_tmp <- positions[ancestors[t-1, ]]
## Propagate
positions <- transition_sam(positions_tmp, ...)
## Calculate importance weights
weights_tmp <- emission_density(observations[t], positions, ...)
}
ancestors
}
conditional_SMC_anc <- function(n_particles, observations, cond_trajectory, emission_density, transition_sam, initial_sam, ...){
n_obs <- length(observations)
ancestors <- matrix(NA, nrow = n_obs - 1, ncol = n_particles)
positions <- numeric(n_particles)
## Initial states
positions[1] <- cond_trajectory[1]
positions[2:n_particles] <- initial_sam(n_particles - 1, ...)
weights_tmp <- emission_density(observations[1], positions, ...)
for (t in 2:(n_obs)){
## Resample
ancestors[t-1, ] <- sample(1:n_particles, n_particles, prob=weights_tmp, replace=TRUE)
ancestors[t-1, 1] <- 1
positions_tmp <- positions[ancestors[t-1, ]]
## Propagate
positions[2:n_particles] <- transition_sam(positions_tmp[2:n_particles], ...)
positions[1] <- cond_trajectory[t]
## Calculate importance weights
weights_tmp <- emission_density(observations[t], positions, ...)
}
ancestors
}
ancestrySize_treeht <- function(history, sampl=NULL, maxgen=NULL){
N <- attr(history, 'N')
N.gen <- (attr(history, 'Ngen'))
min.gen <- max(1, N.gen-maxgen)
if (is.null(sampl)){
sampl <- 1:N
n <- N
}
else{
n <- length(sampl)
}
Size <- rep(NA, N.gen)
Size[N.gen] <- n
ancestry <- sampl
MRCA <- NA
for (t in ((N.gen-1):min.gen)){
ancestry <- history[t,unique(ancestry)] ## find ancestors of current sample
Size[t] <- length(unique(ancestry))
if (is.na(MRCA)){ ## once MRCA is reached, can exit loop & set all previous generations to 1
if (Size[t]==1){
MRCA <- t
break
}
}
}
if (!is.na(MRCA)){ ## set size to 1 for all generations above MRCA
Size[min.gen:(MRCA-1)] <- rep(1, MRCA-1)
}
N.gen-MRCA
}
traceAncestry <- function(history, sampl, maxgen=NULL){
n <- length(sampl)
N <- attr(history, 'N')
N.gen <- (attr(history, 'Ngen'))
min.gen <- max(1, N.gen-maxgen)
ancestry <- matrix(NA, nrow=N.gen, ncol=n)
ancestry[N.gen,] <- sort(sampl)
MRCA <- NA
for (t in ((N.gen-1):min.gen)){ ## try eliminating the for loops
for (i in 1:n){
ancestry[t,i] <- history[t,ancestry[t+1,i]]
}
if (is.na(MRCA)){
if (length(unique(ancestry[t,]))==1){
MRCA <- t ## could leave NAs above once MRCA is found?
}
}
}
class(ancestry) <- 'samplegenealogy'
ancestry@samplesize <- n
ancestry@sample <- sampl
ancestry@MRCA <- as.integer(MRCA)
ancestry@history <- unclass(history)
ancestry
}
ou_emission_density <- function(observation, position, delta, sigma) {
(2 * pi) ^ (-0.5) * sigma ^ (-1) * exp(-(observation - position) ^ 2 / (2 * sigma ^ 2))
}
ou_transition_sam <- function(pos_old, delta, sigma) {
rnorm(length(pos_old), (1 - delta) * pos_old, delta ^ 0.5)
}
ou_initial_sam <- function(n_particles, delta, sigma) {
rnorm(n_particles)
}
ou_rts_smooth <- function(observations, delta, sigma, prior_mean = 0, prior_var = 1) {
n_obs <- length(observations) - 1
## initialise variables
kal_mean <- numeric(n_obs + 1) ## states x(t|t) after time & measurement updates
kal_var <- numeric(n_obs + 1)
kal_mean_tmp <- numeric(n_obs + 1) ## 'tmp' = states x(t|t-1) after time update and before measurement update
kal_var_tmp <- numeric(n_obs + 1)
## Kalman filter recursion
kal_mean_tmp[1] <- prior_mean
kal_var_tmp[1] <- prior_var
for (t in 1:(n_obs)) {
## measurement update:
kal_mean[t] <- kal_mean_tmp[t] + kal_var_tmp[t] * (observations[t] - kal_mean_tmp[t]) / (kal_var_tmp[t] + sigma ^ 2)
kal_var[t] <- kal_var_tmp[t] - kal_var_tmp[t] ^ 2 / (kal_var_tmp[t] + sigma ^ 2)
## time update:
kal_mean_tmp[t + 1] <- (1 - delta) * kal_mean[t]
kal_var_tmp[t + 1] <- delta + kal_var[t] * (1 - delta) ^ 2
}
kal_mean[n_obs + 1] <- kal_mean_tmp[n_obs + 1] + kal_var_tmp[n_obs + 1] * (observations[n_obs + 1] - kal_mean_tmp[n_obs + 1]) / (kal_var_tmp[n_obs + 1] + sigma ^ 2)
kal_var[n_obs + 1] <- kal_var_tmp[n_obs + 1] - kal_var_tmp[n_obs + 1] ^ 2 / (kal_var_tmp[n_obs + 1] + sigma ^ 2)
## initialise variables
rts_mean <- numeric(n_obs + 1)
rts_var <- numeric(n_obs + 1)
pbar <- numeric(n_obs + 1)
g <- numeric(n_obs)
## final state
rts_mean[n_obs + 1] <- kal_mean[n_obs + 1]
rts_var[n_obs + 1] <- kal_var[n_obs + 1]
## RTS smoother recursion
for (t in n_obs:1) {
pbar[t + 1] <- ((1 - delta) ^ 2) * kal_var[t] + delta
g[t] <- kal_var[t] * (1 - delta) / pbar[t + 1]
rts_mean[t] <- kal_mean[t] + g[t] * (rts_mean[t + 1] - (1 - delta) * kal_mean[t])
rts_var[t] <- kal_var[t] + (g[t] ^ 2) * (rts_var[t + 1] - pbar[t + 1])
}
list(mean = rts_mean, variance = rts_var)
}
setClass("genealogy",representation(history="matrix", N="integer", Ngen="integer", model="character"))
setClass("samplegenealogy", representation(ancestry="matrix", samplesize="integer", sample="integer", MRCA="integer"),contains="genealogy")
## Initialise variables ------------------
## logarithmic series:
# log_n_particles_vals <- 4:12
# n_particles_vals <- 2 ^ (log_n_particles_vals)
# n_leaves <- n_particles_vals[1]
## linear series:
n_leaves <- 2
n_particles_vals <- seq(from = 256, to = 4096, by = 256)
n_reps <- 1000
## Generate (now read in) synthetic data ---------------
delta <- 0.1
sigma <- 0.1
n_obs <- 6 * n_particles_vals[length(n_particles_vals)]
ou_data <- read.csv("oudata.csv", header = FALSE)[[1]]
## Run simulations -----------------------
no_cores <- future::availableCores()
registerDoParallel(makeCluster(no_cores, type='FORK', outfile="debug_file.txt"))
for (n_sd_away in 0:3) {
## Generate immortal trajectory (using the Kalman filter solution)
n_sd_away <- 3 ## conditioned trajectory should be how many SDs away from the MAP smoothed trajectory?
rts <- ou_rts_smooth(ou_data, delta, sigma)
rts_mean <- rts$mean
rts_sd <- (rts$variance) ^ (0.5)
imtl_states <- rts_mean + n_sd_away * rts_sd
treeht_iters <- function(data, j, n_particles, imtl_states){
anc <- conditional_SMC_anc(n_particles, data, imtl_states, ou_emission_density, ou_transition_sam, ou_initial_sam, sigma = sigma, delta = delta)
class(anc) <- 'genealogy'
anc@N <- as.integer(n_particles)
anc@Ngen <- as.integer(n_obs - 1)
ancestrySize_treeht(anc, sampl=sample(1:n_particles, n_leaves, replace=F))
}
for (i in 1:length(n_particles_vals)) {
tree_height <- foreach(j = 1:n_reps, .combine = c) %dorng% treeht_iters(ou_data, j, n_particles_vals[i], imtl_states)
write.table(t(tree_height), file="treeht_out_n2.csv", sep=",", append=TRUE, row.names=FALSE, col.names=FALSE)
}
}
stopImplicitCluster()
# ### Standard SMC ======================
# ## Run simulations -----------------------
#
# no_cores <- future::availableCores()
# registerDoParallel(makeCluster(no_cores, type='FORK', outfile="debug_file.txt"))
#
# treeht_iters <- function(data, j, n_particles){
# anc <- standard_SMC_anc(n_particles, data, ou_emission_density, ou_transition_sam, ou_initial_sam, sigma = sigma, delta = delta)
#
# class(anc) <- 'genealogy'
# anc@N <- as.integer(n_particles)
# anc@Ngen <- as.integer(n_obs - 1)
#
# ancestrySize_treeht(anc, sampl=sample(1:n_particles, n_leaves, replace=F))
# }
#
# for (i in 1:length(n_particles_vals)) {
# tree_height <- foreach(j = 1:n_reps, .combine = c) %dorng% treeht_iters(ou_data, j, n_particles_vals[i])
# write.table(t(tree_height), file="treeht_out_std.csv", sep=",", append=TRUE, row.names=FALSE, col.names=FALSE)
# }
#
#
# stopImplicitCluster()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.