fusion_RIS_TA_BLR <- function(N,
dim,
y_split,
X_split,
prior_means,
prior_variances,
time,
m,
C,
K,
power,
precondition_matrices,
samples_to_fuse,
sub_posterior_weights,
level = 1,
node = 1,
core = 1) {
# non-parallel version of time-adapting fusion (on one singular core)
# initialise variables
i <- 0;
# for rejection sampling step
iters_rho <- 0;
x_samples <- rep(list(matrix(nrow = m, ncol = dim, data = 0)), N)
# for importance sampling step
y_samples <- matrix(data = NA, nrow = N, ncol = dim)
Q_weights <- rep(1, N)
# finding T_{c}'s and storing them in new_times
new_times <- time / sub_posterior_weights
# setting T^{*} (T-star) as the largest T_{c} = (time / w_{c}) for c = 1, ..., m
T_star <- max(new_times)
while (i < N) {
# logging the number of iterations for rho probability
iters_rho <- iters_rho+1;
# sampling from each of the m components
x <- matrix(nrow = m, ncol = dim)
for (j in 1:m) {
x[j,] <- samples_to_fuse[[j]][sample(nrow(samples_to_fuse[[j]]), 1),]
}
# calculating the weighted mean of the sampled values
weighted_avg <- BayesLogitFusion:::weighted_column_mean(matrix = x,
weights = sub_posterior_weights)
# finding log(rho)
logrho <- BayesLogitFusion:::log_rho_time_adapting(x = x,
weighted_mean = weighted_avg,
sub_posterior_weights = sub_posterior_weights,
time = time)
if (log(runif(1, 0, 1)) < logrho) {
i <- i+1
# save the accepted x values into x_samples
x_samples[[i]] <- x
# simulate proposed value y from a Gaussian distribution
y_samples[i,] <- BayesLogitFusion:::mvrnormArma(N = 1,
mu = weighted_avg,
Sigma = diag(time/sum(sub_posterior_weights), dim))
# simulate m diffusion and obtain their probabilities
for (c in 1:m) {
Q_weights[i] <- Q_weights[i]*BayesLogitFusion:::diffusion_probability_BLR(dim = dim,
x0 = x[c,],
y = y_samples[i,],
s = T_star - new_times[c],
t = T_star,
K = K[c],
initial_parameters = weighted_avg,
y_labels = y_split[[c]],
X = X_split[[c]],
prior_means = prior_means,
prior_variances = prior_variances,
C = C,
power = power,
precondition_mat = precondition_matrices[[c]])
}
# print progress to file
cat('Level:', level, '|| Node:', node, '|| Core:', core, '||', i, '/', N, '\n',
file = 'fusion_RIS_TA_BLR_progress.txt', append = T)
}
}
# print completion
cat('Node', node, '|| Core:', core, '|| Completed fusion for level', level, '\n',
file = 'fusion_RIS_TA_BLR_progress.txt', append = T)
return(list('y_samples' = y_samples,
'Q_weights' = Q_weights,
'x_samples' = x_samples,
'iters_rho' = iters_rho))
}
######################################## parallel time adapting SMC fusion ########################################
#' Parallel Time-adapting Rejection-Importance Sampling Monte Carlo Fusion for Bayesian Logistic Regression model
#'
#' @param N number of samples
#' @param dim dimension of the predictors (= p+1)
#' @param y_split list of length m, where y_split[[c]] is the y responses for sub-posterior c
#' @param X_split list of length m, where X_split[[c]] is the design matrix for sub-posterior c
#' @param prior_means prior for means of predictors
#' @param prior_variances prior for variances of predictors
#' @param time time T for fusion algorithm
#' @param m number of sub-posteriors to combine
#' @param C overall number of sub-posteriors
#' @param power exponent of target distribution
#' @param precondition boolean value determining whether or not a preconditioning matrix is to be used
#' @param samples_to_fuse list of length m, where samples_to_fuse[c] contains the samples for the c-th sub-posterior
#' @param sub_posterior_weights vector of length m, where sub_posterior_weights[c] is the weight for sub-posterior c
#' @param ESS_threshold number between 0 and 1 defining the proportion of the number of samples that ESS needs to be
#' lower than for resampling (i.e. resampling is carried out only when ESS < N*ESS_threshold)
#' @param seed seed number - default is NULL, meaning there is no seed
#' @param level indicates which level this is for the hierarchy (default 1)
#' @param node indicates which node this is for the hierarchy (default 1)
#' @param n_cores number of cores to use
#'
#' @return A list with components:
#' \describe{
#' \item{samples}{samples from fusion (resampled if ESS < N*ESS_threshold)}
#' \item{weighted_sampled}{weighted samples}
#' \item{norm_weights}{normalised weights (after Q step)}
#' \item{ESS}{effective sample size}
#' \item{x_samples}{samples after the first fusion step (after rho step)}
#' \item{rho_acc}{rho acceptance rate}
#' \item{combined_y}{combined y responses after fusion}
#' \item{combined_X}{combined design matrix after fusion}
#' \item{time}{time taken for fusion}
#' \item{resampled}{boolean value to indicate whether or not samples were resampled}
#' \item{precondition_matrices}{pre-conditioning matricies that were used}
#' }
#'
#' @export
par_fusion_RIS_TA_BLR <- function(N,
dim,
y_split,
X_split,
prior_means,
prior_variances,
time,
m,
C,
power,
precondition = FALSE,
samples_to_fuse,
sub_posterior_weights,
ESS_threshold = 0.5,
seed = NULL,
level = 1,
node = 1,
n_cores = parallel::detectCores()) {
if (length(samples_to_fuse)!=m) {
stop("par_fusion_RIS_TA_BLR: samples_to_fuse must be a list of length m")
} else if (length(y_split)!=m) {
stop("par_fusion_RIS_TA_BLR: y_split must be a list of length m")
} else if (length(X_split)!=m) {
stop("par_fusion_RIS_TA_BLR: X_split must be a list of length m")
} else if (length(sub_posterior_weights)!=m) {
stop("par_fusion_RIS_TA_BLR: sub_posterior_weights must be a vector of length m")
}
# check that all the samples in samples_to_fuse are matrices with dim columns
for (c in 1:length(samples_to_fuse)) {
if (ncol(samples_to_fuse[[c]])!=dim) {
stop("par_fusion_RIS_TA_BLR: check that samples_to_fuse contains matrices with dim columns")
}
}
# if using a preconditioning matrix, we need to calculate them
if (precondition) {
precondition_matrices <- lapply(1:m, function(c) {
# use the diagonal sqrt inverse covariance matrix for the cth sub-posterior
sqrt(diag(diag(cov(samples_to_fuse[[c]]))))
})
} else {
# dont want to use a preconditioning matrix - i.e. use the identity matrix
precondition_matrices <- lapply(1:m, function(c) diag(1, dim))
}
# need to find the lower bound K for the Exact Algorithm
K <- rep(0, m)
for (c in 1:m) {
K[c] <- BayesLogitFusion:::phi_LB_BLR(X = X_split[[c]],
prior_variances = prior_variances,
C = C,
power = power,
precondition_mat = precondition_matrices[[c]])
}
cat('Level:', level, '|| Node:', node, '|| Starting fusion for level',
level, '( node', node, ') trying to get', N, 'samples\n',
file = 'par_fusion_RIS_TA_BLR_progress.txt', append = T)
######### creating parallel cluster
cl <- parallel::makeCluster(n_cores)
# creating variable and functions list to pass into cluster using clusterExport
varlist <- list("phi_LB_BLR",
"bound_phi_BLR",
"diffusion_probability_BLR",
"fusion_RIS_TA_BLR",
"precondition_matrices")
parallel::clusterExport(cl, envir = environment(), varlist = varlist)
# exporting functions from layeredBB package to simulate layered Brownian bridges
parallel::clusterExport(cl, varlist = ls("package:layeredBB"))
if (!is.null(seed)) {
# setting seed for the cores in the cluster
parallel::clusterSetRNGStream(cl, iseed = seed)
}
# how many samples do we need for each core?
if (N < n_cores) {
samples_per_core <- rep(1, N)
} else {
samples_per_core <- rep(floor(N/n_cores), n_cores)
if (sum(samples_per_core) != N) {
remainder <- N %% n_cores
samples_per_core[1:remainder] <- samples_per_core[1:remainder] + 1
}
}
# run fusion in parallel
pcm <- proc.time()
fused <- parallel::parLapply(cl, X = 1:length(samples_per_core), fun = function(i) {
fusion_RIS_TA_BLR(N = samples_per_core[i],
dim = dim,
y_split = y_split,
X_split = X_split,
prior_means = prior_means,
prior_variances = prior_variances,
time = time,
m = m,
C = C,
K = K,
power = power,
precondition_matrices = precondition_matrices,
samples_to_fuse = samples_to_fuse,
sub_posterior_weights = sub_posterior_weights,
level = level,
node = node,
core = i)
})
final <- proc.time() - pcm
# stop cluster
parallel::stopCluster(cl)
########## return samples and acceptance probabilities
# samples from first rejection step in fusion
x_samples <- unlist(lapply(1:length(samples_per_core), function(i) fused[[i]]$x_samples),
recursive = FALSE)
rho_acc <- N / sum(sapply(1:length(samples_per_core), function(i) fused[[i]]$iters_rho))
# samples from second importance sampling step in fusion
y_samples <- do.call(rbind, lapply(1:length(samples_per_core), function(i) fused[[i]]$y_samples))
Q_weights <- unlist(lapply(1:length(samples_per_core), function(i) fused[[i]]$Q_weights))
# normalise weights and resample
Q_weights <- Q_weights / sum(Q_weights)
# check that the weights are not NaN (can happen if we have numerical error)
if (any(is.na(Q_weights))) {
stop("par_fusion_RIS_TA_BLR: Q_weights are NaN - potential numerical error. Try decreasing value for T")
}
# only resample if ESS < N*ESS_threshold
ESS <- 1 / sum(Q_weights^2)
if (ESS < N*ESS_threshold) {
resampled <- TRUE
fusion_samples <- y_samples[sample(1:nrow(y_samples), size = N, replace = T, prob = Q_weights),]
} else {
resampled <- FALSE
fusion_samples <- y_samples
}
# return the full data set combined together
combined_y <- unlist(y_split)
combined_X <- do.call(rbind, X_split)
# print completion
cat('Node', node, '|| Completed fusion for level', level, '\n',
file = 'par_fusion_RIS_TA_BLR_progress.txt', append = T)
return(list('samples' = fusion_samples,
'weighted_samples' = y_samples,
'norm_weights' = Q_weights,
'ESS' = ESS,
'x_samples' = x_samples,
'rho_acc' = rho_acc,
'combined_y' = combined_y,
'combined_X' = combined_X,
'time' = final['elapsed'],
'resampled' = resampled,
'precondition_matrices' = precondition_matrices))
}
######################################## time adapting hierarchical fusion ########################################
#' Time-adapting Hierarchical Rejection-Importance Sampling Monte Carlo Fusion for Bayesian Logistic Regression model
#'
#' @param N_schedule vector of length (L-1), where N_schedule[l] is the number of samples per node at level l
#' @param dim dimension of the predictors (= p+1)
#' @param y_split list of length C, where y_split[[c]] is the y responses for sub-posterior c
#' @param X_split list of length C, where X_split[[c]] is the design matrix for sub-posterior c
#' @param prior_means prior for means of predictors
#' @param prior_variances prior for variances of predictors
#' @param global_T time T for time-adapting fusion algorithm
#' @param m_schedule vector of length (L-1), where m_schedule[l] is the number of samples to fuse for level l
#' @param C number of sub-posteriors at the base level
#' @param power exponent of target distribution
#' @param precondition boolean value determining whether or not a preconditioning matrix is to be used
#' @param L total number of levels in the hierarchy
#' @param base_samples list of length C, where samples_to_fuse[c] containg the samples for the c-th node in the level
#' @param ESS_threshold number between 0 and 1 defining the proportion of the number of samples that ESS needs to be
#' lower than for resampling (i.e. resampling is carried out only when ESS < N*ESS_threshold)
#' @param seed seed number - default is NULL, meaning there is no seed
#' @param n_cores number of cores to use
#'
#' @return A list with components:
#' \describe{
#' \item{samples}{list of length (L-1), where samples[[l]][[i]] are the samples for level l, node i}
#' \item{weighted_samples}{list of length (L-1), where weighted_samples[[l]][[i]] are the weighted samples for level l, node i}
#' \item{normalised_weights}{list of length (L-1), where normalised_weights[[l]][[i]] is the normalised weights for level l, node i}
#' \item{ESS}{list of length (L-1), where ESS[[l]][[i]] is the ESS for samples in level l, node i}
#' \item{x_samples}{list of length (L-1), where x_samples[[l]][[i]] are the samples from first fusion step (rho step) for level l, node i}
#' \item{rho_acc}{list of length (L-1), where rho_acc[[l]][i] is the acceptance rate for first fusion step for level l, node i}
#' \item{time}{list of length (L-1), where time[[l]] is the run time for level l, node i}
#' \item{resampled}{list of length (L-1), where resampled[[l]][[i]] is a boolean value to indicate if samples in level, node i were resampled}
#' \item{y_inputs}{input y data for each level and node}
#' \item{X_inputs}{input X data for each level and node}
#' \item{C_inputs}{vector of length (L-`), where C_inputs[l] is the number of sub-posteriors at level l+1 (the input for C to get to level l)}
#' \item{sub_posterior_weight_inputs}{list of length (L), where sub_posterior_weight_inputs[[l]] is the input for the sub-posterior weights for level l}
#' \item{diffusion_times}{vector of length (L-1), where diffusion_times[l] are the times for fusion in level l}
#' \item{power}{exponent of target distributions in the hierarchy}
#' \item{precondition_matrices}{pre-conditioning matricies that were used}
#' }
#'
#' @export
hierarchical_fusion_RIS_TA_BLR <- function(N_schedule,
dim,
y_split,
X_split,
prior_means,
prior_variances,
global_T,
m_schedule,
C,
power,
precondition = FALSE,
L,
base_samples,
ESS_threshold = 0.5,
seed = NULL,
n_cores = parallel::detectCores()) {
# check variables are of the right length
if (length(N_schedule) != (L-1)) {
stop('hierarchical_fusion_RIS_TA_BLR: length of N_schedule must be equal to (L-1)')
} else if (!is.list(y_split) || length(y_split)!= C) {
stop('hierarchical_fusion_RIS_TA_BLR: check that y_split is a list of length C')
} else if (!is.list(X_split) || length(X_split)!= C) {
stop('hierarchical_fusion_RIS_TA_BLR: check that X_split is a list of length C')
}
# output warning to say that top level does not have C=1
if (C != prod(m_schedule)) {
warning('hierarchical_fusion_RIS_TA_BLR: C != prod(m_schedule) - the top level does not have C=1')
}
# check that at each level, we are fusing a suitable number
if (length(m_schedule) == (L-1)) {
for (l in (L-1):1) {
if ((C/prod(m_schedule[(L-1):l]))%%1 != 0) {
stop('hierarchical_fusion_RIS_TA_BLR: check that C/prod(m_schedule[(L-1):l]) is an integer for l=L-1,...,1')
}
}
} else {
stop('hierarchical_fusion_RIS_TA_BLR: m_schedule must be a vector of length (L-1)')
}
# check ESS_threshold is strictly between 0 and 1
if (ESS_threshold < 0 || ESS_threshold > 1) {
stop('hierarchical_fusion_RIS_TA_BLR: ESS_threshold must be between 0 and 1')
}
# we append 1 to the vector m_schedule to make the indices work later on when we call fusion
m_schedule <- c(m_schedule, 1)
# initialising study results
hier_samples <- list()
hier_samples[[L]] <- base_samples # base level
x_samples <- list()
weighted_samples <- list()
norm_weights <- list()
ESS <- list()
rho_acc <- list()
y_inputs <- list()
y_inputs[[L]] <- y_split # base level input samples for y
X_inputs <- list()
X_inputs[[L]] <- X_split # base level input samples for X
C_inputs <- rep(0, L-1)
sub_posterior_weight_inputs <- list()
diffusion_times <- list()
time <- list()
resampled <- list()
precondition_matrices <- list()
# add to output file that starting hierarchical fusion
cat('Starting hierarchical fusion \n', file = 'hierarchical_fusion_RIS_TA_BLR.txt')
# loop through the levels
for (k in ((L-1):1)) {
# since previous level has C/prod(m_schedule[L:(k-1)]) nodes and we fuse m_schedule[k] of these
n_nodes <- C/prod(m_schedule[L:k])
# performing Fusion for this level
# printing out some stuff to log file to track the progress
cat('########################\n', file = 'hierarchical_fusion_RIS_TA_BLR.txt', append = T)
cat('Starting to fuse', m_schedule[k], 'sub-posteriors for level', k, 'with time',
global_T/prod(m_schedule[L:(k+1)]), ', which is using', n_cores, 'cores\n',
file = 'hierarchical_fusion_RIS_TA_BLR.txt', append = T)
cat('At this level, the data is split up into', (C / prod(m_schedule[L:(k+1)])), 'subsets\n',
file = 'hierarchical_fusion_RIS_TA_BLR.txt', append = T)
cat('There are', n_nodes, 'nodes at the next level each giving', N_schedule[k],
'samples \n', file = 'hierarchical_fusion_RIS_TA_BLR.txt', append = T)
cat('########################\n', file = 'hierarchical_fusion_RIS_TA_BLR.txt', append = T)
# starting fusion
fused <- lapply(X = 1:n_nodes, FUN = function(i) {
par_fusion_RIS_TA_BLR(N = N_schedule[k],
dim = dim,
y_split = y_inputs[[k+1]][((m_schedule[k]*i)-(m_schedule[k]-1)):(m_schedule[k]*i)],
X_split = X_inputs[[k+1]][((m_schedule[k]*i)-(m_schedule[k]-1)):(m_schedule[k]*i)],
prior_means = prior_means,
prior_variances = prior_variances,
time = global_T,
m = m_schedule[k],
C = (C / prod(m_schedule[L:(k+1)])),
power = power,
precondition = precondition,
samples_to_fuse = hier_samples[[k+1]][((m_schedule[k]*i)-(m_schedule[k]-1)):(m_schedule[k]*i)],
sub_posterior_weights = rep(prod(m_schedule[L:(k+1)]), m_schedule[k]),
ESS_threshold = ESS_threshold,
seed = seed,
level = k,
node = i,
n_cores = n_cores)
})
# need to combine the correct samples
hier_samples[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$samples)
weighted_samples[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$weighted_samples)
norm_weights[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$norm_weights)
ESS[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$ESS)
x_samples[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$x_samples)
rho_acc[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$rho_acc)
y_inputs[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$combined_y)
X_inputs[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$combined_X)
time[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$time)
resampled[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$resampled)
precondition_matrices[[k]] <- lapply(1:n_nodes, function(i) fused[[i]]$precondition_matrices)
# track the inputs for the level
C_inputs[k] <- (C / prod(m_schedule[L:(k+1)]))
sub_posterior_weight_inputs[[k]] <- rep(prod(m_schedule[L:(k+1)]), m_schedule[k])
diffusion_times[[k]] <- global_T / rep(prod(m_schedule[L:(k+1)]), m_schedule[k])
}
# print completion
cat('Completed hierarchical fusion\n', file = 'hierarchical_fusion_RIS_TA_BLR.txt', append = T)
if (C == prod(m_schedule)) {
hier_samples[[1]] <- hier_samples[[1]][[1]]
weighted_samples[[1]] <- weighted_samples[[1]][[1]]
norm_weights[[1]] <- norm_weights[[1]][[1]]
ESS[[1]] <- ESS[[1]][[1]]
x_samples[[1]] <- x_samples[[1]][[1]]
rho_acc[[1]] <- rho_acc[[1]][[1]]
y_inputs[[1]] <- y_inputs[[1]][[1]]
X_inputs[[1]] <- X_inputs[[1]][[1]]
time[[1]] <- time[[1]][[1]]
resampled[[1]] <- resampled[[1]][[1]]
precondition_matrices[[1]] <- precondition_matrices[[1]][[1]]
}
return(list('samples' = hier_samples,
'weighted_samples' = weighted_samples,
'normalised_weights' = norm_weights,
'ESS' = ESS,
'x_samples' = x_samples,
'rho_acc' = rho_acc,
'time' = time,
'resampled' = resampled,
'y_inputs' = y_inputs,
'X_inputs' = X_inputs,
'C_inputs' = C_inputs,
'sub_posterior_weight_inputs' = sub_posterior_weight_inputs,
'diffusion_times' = diffusion_times,
'power' = power,
'precondition_matrices' = precondition_matrices))
}
######################################## miscellena ########################################
#' Get resampled top level for RIS hierarchy
#'
#' Function resamples the top level of the hierarchy if it has not been resampled
#'
#' @param hierarchical_result hierarchy returned by 'hierarchical_fusion_RIS_TA_BLR' function
#' @param seed seed number - default is NULL, meaning there is no seed
#'
#' @export
get_resampled_top_level_RIS <- function(hierarchical_result, seed = NULL) {
if (!is.null(seed)) {
# setting seed if given
set.seed(seed)
}
# if the top level got resampled, then we keep that, otherwise resample according to weights
if (hierarchical_result$resampled[[1]]) {
samples <- hierarchical_result$samples[[1]]
} else {
samples <- hierarchical_result$samples[[1]][sample(x = 1:nrow(hierarchical_result$samples[[1]]),
size = nrow(hierarchical_result$samples[[1]]),
replace = T,
prob = hierarchical_result$normalised_weights[[1]]),]
}
return(samples)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.