scripts/logistic_regression/smart_grid/SG128.R

library(DCFusion)
library(HMCBLR)

##### Initialise example #####
seed <- 2022
set.seed(seed)
nsamples <- 10000
prior_means <- rep(0, 13)
prior_variances <- rep(1, 13)
C <- 128
n_cores <- parallel::detectCores()
ESS_threshold <- 0.5
CESS_0_threshold <- 0.2
CESS_j_threshold <- 0.05
diffusion_estimator <- 'NB'

##### Loading in Data #####

load_smart_grid_data <- function(file, seed = NULL) {
  original_data <- read.csv(file)
  original_data$stabf <- as.numeric(original_data$stabf == 'stable')
  original_data <- original_data[complete.cases(original_data),]
  if (!is.null(seed)) {
    set.seed(seed)
    original_data <- original_data[sample(1:nrow(original_data)),]
  }
  X <- subset(original_data, select = -c(stab, stabf))
  design_mat <- as.matrix(cbind(rep(1, nrow(X)), X))
  colnames(design_mat)[1] <- 'intercept'
  return(list('data' = original_data,
              'y' = original_data$stabf,
              'X' = design_mat))
}

smart_grid <- load_smart_grid_data('scripts/logistic_regression/smart_grid/smart_grid_stability_augmented.csv')

##### Sampling from full posterior #####

full_posterior <- hmc_sample_BLR_2(y = smart_grid$y,
                                   X = smart_grid$X,
                                   C = 1,
                                   prior_means = prior_means,
                                   prior_variances = prior_variances,
                                   iterations = nsamples + 10000,
                                   warmup = 10000,
                                   chains = 1,
                                   seed = seed,
                                   output = T)

##### Sampling from sub-posterior C=128 #####

data_split_128 <- split_data(smart_grid$data, y_col_index = 14, X_col_index = 1:12, C = C, as_dataframe = F)
sub_posteriors_128 <- hmc_base_sampler_BLR_2(nsamples = nsamples,
                                             data_split = data_split_128,
                                             C = C,
                                             prior_means = prior_means,
                                             prior_variances = prior_variances,
                                             warmup = 10000,
                                             seed = seed,
                                             output = T)

##### Applying other methodologies #####

print('Applying other methodologies')
consensus_mat_128 <- consensus_scott(S= C, samples_to_combine = sub_posteriors_128, indep = F)
consensus_sca_128 <- consensus_scott(S= C, samples_to_combine = sub_posteriors_128, indep = T)
neiswanger_true_128 <- neiswanger(S= C,
                                  samples_to_combine = sub_posteriors_128,
                                  anneal = TRUE)
neiswanger_false_128 <- neiswanger(S= C,
                                   samples_to_combine = sub_posteriors_128,
                                   anneal = FALSE)
weierstrass_importance_128 <- weierstrass(Samples = sub_posteriors_128,
                                          method = 'importance')
weierstrass_rejection_128 <- weierstrass(Samples = sub_posteriors_128,
                                         method = 'reject')

integrated_abs_distance(full_posterior, consensus_mat_128$samples)
integrated_abs_distance(full_posterior, consensus_sca_128$samples)
integrated_abs_distance(full_posterior, neiswanger_true_128$samples)
integrated_abs_distance(full_posterior, neiswanger_false_128$samples)
integrated_abs_distance(full_posterior, weierstrass_importance_128$samples)
integrated_abs_distance(full_posterior, weierstrass_rejection_128$samples)

##### bal binary combining two sub-posteriors at a time #####

# regular mesh
balanced_C128 <- list('reg' = bal_binary_GBF_BLR(N_schedule = rep(nsamples, 7),
                                                 m_schedule = rep(2, 7),
                                                 time_mesh = NULL,
                                                 base_samples = sub_posteriors_128,
                                                 L = 8,
                                                 dim = 13,
                                                 data_split = data_split_128,
                                                 prior_means = prior_means,
                                                 prior_variances = prior_variances,
                                                 C = C,
                                                 precondition = TRUE,
                                                 resampling_method = 'resid',
                                                 ESS_threshold = ESS_threshold,
                                                 adaptive_mesh = FALSE,
                                                 mesh_parameters = list('condition' = 'SH',
                                                                        'CESS_0_threshold' = CESS_0_threshold,
                                                                        'CESS_j_threshold' = CESS_j_threshold,
                                                                        'vanilla' = FALSE),
                                                 diffusion_estimator = diffusion_estimator,
                                                 seed = seed,
                                                 print_progress_iters = 100))
balanced_C128$reg$particles <- resample_particle_y_samples(particle_set = balanced_C128$reg$particles[[1]],
                                                           multivariate = TRUE,
                                                           resampling_method = 'resid',
                                                           seed = seed)
balanced_C128$reg$proposed_samples <- balanced_C128$reg$proposed_samples[[1]]
print(integrated_abs_distance(full_posterior, balanced_C128$reg$particles$y_samples))
save.image('SG128.RData')

# adaptive mesh
balanced_C128$adaptive <- bal_binary_GBF_BLR(N_schedule = rep(nsamples, 7),
                                             m_schedule = rep(2, 7),
                                             time_mesh = NULL,
                                             base_samples = sub_posteriors_128,
                                             L = 8,
                                             dim = 13,
                                             data_split = data_split_128,
                                             prior_means = prior_means,
                                             prior_variances = prior_variances,
                                             C = C,
                                             precondition = TRUE,
                                             resampling_method = 'resid',
                                             ESS_threshold = ESS_threshold,
                                             adaptive_mesh = TRUE,
                                             mesh_parameters = list('condition' = 'SH',
                                                                    'CESS_0_threshold' = CESS_0_threshold,
                                                                    'CESS_j_threshold' = CESS_j_threshold,
                                                                    'vanilla' = FALSE),
                                             diffusion_estimator = diffusion_estimator,
                                             seed = seed,
                                             print_progress_iters = 100)
balanced_C128$adaptive$particles <- resample_particle_y_samples(particle_set = balanced_C128$adaptive$particles[[1]],
                                                                multivariate = TRUE,
                                                                resampling_method = 'resid',
                                                                seed = seed)
balanced_C128$adaptive$proposed_samples <- balanced_C128$adaptive$proposed_samples[[1]]
print(integrated_abs_distance(full_posterior, balanced_C128$adaptive$particles$y_samples))
save.image('SG128.RData')
rchan26/hierarchicalFusion documentation built on Sept. 11, 2022, 10:30 p.m.