Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(1)
## ----dataGen------------------------------------------------------------------
library(ggplot2)
library(batchmix)
# Data dimensions
N <- 600
P <- 4
K <- 5
B <- 7
# Generating model parameters
mean_dist <- 2.25
batch_dist <- 0.3
group_means <- seq(1, K) * mean_dist
batch_shift <- rnorm(B, mean = batch_dist, sd = batch_dist)
std_dev <- rep(2, K)
batch_var <- rep(1.2, B)
group_weights <- rep(1 / K, K)
batch_weights <- rep(1 / B, B)
dfs <- c(4, 7, 15, 60, 120)
my_data <- generateBatchData(
N,
P,
group_means,
std_dev,
batch_shift,
batch_var,
group_weights,
batch_weights,
type = "MVT",
group_dfs = dfs
)
## ----dataClean----------------------------------------------------------------
X <- my_data$observed_data
true_labels <- my_data$group_IDs
fixed <- my_data$fixed
batch_vec <- my_data$batch_IDs
alpha <- 1
initial_labels <- generateInitialLabels(alpha, K, fixed, true_labels)
## ----runMCMCChains------------------------------------------------------------
# Sampling parameters
R <- 1000
thin <- 50
n_chains <- 4
# Density choice
type <- "MVT"
# MCMC samples and BIC vector
mcmc_output <- runMCMCChains(
X,
n_chains,
R,
thin,
batch_vec,
type,
initial_labels = initial_labels,
fixed = fixed
)
## ----plotAcceptanceRatesEarly-------------------------------------------------
plotAcceptanceRates(mcmc_output)
## ----likelihood---------------------------------------------------------------
plotLikelihoods(mcmc_output)
## ----continueChains-----------------------------------------------------------
R_new <- 9000
# Given an initial value for the parameters
new_output <- continueChains(
mcmc_output,
X,
fixed,
batch_vec,
R_new,
keep_old_samples = TRUE
)
## ----continuedLikelihood------------------------------------------------------
plotLikelihoods(new_output)
## ----plotAcceptanceRates------------------------------------------------------
plotAcceptanceRates(new_output)
## ----processChains------------------------------------------------------------
# Burn in
burn <- 5000
# Process the MCMC samples
processed_samples <- processMCMCChains(new_output, burn)
## ----pca----------------------------------------------------------------------
chain_used <- processed_samples[[1]]
pc <- prcomp(X, scale = T)
pc_batch_corrected <- prcomp(chain_used$inferred_dataset)
plot_df <- data.frame(
PC1 = pc$x[, 1],
PC2 = pc$x[, 2],
PC1_bf = pc_batch_corrected$x[, 1],
PC2_bf = pc_batch_corrected$x[, 2],
pred_labels = factor(chain_used$pred),
true_labels = factor(true_labels),
prob = chain_used$prob,
batch = factor(batch_vec)
)
plot_df |>
ggplot(aes(
x = PC1,
y = PC2,
colour = true_labels,
alpha = prob
)) +
geom_point()
plot_df |>
ggplot(aes(
x = PC1_bf,
y = PC2_bf,
colour = pred_labels,
alpha = prob
)) +
geom_point()
test_inds <- which(fixed == 0)
sum(true_labels[test_inds] == chain_used$pred[test_inds]) / length(test_inds)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.