inst/doc/Bayesian_LCA_mixtures.R

## -----------------------------------------------------------------------------
library("telescope")

## -----------------------------------------------------------------------------
data("SimData", package = "telescope")
y <- as.matrix(SimData[, 1:30])
z <- SimData[, 31]

## -----------------------------------------------------------------------------
dim(y)

## -----------------------------------------------------------------------------
table(z)

## -----------------------------------------------------------------------------
N <- nrow(y)
r <- ncol(y)
cat <- apply(y, 2, max)

## -----------------------------------------------------------------------------
Mmax <- 300
thin <- 1
burnin <- 100

## -----------------------------------------------------------------------------
M <- Mmax/thin

## -----------------------------------------------------------------------------
Kmax <- 50  
Kinit <- 10

## -----------------------------------------------------------------------------
L <- 2

## -----------------------------------------------------------------------------
G <- "MixDynamic"

## -----------------------------------------------------------------------------
priorOnAlpha <- priorOnAlpha_spec("gam_1_2")

## -----------------------------------------------------------------------------
priorOnK <- priorOnK_spec("BNB_143")

## -----------------------------------------------------------------------------
d0 <- 1  

## -----------------------------------------------------------------------------
a_mu <- rep(20, sum(cat)) 
mu_0 <- matrix(rep(rep(1/cat, cat), Kinit),
               byrow = TRUE, nrow = Kinit)

c_phi <- 30
d_phi <- 1
b_phi <- rep(10, r)

a_phi <- rep(1, r)  
phi_0 <- matrix(cat, Kinit, r, byrow = TRUE)

## regularizing constant
a_00 <- 0.05    

## proposal standard deviations for MH steps for sampling mu and phi, and regularizing constant eps to bound the Dirichlet proposal mu away from the boundary of the simplex
s_mu <- 2  
s_phi <- 2 
eps <- 0.01

## -----------------------------------------------------------------------------
set.seed(1234)
cl_y <- kmeans(y, centers = Kinit, nstart = 100, iter.max = 50)
S_0 <- cl_y$cluster
eta_0 <- cl_y$size/N

## -----------------------------------------------------------------------------
I_0 <- rep(1L, N)
if (L > 1) {
  for (k in 1:Kinit) {
    cl_size <- sum(S_0 == k)
    I_0[S_0 == k] <- rep(1:L, length.out = cl_size)
  }
} 

## -----------------------------------------------------------------------------
index <- c(0, cumsum(cat))
low <- (index + 1)[-length(index)]
up <- index[-1]

pi_km <- array(NA_real_, dim = c(Kinit, L, sum(cat)))
rownames(pi_km) <- paste0("k_", 1:Kinit)
for (k in 1:Kinit) {
    for (l in 1:L) {
        index <- (S_0 == k) & (I_0 == l)
        for (j in 1:r) {
            pi_km[k, l, low[j]:up[j]] <- tabulate(y[index, j], cat[j]) / sum(index)
        }
    }
}
pi_0 <- pi_km 

## -----------------------------------------------------------------------------
result <- sampleLCAMixture(y, S_0, L, 
                           pi_0, eta_0, mu_0, phi_0,
                           a_00, a_mu, a_phi, b_phi, c_phi, d_phi,
                           M, burnin, thin, Kmax,
                           s_mu, s_phi, eps,
                           G, priorOnAlpha, d0, priorOnK)

## -----------------------------------------------------------------------------
Eta <- result$Eta
S <- result$S
K <- result$K
Kplus <- result$Kplus

Nk <- result$Nk
Nl <- result$Nl
acc <- result$acc
e0 <- result$e0
alpha <- result$alpha

Mu <- result$Mu
Phi <- result$Phi
B_phi <- result$B_phi
acc_mu <- result$acc_mu
acc_phi <- result$acc_phi
nonnormpost_mode <- result$nonnormpost_mode
Pi_k <- result$Pi_k

## ----fig.height = 5, fig.width = 7--------------------------------------------
acc <- sum(acc)/M
acc
plot(1:length(alpha), alpha, type = "l", 
     ylim = c(0, max(alpha)), 
     xlab = "iterations", ylab = expression(alpha))

## ----fig.height = 5, fig.width = 7--------------------------------------------
hist(alpha, freq = FALSE, breaks = 50)
mean(alpha)
quantile(alpha, probs = c(0.25, 0.5, 0.75))

## ----fig.height = 5, fig.width = 7--------------------------------------------
hist(e0, freq = FALSE, breaks = 50)
mean(e0)
quantile(e0, probs = c(0.25, 0.5, 0.75))

## -----------------------------------------------------------------------------
k <- 1  # component
j <- 1  # variable
sum(acc_mu[, k, j])/M

## ----fig.height = 5, fig.width = 7--------------------------------------------
boxplot(Mu[, k, seq(2, 2*r, 2)], xlab = "mu_j")

## ----fig.height = 5, fig.width = 7--------------------------------------------
j <- 1  # variable
d <- 2  # category
k <- 1  # component
plot(Mu[, k, low[j]+(d-1)], type = "l", 
     ylab = paste0("mu_jkd, j=", j, ",d=", d," (k=", k, ")"))

## ----fig.height = 5, fig.width = 7--------------------------------------------
k <- 1  # component
j <- 1  # variable
sum(acc_phi[, k, j])/M

boxplot(Phi[, k, ], xlab = "phi_j",
        ylim = quantile(Phi[, k, ], c(0, 0.95)))

j <- 3  # variable
k <- 1  # component
plot(Phi[, k, j], type = "l",
     ylab = paste0("phi_jkd, j=", j, ", (k=", k, ")"))

## ----fig.height = 5, fig.width = 7--------------------------------------------
plot(K, type = "l", ylim = c(0, max(K)), 
     xlab = "iteration", main = "", ylab = "count",
     lwd = 0.5, col = "grey")
points(Kplus, type = "l", col = "red3",
       lwd = 2, lty = 1)
legend("topright", legend = c("K", "K+"), col = c("grey", "red3"),
       lwd = 2)

## ----fig.height = 5, fig.width = 7--------------------------------------------
k <- 3   # component
matplot(Nl[seq(100, M, 10), ((k-1)*L+1):(k*L)],
        type = "l", ylab = "Nl")

## ----fig.height = 5, fig.width = 7--------------------------------------------
p_Kplus <- tabulate(Kplus, nbins = max(Kplus))/M 
barplot(p_Kplus/sum(p_Kplus), xlab = expression(K["+"]), names = 1:length(p_Kplus),
        ylab = expression("p(" ~ K["+"] ~ "|" ~ bold(y) ~ ")"))

## -----------------------------------------------------------------------------
quantile(Kplus, probs = c(0.25, 0.5, 0.75))

## -----------------------------------------------------------------------------
Kplus_hat <- which.max(p_Kplus)
Kplus_hat
M0 <- sum(Kplus == Kplus_hat)
M0          

## -----------------------------------------------------------------------------
p_K <- tabulate(K, nbins = max(K))/M
quantile(K, probs = c(0.25, 0.5, 0.75))

## ----fig.height = 5, fig.width = 7--------------------------------------------
barplot(p_K/sum(p_K), names = 1:length(p_K), xlab = "K", 
        ylab = expression("p(" ~ K ~ "|" ~ bold(y) ~ ")"))
which.max(tabulate(K, nbins = max(K)))   

## -----------------------------------------------------------------------------
index <- Kplus == Kplus_hat
Nk[is.na(Nk)] <- 0
Nk_Kplus <- (Nk[Kplus == Kplus_hat, ] > 0)

## -----------------------------------------------------------------------------
index <- Kplus == Kplus_hat
Nk[is.na(Nk)] <- 0
Nk_Kplus <- (Nk[Kplus == Kplus_hat, ] > 0)

Pi_k_inter <- Pi_k[index,,] 
Pi_k_Kplus <- array(0, dim = c(M0, Kplus_hat, sum(cat)))  
for (j in 1:sum(cat)) {
  Pi_k_Kplus[, , j] <- Pi_k_inter[, , j][Nk_Kplus]
}

Mu_inter <- Mu[index, , ]       
Mu_Kplus <- array(0, dim = c(M0, Kplus_hat, sum(cat)))  
for (j in 1:sum(cat)) {
  Mu_Kplus[, , j] <- Mu_inter[, , j][Nk_Kplus]
}

Phi_inter <- Phi[index, , ]
Phi_Kplus <- array(0, dim = c(M0, Kplus_hat, r))
for (j in 1:r) {
  Phi_Kplus[, , j] <- Phi_inter[, , j][Nk_Kplus]
}

Eta_inter <- Eta[index, ]
Eta_Kplus <- matrix(Eta_inter[Nk_Kplus], ncol = Kplus_hat)

v <- which(index)
S_Kplus <- matrix(0, M0, N)
for (i in seq_along(v)) {
  m <- v[i]
  perm_S <- rep(0, Kmax)
  perm_S[Nk[m, ] != 0] <- 1:Kplus_hat
  S_Kplus[i, ] <- perm_S[S[m, ]]
}


## -----------------------------------------------------------------------------
Func_init <- nonnormpost_mode[[Kplus_hat]]$pi_k
identified_Kplus <- identifyLCAMixture(
    Pi_k_Kplus, Mu_Kplus, Phi_Kplus, Eta_Kplus, S_Kplus, Func_init)

## -----------------------------------------------------------------------------
identified_Kplus$non_perm_rate

## -----------------------------------------------------------------------------
colMeans(identified_Kplus$Eta)

## -----------------------------------------------------------------------------
z_sp <- apply(identified_Kplus$S, 2,
              function(x) which.max(tabulate(x, Kplus_hat)))
table(z_sp)
library("mclust")
classError(z_sp, z)$errorRate
adjustedRandIndex(z, z_sp)

## ----fig.height = 5, fig.width = 7--------------------------------------------
k <- 1  # component
boxplot(identified_Kplus$Mu[, k, seq(2, 2*r, 2)],
        ylab = "mu_j")
boxplot(identified_Kplus$Phi[, k, ], ylab = "phi_j",
        ylim = quantile(identified_Kplus$Phi[, k, ], c(0, 0.95)))

Try the telescope package in your browser

Any scripts or data that you put into this service are public.

telescope documentation built on April 4, 2025, 2:40 a.m.