update_all_y <- function(x_data, mu, SigmaINV, Lambda, z){
p <- dim(Lambda)[2]
q <- dim(Lambda)[3]
n <- dim(x_data)[1]
# 2. gibbs for y_i|...
n_k <- table(z)
alive <- as.numeric(names(n_k))
y_new <- array(data = 0, dim = c(n, q))
for(k in alive){
ind <- which(z == k)
center_x <- x_data[ind, ] - matrix(mu[k,], nrow = as.numeric(n_k[as.character(k)]), ncol = p, byrow=TRUE)
Alpha <- t(Lambda[k, , ]) %*% SigmaINV %*% Lambda[k, , ]
diag(Alpha) <- diag(Alpha) + 1
Alpha <- solve(Alpha)
tmp <- Alpha %*% t(Lambda[k, , ]) %*% SigmaINV
y_mean <- t(apply(center_x, 1, function(tk){return(tmp %*% tk)} ))
if(q == 1){ y_mean <- t(y_mean) }
y_new[ind, ] <- t( apply( y_mean, 1, function( tk ){ return( mvrnorm(n = 1, mu = tk, Sigma = Alpha) ) } ) )
}
results <- vector("list", length=1)
results[[1]] <- y_new
names(results) <- c("y")
return(results)
}
update_all_y_Sj <- function(x_data, mu, SigmaINV, Lambda, z){
p <- dim(Lambda)[2]
q <- dim(Lambda)[3]
n <- dim(x_data)[1]
# 2. gibbs for y_i|...
n_k <- table(z)
alive <- as.numeric(names(n_k))
y_new <- array(data = 0, dim = c(n, q))
for(k in alive){
ind <- which(z == k)
center_x <- x_data[ind, ] - matrix(mu[k,], nrow = as.numeric(n_k[as.character(k)]), ncol = p, byrow=TRUE)
Alpha <- t(Lambda[k, , ]) %*% SigmaINV[k, ,] %*% Lambda[k, , ]
diag(Alpha) <- diag(Alpha) + 1
Alpha <- solve(Alpha)
tmp <- Alpha %*% t(Lambda[k, , ]) %*% SigmaINV[k,,]
y_mean <- t(apply(center_x, 1, function(tk){return(tmp %*% tk)} ))
if(q == 1){ y_mean <- t(y_mean) }
y_new[ind, ] <- t( apply( y_mean, 1, function( tk ){ return( mvrnorm(n = 1, mu = tk, Sigma = Alpha) ) } ) )
}
results <- vector("list", length=1)
results[[1]] <- y_new
names(results) <- c("y")
return(results)
}
# compute sufficient statistics given y, z, K (and x_data)
compute_sufficient_statistics <- function(y, z, K, x_data){
cluster_size <- numeric(K)
p <- dim(x_data)[2]
q <- dim(y)[2]
sx <- array(data = 0, dim = c(K,p))
sy <- array(data = 0, dim = c(K,q))
# sxx <- array(data = 0, dim = c(K,p,p)) #this is not needed at all.
sxx <- 0
syy <- array(data = 0, dim = c(K,q,q))
sxy <- array(data = 0, dim = c(K,p,q))
for(k in 1:K){
index <- which(z == k)
cluster_size[k] <- length(index)
if( cluster_size[k] > 0){
sx[k,] <- colSums(array(x_data[index,],dim = c(cluster_size[k],p)))
sy[k,] <- colSums(array(y[index,],dim = c(cluster_size[k],q)))
if(cluster_size[k] > 1){
syy[k,,] <- crossprod(y[index, ])
sxy[k,,] <- crossprod( x_data[index, ], y[index, ])
}else{
syy[k,,] <- y[index,] %*% t(y[index,])
sxy[k,,] <- x_data[index,] %*% t(y[index,])
}
}
}
results <- vector("list", length=6)
names(results) <- c("cluster_size","sx","sy","sxx","syy","sxy")
results[[1]] <- cluster_size
results[[2]] <- sx
results[[3]] <- sy
results[[4]] <- sxx
results[[5]] <- syy
results[[6]] <- sxy
return(results)
}
compute_sufficient_statistics_given_mu <- function(y, z, K, x_data, mu){
cluster_size <- numeric(K)
p <- dim(x_data)[2]
q <- dim(y)[2]
sx <- array(data = 0, dim = c(K,p))
sy <- array(data = 0, dim = c(K,q))
# sxx <- array(data = 0, dim = c(K,p,p))
sxx <- 0
syy <- array(data = 0, dim = c(K,q,q))
sxy <- array(data = 0, dim = c(K,p,q))
for(k in 1:K){
index <- which(z == k)
cluster_size[k] <- length(index)
if( cluster_size[k] > 0){
sx[k,] <- colSums(array(x_data[index,],dim = c(cluster_size[k],p)))
sy[k,] <- colSums(array(y[index,],dim = c(cluster_size[k],q)))
if(cluster_size[k] > 1){
xNEW <- matrix( apply(x_data[index, ], 1, function(tk){return(tk - mu[k,])}), nrow = cluster_size[k], ncol = p, byrow = TRUE)
syy[k,,] <- crossprod(y[index, ])
sxy[k,,] <- crossprod( xNEW, y[index, ])
}else{
xNEW <- x_data[index, ] - mu[k, ]
syy[k,,] <- y[index,] %*% t(y[index,])
sxy[k,,] <- xNEW %*% t(y[index,])
}
# for( i in index){
# syy[k,,] <- syy[k,,] + y[i,] %*% t(y[i,])
# sxy[k,,] <- sxy[k,,] + (x_data[i,] - mu[k, ]) %*% t(y[i,]) #v3: edw afairw to mu
# }
}
}
results <- vector("list", length=6)
names(results) <- c("cluster_size","sx","sy","sxx","syy","sxy")
results[[1]] <- cluster_size
results[[2]] <- sx
results[[3]] <- sy
results[[4]] <- sxx
results[[5]] <- syy
results[[6]] <- sxy
return(results)
}
# dirichlet function
myDirichlet <- function(alpha){
k <- length(alpha)
theta <- rgamma(k, shape = alpha, rate = 1)
return(theta/sum(theta))
}
#simulate z and mixture weights from standard Gibbs
update_z_b <- function(w, mu, Lambda, y, SigmaINV, K, x_data){
n <- dim(x_data)[1]
p <- dim(x_data)[2]
q <- dim(y)[2]
probs <- array(data = 0, dim =c(n,K))
x_var <- array(data = 0, dim = c(p,p))
diag(x_var) <- 1/diag(SigmaINV)
for(k in 1:K){
center_x <- x_data - t(apply(y,1,function(tmp){return(mu[k,] + array(Lambda[k,,], dim = c(p,q)) %*% tmp)}))
probs[,k] <- log(w[k]) + dmvnorm(center_x, mean = rep(0, p), sigma = x_var, log = TRUE)
}
probs <- array(t(apply(probs, 1, function(tmp){return(exp(tmp - max(tmp)))} )),dim = c(n,K))
z <- apply(probs,1,function(tmp){if(anyNA(tmp)){tmp <- rep(1,K)};return(sample(K,1,prob = tmp))})
# apply(probs,1,function(tmp){return(sample(K,1,prob = tmp))})
results <- vector("list", length=2)
names(results) <- c("w","z")
results[[1]] <- w
results[[2]] <- z
return(results)
}
update_z_b_Sj <- function(w, mu, Lambda, y, SigmaINV, K, x_data){
n <- dim(x_data)[1]
p <- dim(x_data)[2]
q <- dim(y)[2]
probs <- array(data = 0, dim =c(n,K))
for(k in 1:K){
x_var <- array(data = 0, dim = c(p,p))
diag(x_var) <- 1/diag(SigmaINV[k,,])
center_x <- x_data - t(apply(y,1,function(tmp){return(mu[k,] + array(Lambda[k,,], dim = c(p,q)) %*% tmp)}))
probs[,k] <- log(w[k]) + dmvnorm(center_x, mean = rep(0, p), sigma = x_var, log = TRUE)
}
probs <- array(t(apply(probs, 1, function(tmp){return(exp(tmp - max(tmp)))} )),dim = c(n,K))
z <- apply(probs,1,function(tmp){if(anyNA(tmp)){tmp <- rep(1,K)};return(sample(K,1,prob = tmp))})
# apply(probs,1,function(tmp){return(sample(K,1,prob = tmp))})
results <- vector("list", length=2)
names(results) <- c("w","z")
results[[1]] <- w
results[[2]] <- z
return(results)
}
# using dmvnorm from package mvtnorm
update_z4 <- function(w, mu, Lambda, SigmaINV, K, x_data){
n <- dim(x_data)[1]
p <- dim(x_data)[2]
probs <- array(data = 0, dim =c(n,K))
for(k in 1:K){
center_x <- x_data - matrix(mu[k,], nrow = n, ncol = p, byrow=TRUE)
x_var <- Lambda[k,,] %*% t(Lambda[k,,])
diag(x_var) <- diag(x_var) + 1/diag(SigmaINV)
lpdf <- log(w[k]) + dmvnorm(center_x, mean = rep(0, p), sigma = x_var, log = TRUE)
probs[,k] <- lpdf
}
probs <- array(t(apply(probs, 1, function(tmp){return(exp(tmp - max(tmp)))} )),dim = c(n,K))
z <- apply(probs,1,function(tmp){if(anyNA(tmp)){tmp <- rep(1,K)};return(sample(K,1,prob = tmp))})
# apply(probs,1,function(tmp){return(sample(K,1,prob = tmp))})
results <- vector("list", length=2)
names(results) <- c("w","z")
results[[1]] <- w
results[[2]] <- z
return(results)
}
update_z4_Sj <- function(w, mu, Lambda, SigmaINV, K, x_data){
n <- dim(x_data)[1]
p <- dim(x_data)[2]
probs <- array(data = 0, dim =c(n,K))
for(k in 1:K){
center_x <- x_data - matrix(mu[k,], nrow = n, ncol = p, byrow=TRUE)
x_var <- Lambda[k,,] %*% t(Lambda[k,,])
diag(x_var) <- diag(x_var) + 1/diag(SigmaINV[k,,])
lpdf <- log(w[k]) + dmvnorm(center_x, mean = rep(0, p), sigma = x_var, log = TRUE)
probs[,k] <- lpdf
}
probs <- array(t(apply(probs, 1, function(tmp){return(exp(tmp - max(tmp)))} )),dim = c(n,K))
z <- apply(probs,1,function(tmp){if(anyNA(tmp)){tmp <- rep(1,K)};return(sample(K,1,prob = tmp))})
# apply(probs,1,function(tmp){return(sample(K,1,prob = tmp))})
results <- vector("list", length=2)
names(results) <- c("w","z")
results[[1]] <- w
results[[2]] <- z
return(results)
}
# using the matrix inversion lemma
update_z2 <- function(w, mu, Lambda, SigmaINV, K, x_data){
n <- dim(x_data)[1]
p <- dim(x_data)[2]
probs <- array(data = 0, dim =c(n,K))
for(k in 1:K){
center_x <- x_data - matrix(mu[k,], nrow = n, ncol = p, byrow=TRUE)
x_var <- t(Lambda[k,,]) %*% SigmaINV %*% Lambda[k,,]
diag(x_var) <- diag(x_var) + 1
x_var <- try(solve(x_var), TRUE)
if(is.numeric(x_var) == TRUE){
x_var <- Lambda[k,,] %*% x_var %*% t(Lambda[k,,])
x_var <- SigmaINV %*% x_var %*% SigmaINV
x_var <- SigmaINV - x_var
probs[,k] <- log(w[k]) -0.5*apply(center_x,1,function(tmp){return( as.numeric(t(tmp) %*% x_var %*% tmp) )}) + 0.5*log(det(x_var))
}
}
probs <- array(t(apply(probs, 1, function(tmp){return(exp(tmp - max(tmp)))} )),dim = c(n,K))
z <- apply(probs,1,function(tmp){if(anyNA(tmp)){tmp <- rep(1,K)};return(sample(K,1,prob = tmp))})
#apply(probs,1,function(tmp){if(anyNA(tmp)){tmp <- rep(1,K)};return(sample(K,1,prob = tmp))})
results <- vector("list", length=2)
names(results) <- c("w","z")
results[[1]] <- w
results[[2]] <- z
return(results)
}
update_z2_Sj <- function(w, mu, Lambda, SigmaINV, K, x_data){
n <- dim(x_data)[1]
p <- dim(x_data)[2]
probs <- array(data = 0, dim =c(n,K))
for(k in 1:K){
center_x <- x_data - matrix(mu[k,], nrow = n, ncol = p, byrow=TRUE)
x_var <- t(Lambda[k,,]) %*% SigmaINV[k,,] %*% Lambda[k,,]
diag(x_var) <- diag(x_var) + 1
x_var <- try(solve(x_var), TRUE)
if(is.numeric(x_var) == TRUE){
x_var <- Lambda[k,,] %*% x_var %*% t(Lambda[k,,])
x_var <- SigmaINV[k,,] %*% x_var %*% SigmaINV[k,,]
x_var <- SigmaINV[k,,] - x_var
probs[,k] <- log(w[k]) -0.5*apply(center_x,1,function(tmp){return( as.numeric(t(tmp) %*% x_var %*% tmp) )}) + 0.5*log(det(x_var))
}
}
probs <- array(t(apply(probs, 1, function(tmp){return(exp(tmp - max(tmp)))} )),dim = c(n,K))
z <- apply(probs,1,function(tmp){if(anyNA(tmp)){tmp <- rep(1,K)};return(sample(K,1,prob = tmp))})
# apply(probs,1,function(tmp){return(sample(K,1,prob = tmp))})
results <- vector("list", length=2)
names(results) <- c("w","z")
results[[1]] <- w
results[[2]] <- z
return(results)
}
complete.log.likelihood <- function(x_data, w, mu, Lambda, SigmaINV, z){
n <- length(z)
p <- dim(Lambda)[2]
q <- dim(Lambda)[3]
probs <- numeric(n)
alive <- as.numeric(names(table(z)))
for(k in alive){
index <- which(z == k)
center_x <- x_data[index,] - matrix(mu[k,], nrow = length(index), ncol = p, byrow=TRUE)
x_var <- Lambda[k,,] %*% t(Lambda[k,,])
diag(x_var) <- diag(x_var) + 1/diag(SigmaINV)
#x_var <- solve(x_var)
x_var <- try(solve(x_var), TRUE)
if(is.numeric(x_var) == TRUE){
probs[index] <- log(w[k]) -0.5*apply(center_x,1,function(tmp){return( as.numeric(t(tmp) %*% x_var %*% tmp) )}) + 0.5*log(det(x_var))
}
}
return(sum(probs))
}
complete.log.likelihood_Sj <- function(x_data, w, mu, Lambda, SigmaINV, z){
n <- length(z)
p <- dim(Lambda)[2]
q <- dim(Lambda)[3]
probs <- numeric(n)
alive <- as.numeric(names(table(z)))
for(k in alive){
index <- which(z == k)
center_x <- x_data[index,] - matrix(mu[k,], nrow = length(index), ncol = p, byrow=TRUE)
x_var <- Lambda[k,,] %*% t(Lambda[k,,])
diag(x_var) <- diag(x_var) + 1/diag(SigmaINV[k,,])
#x_var <- solve(x_var)
x_var <- try(solve(x_var), TRUE)
if(is.numeric(x_var) == TRUE){
probs[index] <- log(w[k]) -0.5*apply(center_x,1,function(tmp){return( as.numeric(t(tmp) %*% x_var %*% tmp) )}) + 0.5*log(det(x_var))
}
}
return(sum(probs))
}
update_SigmaINV_faster <- function(x_data, z, y, Lambda, mu, K, alpha_sigma, beta_sigma){
p <- dim(Lambda)[2]
q <- dim(Lambda)[3]
n <- length(z)
SigmaINV <- array(data = 0, dim = c(p,p))
s <- numeric(p)
alive <- as.numeric(names(table(z)))
for (k in alive){
ind <- which(z == k)
n_k <- length(ind)
tmp <- matrix(mu[k, ], nrow = n_k, ncol = p, byrow = TRUE) + t(
apply
(
array(y[ind, ], dim = c(n_k,q)), 1,
function(tk)
{
return(array(Lambda[ k, , ], dim = c(p,q)) %*% tk)
}
)
)
s <- s + colSums((x_data[ind, ] - tmp)^2)
}
diag(SigmaINV) <- rgamma(p,shape = alpha_sigma + n/2, rate = beta_sigma + s/2)
return(SigmaINV)
}
update_SigmaINV_faster_Sj <- function(x_data, z, y, Lambda, mu, K, alpha_sigma, beta_sigma){
p <- dim(Lambda)[2]
q <- dim(Lambda)[3]
n <- length(z)
SigmaINV <- array(data = 0, dim = c(K,p,p))
for (k in 1:K){
s <- numeric(p)
ind <- which(z == k)
n_k <- length(ind)
if(n_k > 0){
tmp <- matrix(mu[k, ], nrow = n_k, ncol = p, byrow = TRUE) + t(
apply
(
array(y[ind, ], dim = c(n_k,q)), 1,
function(tk)
{
return(array(Lambda[ k, , ], dim = c(p,q)) %*% tk)
}
)
)
s <- colSums((x_data[ind, ] - tmp)^2)
}
diag(SigmaINV[k, , ]) <- rgamma(p,shape = alpha_sigma + n_k/2, rate = beta_sigma + s/2)
}
return(SigmaINV)
}
#new in version 3
update_SigmaINV_xCC <- function(x_data, z, y, Lambda, mu, K, alpha_sigma, beta_sigma){
p <- dim(Lambda)[2]
q <- dim(Lambda)[3]
n <- length(z)
SigmaINV <- array(data = 0, dim = c(p,p)) # this is redundant: SigmaINV = sI_p
s <- 0
alive <- as.numeric(names(table(z)))
for (k in alive){
ind <- which(z == k)
n_k <- length(ind)
tmp <- matrix(mu[k, ], nrow = n_k, ncol = p, byrow = TRUE) + t(
apply
(
array(y[ind, ], dim = c(n_k,q)), 1,
function(tk)
{
return(array(Lambda[ k, , ], dim = c(p,q)) %*% tk)
}
)
)
s <- s + sum((x_data[ind, ] - tmp)^2)
}
diag(SigmaINV) <- rep(rgamma(1,shape = alpha_sigma + n*p/2, rate = beta_sigma + s/2), p)
return(SigmaINV)
}
#new in version 3
update_SigmaINV_xUC <- function(x_data, z, y, Lambda, mu, K, alpha_sigma, beta_sigma){
p <- dim(Lambda)[2]
q <- dim(Lambda)[3]
n <- length(z)
SigmaINV <- array(data = 0, dim = c(K,p,p))
for (k in 1:K){
s <- 0
ind <- which(z == k)
n_k <- length(ind)
if(n_k > 0){
tmp <- matrix(mu[k, ], nrow = n_k, ncol = p, byrow = TRUE) + t(
apply
(
array(y[ind, ], dim = c(n_k,q)), 1,
function(tk)
{
return(array(Lambda[ k, , ], dim = c(p,q)) %*% tk)
}
)
)
s <- sum((x_data[ind, ] - tmp)^2)
}
diag(SigmaINV[k, , ]) <- rep(rgamma(1,shape = alpha_sigma + n_k*p/2, rate = beta_sigma + s/2),p)
}
return(SigmaINV)
}
#update OmegaINV
update_OmegaINV <- function(Lambda, K, g, h){
p <- dim(Lambda)[2]
q <- dim(Lambda)[3]
OmegaINV <- array(data = 0, dim = c(q,q))
betaVector <- numeric(q)
for(k in 1:K){
betaVector <- betaVector + colSums(array(Lambda[k,,]^2,dim = c(p,q)))
}
diag(OmegaINV) <- rgamma(q,shape = g + K*p/2, rate = h + betaVector/2)
return(OmegaINV)
}
#update OmegaINV
update_OmegaINV_Cxx <- function(Lambda, K, g, h){
p <- dim(Lambda)[2]
q <- dim(Lambda)[3]
OmegaINV <- array(data = 0, dim = c(q,q))
betaVector <- colSums(array(Lambda[1,,]^2,dim = c(p,q)))
diag(OmegaINV) <- rgamma(q,shape = g + p/2, rate = h + betaVector/2)
return(OmegaINV)
}
# new in version 4.1
readLambdaValues <- function(myFile,K,p,q){
l <- array(data = NA, dim = c(K,p,q))
kpq <- K*p*q
myline <- as.numeric(read.table(myFile, colClasses = rep('numeric', kpq) ))
if(length(myline) != kpq){stop("dimensions do not match")}
iter <- 0
for(k in 1:K){
for(i in 1:p){
for(j in 1:q){
iter <- iter + 1
l[k,i,j] <- myline[iter]
}
}
}
return(l)
}
#-------------------------------------------------------------------------------------------
### functions for q_0
#-------------------------------------------------------------------------------------------
complete.log.likelihood_q0 <- function(x_data, w, mu, SigmaINV, z){
n <- length(z)
p <- dim(x_data)[2]
probs <- numeric(n)
alive <- as.numeric(names(table(z)))
for(k in alive){
index <- which(z == k)
center_x <- x_data[index,] - matrix(mu[k,], nrow = length(index), ncol = p, byrow=TRUE)
x_var <- SigmaINV[k,,]
probs[index] <- log(w[k]) -0.5*apply(center_x,1,function(tmp){return( as.numeric(t(tmp) %*% x_var %*% tmp) )}) + 0.5*log(det(x_var))
}
return(sum(probs))
}
complete.log.likelihood_q0_sameSigma <- function(x_data, w, mu, SigmaINV, z){
n <- length(z)
p <- dim(x_data)[2]
probs <- numeric(n)
alive <- as.numeric(names(table(z)))
x_var <- SigmaINV
myConstant <- log(det(x_var))
for(k in alive){
index <- which(z == k)
center_x <- x_data[index,] - matrix(mu[k,], nrow = length(index), ncol = p, byrow=TRUE)
probs[index] <- log(w[k]) -0.5*apply(center_x,1,function(tmp){return( as.numeric(t(tmp) %*% x_var %*% tmp) )}) + 0.5*myConstant
}
return(sum(probs))
}
compute_sufficient_statistics_q0 <- function(z, K, x_data){
p <- dim(x_data)[2]
cluster_size <- numeric(K)
sx <- array(data = 0, dim = c(K,p))
sy <- 0
# sxx <- array(data = 0, dim = c(K,p,p))
sxx <- 0
syy <- 0
sxy <- 0
for(k in 1:K){
index <- which(z == k)
cluster_size[k] <- length(index)
if( cluster_size[k] > 0){
sx[k,] <- colSums(array(x_data[index,],dim = c(cluster_size[k],p)))
# for( i in index){
# sxx[k,,] <- sxx[k,,] + x_data[i,] %*% t(x_data[i,])
# }
}
}
results <- vector("list", length=6)
names(results) <- c("cluster_size","sx","sy","sxx","syy","sxy")
results[[1]] <- cluster_size
results[[2]] <- sx
results[[3]] <- sy
results[[4]] <- sxx
results[[5]] <- syy
results[[6]] <- sxy
return(results)
}
compute_A_B_G_D_and_simulate_mu_Lambda_q0 <- function(SigmaINV, suff_statistics, K, priorConst1, T_INV, v_r){
p <- dim(SigmaINV[1,,])[2]
A <- array(data = 0, dim = c(K,p,p))
B <- mu <- array(data = 0, dim = c(K,p))
G <- 0
D <- 0
# (2) mu|Lambda, sufficient statistics, ...
mu <- array(data = 0, dim = c(K,p))
Lambdas <- 0
for(k in 1:K){
diag(A[k,,]) <- 1/( suff_statistics$cluster_size[k]*diag(SigmaINV[k,,]) + diag(T_INV))
B[k,] <- SigmaINV[k,,] %*% (suff_statistics$sx[k,] ) + priorConst1
# this is for simulating mu_k
mu_mean <- A[k,,] %*% B[k,]
mu[k,] <- mvrnorm(n = 1, mu = mu_mean, Sigma = A[k,,])
}
results <- vector("list", length=6)
results[[1]] <- A
results[[2]] <- B
results[[3]] <- G
results[[4]] <- D
results[[5]] <- Lambdas
results[[6]] <- mu
names(results) <- c("A","B","G","D","Lambdas","mu")
return(results)
}
compute_A_B_G_D_and_simulate_mu_Lambda_q0_sameSigma <- function(SigmaINV, suff_statistics, K, priorConst1, T_INV, v_r){
p <- dim(SigmaINV)[2]
A <- array(data = 0, dim = c(K,p,p))
B <- mu <- array(data = 0, dim = c(K,p))
G <- 0
D <- 0
# (2) mu|Lambda, sufficient statistics, ...
mu <- array(data = 0, dim = c(K,p))
Lambdas <- 0
for(k in 1:K){
diag(A[k,,]) <- 1/( suff_statistics$cluster_size[k]*diag(SigmaINV) + diag(T_INV))
B[k,] <- SigmaINV %*% (suff_statistics$sx[k,] ) + priorConst1
# this is for simulating mu_k
mu_mean <- A[k,,] %*% B[k,]
mu[k,] <- mvrnorm(n = 1, mu = mu_mean, Sigma = A[k,,])
}
results <- vector("list", length=6)
results[[1]] <- A
results[[2]] <- B
results[[3]] <- G
results[[4]] <- D
results[[5]] <- Lambdas
results[[6]] <- mu
names(results) <- c("A","B","G","D","Lambdas","mu")
return(results)
}
update_z_q0 <- function(w, mu, SigmaINV, K, x_data){
n <- dim(x_data)[1]
p <- dim(x_data)[2]
probs <- array(data = 0, dim =c(n,K))
for(k in 1:K){
center_x <- x_data - matrix(mu[k,], nrow = n, ncol = p , byrow = TRUE)
probs[,k] <- log(w[k]) -0.5*apply(center_x,1,function(tmp){return( as.numeric(t(tmp) %*% SigmaINV[k,,] %*% tmp) )}) + 0.5*sum(log(diag(SigmaINV[k,,])))
}
probs <- array(t(apply(probs, 1, function(tmp){return(exp(tmp - max(tmp)))} )),dim = c(n,K))
z <- apply(probs,1,function(tmp){return(sample(K,1,prob = tmp))})
results <- vector("list", length=2)
names(results) <- c("w","z")
results[[1]] <- w
results[[2]] <- z
return(results)
}
update_z_q0_sameSigma <- function(w, mu, SigmaINV, K, x_data){
n <- dim(x_data)[1]
p <- dim(x_data)[2]
probs <- array(data = 0, dim =c(n,K))
myConstant <- sum(log(diag(SigmaINV)))
for(k in 1:K){
center_x <- x_data - matrix(mu[k,], nrow = n, ncol = p , byrow = TRUE)
probs[,k] <- log(w[k]) -0.5*apply(center_x,1,function(tmp){return( as.numeric(t(tmp) %*% SigmaINV %*% tmp) )}) + 0.5*myConstant
}
probs <- array(t(apply(probs, 1, function(tmp){return(exp(tmp - max(tmp)))} )),dim = c(n,K))
z <- apply(probs,1,function(tmp){return(sample(K,1,prob = tmp))})
results <- vector("list", length=2)
names(results) <- c("w","z")
results[[1]] <- w
results[[2]] <- z
return(results)
}
update_SigmaINV_faster_q0 <- function(z, mu, K, alpha_sigma, beta_sigma, x_data){
p <- dim(x_data)[2]
SigmaINV <- array(data = 0, dim = c(K,p,p))
alive <- as.numeric(names(table(z)))
for (k in 1:K){
s <- numeric(p)
ind <- which(z == k)
n_k <- length(ind)
if(n_k > 0){
tmp <- matrix(mu[k, ], nrow = n_k, ncol = p, byrow = TRUE)
s <- colSums((x_data[ind, ] - tmp)^2)
}
diag(SigmaINV[k, , ]) <- rgamma(p,shape = alpha_sigma + n_k/2, rate = beta_sigma + s/2)
}
return(SigmaINV)
}
update_SigmaINV_faster_q0_sameSigma <- function(z, mu, K, alpha_sigma, beta_sigma, x_data){
p <- dim(x_data)[2]
n <- dim(x_data)[1]
SigmaINV <- array(data = 0, dim = c(p,p))
alive <- as.numeric(names(table(z)))
s <- numeric(p)
for (k in alive){
ind <- which(z == k)
n_k <- length(ind)
tmp <- matrix(mu[k, ], nrow = n_k, ncol = p, byrow = TRUE)
s <- s + colSums((x_data[ind, ] - tmp)^2)
}
diag(SigmaINV) <- rgamma(p,shape = alpha_sigma + n/2, rate = beta_sigma + s/2)
return(SigmaINV)
}
overfitting_q0 <- function(x_data, originalX, outputDirectory, Kmax, m, thinning, burn, g, h, alpha_prior, alpha_sigma, beta_sigma, start_values, q = 0, zStart, gibbs_z, lowerTriangular=TRUE){
if(missing(originalX)){originalX <- x_data}
if(missing(gibbs_z)){gibbs_z = 0.05}
if(missing(zStart)){zStart = FALSE}
if(missing(x_data)){stop('x_data not provided.')}
p <- dim(x_data)[2]
ledermannBound <- 0
if (q > 0){ stop(paste0('q should be equal to ', ledermannBound)) }
n <- dim(x_data)[1]
v_r <- rep(q, p) #indicates the non-zero values of Lambdas
if(lowerTriangular){
for( r in 1:p ){
v_r[r] <- min(r,q)
}
}
if(missing(Kmax)){Kmax <- 20}
if(missing(m)){m <- 21000}
if(missing(burn)){burn <- 1000}
if(missing(thinning)){thinning <- 10}
if(missing(g)){g <- 2}
if(missing(h)){h <- 1}
if(missing(alpha_prior)){alpha_prior <- 1*rep(1/Kmax,Kmax)}
if(missing(alpha_sigma)){alpha_sigma <- 2}
if(missing(beta_sigma)){beta_sigma <- 1}
if(missing(start_values)){start_values <- FALSE}
if( start_values == F ){
dir.create(outputDirectory)
}
setwd(outputDirectory)
K <- Kmax
# prior parameters
T_INV <- array(data = 0, dim = c(p,p))
diag(T_INV) <- diag(var(x_data))
diag(T_INV) <- 1/diag(T_INV)
ksi <- colSums(x_data)/n
priorConst1 <- T_INV %*% ksi
sigma_y2 <- 1/1
SigmaINV.values <- array(data = 0, dim = c(K,p,p))
mu.values <- array(data = 0, dim = c(K,p))
z <- numeric(n)
w.values <- numeric(K)
# initial values
iter <- 1
if(start_values == FALSE){
for(k in 1:K){
diag(SigmaINV.values[k,,]) <- rgamma(n = p, shape = alpha_sigma, rate = beta_sigma) ## parameterization with mean = g/h and var = g/(h^2)
mu.values[k,] <- rnorm(p,mean = ksi, sd = sqrt( 1/diag(T_INV) ))
}
w.values <- myDirichlet(alpha_prior[1:K])
z <- sample(K,n,replace = TRUE, prob = w.values)
if( outputDirectory == 'alpha_1'){
if(is.numeric(zStart)){
z <- zStart
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
}
}
}else{
# cat(paste0('reading starting values... '))
tmp1 <- read.table("muValues.txt")
tmp2 <- as.matrix(read.table("sigmainvValues.txt"))
for(k in 1:K){
mu.values[k, ] <- as.matrix(tmp1[ , k + Kmax*((1:p)-1)])
diag(SigmaINV.values[k, , ]) <- as.matrix(tmp2[,((k-1)*p + 1):(k*p)])
}
w.values <- as.numeric(read.table("wValues.txt"))
z <- as.numeric(read.table("zValues.txt"))
}
###############################################
trueVar <- array(data = 0, dim = c(K,p,p))
trueVar.values <- array(data = 0, dim = c(K,p,p))
mhAR <- mhAR1 <- 0
mhDeltaAR <- 0
mIter <- 0
# MCMC sampler
cluster_size <- numeric(K)
zOld <- z
kValues <- numeric(m)
kValues[iter] <- length(table(z))
zConnection <- file("zValues.txt",open = "w")
yConnection <- file("yValues.txt",open = "w")
sigmainvConnection <- file("sigmainvValues.txt",open = "w")
omegainvConnection <- file("omegainvValues.txt",open = "w")
muConnection <- file("muValues.txt",open = "w")
wConnection <- file("wValues.txt",open = "w")
logLConnection <- file("k.and.logl.Values.txt",open = "w")
LambdaConnection <- vector('list',length = K)
current_matrix <- vector("list", length = 4)
names(current_matrix) <- c("A","B","G","D")
kavatza <- 0
for (iter in 2:m){
# 2
suf_stat <- compute_sufficient_statistics_q0(z = z, K = K, x_data = x_data)
f2 <- compute_A_B_G_D_and_simulate_mu_Lambda_q0(SigmaINV = SigmaINV.values,
suff_statistics = suf_stat,
K = K, priorConst1 = priorConst1, T_INV = T_INV, v_r = v_r)
mu.values <- f2$mu
# cat(paste0("HERE"), "\n")
# 3
f2 <- update_z_q0(w = w.values, mu = array(mu.values,dim = c(K,p)), SigmaINV = SigmaINV.values, K = K, x_data = x_data)
z <- f2$z
kValues[iter] <- length(table(z))
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
# 5
SigmaINV.values <- update_SigmaINV_faster_q0(x_data = x_data, z = z,
mu = array(mu.values,dim = c(K,p)), K = K, alpha_sigma = alpha_sigma, beta_sigma = beta_sigma)
if(iter %% thinning == 0){
if(iter > burn){
logLValues <- c(kValues[iter], complete.log.likelihood_q0(x_data = x_data, w = w.values, mu = mu.values, SigmaINV = SigmaINV.values, z = z))
cat(logLValues, file = logLConnection, '\n', append = TRUE)
cat(z, file = zConnection, '\n', append = TRUE)
cat(w.values, file = wConnection, '\n', append = TRUE)
cat(mu.values, file = muConnection, '\n', append = TRUE)
for(k in 1:K){
cat(diag(SigmaINV.values[k,,]), " ", file = sigmainvConnection, append = TRUE)
}
cat('\n', file = sigmainvConnection, append = TRUE)
}
}
}
close(zConnection)
close(yConnection)
close(wConnection)
close(muConnection)
close(sigmainvConnection)
close(omegainvConnection)
close(logLConnection)
# for(k in 1:K){
# close(regulonExpressionConnection[[k]])
# close(LambdaConnection[[k]])
# }
setwd("../")
}
overfitting_q0_sameSigma <- function(x_data, originalX, outputDirectory, Kmax, m, thinning, burn, g, h, alpha_prior, alpha_sigma, beta_sigma, start_values, q = 0, zStart, gibbs_z, lowerTriangular=TRUE){
if(missing(originalX)){originalX <- x_data}
if(missing(gibbs_z)){gibbs_z = 0.05}
if(missing(zStart)){zStart = FALSE}
if(missing(x_data)){stop('x_data not provided.')}
p <- dim(x_data)[2]
ledermannBound <- 0
if (q > 0){ stop(paste0('q should be equal to ', ledermannBound)) }
n <- dim(x_data)[1]
v_r <- rep(q, p) #indicates the non-zero values of Lambdas
if(lowerTriangular){
for( r in 1:p ){
v_r[r] <- min(r,q)
}
}
if(missing(Kmax)){Kmax <- 20}
if(missing(m)){m <- 21000}
if(missing(burn)){burn <- 1000}
if(missing(thinning)){thinning <- 10}
if(missing(g)){g <- 0.5}
if(missing(h)){h <- 0.5}
if(missing(alpha_prior)){alpha_prior <- 1*rep(1/Kmax,Kmax)}
if(missing(alpha_sigma)){alpha_sigma <- 0.5}
if(missing(beta_sigma)){beta_sigma <- 0.5}
if(missing(start_values)){start_values <- FALSE}
if( start_values == F ){
dir.create(outputDirectory)
}
setwd(outputDirectory)
K <- Kmax
# prior parameters
T_INV <- array(data = 0, dim = c(p,p))
diag(T_INV) <- diag(var(x_data))
diag(T_INV) <- 1/diag(T_INV)
ksi <- colSums(x_data)/n
priorConst1 <- T_INV %*% ksi
sigma_y2 <- 1/1
SigmaINV.values <- array(data = 0, dim = c(p,p))
mu.values <- array(data = 0, dim = c(K,p))
z <- numeric(n)
w.values <- numeric(K)
# initial values
iter <- 1
if(start_values == FALSE){
diag(SigmaINV.values) <- rgamma(n = p, shape = alpha_sigma, rate = beta_sigma)
for(k in 1:K){
mu.values[k,] <- rnorm(p,mean = ksi, sd = sqrt( 1/diag(T_INV) ))
}
w.values <- myDirichlet(alpha_prior[1:K])
z <- sample(K,n,replace = TRUE, prob = w.values)
if( outputDirectory == 'alpha_1'){
if(is.numeric(zStart)){
z <- zStart
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
}
}
}else{
# cat(paste0('reading starting values... '))
tmp1 <- read.table("muValues.txt")
diag(SigmaINV.values) <- as.numeric(read.table("sigmainvValues.txt"))
for(k in 1:K){
mu.values[k, ] <- as.matrix(tmp1[ , k + Kmax*((1:p)-1)])
}
w.values <- as.numeric(read.table("wValues.txt"))
z <- as.numeric(read.table("zValues.txt"))
}
###############################################
trueVar <- array(data = 0, dim = c(K,p,p))
trueVar.values <- array(data = 0, dim = c(K,p,p))
mhAR <- mhAR1 <- 0
mhDeltaAR <- 0
mIter <- 0
# MCMC sampler
cluster_size <- numeric(K)
zOld <- z
kValues <- numeric(m)
kValues[iter] <- length(table(z))
zConnection <- file("zValues.txt",open = "w")
yConnection <- file("yValues.txt",open = "w")
sigmainvConnection <- file("sigmainvValues.txt",open = "w")
omegainvConnection <- file("omegainvValues.txt",open = "w")
muConnection <- file("muValues.txt",open = "w")
wConnection <- file("wValues.txt",open = "w")
logLConnection <- file("k.and.logl.Values.txt",open = "w")
LambdaConnection <- vector('list',length = K)
current_matrix <- vector("list", length = 4)
names(current_matrix) <- c("A","B","G","D")
kavatza <- 0
for (iter in 2:m){
# 2
suf_stat <- compute_sufficient_statistics_q0(z = z, K = K, x_data = x_data)
f2 <- compute_A_B_G_D_and_simulate_mu_Lambda_q0_sameSigma(SigmaINV = SigmaINV.values,
suff_statistics = suf_stat,
K = K, priorConst1 = priorConst1, T_INV = T_INV, v_r = v_r)
mu.values <- f2$mu
# cat(paste0("here2"),"\n")
# 3
f2 <- update_z_q0_sameSigma(w = w.values, mu = array(mu.values,dim = c(K,p)), SigmaINV = SigmaINV.values, K = K, x_data = x_data)
z <- f2$z
kValues[iter] <- length(table(z))
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
# cat(paste0("here3"),"\n")
# 5
SigmaINV.values <- update_SigmaINV_faster_q0_sameSigma(x_data = x_data, z = z,
mu = array(mu.values,dim = c(K,p)), K = K, alpha_sigma = alpha_sigma, beta_sigma = beta_sigma)
# cat(paste0("here4"),"\n")
if(iter %% thinning == 0){
if(iter > burn){
logLValues <- c(kValues[iter], complete.log.likelihood_q0_sameSigma(x_data = x_data, w = w.values, mu = mu.values,
SigmaINV = SigmaINV.values, z = z))
cat(logLValues, file = logLConnection, '\n', append = TRUE)
cat(z, file = zConnection, '\n', append = TRUE)
cat(w.values, file = wConnection, '\n', append = TRUE)
cat(mu.values, file = muConnection, '\n', append = TRUE)
cat(diag(SigmaINV.values), file = sigmainvConnection, '\n', append = TRUE)
}
}
}
close(zConnection)
close(yConnection)
close(wConnection)
close(muConnection)
close(sigmainvConnection)
close(omegainvConnection)
close(logLConnection)
setwd("../")
}
#-------------------------------------------------------------------------------------------
# end of q0 functions
#-------------------------------------------------------------------------------------------
################################################################################################################
################################################################################################################
overfittingMFA <- function(x_data, originalX, outputDirectory, Kmax, m, thinning, burn, g, h, alpha_prior, alpha_sigma, beta_sigma, start_values, q, zStart, gibbs_z, lowerTriangular = TRUE){
if(missing(originalX)){originalX <- x_data}
if(missing(gibbs_z)){gibbs_z = 0.05}
if(missing(zStart)){zStart = FALSE}
if(missing(x_data)){stop('x_data not provided.')}
if(missing(q)){stop('q not provided.')}
p <- dim(x_data)[2]
ledermannBound <- ( 2*p + 1 - sqrt(8*p + 1) ) / 2
if (q > ledermannBound){ stop(paste0('q should not exceed the Ledermann bound: ', ledermannBound)) }
n <- dim(x_data)[1]
v_r <- rep(q, p) #indicates the non-zero values of Lambdas
if(lowerTriangular){
for( r in 1:p ){
v_r[r] <- min(r,q)
}
}
if(missing(Kmax)){Kmax <- 20}
if(missing(m)){m <- 21000}
if(missing(burn)){burn <- 1000}
if(missing(thinning)){thinning <- 10}
if(missing(g)){g <- 0.5}
if(missing(h)){h <- 0.5}
if(missing(alpha_prior)){alpha_prior <- 1*rep(1/Kmax,Kmax)}
if(missing(alpha_sigma)){alpha_sigma <- 0.5}
if(missing(beta_sigma)){beta_sigma <- 0.5}
if(missing(start_values)){start_values <- FALSE}
if(q == 0){
# redirecting everything to the corresponding function
overfitting_q0_sameSigma(x_data = x_data, originalX = originalX,
outputDirectory = outputDirectory, Kmax = Kmax,
m = m, thinning = thinning, burn = burn, g = g,
h = h, alpha_prior = alpha_prior, alpha_sigma = alpha_sigma,
beta_sigma = beta_sigma, start_values = start_values,
q = 0, zStart = zStart, gibbs_z = gibbs_z, lowerTriangular = lowerTriangular)
return(doNothing <- 0) # exit
}
if( start_values == F ){
dir.create(outputDirectory)
}
setwd(outputDirectory)
# cat(paste0("a = ", alpha_prior[1], ", p = ", p, ", q = ", q, ", n = ",n,", g = ", g, ", h = ", h, ", alpha_sigma = ", alpha_sigma, ", beta_sigma = ", beta_sigma,"\n"))
K <- Kmax
# prior parameters
T_INV <- array(data = 0, dim = c(p,p))
# diag(T_INV) <- (apply(x_data,2,max) - apply(x_data,2,min))^2
diag(T_INV) <- diag(var(x_data))
# diag(T_INV) <- rep(1,p)
# diag(T_INV) <- rep(100,p)
diag(T_INV) <- 1/diag(T_INV)
ksi <- colSums(x_data)/n
priorConst1 <- T_INV %*% ksi
sigma_y2 <- 1/1
# print("here")
#############################################
#
OmegaINV.constant <- array(data = 0, dim = c(q,q));
diag(OmegaINV.constant) <- rep(g/h,q)
# diag(OmegaINV.constant) <- rep(1000,q)
OmegaINV.constantINITIAL <- OmegaINV.constant
SigmaINV.values <- array(data = 0, dim = c(p,p))
Lambda.values <- array(data = 0, dim = c(K,p,q))
mu.values <- array(data = 0, dim = c(K,p))
z <- numeric(n)
y <- array(data = 0, dim = c(n,q))
w.values <- numeric(K)
#############################################
# initial values
iter <- 1
if(start_values == FALSE){
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
diag(SigmaINV.values) <- rgamma(n = p, shape = alpha_sigma, rate = beta_sigma) ## parameterization with mean = g/h and var = g/(h^2)
#diag(SigmaINV.values) <- rgamma(n = p, shape = 1000, rate = 1)
for(k in 1:K){
mu.values[k,] <- rnorm(p,mean = ksi, sd = sqrt( 1/diag(T_INV) ))
for(r in 1:p){
Lambda.values[k,r,1:v_r[r]] <- mvrnorm(n = 1, mu = rep(0,v_r[r]), Sigma = omega[1:v_r[r],1:v_r[r]])
}
}
for(i in 1:n){
y[i,] <- rnorm(q,mean = 0,sd = 1)
}
w.values <- myDirichlet(alpha_prior[1:K])
z <- sample(K,n,replace = TRUE, prob = w.values)
if( outputDirectory == 'alpha_1'){
if(is.numeric(zStart)){
z <- zStart
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
}
}
}else{
# cat(paste0('reading starting values... '))
diag(OmegaINV.constant) <- as.numeric(read.table('omegainvValues.txt', colClasses = rep('numeric',q))[1,])
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
tmp1 <- read.table("muValues.txt", colClasses = rep('numeric',Kmax*p))
diag(SigmaINV.values) <- as.numeric(read.table("sigmainvValues.txt"))
for(k in 1:K){
mu.values[k, ] <- as.matrix(tmp1[ , k + Kmax*((1:p)-1)])
# Lambda.values[k, , ] <- matrix(as.matrix(read.table(paste0("LambdaValues",k,".txt"))),nrow = p, ncol = q, byrow=TRUE)
}
Lambda.values <- readLambdaValues(myFile = "LambdaValues.txt", K = K, p = p, q = q)
y <- matrix(as.matrix(read.table('yValues.txt', colClasses = rep('numeric',n*q))), nrow = n , ncol = q)
w.values <- as.numeric(read.table("wValues.txt", colClasses = rep('numeric',Kmax)))
z <- as.numeric(read.table("zValues.txt", colClasses = rep('numeric', n)))
# cat(paste0('done.'),'\n')
}
###############################################
yd <- array(data = 0, dim = c(n,q))
trueVar <- array(data = 0, dim = c(K,p,p))
trueVar.values <- array(data = 0, dim = c(K,p,p))
mhAR <- mhAR1 <- 0
mhDeltaAR <- 0
mIter <- 0
# MCMC sampler
cluster_size <- numeric(K)
zOld <- z
kValues <- numeric(m)
kValues[iter] <- length(table(z))
zConnection <- file("zValues.txt",open = "w")
yConnection <- file("yValues.txt",open = "w")
sigmainvConnection <- file("sigmainvValues.txt",open = "w")
omegainvConnection <- file("omegainvValues.txt",open = "w")
muConnection <- file("muValues.txt",open = "w")
wConnection <- file("wValues.txt",open = "w")
logLConnection <- file("k.and.logl.Values.txt",open = "w")
LambdaConnection <- file("LambdaValues.txt", open = "w")
current_matrix <- vector("list", length = 4)
names(current_matrix) <- c("A","B","G","D")
kavatza <- 0
# u_v <- runif(1)
for (iter in 2:m){
# 1
OmegaINV.constant <- update_OmegaINV(Lambda = array(Lambda.values,dim = c(K,p,q)), K = K, g = g, h = h)
# 2
suf_stat <- compute_sufficient_statistics(y = y, z = z, K = K, x_data = x_data)
f2 <- compute_A_B_G_D_and_simulate_mu_Lambda(SigmaINV = SigmaINV.values,
suff_statistics = suf_stat, OmegaINV = OmegaINV.constant,
K = K, priorConst1 = priorConst1, T_INV = T_INV, v_r = v_r)
mu.values <- f2$mu
Lambda.values <- f2$Lambdas
# 3
u_v <- runif(1)
if(u_v < gibbs_z){
f2 <- update_z2(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), SigmaINV = SigmaINV.values, K = K, x_data = x_data)
}else{
f2 <- update_z_b(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), y = y,
SigmaINV = SigmaINV.values, K = K, x_data = x_data)
}
z <- f2$z
kValues[iter] <- length(table(z))
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
# 4
y <- array(update_all_y(x_data = x_data, mu = mu.values, SigmaINV = SigmaINV.values, Lambda = array(Lambda.values,dim = c(K,p,q)), z = z)$y, dim = c(n, q))
# 5
SigmaINV.values <- update_SigmaINV_faster(x_data = x_data, z = z, y = y, Lambda = array(Lambda.values,dim = c(K,p,q)),
mu = array(mu.values,dim = c(K,p)), K = K, alpha_sigma = alpha_sigma, beta_sigma = beta_sigma)
if(iter %% thinning == 0){
if(iter > burn){
logLValues <- c(kValues[iter], complete.log.likelihood(x_data = x_data, w = w.values, mu = mu.values, Lambda = Lambda.values, SigmaINV = SigmaINV.values, z = z))
cat(logLValues, file = logLConnection, '\n', append = TRUE)
cat(z, file = zConnection, '\n', append = TRUE)
cat(y, file = yConnection, '\n', append = TRUE)
cat(w.values, file = wConnection, '\n', append = TRUE)
cat(mu.values, file = muConnection, '\n', append = TRUE)
cat(diag(OmegaINV.constant), file = omegainvConnection, '\n', append = TRUE)
cat(diag(SigmaINV.values), file = sigmainvConnection, '\n', append = TRUE)
for(k in 1:K){
for(r in 1:p){
cat(Lambda.values[k, r, ], " ", file = LambdaConnection, append = TRUE)
}
}
cat('\n', file = LambdaConnection, append = TRUE)
}
}
}
close(zConnection)
close(yConnection)
close(wConnection)
close(muConnection)
close(sigmainvConnection)
close(omegainvConnection)
close(logLConnection)
close(LambdaConnection)
setwd("../")
}
overfittingMFA_Sj <- function(x_data, originalX, outputDirectory, Kmax, m, thinning, burn, g, h, alpha_prior, alpha_sigma, beta_sigma, start_values, q, zStart, gibbs_z, lowerTriangular=TRUE){
if(missing(originalX)){originalX <- x_data}
if(missing(gibbs_z)){gibbs_z = 0.05}
if(missing(zStart)){zStart = FALSE}
if(missing(x_data)){stop('x_data not provided.')}
if(missing(q)){stop('q not provided.')}
p <- dim(x_data)[2]
ledermannBound <- ( 2*p + 1 - sqrt(8*p + 1) ) / 2
if (q > ledermannBound){ stop(paste0('q should not exceed the Ledermann bound: ', ledermannBound)) }
n <- dim(x_data)[1]
v_r <- rep(q, p) #indicates the non-zero values of Lambdas
if(lowerTriangular){
for( r in 1:p ){
v_r[r] <- min(r,q)
}
}
if(missing(Kmax)){Kmax <- 20}
if(missing(m)){m <- 21000}
if(missing(burn)){burn <- 1000}
if(missing(thinning)){thinning <- 10}
if(missing(g)){g <- 2}
if(missing(h)){h <- 1}
if(missing(alpha_prior)){alpha_prior <- 1*rep(1/Kmax,Kmax)}
if(missing(alpha_sigma)){alpha_sigma <- 2}
if(missing(beta_sigma)){beta_sigma <- 1}
if(missing(start_values)){start_values <- FALSE}
if(q == 0){
# redirecting everything to the corresponding function
overfitting_q0(x_data = x_data, originalX = originalX,
outputDirectory = outputDirectory, Kmax = Kmax,
m = m, thinning = thinning, burn = burn, g = g,
h = h, alpha_prior = alpha_prior, alpha_sigma = alpha_sigma,
beta_sigma = beta_sigma, start_values = start_values,
q = 0, zStart = zStart, gibbs_z = gibbs_z, lowerTriangular = lowerTriangular)
return(doNothing <- 0) # exit
}
if( start_values == F ){
dir.create(outputDirectory)
}
setwd(outputDirectory)
# cat(paste0("a = ", alpha_prior[1], ", p = ", p, ", q = ", q, ", n = ",n,", g = ", g, ", h = ", h, ", alpha_sigma = ", alpha_sigma, ", beta_sigma = ", beta_sigma,"\n"))
K <- Kmax
# prior parameters
T_INV <- array(data = 0, dim = c(p,p))
# diag(T_INV) <- (apply(x_data,2,max) - apply(x_data,2,min))^2
diag(T_INV) <- diag(var(x_data))
# diag(T_INV) <- rep(1,p)
# diag(T_INV) <- rep(100,p)
diag(T_INV) <- 1/diag(T_INV)
ksi <- colSums(x_data)/n
priorConst1 <- T_INV %*% ksi
sigma_y2 <- 1/1
# print("here")
#############################################
#
OmegaINV.constant <- array(data = 0, dim = c(q,q));
diag(OmegaINV.constant) <- rep(g/h,q)
# diag(OmegaINV.constant) <- rep(1000,q)
OmegaINV.constantINITIAL <- OmegaINV.constant
SigmaINV.values <- array(data = 0, dim = c(K,p,p))
Lambda.values <- array(data = 0, dim = c(K,p,q))
mu.values <- array(data = 0, dim = c(K,p))
z <- numeric(n)
y <- array(data = 0, dim = c(n,q))
w.values <- numeric(K)
#############################################
# initial values
iter <- 1
if(start_values == FALSE){
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
#diag(SigmaINV.values) <- rgamma(n = p, shape = 1000, rate = 1)
for(k in 1:K){
mu.values[k,] <- rnorm(p,mean = ksi, sd = sqrt( 1/diag(T_INV) ))
for(r in 1:p){
Lambda.values[k,r,1:v_r[r]] <- mvrnorm(n = 1, mu = rep(0,v_r[r]), Sigma = omega[1:v_r[r],1:v_r[r]])
}
diag(SigmaINV.values[k,,]) <- rgamma(n = p, shape = alpha_sigma, rate = beta_sigma) ## parameterization with mean = g/h and var = g/(h^2)
}
for(i in 1:n){
y[i,] <- rnorm(q,mean = 0,sd = 1)
}
w.values <- myDirichlet(alpha_prior[1:K])
z <- sample(K,n,replace = TRUE, prob = w.values)
if( outputDirectory == 'alpha_1'){
if(is.numeric(zStart)){
z <- zStart
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
}
}
}else{
# cat(paste0('reading starting values... '))
diag(OmegaINV.constant) <- as.numeric(read.table('omegainvValues.txt', colClasses = rep('numeric',q))[1,])
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
tmp1 <- read.table("muValues.txt", colClasses = rep('numeric',Kmax*p))
tmp2 <- as.matrix(read.table("sigmainvValues.txt"))
for(k in 1:K){
mu.values[k, ] <- as.matrix(tmp1[ , k + Kmax*((1:p)-1)])
diag(SigmaINV.values[k, , ]) <- as.matrix(tmp2[,((k-1)*p + 1):(k*p)])
}
Lambda.values <- readLambdaValues(myFile = "LambdaValues.txt", K = K, p = p, q = q)
y <- matrix(as.matrix(read.table('yValues.txt', colClasses = rep('numeric',n*q))), nrow = n , ncol = q)
w.values <- as.numeric(read.table("wValues.txt", colClasses = rep('numeric',Kmax)))
z <- as.numeric(read.table("zValues.txt", colClasses = rep('numeric', n)))
# cat(paste0('done.'),'\n')
}
###############################################
yd <- array(data = 0, dim = c(n,q))
trueVar <- array(data = 0, dim = c(K,p,p))
trueVar.values <- array(data = 0, dim = c(K,p,p))
mhAR <- mhAR1 <- 0
mhDeltaAR <- 0
mIter <- 0
# MCMC sampler
cluster_size <- numeric(K)
zOld <- z
kValues <- numeric(m)
kValues[iter] <- length(table(z))
zConnection <- file("zValues.txt",open = "w")
yConnection <- file("yValues.txt",open = "w")
sigmainvConnection <- file("sigmainvValues.txt",open = "w")
omegainvConnection <- file("omegainvValues.txt",open = "w")
muConnection <- file("muValues.txt",open = "w")
wConnection <- file("wValues.txt",open = "w")
logLConnection <- file("k.and.logl.Values.txt",open = "w")
LambdaConnection <- file("LambdaValues.txt", open = "w")
current_matrix <- vector("list", length = 4)
names(current_matrix) <- c("A","B","G","D")
kavatza <- 0
# u_v <- runif(1)
for (iter in 2:m){
# 1
OmegaINV.constant <- update_OmegaINV(Lambda = array(Lambda.values,dim = c(K,p,q)), K = K, g = g, h = h)
# 2
suf_stat <- compute_sufficient_statistics(y = y, z = z, K = K, x_data = x_data)
f2 <- compute_A_B_G_D_and_simulate_mu_Lambda_Sj(SigmaINV = array(SigmaINV.values,dim = c(K,p,p)),
suff_statistics = suf_stat, OmegaINV = OmegaINV.constant,
K = K, priorConst1 = priorConst1, T_INV = T_INV, v_r = v_r)
mu.values <- f2$mu
Lambda.values <- f2$Lambdas
# 3
u_v <- runif(1)
if(u_v < gibbs_z){
f2 <- update_z2_Sj(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), SigmaINV = SigmaINV.values, K = K, x_data = x_data)
}else{
f2 <- update_z_b_Sj(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), y = y,
SigmaINV = array(SigmaINV.values,dim = c(K,p,p)), K = K, x_data = x_data)
}
z <- f2$z
kValues[iter] <- length(table(z))
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
# 4
y <- array(update_all_y_Sj(x_data = x_data, mu = mu.values, SigmaINV = SigmaINV.values, Lambda = array(Lambda.values,dim = c(K,p,q)), z = z)$y, dim = c(n, q))
# 5
SigmaINV.values <- update_SigmaINV_faster_Sj(x_data = x_data, z = z, y = y, Lambda = array(Lambda.values,dim = c(K,p,q)),
mu = array(mu.values,dim = c(K,p)), K = K, alpha_sigma = alpha_sigma, beta_sigma = beta_sigma)
if(iter %% thinning == 0){
if(iter > burn){
logLValues <- c(kValues[iter], complete.log.likelihood_Sj(x_data = x_data, w = w.values, mu = mu.values, Lambda = Lambda.values, SigmaINV = SigmaINV.values, z = z))
cat(logLValues, file = logLConnection, '\n', append = TRUE)
cat(z, file = zConnection, '\n', append = TRUE)
cat(y, file = yConnection, '\n', append = TRUE)
cat(w.values, file = wConnection, '\n', append = TRUE)
cat(mu.values, file = muConnection, '\n', append = TRUE)
cat(diag(OmegaINV.constant), file = omegainvConnection, '\n', append = TRUE)
for(k in 1:K){
for(r in 1:p){
cat(Lambda.values[k, r, ], " ", file = LambdaConnection, append = TRUE)
}
cat(diag(SigmaINV.values[k,,]), " ", file = sigmainvConnection, append = TRUE)
}
cat('\n', file = LambdaConnection, append = TRUE)
cat('\n', file = sigmainvConnection, append = TRUE)
}
}
}
close(zConnection)
close(yConnection)
close(wConnection)
close(muConnection)
close(sigmainvConnection)
close(omegainvConnection)
close(logLConnection)
close(LambdaConnection)
setwd("../")
}
#new in version 3 (kapote na ftiakseis ta lambda, edw den xreiazontai ola ta connections.)
overfittingMFA_CCU <- function(x_data, originalX, outputDirectory, Kmax, m, thinning, burn, g, h, alpha_prior, alpha_sigma, beta_sigma, start_values, q, zStart, gibbs_z, lowerTriangular = TRUE){
if(missing(originalX)){originalX <- x_data}
if(missing(gibbs_z)){gibbs_z = 0.05}
if(missing(zStart)){zStart = FALSE}
if(missing(x_data)){stop('x_data not provided.')}
if(missing(q)){stop('q not provided.')}
p <- dim(x_data)[2]
ledermannBound <- ( 2*p + 1 - sqrt(8*p + 1) ) / 2
if (q > ledermannBound){ stop(paste0('q should not exceed the Ledermann bound: ', ledermannBound)) }
n <- dim(x_data)[1]
v_r <- rep(q, p) #indicates the non-zero values of Lambdas
if(lowerTriangular){
for( r in 1:p ){
v_r[r] <- min(r,q)
}
}
if(missing(Kmax)){Kmax <- 20}
if(missing(m)){m <- 21000}
if(missing(burn)){burn <- 1000}
if(missing(thinning)){thinning <- 10}
if(missing(g)){g <- 0.5}
if(missing(h)){h <- 0.5}
if(missing(alpha_prior)){alpha_prior <- 1*rep(1/Kmax,Kmax)}
if(missing(alpha_sigma)){alpha_sigma <- 0.5}
if(missing(beta_sigma)){beta_sigma <- 0.5}
if(missing(start_values)){start_values <- FALSE}
if(q == 0){
# redirecting everything to the corresponding function
cat(paste('q = 0 is not currently supported for CCU model.'),'\n')
#overfitting_q0_sameSigma(x_data = x_data, originalX = originalX,
# outputDirectory = outputDirectory, Kmax = Kmax,
# m = m, thinning = thinning, burn = burn, g = g,
# h = h, alpha_prior = alpha_prior, alpha_sigma = alpha_sigma,
# beta_sigma = beta_sigma, start_values = start_values,
# q = 0, zStart = zStart, gibbs_z = gibbs_z)
return(doNothing <- 0) # exit
}
if( start_values == F ){
dir.create(outputDirectory)
}
setwd(outputDirectory)
# cat(paste0("a = ", alpha_prior[1], ", p = ", p, ", q = ", q, ", n = ",n,", g = ", g, ", h = ", h, ", alpha_sigma = ", alpha_sigma, ", beta_sigma = ", beta_sigma,"\n"))
K <- Kmax
# prior parameters
T_INV <- array(data = 0, dim = c(p,p))
# diag(T_INV) <- (apply(x_data,2,max) - apply(x_data,2,min))^2
diag(T_INV) <- diag(var(x_data))
# diag(T_INV) <- rep(1,p)
# diag(T_INV) <- rep(100,p)
diag(T_INV) <- 1/diag(T_INV)
ksi <- colSums(x_data)/n
priorConst1 <- T_INV %*% ksi
sigma_y2 <- 1/1
# print("here")
#############################################
#
OmegaINV.constant <- array(data = 0, dim = c(q,q));
diag(OmegaINV.constant) <- rep(g/h,q)
# diag(OmegaINV.constant) <- rep(1000,q)
OmegaINV.constantINITIAL <- OmegaINV.constant
SigmaINV.values <- array(data = 0, dim = c(p,p))
Lambda.values <- array(data = 0, dim = c(K,p,q))
mu.values <- array(data = 0, dim = c(K,p))
z <- numeric(n)
y <- array(data = 0, dim = c(n,q))
w.values <- numeric(K)
#############################################
# initial values
iter <- 1
if(start_values == FALSE){
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
diag(SigmaINV.values) <- rgamma(n = p, shape = alpha_sigma, rate = beta_sigma) ## parameterization with mean = g/h and var = g/(h^2)
#diag(SigmaINV.values) <- rgamma(n = p, shape = 1000, rate = 1)
for(r in 1:p){
Lambda.values[1,r,1:v_r[r]] <- mvrnorm(n = 1, mu = rep(0,v_r[r]), Sigma = omega[1:v_r[r],1:v_r[r]])
}
for(k in 1:K){
mu.values[k,] <- rnorm(p,mean = ksi, sd = sqrt( 1/diag(T_INV) ))
for(r in 1:p){
Lambda.values[k,r,1:v_r[r]] <- Lambda.values[1,r,1:v_r[r]]
}
}
for(i in 1:n){
y[i,] <- rnorm(q,mean = 0,sd = 1)
}
w.values <- myDirichlet(alpha_prior[1:K])
z <- sample(K,n,replace = TRUE, prob = w.values)
if( outputDirectory == 'alpha_1'){
if(is.numeric(zStart)){
z <- zStart
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
}
}
}else{
# cat(paste0('reading starting values... '))
diag(OmegaINV.constant) <- as.numeric(read.table('omegainvValues.txt', colClasses = rep('numeric',q))[1,])
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
tmp1 <- read.table("muValues.txt", colClasses = rep('numeric',Kmax*p))
diag(SigmaINV.values) <- as.numeric(read.table("sigmainvValues.txt"))
for(k in 1:K){
mu.values[k, ] <- as.matrix(tmp1[ , k + Kmax*((1:p)-1)])
}
Lambda.values <- readLambdaValues(myFile = "LambdaValues.txt", K = K, p = p, q = q)
y <- matrix(as.matrix(read.table('yValues.txt', colClasses = rep('numeric',n*q))), nrow = n , ncol = q)
w.values <- as.numeric(read.table("wValues.txt", colClasses = rep('numeric',Kmax)))
z <- as.numeric(read.table("zValues.txt", colClasses = rep('numeric', n)))
# cat(paste0('done.'),'\n')
}
###############################################
yd <- array(data = 0, dim = c(n,q))
trueVar <- array(data = 0, dim = c(K,p,p))
trueVar.values <- array(data = 0, dim = c(K,p,p))
mhAR <- mhAR1 <- 0
mhDeltaAR <- 0
mIter <- 0
# MCMC sampler
cluster_size <- numeric(K)
zOld <- z
kValues <- numeric(m)
kValues[iter] <- length(table(z))
zConnection <- file("zValues.txt",open = "w")
yConnection <- file("yValues.txt",open = "w")
sigmainvConnection <- file("sigmainvValues.txt",open = "w")
omegainvConnection <- file("omegainvValues.txt",open = "w")
muConnection <- file("muValues.txt",open = "w")
wConnection <- file("wValues.txt",open = "w")
logLConnection <- file("k.and.logl.Values.txt",open = "w")
LambdaConnection <- file("LambdaValues.txt", open = "w")
current_matrix <- vector("list", length = 4)
names(current_matrix) <- c("A","B","G","D")
kavatza <- 0
# u_v <- runif(1)
for (iter in 2:m){
# 1
OmegaINV.constant <- update_OmegaINV_Cxx(Lambda = array(Lambda.values,dim = c(K,p,q)), K = K, g = g, h = h)
# 2
suf_stat <- compute_sufficient_statistics_given_mu(y = y, z = z, K = K, x_data = x_data, mu = mu.values)
f2 <- compute_A_B_G_D_and_simulate_mu_Lambda_CCU(SigmaINV = SigmaINV.values,
suff_statistics = suf_stat, OmegaINV = OmegaINV.constant,
K = K, priorConst1 = priorConst1, T_INV = T_INV, v_r = v_r)
mu.values <- f2$mu
Lambda.values <- f2$Lambdas
# 3
u_v <- runif(1)
if(u_v < gibbs_z){
f2 <- update_z2(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), SigmaINV = SigmaINV.values, K = K, x_data = x_data)
}else{
f2 <- update_z_b(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), y = y,
SigmaINV = SigmaINV.values, K = K, x_data = x_data)
}
z <- f2$z
kValues[iter] <- length(table(z))
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
# 4
y <- array(update_all_y(x_data = x_data, mu = mu.values, SigmaINV = SigmaINV.values, Lambda = array(Lambda.values,dim = c(K,p,q)), z = z)$y, dim = c(n, q))
# 5
SigmaINV.values <- update_SigmaINV_faster(x_data = x_data, z = z, y = y, Lambda = array(Lambda.values,dim = c(K,p,q)),
mu = array(mu.values,dim = c(K,p)), K = K, alpha_sigma = alpha_sigma, beta_sigma = beta_sigma)
if(iter %% thinning == 0){
if(iter > burn){
logLValues <- c(kValues[iter], complete.log.likelihood(x_data = x_data, w = w.values, mu = mu.values, Lambda = Lambda.values, SigmaINV = SigmaINV.values, z = z))
cat(logLValues, file = logLConnection, '\n', append = TRUE)
cat(z, file = zConnection, '\n', append = TRUE)
cat(y, file = yConnection, '\n', append = TRUE)
cat(w.values, file = wConnection, '\n', append = TRUE)
cat(mu.values, file = muConnection, '\n', append = TRUE)
cat(diag(OmegaINV.constant), file = omegainvConnection, '\n', append = TRUE)
cat(diag(SigmaINV.values), file = sigmainvConnection, '\n', append = TRUE)
for(k in 1:K){
for(r in 1:p){
cat(Lambda.values[k, r, ], " ", file = LambdaConnection, append = TRUE)
}
}
cat('\n', file = LambdaConnection, append = TRUE)
}
}
}
close(zConnection)
close(yConnection)
close(wConnection)
close(muConnection)
close(sigmainvConnection)
close(omegainvConnection)
close(logLConnection)
close(LambdaConnection)
setwd("../")
}
# new in version 3
overfittingMFA_CUU <- function(x_data, originalX, outputDirectory, Kmax, m, thinning, burn, g, h, alpha_prior, alpha_sigma, beta_sigma, start_values, q, zStart, gibbs_z, lowerTriangular = TRUE){
if(missing(originalX)){originalX <- x_data}
if(missing(gibbs_z)){gibbs_z = 0.05}
if(missing(zStart)){zStart = FALSE}
if(missing(x_data)){stop('x_data not provided.')}
if(missing(q)){stop('q not provided.')}
p <- dim(x_data)[2]
ledermannBound <- ( 2*p + 1 - sqrt(8*p + 1) ) / 2
if (q > ledermannBound){ stop(paste0('q should not exceed the Ledermann bound: ', ledermannBound)) }
n <- dim(x_data)[1]
v_r <- rep(q, p) #indicates the non-zero values of Lambdas
if(lowerTriangular){
for( r in 1:p ){
v_r[r] <- min(r,q)
}
}
if(missing(Kmax)){Kmax <- 20}
if(missing(m)){m <- 21000}
if(missing(burn)){burn <- 1000}
if(missing(thinning)){thinning <- 10}
if(missing(g)){g <- 2}
if(missing(h)){h <- 1}
if(missing(alpha_prior)){alpha_prior <- 1*rep(1/Kmax,Kmax)}
if(missing(alpha_sigma)){alpha_sigma <- 2}
if(missing(beta_sigma)){beta_sigma <- 1}
if(missing(start_values)){start_values <- FALSE}
if(q == 0){
# redirecting everything to the corresponding function
cat(paste('q = 0 is not currently supported for CUU model.'),'\n')
#overfitting_q0(x_data = x_data, originalX = originalX,
# outputDirectory = outputDirectory, Kmax = Kmax,
# m = m, thinning = thinning, burn = burn, g = g,
# h = h, alpha_prior = alpha_prior, alpha_sigma = alpha_sigma,
# beta_sigma = beta_sigma, start_values = start_values,
# q = 0, zStart = zStart, gibbs_z = gibbs_z)
return(doNothing <- 0) # exit
}
if( start_values == F ){
dir.create(outputDirectory)
}
setwd(outputDirectory)
# cat(paste0("a = ", alpha_prior[1], ", p = ", p, ", q = ", q, ", n = ",n,", g = ", g, ", h = ", h, ", alpha_sigma = ", alpha_sigma, ", beta_sigma = ", beta_sigma,"\n"))
K <- Kmax
# prior parameters
T_INV <- array(data = 0, dim = c(p,p))
# diag(T_INV) <- (apply(x_data,2,max) - apply(x_data,2,min))^2
diag(T_INV) <- diag(var(x_data))
# diag(T_INV) <- rep(1,p)
# diag(T_INV) <- rep(100,p)
diag(T_INV) <- 1/diag(T_INV)
ksi <- colSums(x_data)/n
priorConst1 <- T_INV %*% ksi
sigma_y2 <- 1/1
# print("here")
#############################################
#
OmegaINV.constant <- array(data = 0, dim = c(q,q));
diag(OmegaINV.constant) <- rep(g/h,q)
# diag(OmegaINV.constant) <- rep(1000,q)
OmegaINV.constantINITIAL <- OmegaINV.constant
SigmaINV.values <- array(data = 0, dim = c(K,p,p))
Lambda.values <- array(data = 0, dim = c(K,p,q))
mu.values <- array(data = 0, dim = c(K,p))
z <- numeric(n)
y <- array(data = 0, dim = c(n,q))
w.values <- numeric(K)
#############################################
# initial values
iter <- 1
if(start_values == FALSE){
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
#diag(SigmaINV.values) <- rgamma(n = p, shape = 1000, rate = 1)
for(r in 1:p){
Lambda.values[1,r,1:v_r[r]] <- mvrnorm(n = 1, mu = rep(0,v_r[r]), Sigma = omega[1:v_r[r],1:v_r[r]])
}
for(k in 1:K){
mu.values[k,] <- rnorm(p,mean = ksi, sd = sqrt( 1/diag(T_INV) ))
for(r in 1:p){
Lambda.values[k,r,1:v_r[r]] <- Lambda.values[1,r,1:v_r[r]]
}
diag(SigmaINV.values[k,,]) <- rgamma(n = p, shape = alpha_sigma, rate = beta_sigma) ## parameterization with mean = g/h and var = g/(h^2)
}
for(i in 1:n){
y[i,] <- rnorm(q,mean = 0,sd = 1)
}
w.values <- myDirichlet(alpha_prior[1:K])
z <- sample(K,n,replace = TRUE, prob = w.values)
if( outputDirectory == 'alpha_1'){
if(is.numeric(zStart)){
z <- zStart
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
}
}
}else{
# cat(paste0('reading starting values... '))
diag(OmegaINV.constant) <- as.numeric(read.table('omegainvValues.txt', colClasses = rep('numeric',q))[1,])
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
tmp1 <- read.table("muValues.txt", colClasses = rep('numeric',Kmax*p))
tmp2 <- as.matrix(read.table("sigmainvValues.txt"))
for(k in 1:K){
mu.values[k, ] <- as.matrix(tmp1[ , k + Kmax*((1:p)-1)])
diag(SigmaINV.values[k, , ]) <- as.matrix(tmp2[,((k-1)*p + 1):(k*p)])
}
Lambda.values <- readLambdaValues(myFile = "LambdaValues.txt", K = K, p = p, q = q)
y <- matrix(as.matrix(read.table('yValues.txt', colClasses = rep('numeric',n*q))), nrow = n , ncol = q)
w.values <- as.numeric(read.table("wValues.txt", colClasses = rep('numeric',Kmax)))
z <- as.numeric(read.table("zValues.txt", colClasses = rep('numeric', n)))
# cat(paste0('done.'),'\n')
}
###############################################
yd <- array(data = 0, dim = c(n,q))
trueVar <- array(data = 0, dim = c(K,p,p))
trueVar.values <- array(data = 0, dim = c(K,p,p))
mhAR <- mhAR1 <- 0
mhDeltaAR <- 0
mIter <- 0
# MCMC sampler
cluster_size <- numeric(K)
zOld <- z
kValues <- numeric(m)
kValues[iter] <- length(table(z))
zConnection <- file("zValues.txt",open = "w")
yConnection <- file("yValues.txt",open = "w")
sigmainvConnection <- file("sigmainvValues.txt",open = "w")
omegainvConnection <- file("omegainvValues.txt",open = "w")
muConnection <- file("muValues.txt",open = "w")
wConnection <- file("wValues.txt",open = "w")
logLConnection <- file("k.and.logl.Values.txt",open = "w")
LambdaConnection <- file("LambdaValues.txt", open = "w")
current_matrix <- vector("list", length = 4)
names(current_matrix) <- c("A","B","G","D")
kavatza <- 0
# u_v <- runif(1)
for (iter in 2:m){
# 1
OmegaINV.constant <- update_OmegaINV_Cxx(Lambda = array(Lambda.values,dim = c(K,p,q)), K = K, g = g, h = h)
# 2
suf_stat <- compute_sufficient_statistics_given_mu(y = y, z = z, K = K, x_data = x_data, mu = mu.values)
f2 <- compute_A_B_G_D_and_simulate_mu_Lambda_CUU(SigmaINV = array(SigmaINV.values,dim = c(K,p,p)),
suff_statistics = suf_stat, OmegaINV = OmegaINV.constant,
K = K, priorConst1 = priorConst1, T_INV = T_INV, v_r = v_r)
mu.values <- f2$mu
Lambda.values <- f2$Lambdas
# 3
u_v <- runif(1)
if(u_v < gibbs_z){
f2 <- update_z2_Sj(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), SigmaINV = SigmaINV.values, K = K, x_data = x_data)
}else{
f2 <- update_z_b_Sj(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), y = y,
SigmaINV = array(SigmaINV.values,dim = c(K,p,p)), K = K, x_data = x_data)
}
z <- f2$z
kValues[iter] <- length(table(z))
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
# 4
y <- array(update_all_y_Sj(x_data = x_data, mu = mu.values, SigmaINV = SigmaINV.values, Lambda = array(Lambda.values,dim = c(K,p,q)), z = z)$y, dim = c(n, q))
# 5
SigmaINV.values <- update_SigmaINV_faster_Sj(x_data = x_data, z = z, y = y, Lambda = array(Lambda.values,dim = c(K,p,q)),
mu = array(mu.values,dim = c(K,p)), K = K, alpha_sigma = alpha_sigma, beta_sigma = beta_sigma)
if(iter %% thinning == 0){
if(iter > burn){
logLValues <- c(kValues[iter], complete.log.likelihood_Sj(x_data = x_data, w = w.values, mu = mu.values, Lambda = Lambda.values, SigmaINV = SigmaINV.values, z = z))
cat(logLValues, file = logLConnection, '\n', append = TRUE)
cat(z, file = zConnection, '\n', append = TRUE)
cat(y, file = yConnection, '\n', append = TRUE)
cat(w.values, file = wConnection, '\n', append = TRUE)
cat(mu.values, file = muConnection, '\n', append = TRUE)
cat(diag(OmegaINV.constant), file = omegainvConnection, '\n', append = TRUE)
for(k in 1:K){
for(r in 1:p){
cat(Lambda.values[k, r, ], " ", file = LambdaConnection, append = TRUE)
}
cat(diag(SigmaINV.values[k,,]), " ", file = sigmainvConnection, append = TRUE)
}
cat('\n', file = LambdaConnection, append = TRUE)
cat('\n', file = sigmainvConnection, append = TRUE)
}
}
}
close(zConnection)
close(yConnection)
close(wConnection)
close(muConnection)
close(sigmainvConnection)
close(omegainvConnection)
close(logLConnection)
close(LambdaConnection)
setwd("../")
}
#new in version 3
overfittingMFA_CCC <- function(x_data, originalX, outputDirectory, Kmax, m, thinning, burn, g, h, alpha_prior, alpha_sigma, beta_sigma, start_values, q, zStart, gibbs_z, lowerTriangular=TRUE){
if(missing(originalX)){originalX <- x_data}
if(missing(gibbs_z)){gibbs_z = 0.05}
if(missing(zStart)){zStart = FALSE}
if(missing(x_data)){stop('x_data not provided.')}
if(missing(q)){stop('q not provided.')}
p <- dim(x_data)[2]
ledermannBound <- ( 2*p + 1 - sqrt(8*p + 1) ) / 2
if (q > ledermannBound){ stop(paste0('q should not exceed the Ledermann bound: ', ledermannBound)) }
n <- dim(x_data)[1]
v_r <- rep(q, p) #indicates the non-zero values of Lambdas
if(lowerTriangular){
for( r in 1:p ){
v_r[r] <- min(r,q)
}
}
if(missing(Kmax)){Kmax <- 20}
if(missing(m)){m <- 21000}
if(missing(burn)){burn <- 1000}
if(missing(thinning)){thinning <- 10}
if(missing(g)){g <- 0.5}
if(missing(h)){h <- 0.5}
if(missing(alpha_prior)){alpha_prior <- 1*rep(1/Kmax,Kmax)}
if(missing(alpha_sigma)){alpha_sigma <- 0.5}
if(missing(beta_sigma)){beta_sigma <- 0.5}
if(missing(start_values)){start_values <- FALSE}
if(q == 0){
# redirecting everything to the corresponding function
cat(paste('q = 0 is not currently supported for CCC model.'),'\n')
#overfitting_q0_sameSigma(x_data = x_data, originalX = originalX,
# outputDirectory = outputDirectory, Kmax = Kmax,
# m = m, thinning = thinning, burn = burn, g = g,
# h = h, alpha_prior = alpha_prior, alpha_sigma = alpha_sigma,
# beta_sigma = beta_sigma, start_values = start_values,
# q = 0, zStart = zStart, gibbs_z = gibbs_z)
return(doNothing <- 0) # exit
}
if( start_values == F ){
dir.create(outputDirectory)
}
setwd(outputDirectory)
# cat(paste0("a = ", alpha_prior[1], ", p = ", p, ", q = ", q, ", n = ",n,", g = ", g, ", h = ", h, ", alpha_sigma = ", alpha_sigma, ", beta_sigma = ", beta_sigma,"\n"))
K <- Kmax
# prior parameters
T_INV <- array(data = 0, dim = c(p,p))
# diag(T_INV) <- (apply(x_data,2,max) - apply(x_data,2,min))^2
diag(T_INV) <- diag(var(x_data))
# diag(T_INV) <- rep(1,p)
# diag(T_INV) <- rep(100,p)
diag(T_INV) <- 1/diag(T_INV)
ksi <- colSums(x_data)/n
priorConst1 <- T_INV %*% ksi
sigma_y2 <- 1/1
# print("here")
#############################################
#
OmegaINV.constant <- array(data = 0, dim = c(q,q));
diag(OmegaINV.constant) <- rep(g/h,q)
# diag(OmegaINV.constant) <- rep(1000,q)
OmegaINV.constantINITIAL <- OmegaINV.constant
SigmaINV.values <- array(data = 0, dim = c(p,p))
Lambda.values <- array(data = 0, dim = c(K,p,q))
mu.values <- array(data = 0, dim = c(K,p))
z <- numeric(n)
y <- array(data = 0, dim = c(n,q))
w.values <- numeric(K)
#############################################
# initial values
iter <- 1
if(start_values == FALSE){
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
diag(SigmaINV.values) <- rep(rgamma(n = 1, shape = alpha_sigma, rate = beta_sigma), p) ## parameterization with mean = g/h and var = g/(h^2)
#diag(SigmaINV.values) <- rgamma(n = p, shape = 1000, rate = 1)
for(r in 1:p){
Lambda.values[1,r,1:v_r[r]] <- mvrnorm(n = 1, mu = rep(0,v_r[r]), Sigma = omega[1:v_r[r],1:v_r[r]])
}
for(k in 1:K){
mu.values[k,] <- rnorm(p,mean = ksi, sd = sqrt( 1/diag(T_INV) ))
for(r in 1:p){
Lambda.values[k,r,1:v_r[r]] <- Lambda.values[1,r,1:v_r[r]]
}
}
for(i in 1:n){
y[i,] <- rnorm(q,mean = 0,sd = 1)
}
w.values <- myDirichlet(alpha_prior[1:K])
z <- sample(K,n,replace = TRUE, prob = w.values)
if( outputDirectory == 'alpha_1'){
if(is.numeric(zStart)){
z <- zStart
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
}
}
}else{
# cat(paste0('reading starting values... '))
diag(OmegaINV.constant) <- as.numeric(read.table('omegainvValues.txt', colClasses = rep('numeric',q))[1,])
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
tmp1 <- read.table("muValues.txt", colClasses = rep('numeric',Kmax*p))
diag(SigmaINV.values) <- as.numeric(read.table("sigmainvValues.txt"))
for(k in 1:K){
mu.values[k, ] <- as.matrix(tmp1[ , k + Kmax*((1:p)-1)])
}
Lambda.values <- readLambdaValues(myFile = "LambdaValues.txt", K = K, p = p, q = q)
y <- matrix(as.matrix(read.table('yValues.txt', colClasses = rep('numeric',n*q))), nrow = n , ncol = q)
w.values <- as.numeric(read.table("wValues.txt", colClasses = rep('numeric',Kmax)))
z <- as.numeric(read.table("zValues.txt", colClasses = rep('numeric', n)))
# cat(paste0('done.'),'\n')
}
###############################################
yd <- array(data = 0, dim = c(n,q))
trueVar <- array(data = 0, dim = c(K,p,p))
trueVar.values <- array(data = 0, dim = c(K,p,p))
mhAR <- mhAR1 <- 0
mhDeltaAR <- 0
mIter <- 0
# MCMC sampler
cluster_size <- numeric(K)
zOld <- z
kValues <- numeric(m)
kValues[iter] <- length(table(z))
zConnection <- file("zValues.txt",open = "w")
yConnection <- file("yValues.txt",open = "w")
sigmainvConnection <- file("sigmainvValues.txt",open = "w")
omegainvConnection <- file("omegainvValues.txt",open = "w")
muConnection <- file("muValues.txt",open = "w")
wConnection <- file("wValues.txt",open = "w")
logLConnection <- file("k.and.logl.Values.txt",open = "w")
LambdaConnection <- file("LambdaValues.txt", open = "w")
current_matrix <- vector("list", length = 4)
names(current_matrix) <- c("A","B","G","D")
kavatza <- 0
# u_v <- runif(1)
for (iter in 2:m){
# 1
OmegaINV.constant <- update_OmegaINV_Cxx(Lambda = array(Lambda.values,dim = c(K,p,q)), K = K, g = g, h = h)
# 2
suf_stat <- compute_sufficient_statistics_given_mu(y = y, z = z, K = K, x_data = x_data, mu = mu.values)
f2 <- compute_A_B_G_D_and_simulate_mu_Lambda_CCU(SigmaINV = SigmaINV.values,
suff_statistics = suf_stat, OmegaINV = OmegaINV.constant,
K = K, priorConst1 = priorConst1, T_INV = T_INV, v_r = v_r)
mu.values <- f2$mu
Lambda.values <- f2$Lambdas
# 3
u_v <- runif(1)
if(u_v < gibbs_z){
f2 <- update_z2(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), SigmaINV = SigmaINV.values, K = K, x_data = x_data)
}else{
f2 <- update_z_b(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), y = y,
SigmaINV = SigmaINV.values, K = K, x_data = x_data)
}
z <- f2$z
kValues[iter] <- length(table(z))
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
# 4
y <- array(update_all_y(x_data = x_data, mu = mu.values, SigmaINV = SigmaINV.values, Lambda = array(Lambda.values,dim = c(K,p,q)), z = z)$y, dim = c(n, q))
# 5
SigmaINV.values <- update_SigmaINV_xCC(x_data = x_data, z = z, y = y, Lambda = array(Lambda.values,dim = c(K,p,q)),
mu = array(mu.values,dim = c(K,p)), K = K, alpha_sigma = alpha_sigma, beta_sigma = beta_sigma)
if(iter %% thinning == 0){
if(iter > burn){
logLValues <- c(kValues[iter], complete.log.likelihood(x_data = x_data, w = w.values, mu = mu.values, Lambda = Lambda.values, SigmaINV = SigmaINV.values, z = z))
cat(logLValues, file = logLConnection, '\n', append = TRUE)
cat(z, file = zConnection, '\n', append = TRUE)
cat(y, file = yConnection, '\n', append = TRUE)
cat(w.values, file = wConnection, '\n', append = TRUE)
cat(mu.values, file = muConnection, '\n', append = TRUE)
cat(diag(OmegaINV.constant), file = omegainvConnection, '\n', append = TRUE)
cat(diag(SigmaINV.values), file = sigmainvConnection, '\n', append = TRUE)
for(k in 1:K){
for(r in 1:p){
cat(Lambda.values[k, r, ], " ", file = LambdaConnection, append = TRUE)
}
}
cat('\n', file = LambdaConnection, append = TRUE)
}
}
}
close(zConnection)
close(yConnection)
close(wConnection)
close(muConnection)
close(sigmainvConnection)
close(omegainvConnection)
close(logLConnection)
close(LambdaConnection)
setwd("../")
}
# new in version 3
overfittingMFA_CUC <- function(x_data, originalX, outputDirectory, Kmax, m, thinning, burn, g, h, alpha_prior, alpha_sigma, beta_sigma, start_values, q, zStart, gibbs_z, lowerTriangular = TRUE){
if(missing(originalX)){originalX <- x_data}
if(missing(gibbs_z)){gibbs_z = 0.05}
if(missing(zStart)){zStart = FALSE}
if(missing(x_data)){stop('x_data not provided.')}
if(missing(q)){stop('q not provided.')}
p <- dim(x_data)[2]
ledermannBound <- ( 2*p + 1 - sqrt(8*p + 1) ) / 2
if (q > ledermannBound){ stop(paste0('q should not exceed the Ledermann bound: ', ledermannBound)) }
n <- dim(x_data)[1]
v_r <- rep(q, p) #indicates the non-zero values of Lambdas
if(lowerTriangular){
for( r in 1:p ){
v_r[r] <- min(r,q)
}
}
if(missing(Kmax)){Kmax <- 20}
if(missing(m)){m <- 21000}
if(missing(burn)){burn <- 1000}
if(missing(thinning)){thinning <- 10}
if(missing(g)){g <- 2}
if(missing(h)){h <- 1}
if(missing(alpha_prior)){alpha_prior <- 1*rep(1/Kmax,Kmax)}
if(missing(alpha_sigma)){alpha_sigma <- 2}
if(missing(beta_sigma)){beta_sigma <- 1}
if(missing(start_values)){start_values <- FALSE}
if(q == 0){
# redirecting everything to the corresponding function
cat(paste('q = 0 is not currently supported for CUC model.'),'\n')
#overfitting_q0(x_data = x_data, originalX = originalX,
# outputDirectory = outputDirectory, Kmax = Kmax,
# m = m, thinning = thinning, burn = burn, g = g,
# h = h, alpha_prior = alpha_prior, alpha_sigma = alpha_sigma,
# beta_sigma = beta_sigma, start_values = start_values,
# q = 0, zStart = zStart, gibbs_z = gibbs_z)
return(doNothing <- 0) # exit
}
if( start_values == F ){
dir.create(outputDirectory)
}
setwd(outputDirectory)
# cat(paste0("a = ", alpha_prior[1], ", p = ", p, ", q = ", q, ", n = ",n,", g = ", g, ", h = ", h, ", alpha_sigma = ", alpha_sigma, ", beta_sigma = ", beta_sigma,"\n"))
K <- Kmax
# prior parameters
T_INV <- array(data = 0, dim = c(p,p))
# diag(T_INV) <- (apply(x_data,2,max) - apply(x_data,2,min))^2
diag(T_INV) <- diag(var(x_data))
# diag(T_INV) <- rep(1,p)
# diag(T_INV) <- rep(100,p)
diag(T_INV) <- 1/diag(T_INV)
ksi <- colSums(x_data)/n
priorConst1 <- T_INV %*% ksi
sigma_y2 <- 1/1
# print("here")
#############################################
#
OmegaINV.constant <- array(data = 0, dim = c(q,q));
diag(OmegaINV.constant) <- rep(g/h,q)
# diag(OmegaINV.constant) <- rep(1000,q)
OmegaINV.constantINITIAL <- OmegaINV.constant
SigmaINV.values <- array(data = 0, dim = c(K,p,p))
Lambda.values <- array(data = 0, dim = c(K,p,q))
mu.values <- array(data = 0, dim = c(K,p))
z <- numeric(n)
y <- array(data = 0, dim = c(n,q))
w.values <- numeric(K)
#############################################
# initial values
iter <- 1
if(start_values == FALSE){
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
#diag(SigmaINV.values) <- rgamma(n = p, shape = 1000, rate = 1)
for(r in 1:p){
Lambda.values[1,r,1:v_r[r]] <- mvrnorm(n = 1, mu = rep(0,v_r[r]), Sigma = omega[1:v_r[r],1:v_r[r]])
}
for(k in 1:K){
mu.values[k,] <- rnorm(p,mean = ksi, sd = sqrt( 1/diag(T_INV) ))
for(r in 1:p){
Lambda.values[k,r,1:v_r[r]] <- Lambda.values[1,r,1:v_r[r]]
}
diag(SigmaINV.values[k,,]) <- rep(rgamma(n = 1, shape = alpha_sigma, rate = beta_sigma),p) ## parameterization with mean = g/h and var = g/(h^2)
}
for(i in 1:n){
y[i,] <- rnorm(q,mean = 0,sd = 1)
}
w.values <- myDirichlet(alpha_prior[1:K])
z <- sample(K,n,replace = TRUE, prob = w.values)
# if( outputDirectory == 'alpha_1'){
if(is.numeric(zStart)){
z <- zStart
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
}
# }
}else{
# cat(paste0('reading starting values... '))
diag(OmegaINV.constant) <- as.numeric(read.table('omegainvValues.txt', colClasses = rep('numeric',q))[1,])
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
tmp1 <- read.table("muValues.txt", colClasses = rep('numeric',Kmax*p))
tmp2 <- as.matrix(read.table("sigmainvValues.txt"))
for(k in 1:K){
mu.values[k, ] <- as.matrix(tmp1[ , k + Kmax*((1:p)-1)])
diag(SigmaINV.values[k, , ]) <- as.matrix(tmp2[,((k-1)*p + 1):(k*p)])
}
Lambda.values <- readLambdaValues(myFile = "LambdaValues.txt", K = K, p = p, q = q)
y <- matrix(as.matrix(read.table('yValues.txt', colClasses = rep('numeric',n*q))), nrow = n , ncol = q)
w.values <- as.numeric(read.table("wValues.txt", colClasses = rep('numeric',Kmax)))
z <- as.numeric(read.table("zValues.txt", colClasses = rep('numeric', n)))
# cat(paste0('done.'),'\n')
}
###############################################
yd <- array(data = 0, dim = c(n,q))
trueVar <- array(data = 0, dim = c(K,p,p))
trueVar.values <- array(data = 0, dim = c(K,p,p))
mhAR <- mhAR1 <- 0
mhDeltaAR <- 0
mIter <- 0
# MCMC sampler
cluster_size <- numeric(K)
zOld <- z
kValues <- numeric(m)
kValues[iter] <- length(table(z))
zConnection <- file("zValues.txt",open = "w")
yConnection <- file("yValues.txt",open = "w")
sigmainvConnection <- file("sigmainvValues.txt",open = "w")
omegainvConnection <- file("omegainvValues.txt",open = "w")
muConnection <- file("muValues.txt",open = "w")
wConnection <- file("wValues.txt",open = "w")
logLConnection <- file("k.and.logl.Values.txt",open = "w")
LambdaConnection <- file("LambdaValues.txt", open = "w")
current_matrix <- vector("list", length = 4)
names(current_matrix) <- c("A","B","G","D")
kavatza <- 0
# u_v <- runif(1)
for (iter in 2:m){
# 1
OmegaINV.constant <- update_OmegaINV_Cxx(Lambda = array(Lambda.values,dim = c(K,p,q)), K = K, g = g, h = h)
# 2
suf_stat <- compute_sufficient_statistics_given_mu(y = y, z = z, K = K, x_data = x_data, mu = mu.values)
f2 <- compute_A_B_G_D_and_simulate_mu_Lambda_CUU(SigmaINV = array(SigmaINV.values,dim = c(K,p,p)),
suff_statistics = suf_stat, OmegaINV = OmegaINV.constant,
K = K, priorConst1 = priorConst1, T_INV = T_INV, v_r = v_r)
mu.values <- f2$mu
Lambda.values <- f2$Lambdas
# 3
u_v <- runif(1)
if(u_v < gibbs_z){
f2 <- update_z2_Sj(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), SigmaINV = SigmaINV.values, K = K, x_data = x_data)
}else{
f2 <- update_z_b_Sj(w = w.values, mu = array(mu.values,dim = c(K,p)), Lambda = array(Lambda.values,dim = c(K,p,q)), y = y,
SigmaINV = array(SigmaINV.values,dim = c(K,p,p)), K = K, x_data = x_data)
}
z <- f2$z
kValues[iter] <- length(table(z))
cluster_size <- numeric(K)
for(k in 1:K){ index <- which(z == k); cluster_size[k] <- length(index)}
w.values <- myDirichlet(alpha_prior[1:K] + cluster_size)
# 4
y <- array(update_all_y_Sj(x_data = x_data, mu = mu.values, SigmaINV = SigmaINV.values, Lambda = array(Lambda.values,dim = c(K,p,q)), z = z)$y, dim = c(n, q))
# 5
SigmaINV.values <- update_SigmaINV_xUC(x_data = x_data, z = z, y = y, Lambda = array(Lambda.values,dim = c(K,p,q)),
mu = array(mu.values,dim = c(K,p)), K = K, alpha_sigma = alpha_sigma, beta_sigma = beta_sigma)
if(iter %% thinning == 0){
if(iter > burn){
logLValues <- c(kValues[iter], complete.log.likelihood_Sj(x_data = x_data, w = w.values, mu = mu.values, Lambda = Lambda.values, SigmaINV = SigmaINV.values, z = z))
cat(logLValues, file = logLConnection, '\n', append = TRUE)
cat(z, file = zConnection, '\n', append = TRUE)
cat(y, file = yConnection, '\n', append = TRUE)
cat(w.values, file = wConnection, '\n', append = TRUE)
cat(mu.values, file = muConnection, '\n', append = TRUE)
cat(diag(OmegaINV.constant), file = omegainvConnection, '\n', append = TRUE)
for(k in 1:K){
for(r in 1:p){
cat(Lambda.values[k, r, ], " ", file = LambdaConnection, append = TRUE)
}
cat(diag(SigmaINV.values[k,,]), " ", file = sigmainvConnection, append = TRUE)
}
cat('\n', file = LambdaConnection, append = TRUE)
cat('\n', file = sigmainvConnection, append = TRUE)
}
}
}
close(zConnection)
close(yConnection)
close(wConnection)
close(muConnection)
close(sigmainvConnection)
close(omegainvConnection)
close(logLConnection)
close(LambdaConnection)
setwd("../")
}
#new in version 3
overfittingMFA_UCC <- function(x_data, originalX, outputDirectory, Kmax, m, thinning, burn, g, h, alpha_prior, alpha_sigma, beta_sigma, start_values, q, zStart, gibbs_z, lowerTriangular=TRUE){
if(missing(originalX)){originalX <- x_data}
if(missing(gibbs_z)){gibbs_z = 0.05}
if(missing(zStart)){zStart = FALSE}
if(missing(x_data)){stop('x_data not provided.')}
if(missing(q)){stop('q not provided.')}
p <- dim(x_data)[2]
ledermannBound <- ( 2*p + 1 - sqrt(8*p + 1) ) / 2
if (q > ledermannBound){ stop(paste0('q should not exceed the Ledermann bound: ', ledermannBound)) }
n <- dim(x_data)[1]
v_r <- rep(q, p) #indicates the non-zero values of Lambdas
if(lowerTriangular){
for( r in 1:p ){
v_r[r] <- min(r,q)
}
}
if(missing(Kmax)){Kmax <- 20}
if(missing(m)){m <- 21000}
if(missing(burn)){burn <- 1000}
if(missing(thinning)){thinning <- 10}
if(missing(g)){g <- 0.5}
if(missing(h)){h <- 0.5}
if(missing(alpha_prior)){alpha_prior <- 1*rep(1/Kmax,Kmax)}
if(missing(alpha_sigma)){alpha_sigma <- 0.5}
if(missing(beta_sigma)){beta_sigma <- 0.5}
if(missing(start_values)){start_values <- FALSE}
if(q == 0){
# redirecting everything to the corresponding function
#overfitting_q0_sameSigma(x_data = x_data, originalX = originalX,
# outputDirectory = outputDirectory, Kmax = Kmax,
# m = m, thinning = thinning, burn = burn, g = g,
# h = h, alpha_prior = alpha_prior, alpha_sigma = alpha_sigma,
# beta_sigma = beta_sigma, start_values = start_values,
# q = 0, zStart = zStart, gibbs_z = gibbs_z)
return(doNothing <- 0) # exit
}
if( start_values == F ){
dir.create(outputDirectory)
}
setwd(outputDirectory)
# cat(paste0("a = ", alpha_prior[1], ", p = ", p, ", q = ", q, ", n = ",n,", g = ", g, ", h = ", h, ", alpha_sigma = ", alpha_sigma, ", beta_sigma = ", beta_sigma,"\n"))
K <- Kmax
# prior parameters
T_INV <- array(data = 0, dim = c(p,p))
# diag(T_INV) <- (apply(x_data,2,max) - apply(x_data,2,min))^2
diag(T_INV) <- diag(var(x_data))
# diag(T_INV) <- rep(1,p)
# diag(T_INV) <- rep(100,p)
diag(T_INV) <- 1/diag(T_INV)
ksi <- colSums(x_data)/n
priorConst1 <- T_INV %*% ksi
sigma_y2 <- 1/1
# print("here")
#############################################
#
OmegaINV.constant <- array(data = 0, dim = c(q,q));
diag(OmegaINV.constant) <- rep(g/h,q)
# diag(OmegaINV.constant) <- rep(1000,q)
OmegaINV.constantINITIAL <- OmegaINV.constant
SigmaINV.values <- array(data = 0, dim = c(p,p))
Lambda.values <- array(data = 0, dim = c(K,p,q))
mu.values <- array(data = 0, dim = c(K,p))
z <- numeric(n)
y <- array(data = 0, dim = c(n,q))
w.values <- numeric(K)
#############################################
# initial values
iter <- 1
if(start_values == FALSE){
omega <- OmegaINV.constant
diag(omega) <- 1/(diag(omega))
diag(SigmaINV.values) <- rep(rgamma(n = 1, shape = alpha_sigma, rate = beta_sigma),p) ## parameterization with mean = g/h and var = g/(h^2)
#diag(SigmaINV.values) <- rgamma(n = p, shape = 1000, rate = 1)
for(k in 1:K){
mu.values[k,] <- rnorm(p,mean = ksi, sd = sqrt( 1/diag(T_INV) ))
for(r in 1:p){
Lambda.values[k,r,1:v_r[r]] <- mvrnorm(n = 1, mu = rep(0,v_r[r]), Sigma = omega[1:v_r[r],1:v_r[r]])
}
}