#' Diagonal Block Mixture of von Mises-Fisher Dsitributions
#'
#' Provides a simple function to bluid and fit a block mixture of von Mises-Fisher for data co-clustering.
#' @import Matrix
#' @import Bessel
#' @import mclust
#' @param X A sparse or dense data matrix, where rows denote objets lying on a unit-hypersphere and column denote features. Supported format include Matrix, dgCMatrix, dgTMatrix, etc.
#' @param k The number of mixture compoenents (or clusters).
#' @param control A list if additional parameters, see Details.
#' @param ... A list of additional parameters (overriding those specified in control).
#'
#' @details In addition to the parameters described above, the function dbmovMF supports other useful parameters including:
#' * max_iter: the maximum number of iteration
#' * max_st_iter: the maximum number of stochatic iterations. Only useful when performing learning using either SAEMb or CAEMb
#' * n_init: number of time the algorithm will be run with different initializations. The final results correspond to the bust run in terms of log likelihood.
#' * tol: tolerance relatve to the log likelihood to declar convergence.
#' * equal_prop: if true then then all mixture proportion will be consider to be equal, and the mixture propotions parmameters alpha will be ignored.
#' * fit_algo: learning algorithm to use. Supported algorithms include: EMb, CEMb, SEMb, SAEMb and CAEMb.
#' * equal_kappa: if true then all concentration parameters kappa of the differrent mixture components will be equal
#' * kappa_: The value of the concentration parameter. This parameter is only effective when euqal_kappa = TRUE.
#' * row_init: a partition of rows into k cluster to be used for initialization. Could be a vector if n_init = 1 of a matrix of size (n_init x n_rows) if n_init>1. Default = NULL.
#' * col_init: same as row_init but for columns.
#' @md
#' @return returns an object of class dbmovMF including the following attributes:
#' * rowcluster: the partition of rows into k clusters.
#' * colcluster: the partition of columns into k clusters.
#' * kappa_: a vector of concentration parameters.
#' * alpha: a vector of cluster proportions.
#' * alpha: a vector of cluster proportions.
#' * ll: the values of the complete data log likelihood over iterations.
#' * Iter: the total number of iteration needed for convergence.
#'
#' @export
dbmovMF<- function(X,k,control=list(),...){
#Preparing parameters
control <- c(control, list(...))
if(is.null(control$max_iter))
max_iter = 100
else
max_iter = control$max_iter
if(is.null(control$max_st_iter))
max_st_iter = round(0.7*max_iter)
else
max_st_iter = control$max_st_iter
if(is.null(control$n_init))
n_init = 1
else
n_init = control$n_init
if(is.null(control$tol))
tol = 1e-6
else
tol = control$tol
if(is.null(control$equal_prop))
equal_prop = FALSE
else
equal_prop = control$equal_prop
if(is.null(control$fit_algo)){
fit_algo = 'CAEMb'
print('CAEMb is default fitting algortithm for dbmovMF')
}
else
fit_algo = control$fit_algo
if(is.null(control$equal_kappa))
equal_kappa = FALSE
else{
equal_kappa = control$equal_kappa
if(equal_kappa){
if(is.null(control$kappa_)){
kappa_ = 100
print("default value for kappa_ is 100")
}
else
kappa_ = control$kappa_
}
}
if(is.null(control$row_init))
row_init = NULL
else{
row_init = control$row_init
control$row_init = NULL #to save memory space
}
if(is.null(control$col_init))
col_init = NULL
else{
col_init = control$col_init
control$col_init = NULL #to save memory space
}
fit_algorithms <- c("EMb", "CEMb", "SEMb", "SAEMb", "CAEMb")
#Coherence tests
if(k>= min(dim(X))){
stop("more clusters than distinc objects/features (rows/columns)")
}
if(equal_kappa){
if(length(kappa_==1L)){
if(kappa_<=0)
stop('kappa_ must be positive')
}
else
stop('kappa_ must be a positive real')
}
if(is.na(pmatch(tolower(fit_algo), tolower(fit_algorithms))))
stop("Invalid fit_algo, please choose one of: SAEMb, CAEMb, EMb, CEMb or SEMb")
if(n_init>1){
## Coherence tests for row_init
if(!is.null(row_init)){
if(is.matrix(row_init)){
if(dim(row_init)[1]<n_init)
stop("Less row partitions than n_init")
if(dim(row_init)[2]!=nrow(X))
stop("The length of the row partitions is different from the number of objects")
}
else
stop("Error o_O', row_init is not a matrix")
}
## Coherence tests for col_init
if(!is.null(col_init)){
if(is.matrix(col_init)){
if(dim(col_init)[1]<n_init)
stop("Less column partitions than n_init")
if(dim(col_init)[2]!=ncol(X))
stop("The length of the col partitions is different from the number of columns")
}
else
stop("Error o_O', col_init is not a matrix")
}
}
else{
if(!is.null(row_init)){
if(length(row_init) != nrow(X))
stop("The length row_init must be equal to the number of rows of X")
}
if(!is.null(col_init)){
if(length(col_init) != ncol(X))
stop("The length of the col_init must be equal to the number of columns of X")
}
}
#normalize rows to have unit L2 norm and convert X to column spase matrix
X = as(X,"dgCMatrix")
X = X/sqrt(Matrix::rowSums(X*X))
#useful variables
n = nrow(X)
d = ncol(X)
ll = c(rep(0,max_iter)) #(Expected) classification log-likelihood
ll_runs = c(rep(0,n_init))
etp = c(rep(0,max_iter)) # entropy of the latent variable z
nbIter = max_iter
vr1 = matrix(c(rep(1,n)),1,n) # 1 vector of dimension (1*n)
vc1 = matrix(c(rep(1,d)),1,d) # 1 vector of dimension (1*d)
nu = (d/2)-1 #useful when dealing with the normalizing constant of the vMF distribution
#perform one run
do_one <- function(row_c,col_c){
#Compute initial binary row-cluster indicator matrix
Z = matrix(0,n,k)
Z = as(Z,"dgCMatrix")
Z[cbind(seq_along(row_c), row_c)] = 1
#Compute initial binary column-cluster indicator matrix
W = matrix(0,d,k)
W = as(W,"dgCMatrix")
W[cbind(seq_along(col_c), col_c)] = 1
#row centroids MU^z
Mu_h = diag(1/sqrt(table(col_c)))
Mu_h = as(Mu_h,"dgCMatrix")
alpha = (vr1%*%Z)/n #row cluster proportions, matrix of size (1*k)
if(!equal_kappa){
nc = vc1%*%W #column cluster cardinalities, matrix of size (1*k)
#X_w (n*k): compressed X according to column clusters
X_w = X%*%W
#X_z_w (k*k): compressed X according to row and column clusters
X_z_w = Matrix::crossprod(X_w,Z)
r_bar = Matrix::diag(X_z_w)
r_bar = r_bar/((alpha[1,]*n)*sqrt(nc[1,]))
kap = (r_bar*d - r_bar^3)/(1-r_bar^2) #vector containing clusters' kappa parameters
#kap = rep(10,k)
}
else
kap = rep(kappa_,k)
KAP = as(Matrix::diag(kap),"dgCMatrix")
for(iter in 1:max_iter){
#M-step: starting with M-step is necessary when initializing with skmeans. This also allows us to save computation when evaluating the log-likelihood
#update W
Wt = Matrix::crossprod(X,Z)%*%(Mu_h*KAP)
switch (fit_algo,
"SAEMb" = W <- SAEMb.update_w(Wt,iter,max_st_iter),
"CAEMb" = W <- SAEMb.update_w(Wt,iter,max_st_iter), #same as saemb
"EMb" = W <- EMb.update_w(Wt), #same as EMb
"CEMb" = W <- EMb.update_w(Wt), #same as EMb
"SEMb" = W <- SEMb.update_w(Wt),
W <- SAEMb.update_w(Wt,iter,max_st_iter) # default CAEMb
)
col_partition = apply(W,1,which.max)
#update Mu_h
Mu_h = Matrix::diag(1/sqrt(table(col_partition)))
Mu_h = as(Mu_h,"dgCMatrix")
#update alpha
alpha = (vr1%*%Z)/n
#update kappa
if(!equal_kappa){
nc = vc1%*%W #column cluster cardinalities, matrix of size (1*k)
#X_w (n*k): compressed X according to column clusters
X_w = X%*%W
#X_z_w (k*k): compressed X according to row and column clusters
X_z_w = Matrix::crossprod(X_w,Z)
r_bar = Matrix::diag(X_z_w)
r_bar = r_bar/((alpha[1,]*n)*sqrt(nc[1,]))
kap = (r_bar*d - r_bar^3)/(1-r_bar^2) #vector containing clusters' kappa parameters
KAP = as(Matrix::diag(kap),"dgCMatrix")
}
#E-step
tmpz = rep(0,k)
logZt = (X%*%W)%*%(Mu_h*KAP)
if(!equal_kappa)
tmpz = nu*log(kap) - (nu+1)*log(2*pi) - besselI.nuAsym(kap, nu, k.max = 4, log=TRUE)
if(!equal_prop)
tmpz = tmpz + log(alpha[1,])
logZt = sweep(logZt,2,FUN = "+", tmpz)
switch (fit_algo,
"SAEMb" = Z <- SAEMb.E_step(logZt,iter,max_st_iter),
"CAEMb" = Z <- CAEMb.E_step(logZt,iter,max_st_iter),
"EMb" = Z <- EMb.E_step(logZt),
"CEMb" = Z <- CEMb.C_step(logZt),
"SEMb" = Z <- SEMb.S_step(logZt),
Z <- CAEMb.E_step(logZt,iter,max_st_iter) # default CAEMb
)
#evaluate log-likelihood
ll[iter] = sum(Z*logZt)
etp[iter] = -sum(Z@x*log(Z@x))
if(iter > 1){
if(abs(ll[iter] - ll[iter-1]) <tol){
nbIter = iter
break
}
}
}
row_partition = apply(Z,1,which.max)
structure(list(rowcluster = row_partition,colcluster = col_partition,kappa_ = kap,alpha=alpha, ll = ll,iter = nbIter))
#End of do_one
}
#Preparing initial row partition
if(is.null(row_init)){
row_c = as.integer(sample(as.numeric(1:k),n,replace = TRUE))
}
else{
if(is.matrix(row_init)){
row_c = as.integer(row_init[1,])
}
else{
row_c = row_init
}
}
#Preparing initial column partition
if(is.null(col_init)){
col_c = as.integer(sample(as.numeric(1:k),d,replace = TRUE))
}
else{
if(is.matrix(col_init)){
col_c = as.integer(col_init[1,])
}
else{
col_c = col_init
}
}
#Perform run(s)
run <- do_one(row_c,col_c)
best <- run$ll[run$iter]
index_best = 1;
ll_runs[1] = run$ll[run$iter]
#in case of multiple runs
if(n_init >= 2){
#
for(i in 2:n_init) {
#row partition preparation
if(is.null(row_init)){
row_c = as.integer(sample(as.numeric(1:k),n,replace = TRUE))
}
else{
row_c = as.integer(row_init[i,])
}
#column partition preparation
if(is.null(col_init)){
col_c = as.integer(sample(as.numeric(1:k),d,replace = TRUE))
}
else{
col_c = as.integer(col_init[i,])
}
rrun <- do_one(row_c,col_c)
ll_runs[i] = rrun$ll[rrun$iter]
#update best run is necessary
if(!is.na(rrun$ll[rrun$iter])){
if(rrun$ll[rrun$iter] > best){
run <- rrun
best <- run$ll[run$iter]
index_best = i
}
}
}
#end of runs
}
run$index_best = index_best
run$ll_runs = ll_runs
run
#end of dbmovMF
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.