#' Variational Bayesian inference for outcome-guided clustering with Gaussian response
#'
#' Outcome-guided, model-based clustering using variational Bayesian inference, when the response
#' variable y is continuous and assumed to be Gaussian.
#' @param X NxD data matrix
#' @param y Vector of response variables of length N.
#' @param K (Maximum) number of clusters.
#' @param prior Prior hyperparameters (optional).
#' @param ms Boolean flag that indicates whether model selection is required or not. Default is FALSE.
#' @param vs Boolean flag that indicates whether variable selection is required or not. Default is FALSE.
#' @param labels_init User-defined initial cluster labels (can be empty).
#' @param kmeans_init Boolean flag, which, if TRUE, initializes the cluster labels with the k-means algorithm. Default is FALSE.
#' @param tol Tolerance for stopping criterion. Default is 10e-20.
#' @param maxiter Maximum number of iterations. Default is 2000.
#' @param verbose Boolean flag which, if TRUE, prints the iteration number. Default is FALSE.
#' @param indep Boolean flag which determines whether the covariates are considered as independent or not. Default is TRUE.
#' @return A list containing L, the lower bound at each step of the algorithm, label, a vector containing the cluster
#' labels, model, a list containing the trained model structure, and a vector called n_ comp which, if model selection
#' is required, contains the number of mixture components at every step of the VB algorithm.
#' @references Pattern Recognition and Machine Learning by Christopher M. Bishop
#' @export
#'
ogc_gaussianResponse = function(X, K, y, prior, ms = F, vs = F, labels_init, kmeans_init = F,
tol = 10e-20, maxiter = 2000, verbose = F, indep = T){
# ms = F; labels_init = c(rep(1,70),rep(2,30),rep(3,50)); kmeans_init = F; tol = 10e-15; maxiter = 100; verbose = T
if(verbose) message(sprintf("Semi-supervised clustering. \n y ~ Gaussian, x_i ~ Gaussian indep"))
N = dim(X)[1]
D = dim(X)[2]
L = rep(-Inf,maxiter*2+1)
if(ms) n_comp = rep(1000, maxiter)
Resp = init(X, K, labels_init, kmeans_init, verbose)
if(missing(prior)){ # default prior
prior = list(alpha = 1, beta = 1, m = colMeans(X), v = D+1, M = rep(1,D), beta_y = 1, m_y = mean(y), v_y = 2, M_y = 1);
}
model = prior; model$Resp = Resp # model initialisation
model = maximizeGauss(X, model, prior, ms, indep) # Maximization step
model = maximizeGaussResp(y, model, prior)
for (iter in 1:maxiter){
if(verbose) message(sprintf("Iteration number %d.\n", iter))
model = expectGauss_gaussResp(X, y, model, ms, indep) # Expectation step
L[iter*2] = (boundGauss(X, model, prior, indep) + boundGaussResp(y, model, prior))/N # Lower bound
model = maximizeGauss(X, model, prior, ms, indep) # Maximization step
model = maximizeGaussResp(y, model, prior)
L[iter*2+1] = (boundGauss(X, model, prior, indep) + boundGaussResp(y, model, prior))/N # Lower bound
if(ms) n_comp[iter] = sum(model$piw > 10e-3)
if(check_convergence(L, iter, tol, maxiter, verbose)) break
}
output = list(L = L[1:iter*2+1], label = apply(model$R, 1, which.max), model=model)
if(ms) ouput$n_comp = n_comp
output
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.