MCAPfit <- function(xx, k, projection = 'PCA',
true_labels = NULL, centering_per_group = FALSE,
parallel = FALSE, verbose = FALSE, ...){
#' Model based clustering via adaptive (linear) projections
#'
#' \code{"MCAPfit"} performs model based clustering using full variance
#' Gaussian mixtures in a lower dimensional projected space obtained via
#' adaptive (linear) projections.
#' Projection variants include PCA-based and random projection.
#'
#' @author Bernd Taschler: \email{bernd.taschler@dzne.de}
#' @author Sach Mukherjee: \email{sach.mukherjee@dzne.de}
#' @references Taschler, B., Dondelinger, F. and Mukherjee, S. (2019)
#' Model based clustering via adaptive projections \url{https://arxiv.org/pdf/??.pdf}
#' @seealso \code{\link{GMMwrapper}},
#' \code{\link{OptDimClusterStability}},
#' \code{\link{ClusterStability}}
#'
#' @param xx The data matrix (n x p).
#' @param k The number of clusters.
#' @param projection Projection method (\code{"PCA"}, \code{"gaussian"},
#' \code{"achlioptas"} or \code{"li"}). Default: \code{"PCA"}.
#' @param true_labels Vector of true cluster assignments (when provided, it is
#' used to compute the Rand index).
#' @param centering_per_group Logical, when true: mean centre input matrix (if true
#' labels are provided: centre data per group)
#' @param parallel Logical, when true: perform line search over projection
#' dimension in parallel.
#' @param verbose Logical, when true: print some progress information.
#' @param ... Additional options for \code{\link{OptDimClusterStability}} and
#' \code{\link{GMMwrapper}}.
#'
#' @return \item{fit_gmm}{ Model fit (GMM output of \code{\link[nethet]{mixglasso}}),
#' including BIC, MMDL and adj. Rand index (when
#' \code{true_labels} is provided).}
#' \item{fit_q_opt}{ Output of \code{\link{OptDimClusterStability}}.}
#'
#' @details ...
#'
#' @examples
#' ## small standard Normal matrix with random labels:
#' MCAPfit(xx=matrix(rnorm(200),20,10), k=2, projection='PCA',
#' true_labels = round(runif(20)), parallel=TRUE)
#'
#' \dontrun{
#' ## sparse random projection:
#' MCAPfit(xx=rbind(matrix(rnorm(5e4),100,500), matrix(rnorm(5e4,mean=1),100,500)),
#' k=2, projection='li',
#' true_labels=c(rep(0,100),rep(1,100)), parallel=TRUE)
#'
#' ## sparse random projection, removing the mean signal:
#' MCAPfit(xx=rbind(matrix(rnorm(5e4),100,500), matrix(rnorm(5e4,mean=1),100,500)),
#' k=2, projection='li', centering_per_group=TRUE,
#' true_labels=c(rep(0,100),rep(1,100)), parallel=TRUE)
#' }
#' @export
## input checks
if(!is.null(true_labels)){
stopifnot(length(true_labels) == nrow(xx))
stopifnot(length(unique(true_labels)) == k)
}
## preliminaries
xx <- as.matrix(xx)
if(centering_per_group){
xx <- CentrePerGroup(xx, true_labels = true_labels)
}
## determine optimal projection dimension
fit_q_opt <- OptDimClusterStability(xx, k = k, method = projection,
true_labels = true_labels,
verbose = verbose, ...)
## parameter estimation and GMM clustering with optimised target dimension
fit_gmm <- GMMwrapper(GramPCA(xx, npc = fit_q_opt$q_opt)$zz,
k = k, true_labels = true_labels,
verbose = verbose, ...)
return(list('fit_gmm' = fit_gmm, 'fit_q_opt' = fit_q_opt))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.