R/flowClust.R
In flowClust: Clustering for Flow Cytometry

Documented in flowClust

#' Robust Model-based Clustering for Flow Cytometry
#' 
#' This function performs automated clustering for identifying cell populations
#' in flow cytometry data.  The approach is based on the tmixture model
#' with the Box-Cox transformation, which provides a unified framework to
#' handle outlier identification and data transformation simultaneously.
#' 
#' Estimation of the unknown parameters (including the Box-Cox parameter) is
#' done via an Expectation-Maximization (EM) algorithm.  At each EM iteration,
#' Brent's algorithm is used to find the optimal value of the Box-Cox
#' transformation parameter.  Conditional on the transformation parameter, all
#' other estimates can be obtained in closed form.  Please refer to Lo et al.
#' (2008) for more details.
#' 
#' The \pkg{flowClust} package makes extensive use of the GSL as well as BLAS.
#' If an optimized BLAS library is provided when compiling the package, the
#' \pkg{flowClust} package will be able to run multi-threaded processes.
#' 
#' Various operations have been defined for the object returned from
#' \code{\link{flowClust}}.  
#' 
#' In addition, to facilitate the integration with the \pkg{flowCore} package
#' for processing flow cytometry data, the \code{flowClust} operation can be
#' done through a method pair (\code{\link{tmixFilter}} and
#' \code{\link[=tmixFilter]{filter}}) such that various methods defined in
#' \pkg{flowCore} can be applied on the object created from the filtering
#' operation.
#' 
#' @name fowClust
#' 
#' @param x A numeric vector, matrix, data frame of observations, or object of
#' class \code{flowFrame}.  Rows correspond to observations and columns
#' correspond to variables.
#' @param expName A character string giving the name of the experiment.
#' @param varNames A character vector specifying the variables (columns) to be
#' included in clustering.  When it is left unspecified, all the variables will
#' be used.
#' @param K An integer vector indicating the numbers of clusters.
#' @param nu The degrees of freedom used for the \eqn{t} distribution.  Default
#' is 4.  If \code{nu=Inf}, Gaussian distribution will be used.
#' @param lambda The initial transformation to be applied to the data.
#' @param trans A numeric indicating whether the Box-Cox transformation
#' parameter is estimated from the data.  May take 0 (no estimation), 1
#' (estimation, default) or 2 (cluster-specific estimation).
#' @param min.count An integer specifying the threshold count for filtering
#' data points from below.  The default is 10, meaning that if 10 or more data
#' points are smaller than or equal to \code{min}, they will be excluded from
#' the analysis.  If \code{min} is \code{NULL}, then the minimum of data as per
#' each variable will be used.  To suppress filtering, set it as -1.
#' @param max.count An integer specifying the threshold count for filtering
#' data points from above.  Interpretation is similar to that of
#' \code{min.count}.
#' @param min The lower boundary set for data filtering.  Note that it is a
#' vector of length equal to the number of variables (columns), implying that a
#' different value can be set as per each variable.
#' @param max The upper boundary set for data filtering.  Interpretation is
#' similar to that of \code{min}.
#' @param randomStart A numeric value indicating how many times a random
#' parition of the data is generated for initialization.  The default is 0,
#' meaning that a deterministic partition based on kmeans clustering is used. A
#' value of 10 means random partitions of the data will be generated, each of
#' which is followed by a short EM run.  The partition leading to the highest
#' likelihood value will be adopted to be the initial partition for the
#' eventual long EM run.
#' @param prior The specification of the prior. Used if usePrior="yes"
#' @param usePrior Argument specifying whether or not the prior will be used.
#' Can be "yes","no","vague". A vague prior will be automatically specified if
#' usePrior="vague"
#' @param criterion A character string stating the criterion used to choose the
#' best model.  May take either \code{"BIC"} or \code{"ICL"}.  This argument is
#' only relevant when \code{length(K)>1}. Default is "BIC".
#' @param ... other arguments: B: The maximum number of EM iterations.Default
#' is 500.
#' 
#' tol: The tolerance used to assess the convergence of the EM. default is
#' 1e-5.
#' 
#' nu.est: A numeric indicating whether \code{nu} is to be estimated or not.
#' May take 0 (no estimation, default), 1 (estimation) or 2 (cluster-specific
#' estimation). Default is 0.
#' 
#' level: A numeric value between 0 and 1 specifying the threshold quantile
#' level used to call a point an outlier.  The default is 0.9, meaning that any
#' point outside the 90\% quantile region will be called an outlier.
#' 
#' u.cutoff: Another criterion used to identify outliers.  If this is
#' \code{NULL}, which is default, then \code{level} will be used.  Otherwise,
#' this specifies the threshold (e.g., 0.5) for \eqn{u}, a quantity used to
#' measure the degree of \dQuote{outlyingness} based on the Mahalanobis
#' distance.  Please refer to Lo et al. (2008) for more details.
#' 
#' z.cutoff: A numeric value between 0 and 1 underlying a criterion which may
#' be used together with \code{level}/\code{u.cutoff} to identify outliers.  A
#' point with the probability of assignment \eqn{z} (i.e., the posterior
#' probability that a data point belongs to the cluster assigned) smaller than
#' \code{z.cutoff} will be called an outlier.  The default is 0, meaning that
#' assignment will be made no matter how small the associated probability is,
#' and outliers will be identified solely based on the rule set by \code{level}
#' or \code{cutoff}.
#' 
#' B.init: The maximum number of EM iterations following each random partition
#' in random initialization. Default is the same as B.
#' 
#' tol.init: The tolerance used as the stopping criterion for the short EM runs
#' in random initialization. Default is 1e-2.
#' 
#' seed: An integer giving the seed number used when
#' \code{randomStart>0}.Default is 1.
#' 
#' control: An argument reserved for internal use.
#' @return If \code{K} is of length 1, the function returns an object of class
#' \code{flowClust} containing the following slots, where \eqn{K} is the number
#' of clusters, \eqn{N} is the number of observations and \eqn{P} is the number
#' of variables: \item{expName}{Content of the \code{expName} argument.}
#' \item{varNames}{Content of the \code{varNames} argument if provided;
#' generated if available otherwise.} \item{K}{An integer showing the number of
#' clusters.} \item{w}{A vector of length \eqn{K}, containing the estimates of
#' the \eqn{K} cluster proportions.} \item{mu}{A matrix of size \eqn{K \times
#' P}{K x P}, containing the estimates of the \eqn{K} mean vectors.}
#' \item{sigma}{An array of dimension \eqn{K \times P \times P}{K x P x P},
#' containing the estimates of the \eqn{K} covariance matrices.}
#' \item{lambda}{The Box-Cox transformation parameter estimate.} \item{nu}{The
#' degrees of freedom for the \eqn{t} distribution.} \item{z}{A matrix of size
#' \eqn{N \times K}{N x K}, containing the posterior probabilities of cluster
#' memberships.  The probabilities in each row sum up to one.} \item{u}{A
#' matrix of size \eqn{N \times K}{N x K}, containing the \dQuote{weights} (the
#' contribution for computing cluster mean and covariance matrix) of each data
#' point in each cluster.  Since this quantity decreases monotonically with the
#' Mahalanobis distance, it can also be interpreted as the level of
#' \dQuote{outlyingness} of a data point.  Note that, when \code{nu=Inf}, this
#' slot is used to store the Mahalanobis distances instead.} \item{label}{A
#' vector of size \eqn{N}, showing the cluster membership according to the
#' initial partition (i.e., hierarchical clustering if \code{randomStart=0} or
#' random partitioning if \code{randomStart>0}).  Filtered observations will be
#' labelled as \code{NA}.  Unassigned observations (which may occur since only
#' 1500 observations at maximum are taken for hierarchical clustering) will be
#' labelled as 0.} \item{uncertainty}{A vector of size \eqn{N}, containing the
#' uncertainty about the cluster assignment.  Uncertainty is defined as 1 minus
#' the posterior probability that a data point belongs to the cluster to which
#' it is assigned.} \item{ruleOutliers}{A numeric vector of size 3, storing the
#' rule used to call outliers.  The first element is 0 if the criterion is set
#' by the \code{level} argument, or 1 if it is set by \code{u.cutoff}.  The
#' second element copies the content of either the \code{level} or
#' \code{u.cutoff} argument.  The third element copies the content of the
#' \code{z.cutoff} argument.  For instance, if points are called outliers when
#' they lie outside the 90\% quantile region or have assignment probabilities
#' less than 0.5, then \code{ruleOutliers} is \code{c(0, 0.9, 0.5)}.  If points
#' are called outliers only if their \dQuote{weights} in the assigned clusters
#' are less than 0.5 regardless of the assignment probabilities, then
#' \code{ruleOutliers} becomes \code{c(1, 0.5, 0)}.} \item{flagOutliers}{A
#' logical vector of size \eqn{N}, showing whether each data point is called an
#' outlier or not based on the rule defined by \code{level}/\code{u.cutoff} and
#' \code{z.cutoff}.} \item{rm.min}{Number of points filtered from below.}
#' \item{rm.max}{Number of points filtered from above.} \item{logLike}{The
#' log-likelihood of the fitted mixture model.} \item{BIC}{The Bayesian
#' Information Criterion for the fitted mixture model.} \item{ICL}{The
#' Integrated Completed Likelihood for the fitted mixture model.} If \code{K}
#' has a length >1, the function returns an object of class
#' \code{flowClustList}.  Its data part is a list with the same length as
#' \code{K}, each element of which is a \code{flowClust} object corresponding
#' to a specific number of clusters.  In addition, the resultant
#' \code{flowClustList} object contains the following slots:\cr
#' 
#' \code{index} An integer giving the index of the list element corresponding
#' to the best model as selected by \code{criterion}.\cr \code{criterion} The
#' criterion used to choose the best model -- either \code{"BIC"} or
#' \code{"ICL"}.\cr
#' 
#' Note that when a \code{flowClustList} object is used in place of a
#' \code{flowClust} object, in most cases the list element corresponding to the
#' best model will be extracted and passed to the method/function call.
#' @author Raphael Gottardo <\email{raph@@stat.ubc.ca}>, Kenneth Lo
#' <\email{c.lo@@stat.ubc.ca}>
#' @seealso \code{\link[=summary.flowClust]{summary}},
#' \code{\link[=plot,flowClust-method]{plot}},
#' \code{\link[=density.flowClust]{density}},
#' \code{\link[=hist.flowClust]{hist}}, \code{\link{Subset}},
#' \code{\link{split}}, \code{\link{ruleOutliers}}, \code{\link{Map}},
#' \code{\link{SimulateMixture}}
#' @references Lo, K., Brinkman, R. R. and Gottardo, R. (2008) Automated Gating
#' of Flow Cytometry Data via Robust Model-based Clustering. \emph{Cytometry A}
#' \bold{73}, 321-332.
#' @keywords cluster models
#' @examples
#' 
#' library(flowCore)
#' data(rituximab)
#' 
#' ### cluster the data using FSC.H and SSC.H
#' res1 <- flowClust(rituximab, varNames=c("FSC.H", "SSC.H"), K=1)
#' 
#' ### remove outliers before proceeding to the second stage
#' # %in% operator returns a logical vector indicating whether each
#' # of the observations lies within the cluster boundary or not
#' rituximab2 <- rituximab[rituximab %in% res1,]
#' # a shorthand for the above line
#' rituximab2 <- rituximab[res1,]
#' # this can also be done using the Subset method
#' rituximab2 <- Subset(rituximab, res1)
#' 
#' ### cluster the data using FL1.H and FL3.H (with 3 clusters)
#' res2 <- flowClust(rituximab2, varNames=c("FL1.H", "FL3.H"), K=3)
#' show(res2)
#' summary(res2)
#' 
#' # to demonstrate the use of the split method
#' split(rituximab2, res2)
#' split(rituximab2, res2, population=list(sc1=c(1,2), sc2=3))
#' 
#' # to show the cluster assignment of observations
#' table(Map(res2))
#' 
#' # to show the cluster centres (i.e., the mean parameter estimates
#' # transformed back to the original scale)
#' getEstimates(res2)$locations
#' 
#' ### demonstrate the use of various plotting methods
#' # a scatterplot
#' plot(res2, data=rituximab2, level=0.8)
#' plot(res2, data=rituximab2, level=0.8, include=c(1,2), grayscale=TRUE,
#'     pch.outliers=2)
#' # a contour / image plot
#' res2.den <- density(res2, data=rituximab2)
#' plot(res2.den)
#' plot(res2.den, scale="sqrt", drawlabels=FALSE)
#' plot(res2.den, type="image", nlevels=100)
#' plot(density(res2, include=c(1,2), from=c(0,0), to=c(400,600)))
#' # a histogram (1-D density) plot
#' hist(res2, data=rituximab2, subset="FL1.H")
#' 
#' ### to demonstrate the use of the ruleOutliers method
#' summary(res2)
#' # change the rule to call outliers
#' ruleOutliers(res2) <- list(level=0.95)
#' # augmented cluster boundaries lead to fewer outliers
#' summary(res2)
#' 
#' # the following line illustrates how to select a subset of data 
#' # to perform cluster analysis through the min and max arguments;
#' # also note the use of level to specify a rule to call outliers
#' # other than the default
#' flowClust(rituximab2, varNames=c("FL1.H", "FL3.H"), K=3, B=100, 
#'     min=c(0,0), max=c(400,800), level=0.95, z.cutoff=0.5)
#' @rdname flowClust
#' @import methods graph flowCore
#' @importFrom parallel mclapply
#' @useDynLib flowClust
#' @export 
flowClust<-function(x, expName="Flow Experiment", varNames=NULL, K
                    , nu=4, lambda=1,trans=1, min.count=10, max.count=10, min=NULL, max=NULL
                    ,  randomStart=0, prior=NULL,usePrior="no", criterion="BIC", ...)
{
	if (is(x, "flowFrame")) {
		if (length(varNames)==0) {
			y <- exprs(x)
			varNames <- colnames(y)
		}
		else {
			y <- as.matrix(exprs(x)[, varNames,drop=FALSE])
		}
	}
	else if (is(x, "matrix")) {
		if (length(varNames)==0) {
			y <- x
			if (length(colnames(x))==0)
				varNames <- "Not Available"
			else varNames <- colnames(x)
		}
		else {
			y <- as.matrix(x[, varNames,drop=FALSE])
		}
	}
	else if (is(x, "data.frame")) {
		if (length(varNames)==0) {
			y <- as.matrix(x)
			varNames <- colnames(x)
		}
		else {
			y <- as.matrix(x[, varNames,drop=FALSE])
		}
	}
	else if (is(x, "vector")) {
		y <- matrix(x)
		if (length(varNames)==0)
			varNames <- "Not Available"
	}
	else {
		stop(paste("Object ", as.character(x), " is not of class flowFrame / matrix / data frame!"))
	}

	# finding filtered observations
	rm.max <- rm.min <- rep(FALSE, nrow(y))
	if (max.count > -1) {
		if (is.null(max)[1])
			max <- apply(y, 2, max)
		for (k in 1:ncol(y))  if (sum(y[,k]>=max[k]) >= max.count)
				rm.max <- rm.max | (y[,k] >= max[k])
	}
	if (min.count > -1) {
		if (is.null(min)[1])
			min <- apply(y, 2, min)
		for (k in 1:ncol(y))  if (sum(y[,k]<=min[k]) >= min.count)
				rm.min <- rm.min | (y[,k] <= min[k])
	}
	if(min.count <= -1 && max.count <= -1)
	  include <- NULL
	else
	  include <- !rm.max & !rm.min

	usePrior=match.arg(as.character(usePrior),c("yes","no"))
	if(usePrior=="yes"){
		if(is.null(prior)){
			stop("You must specify a prior with usePrior=\"yes\"");
		}
		if (length(K)>1)
		{
			stop("You can only use a prior if the number of cluster is fixed!")
		}
		if(randomStart){
			message("randomStart>0 has no effect when using a prior. Labels are initialized from the prior.");
		}
		if(trans==2)
		{
			stop("You are using a prior with cluster specific transformations.\n  This is not recommended.")
		}
                if(any(is.infinite(c(nu,prior$nu0)))){
                    stop("If usePrior='yes', nu or nu0 may not be Inf");
                }
                if(lambda!=1&trans==1){
                    warning("Use of a prior with transformation estimation and lambda!=1 requires the prior means to be on the transformed scale.")
                }
		# TODO Add tests for validity of w0. Set a default. Same for oorder.
		if(!is.null(prior)&length(K)==1){
			#Check that the prior dimensions match the model being fit.
			mp<-ncol(prior$Mu0);
			mk<-nrow(prior$Mu0);
			ld<-dim(prior$Lambda0)
			lomega<-dim(prior$Omega0);
			#message(lomega);
			lnu0<-length(prior$nu0);
			py<-ncol(y);
			warn.o<-options("warn")
			options(warn=-1);
			if(any(is.null(prior$w0)))stop("w0 prior should be defined")
			if(length(prior$w0)!=K|(length(ld)!=3)|((lnu0!=1)&(lnu0!=mk))|(mp!=py)|(mk!=K)|(ld[1]!=K)|(ld[2]!=py)|(ld[3]!=py)|((any(lomega!=c(K,py,py)))&(any(lomega!=c(py,py))))){
				stop("Prior dimensions must match the number of clusters and the number of dimensions being analyzed.")
			}
			if(lnu0==1){
				#Extend nu0 to be a vector of length k. We allow cluster specific weight for covariance matrices.
				prior$nu0<-rep(prior$nu0,mk);
			}
			if(length(prior$w0)==1){
					#Extend w0 to be a vector of length k.
					prior$w0<-rep(prior$w0,mk);
			}
			options(warn=warn.o$warn)
		}
	}
	if (usePrior=="no")
	{
		if(!is.null(prior)){
			message("The prior specification has no effect when usePrior=",usePrior);
		}
		prior<-list(NA);
	}
	mc.cores <- getOption("mc.cores", 2L)
	if(mc.cores < 2||length(K) == 1)
	{
		message("Using the serial version of flowClust")
		# C version
		result<-lapply(as.list(1:length(K)),.flowClustK, y, expName=expName, varNames=varNames, K=K, criterion=criterion
        , nu=nu, lambda=lambda, trans=trans, min.count=min.count, max.count=max.count, min=min, max=max
        , randomStart=randomStart, include=include, rm.max, rm.min, prior,usePrior
        , ...)
	}
	else{
      
		# Split into nClust segReadsList
      # We solely rely on getOption("mc.cores",2L) to determine parallel cores.
      # and don't want to pass mc.cores explicitly because on windows, mclapply does not take mc.cores>1 
		result<-mclapply(as.list(1:length(K)),.flowClustK, y, expName=expName, varNames=varNames, K=K, criterion=criterion
        , nu=nu, lambda=lambda, trans=trans, min.count=min.count, max.count=max.count, min=min, max=max
        , randomStart=randomStart, include=include, rm.max, rm.min, prior,usePrior, mc.preschedule=FALSE
        , ...)
	}
	  
	# Simply return a flowClust object
	if (length(K)==1)
	{
		result[[1]]
	}
	# Create a list flowClustList
	else
	{
		result <- new("flowClustList", result, criterion=criterion)
		result@index <- which.max(criterion(result, criterion))
		result
	}
}

.flowClustK<-function(i, y, expName="Flow Experiment", varNames=NULL, K
                        , nu, lambda, trans, min.count, max.count, min, max, randomStart, include, rm.max,rm.min, prior,usePrior, criterion # default values set in flowClust API
                        , nu.est=0, B=500, tol=1e-5, level=0.9, u.cutoff=NULL, z.cutoff=0, B.init=B, tol.init=1e-2, seed=1, control=NULL
                        , nstart = 100 # passed to kmeans, only has effect on more than 1d clustering
                    )
{
	oorder<-1:K[i]
	.model<-1; #Tells the C code whether to run ECM with non-conjugate priors, or classic flowClust.'
	match.arg(usePrior,c("yes","no"));
	switch(usePrior,
			yes=priorFlag<-1,
			no=priorFlag<-0)
	if(!is.null(include))
	  y <- as.matrix(y[include,,drop=FALSE])
	ly <- nrow(y)
	py <- ncol(y)
	if (min(y)<=0 && lambda<=0)
		stop("lambda must be positive when data contain zero / negative values!")
	else if(usePrior=="yes")
	{
    #If we have a prior lambda.. use it to override the specified lambda.
    if(exists("lambda",prior)){
      if(!is.null(prior$lambda)){
        if(prior$lambda!=0){
          lambda<-prior$lambda
        }
      }
    }
		Mu0<-prior$Mu0
		Lambda0<-prior$Lambda0
		if(length(dim(prior$Omega0))==2){
			Omega0<-array(prior$Omega0,c(py,py,K[i]))
		}else{
			Omega0<-aperm(prior$Omega0,c(2:3,1));
		}

		for(j in 1:K[i]){
			if(!all(as.vector(Omega0[,,j])==0)){
				#message(j)
				#message(Omega0[,,j])
				Omega0[,,j] <- tryCatch(qr.solve(Omega0[,,j]), error = function(e)solve(Omega0[,,j])) 
			}else{
				#message(j);
				#message(Omega0[,,j])
				Omega0[,,j]<-Omega0[,,j];
			}
		}
		nu0<-prior$nu0
		w0<-prior$w0;
		kappa0<-0.0
		Lambda0<-aperm(Lambda0,c(2:3,1))
		initprec<-array(0,c(py,py,K[i]));
		#cat("initprec dim: ",dim(initprec),"\n")
		#cat("Lambda0 dim:",dim(Lambda0),"\n");
		if(!all(as.vector(Lambda0==0))){
			for(j in 1:dim(Lambda0)[3]){
				#cat("Lambda0 value:",(Lambda0[,,j]),"\n")
				#cat("initprec value:",initprec[,,j],"\n")
				cholStatus <- try(u <- chol(Lambda0[,,j]),silent=TRUE);
				cholError <- inherits(cholStatus,"try-error")
				newMat <- Lambda0[,,j]
				#cat("initprec class:",class(initprec[,,]))
				#cat("solve initprec value:",initprec[,,j],"\n")
				iter<-0
				while(cholError){
					##Should do something here to catch the error.
					iter <- iter + 1
					cat("iteration ", iter, "\n")
					
					# replace -ve eigen values with small +ve number
					newEig <- eigen(newMat)
					newEig2 <- ifelse(newEig$values < 0, 0.1, newEig$values)
					
					# create modified matrix eqn 5 from Brissette et al 2007, inv = transp for
					# eig vectors
					newMat <- newEig$vectors %*% diag(newEig2) %*% t(newEig$vectors)
					
					# normalize modified matrix eqn 6 from Brissette et al 2007
					newMat <- newMat/sqrt(diag(newMat) %*% t(diag(newMat)))
					
					# try chol again
					cholStatus <- try(u <- chol(newMat), silent = TRUE)
					cholError <- ifelse(class(cholStatus) == "try-error", TRUE, FALSE)
					if(iter>10){
						break;
					}					
				}
				if(cholError){
					initprec[,,j] <- qr.solve(Lambda0[,,j])
					Lambda0[,,j] <- qr.solve(initprec[,,j])
				}else{
					initprec[,,j] <- chol2inv(u)
					Lambda0[,,j] <- chol2inv(chol(initprec[,,j]))
				}
			}
		}else{
			initprec<-array(var(box(y,lambda))/(K[i]^(2/py)),c(py,py,K[i]));
		}
		priorFlag<-1
		.model<-2;
	}
	else
	{
		# Non informative prior
		Mu0<-matrix(rep(colMeans(y),K[i]),K[i],py,byrow=TRUE)
		nu0<- rep(-py-1,K[i]);
		#Don't need Omega0, we'll use the conjugate prior code.
		Omega0<-rep(0,py*py);
		kappa0<-0;
		initprec<-rep(0,K[i]*py*py);
		#Lambda0, the prior covariance
		Lambda0<-rep(0,K[i]*py*py)
		w0<-rep(0,K[i]);
		.model<-1;
	}

# to determine the rule of calling outliers
	if (nu != Inf) {
		if (is.null(u.cutoff)) {
			if (!nu.est) {
				cc <- py * qf(level, py, nu)
				u.cutoff <- (nu + py) / (nu + cc)
			}
			else {
				u.cutoff <- level     # pass level to .C call to compute u.cutoff
			}
			ruleOutliers <- c(0, level, z.cutoff)     # 0 means quantile
		}
		else {
			ruleOutliers <- c(1, u.cutoff, z.cutoff)     # 1 means cutoff
		}
	}
	else {
		if (level != 1)
			q.cutoff <- qchisq(level, py)
		else
			q.cutoff <- -1     # -1 means no outlier identification
		ruleOutliers <- c(0, level, z.cutoff)
	}


	if (is.null(control$B.lambda)) control$B.lambda <- B    # BSolve=100
	if (is.null(control$B.brent)) control$B.brent <- 10000    # iterSolveMax=50
	if (is.null(control$tol.brent)) control$tol.brent <- 1e-5    # DiffSolve=1e-3
	if (is.null(control$xLow)) control$xLow <- 0.1    # xLow=.1
	if (is.null(control$xUp)) control$xUp <- 10    # xUp=1
	if (is.null(control$nuLow)) control$nuLow <- 2    # nuLow=2
	if (is.null(control$nuUp)) control$nuUp <- 100    # nuUp=30
	if (is.null(control$seed)) control$seed <- TRUE

	ind <- 0
	if (K[i]==1)
	{
		label <- rep(1, ly)
	}
	else if (!randomStart) #kmeans initialization
	{
		if (py==1)
		{
			q <- quantile(y, seq(from=0, to=1, by=1/K[i]))
			label <- rep(0, ly)
			q[1] <- q[1]-1
			for (k in 1:K[i]) label[y>q[k] & y<=q[k+1]] <- k
		}else{
			#label<-try(kmeans(y,Mu0)$cluster,silent=TRUE)
			#if(inherits(label,"try-error"))
			label<-try(kmeans(scale(y),K[i],nstart=nstart,iter.max=100,algorithm="MacQueen")$cluster,silent=TRUE)
		}
	}
	if(priorFlag==0)
	{ # Initialization based on short EMs with random partitions if randomStart=TRUE
		if (control$seed) set.seed(seed)
		if (randomStart==1)
		{
			label <- sample(1:K[i], ly, replace=T)
		}
		else if(randomStart==0)
		{
			M<-0
			if(inherits(label,"try-error")){
				label <- sample(1:K[i], ly, replace=T)
			}
			ind<-0;
		}
		else
		{
			maxLabel <- vector("list",randomStart)
			maxLogLike <- rep(NA,randomStart)
			for (j in 1:randomStart)
			{
				label <- sample(1:K[i], ly, replace=TRUE)
				if (nu != Inf)
				{
					#cat(initprec,"\n");
					#ordering of the priors.. used to reorder the population names;
					obj <- try(.C("flowClust", as.double(t(y)), as.integer(ly),
									as.integer(py), as.integer(K[i]),
									w=rep(0,K[i]), mu=rep(0,K[i]*py),
									precision=initprec,
									lambda=as.double(rep(lambda, length.out=(if (trans>1) K[i] else 1))),
									nu=as.double(rep(nu,K[i])),
									z=rep(0,ly*K[i]), u=rep(0,ly*K[i]),
									as.integer(label), uncertainty=double(ly),
									as.double(rep(u.cutoff,K[i])), as.double(z.cutoff),
									flagOutliers=integer(ly), as.integer(B.init),
									as.double(tol.init), as.integer(trans),
									as.integer(nu.est), logLike=as.double(0),
									as.integer(control$B.lambda), as.integer(control$B.brent),
									as.double(control$tol.brent), as.double(control$xLow),
									as.double(control$xUp), as.double(control$nuLow),
									as.double(control$nuUp),
									mu0=as.double(t(Mu0)),
									as.double(kappa0),
									nu0=as.double(nu0),
									lambda0=as.double(Lambda0),
									omega0=as.double(Omega0),
									w0=as.double(w0),
									as.integer(.model),
									oorder=as.integer(oorder),
									PACKAGE="flowClust"))
									if (class(obj)=="try-error")
									{
										message("flowClust failed")
									}
				}
				else
				{
					obj <- try(.C("flowClustGaussian", as.double(t(y)), as.integer(ly),
									as.integer(py), as.integer(K[i]),
									w=rep(0,K[i]), mu=rep(0,K[i]*py),
									precision=initprec,
									lambda=as.double(rep(lambda, length.out=(if (trans>1) K[i] else 1))),
									z=rep(0, ly*K[i]), u=rep(0,ly*K[i]),
									as.integer(label), uncertainty=double(ly),
									as.double(q.cutoff), as.double(z.cutoff),
									flagOutliers=integer(ly), as.integer(B.init),
									as.double(tol.init), as.integer(trans),
									logLike=as.double(0),
									as.integer(control$B.lambda), as.integer(control$B.brent),
									as.double(control$tol.brent), as.double(control$xLow),
									as.double(control$xUp),
									as.double(t(Mu0)),
									as.double(kappa0),
									as.double(nu0),
									as.double(Lambda0),
									as.double(Omega0),
									as.integer(.model),
									PACKAGE="flowClust"))
				}
				if (class(obj)!="try-error")
				{
					maxLabel[[j]] <- label
					maxLogLike[j] <- obj$logLike
				}
			}
			ind <- order(maxLogLike, decreasing=T, na.last=NA)
		}
	}
	else
	{
		if(usePrior=="yes")
		{
			M<-0
                        #FIXME priors with transformation is currently borked
			#Assuming Mu0 is on transformed scale
			if(lambda!=1){
				ytmp<-box(y,lambda);
			}else{
				ytmp<-y;
			}
      # We use the prior densities to initialize the cluster labels to the
      # mixture component (cluster) that maximizes the prior probability.
      prob <- sapply(seq_len(K), function(k) {
        with(prior, w0[k] * dmvt(x = ytmp, mu = Mu0[k, ], sigma = Lambda0[k,,],
                                 nu = nu0[k], lambda = lambda)$value)
      })
      label <- apply(prob, 1, which.max)
		}
	}


# long EMs
	for (M in ind)
	{
		if (nu != Inf)
		{
			obj <- try(.C("flowClust", as.double(t(y)), as.integer(ly),
							as.integer(py), as.integer(K[i]),
							w=rep(0,K[i]), mu=rep(0,K[i]*py),
							precision=initprec,
							lambda=as.double(rep(lambda, length.out=(if (trans>1) K[i] else 1))),
							nu=as.double(rep(nu,K[i])),
							z=rep(0,ly*K[i]), u=rep(0,ly*K[i]),
							as.integer(if (M==0) label else maxLabel[[M]]), uncertainty=double(ly),
							as.double(rep(u.cutoff,K[i])), as.double(z.cutoff),
							flagOutliers=integer(ly), as.integer(B),
							as.double(tol), as.integer(trans),
							as.integer(nu.est), logLike=as.double(0),
							as.integer(control$B.lambda), as.integer(control$B.brent),
							as.double(control$tol.brent), as.double(control$xLow),
							as.double(control$xUp), as.double(control$nuLow),
							as.double(control$nuUp),
							mu0=as.double(t(Mu0)),
							as.double(kappa0),
							nu0=as.double(nu0),
							lambda0=as.double(Lambda0),
							omega0=as.double(Omega0),
							w0=as.double(w0),
							as.integer(.model),
							oorder=as.integer(oorder),
							PACKAGE="flowClust"))
							if (class(obj)=="try-error"){
								message("flowClust failed")
							}

		}
		else
		{
			obj <- try(.C("flowClustGaussian", as.double(t(y)), as.integer(ly),
							as.integer(py), as.integer(K[i]),
							w=rep(0,K[i]), mu=rep(0,K[i]*py),
							precision=initprec,
							lambda=as.double(rep(lambda, length.out=(if (trans>1) K[i] else 1))),
							z=rep(0,ly*K[i]), u=rep(0,ly*K[i]),
							as.integer(if (M==0) label else maxLabel[[M]]), uncertainty=double(ly),
							as.double(q.cutoff), as.double(z.cutoff),
							flagOutliers=integer(ly), as.integer(B),
							as.double(tol), as.integer(trans),
							logLike=as.double(0),
							as.integer(control$B.lambda), as.integer(control$B.brent),
							as.double(control$tol.brent), as.double(control$xLow),
							as.double(control$xUp),
							as.double(t(Mu0)),
							as.double(kappa0),
							as.double(nu0),
							as.double(Lambda0),
							as.double(Omega0),
							as.integer(.model),
							PACKAGE="flowClust"))
			if (class(obj)!="try-error"){

				obj$nu <- Inf
			}
		}
		if (class(obj)!="try-error")
			break
	}
	if (class(obj)=="try-error")
		stop(geterrmessage())

	if(usePrior=="vague"){
		trans<-1;
	}
# output obj$precision to sigma
	sigma <- array(0, c(K[i], py, py))
	precision <- matrix(obj$precision, K[i], py * py, byrow=TRUE)
	for (k in 1:K[i])
		sigma[k,,] <- matrix(precision[k,], py, py, byrow = TRUE)

# output BIC & ICL
	BIC <- 2*obj$logLike - log(ly) * (K[i]*(py+1)*py/2 + K[i]*py + K[i]-1 + (if (trans>1) K[i] else trans) + (if (nu.est>1) K[i] else abs(nu.est)))
	z <- matrix(obj$z, ly, K[i], byrow = TRUE)
	ICL <- BIC + 2 * sum(z*log(z), na.rm = TRUE)
  if(is.null(include)){
    # output z, u, label, uncertainty, flagOutliers
    z <- u <- matrix(NA, ly, K[i])
    z <- matrix(obj$z, ly, K[i], byrow=TRUE)
    u <- matrix(obj$u, ly, K[i], byrow=TRUE)
    #cat(M);
    tempLabel <- if (M==0) label else maxLabel[[M]]
    label <- uncertainty <- flagOutliers <- rep(NA, ly)
    label <- tempLabel
    uncertainty <- obj$uncertainty
    flagOutliers <- as.logical(obj$flagOutliers)
  }else{
    # output z, u, label, uncertainty, flagOutliers
    z <- u <- matrix(NA, length(include), K[i])
    z[include,] <- matrix(obj$z, ly, K[i], byrow=TRUE)
    u[include,] <- matrix(obj$u, ly, K[i], byrow=TRUE)
    #cat(M);
    tempLabel <- if (M==0) label else maxLabel[[M]]
    label <- uncertainty <- flagOutliers <- rep(NA, length(include))
    label[include] <- tempLabel
    uncertainty[include] <- obj$uncertainty
    flagOutliers[include] <- as.logical(obj$flagOutliers)
    
  }

# output reordered prior
	prior$Mu0<-matrix({if(all(!is.null(obj$mu0))){obj$mu0}else{NA}},K[i],py,byrow=TRUE);
	prior$Lambda0<-aperm(array({if(all(!is.null(obj$lambda0))){obj$lambda0}else{NA}},c(py,py,K[i])),c(3,1:2))
	prior$Omega0<-aperm(array({if(all(!is.null(obj$omega0))){obj$omega0}else{NA}},c(py,py,K[i])),c(3,1:2))
	prior$nu0<-{if(all(!is.null(obj$nu0))){obj$nu0}else{NA}}
	prior$w0<-{if(all(!is.null(obj$w0))){obj$nu0}else{NA}}
#omit
#result<- new("flowClust", expName=expName, varNames=varNames, K=K[i],
#		w=obj$w, mu=matrix(obj$mu, K[i], py, byrow=TRUE), sigma=sigma,
#		lambda=(if (trans>0) obj$lambda else numeric(0)), nu=(if (nu.est>1) obj$nu else obj$nu[1]), z=z,
#		u=u, label=label, uncertainty=uncertainty,
#		ruleOutliers=ruleOutliers, flagOutliers=flagOutliers, rm.min=sum(rm.min),
#		rm.max=sum(rm.max), logLike=obj$logLike, BIC=BIC, ICL=ICL);
class(prior)<-"list";
prior$order<-obj$oorder;
	if(trans==1&obj$lambda==1){
		obj$mu<-rbox(obj$mu,obj$lambda)
	}
	#do nothing in particular if trans>1
        #Not sure if the above does the right thing when trans=1, and the returned lambda=1, and usePrior="no"
	result<- new("flowClust", expName=expName, varNames=varNames, K=K[i],
			w=obj$w, mu=matrix(obj$mu, K[i], py, byrow=TRUE), sigma=sigma,
			lambda= obj$lambda, nu=(if (nu.est>1) obj$nu else obj$nu[1]), z=z,
			u=u, label=label, uncertainty=uncertainty,
			ruleOutliers=ruleOutliers, flagOutliers=flagOutliers, rm.min=sum(rm.min),
			rm.max=sum(rm.max), logLike=obj$logLike, BIC=BIC, ICL=ICL,prior=prior);
	# if(!any(is.na(prior))&usePrior=="yes"&ruleOutliers[1]==0){
		# label<-.fcbMap(result,ruleOutliers[2])
		# result@flagOutliers<-label==0
		# label[result@flagOutliers]<-NA;
		# result@label<-label;
	# }
	result
}