Nothing
######
## VT::17.12.2018
##
##
## roxygen2::roxygenise("C:/users/valen/onedrive/myrepo/R/fsdaR", load_code=roxygen2:::load_installed)
##
#' Performs cluster analysis by calling \code{\link{tclustfsda}} for different
#' number of groups \code{k} and restriction factors \code{c}
#'
#' @description Computes the values of BIC (MIXMIX), ICL (MIXCLA) or CLA (CLACLA),
#' for different values of \code{k} (number of groups) and different values of \code{c}
#' (restriction factor), for a prespecified level of trimming (the last two letters in the name
#' stand for 'Information Criterion'). In order to minimize
#' randomness, given \code{k}, the same subsets are used for each value of \code{c}.
#'
#' @param x An n x p data matrix (n observations and p variables).
#' Rows of x represent observations, and columns represent variables.
#'
#' Missing values (NA's) and infinite values (Inf's) are allowed,
#' since observations (rows) with missing or infinite values will
#' automatically be excluded from the computations.
#'
#' @param kk an integer vector specifying the number of mixture components (clusters) for which the BIC is to be calculated. By default \code{kk=1:5}.
#' @param cc an vector specifying the values of the restriction factor which have to be considered. By default \code{cc=c(1, 2, 4, 8, 16, 32, 64, 128)}.
#' @param whichIC A character value which specifies which information criteria must be computed
#' for each \code{k} (number of groups) and each value of the restriction factor \code{c}. Possible values for \code{whichIC} are:
#' \itemize{
#' \item "MIXMIX": a mixture model is fitted and for computing the information criterion
#' the mixture likelihood is used. This option corresponds to the use of the Bayesian
#' Information criterion (BIC). In output just the matrix \code{MIXMIX} is given.
#' \item "MIXCLA": a mixture model is fitted but to compute the information criterion
#' the classification likelihood is used. This option corresponds to the use of the
#' Integrated Complete Likelihood (ICL). In the output just the matrix \code{MIXCLA} is given.
#' \item "CLACLA": everything is based on the classification likelihood. This information
#' criterion will be called CLA. In the output just the matrix \code{CLACLA} is given.
#' \item "ALL": both classification and mixture likelihood are used. In this case all
#' three information criteria CLA, ICL and BIC are computed. In the output all
#' three matrices \code{MIXMIX}, \code{MIXCLA} and \code{CLACLA} are given.
#' }
#'
#' @param alpha Global trimming level. A scalar between 0 and 0.5 or an integer specifying the number of
#' observations which have to be trimmed. If \code{alpha=0} all observations are considered. By default \code{alpha=0}.
#'
#' More in detail, if \code{0 < alpha < 1} clustering is based on \code{h = fix(n * (1-alpha))}
#' observations, else if alpha is an integer greater than 1 clustering is based on \code{h = n - floor(alpha)}.
#'
#' @param plot If \code{plot=TRUE}, a plot of the BIC (MIXMIX), ICL (MIXCLA) curve
#' and CLACLA is shown on the screen. The plots which are shown depend on
#' the input option \code{whichIC}.
# If \code{plot=FALSE} (default) or \code{plot=0} no plot is produced.
#' @param nsamp If a scalar, it contains the number of subsamples which will be extracted.
#' If \code{nsamp = 0} all subsets will be extracted. Remark - if the number of all possible
#' subset is greater than 300 the default is to extract all subsets, otherwise just 300.
#' If \code{nsamp} is a matrix it contains in the rows the indexes of the subsets which
#' have to be extracted. \code{nsamp} in this case can be conveniently generated by
#' function \code{subsets()}. \code{nsamp} can have \code{k} columns or \code{k * (p + 1)}
#' columns. If \code{nsamp} has \code{k} columns the \code{k} initial centroids each
#' iteration i are given by \code{X[nsamp[i,] ,]} and the covariance matrices are equal
#' to the identity.
#'
#' If \code{nsamp} has \code{k * (p + 1)} columns, the initial centroids and covariance
#' matrices in iteration \code{i} are computed as follows:
#' \itemize{
#' \item X1 <- X[nsamp[i ,] ,]
#' \item mean(X1[1:p + 1, ]) contains the initial centroid for group 1
#' \item cov(X1[1:p + 1, ]) contains the initial cov matrix for group 1
#' \item mean(X1[(p + 2):(2*p + 2), ]) contains the initial centroid for group 2
#' \item cov(X1[(p + 2):(2*p + 2), ]) contains the initial cov matrix for group 2
#' \item ...
#' \item mean(X1[(k-1)*p+1):(k*(p+1), ]) contains the initial centroids for group k
#' \item cov(X1[(k-1)*p+1):(k*(p+1), ]) contains the initial cov matrix for group k.
#' }
#'
#' REMARK: If \code{nsamp} is not a scalar, the option \code{startv1} given below is ignored.
#' More precisely, if \code{nsamp} has \code{k} columns \code{startv1 = 0} else if
#' \code{nsamp} has \code{k*(p+1)} columns option \code{startv1=1}.
#'
#' @param refsteps Number of refining iterations in each subsample. Default is \code{refsteps=15}.
#' \code{refsteps = 0} means "raw-subsampling" without iterations.
#'
#' @param reftol Tolerance of the refining steps. The default value is 1e-14
#'
#' @param equalweights A logical specifying wheather cluster weights in the concentration
#' and assignment steps shall be considered. If \code{equalweights=TRUE} we are (ideally)
#' assuming equally sized groups, else if \code{equalweights = false} (default) we allow for
#' different group weights. Please, check in the given references which functions
#' are maximized in both cases.
#'
#' @param msg Controls whether to display or not messages on the screen If \code{msg==TRUE} (default)
#' messages are displayed on the screen. If \code{msg=2}, detailed messages are displayed,
#' for example the information at iteration level.
#' @param nocheck Check input arguments. If \code{nocheck=TRUE} no check is performed
#' on matrix \code{X}. The default \code{nocheck=FALSE}.
#'
#' @param startv1 How to initialize centroids and covariance matrices. Scalar.
#' If \code{startv1=1} then initial centroids and covariance matrices are based
#' on \code{(p+1)} observations randomly chosen, else each centroid is initialized
#' taking a random row of input data matrix and covariance matrices are initialized
#' with identity matrices. The default value is\code{startv1=1}.
#'
#' Remark 1: in order to start with a routine which is in the required parameter space,
#' eigenvalue restrictions are immediately applied.
#'
#' Remark 2 - option \code{startv1} is used just if \code{nsamp} is a scalar
#' (see for more details the help associated with \code{nsamp}).
#'
#' @param restrtype Type of restriction to be applied on the cluster scatter matrices.
#' Valid values are \code{'eigen'} (default), or \code{'deter'}.
#' \code{"eigen"} implies restriction on the eigenvalues while \code{"deter"}
#' implies restriction on the determinants.
#'
#' @param UnitsSameGroup List of the units which must (whenever possible) have
#' a particular label. For example \code{UnitsSameGroup=c(20, 26)}, means that
#' group which contains unit 20 is always labelled with number 1. Similarly,
#' the group which contains unit 26 is always labelled with number 2, (unless
#' it is found that unit 26 already belongs to group 1).
#' In general, group which contains unit \code{UnitsSameGroup(r)} where \code{r=2, ...length(kk)-1}
#' is labelled with number \code{r} (unless it is found that unit \code{UnitsSameGroup(r)}
#' has already been assigned to groups \code{1, 2, ..., r-1}.
#'
#' @param numpool The number of parallel sessions to open. If numpool is not defined,
#' then it is set equal to the number of physical cores in the computer.
#'
#' @param cleanpool Logical, indicating if the open pool must be closed or not.
#' It is useful to leave it open if there are subsequent parallel sessions to execute,
#' so that to save the time required to open a new pool.
#'
#' @param trace Whether to print intermediate results. Default is \code{trace=FALSE}.
#'
#' @param ... potential further arguments passed to lower level functions.
#' @return An S3 object of class \code{\link{tclustic.object}}
#' @references
#' Cerioli, A., Garcia-Escudero, L.A., Mayo-Iscar, A. and Riani M. (2017).
#' Finding the Number of Groups in Model-Based Clustering via Constrained Likelihoods,
#' \emph{Journal of Computational and Graphical Statistics}, pp. 404-416,
#' https://doi.org/10.1080/10618600.2017.1390469.
#'
#'
#' @seealso \code{\link{tclustfsda}}, \code{\link{tclustICplot}}, \code{\link{tclustICsol}}, \code{\link{carbikeplot}}
#' @examples
#' \dontrun{
#' data(geyser2)
#' (out <- tclustIC(geyser2, whichIC="MIXMIX", plot=FALSE, alpha=0.1))
#' summary(out)
#' }
#'
#' \dontrun{
#' data(flea)
#' Y <- as.matrix(flea[, 1:(ncol(flea)-1)]) # select only the numeric variables
#' rownames(Y) <- 1:nrow(Y)
#' head(Y)
#'
#' (out <- tclustIC(Y, whichIC="CLACLA", plot=FALSE, alpha=0.1, nsamp=100, numpool=1))
#' summary(out)
#' }
#'
#' @export
#' @author FSDA team, \email{valentin.todorov@@chello.at}
tclustIC <- function(x, kk=1:5, cc=c(1, 2, 4, 8, 16, 32, 64, 128), alpha=0,
whichIC=c("ALL", "MIXMIX", "MIXCLA", "CLACLA"),
nsamp, refsteps=15, reftol=1e-14, equalweights=FALSE, msg=TRUE, nocheck=FALSE,
plot=FALSE,
startv1=1, restrtype= c("eigen", "deter"),
UnitsSameGroup, numpool, cleanpool,
trace=FALSE, ...)
{
whichIC <- match.arg(whichIC)
if(is.data.frame(x))
x <- data.matrix(x)
else if(!is.matrix(x))
x <- matrix(x, length(x), 1,
dimnames = list(names(x), deparse(substitute(x))))
if(!is.numeric(x)) stop("x is not a numeric")
dx <- dim(x)
xn <- (dnx <- dimnames(x))[[2]]
xn <- if (!is.null(xn))
xn
else if (dx[2] > 1)
paste("X", 1:dx[2], sep = "")
else if(dx[2])
"X"
dimnames(x) <- list(dnx[[1]], xn)
n <- nrow(x)
p <- ncol(x)
outclass <- "tclustic"
control <- list()
xplots <- 0
if(is.logical(plot))
xplots <- ifelse(plot, 1, 0)
else if(is.numeric(plot) && plot >= 0 && plot <= 1)
xplots <- plot
else
stop("Invalid parameter 'plot'. Should be TRUE/FALSE or 0, 1.")
control$plots <- xplots
control$kk <- kk
control$cc <- cc
control$alpha <- alpha
control$whichIC <- whichIC
if(!missing(nsamp))
control$nsamp <- nsamp
control$refsteps <- refsteps
control$reftol <- reftol
control$equalweights <- equalweights
if(!missing(UnitsSameGroup))
control$UnitsSameGroup <- UnitsSameGroup
if(!missing(numpool))
control$numpool <- numpool
if(!missing(cleanpool))
control$cleanpool <- ifelse(cleanpool, 1, 0)
xmsg <- 0
if(is.logical(msg))
xmsg <- ifelse(msg, 1, 0)
else if(is.numeric(msg) && msg >= 0 && msg <= 2)
xmsg <- msg
else
stop("Invalid parameter 'msg'. Should be TRUE/FALSE or 0, 1, 2.")
control$msg <- xmsg
xtemp <- 0
if(is.logical(nocheck))
xtemp <- ifelse(nocheck, 1, 0)
else if(is.numeric(nocheck) && nocheck >= 0 && nocheck <= 1)
xtemp <- nocheck
else
stop("Invalid parameter 'nocheck'. Should be TRUE/FALSE or 0, 1.")
control$nocheck <- xtemp
control$startv1 <- startv1
control$restrtype <- match.arg(restrtype)
## ES 27.06.2018: parameters that are mandatory to the MATLAB function
## cannot be put into the MATLAB function because they have to be supplied
## to the function individually and not in (name, value) pairs.
## Mandatory parameters to the MATLAB FSDA function
parlist = c(.jarray(x, dispatch=TRUE))
if(trace)
print(control)
paramNames = names(control)
if(length(paramNames) > 0)
{
for (i in 1:length(paramNames)) {
paramName = paramNames[i]
paramValue = control[[i]]
matlabValue = rType2MatlabType(paramName, paramValue)
parlist = c(parlist, .jnew("java/lang/String", paramName), matlabValue)
}
}
out <- callFsdaFunction("tclustIC", "[Ljava/lang/Object;", 1, parlist)
if(is.null(out))
return(NULL)
arr1 = .jcast(out[[1]], "com/mathworks/toolbox/javabuilder/MWStructArray")
arr = .jnew("org/jrc/ipsc/globesec/sitaf/fsda/FsdaMWStructArray", arr1)
if(trace)
{
cat("\nReturning from MATLAB tclustIC(). Fields returned by MATLAB: \n")
print(arr$fieldNames())
}
kk_ret <- as.vector(as.matrix(.jevalArray(arr$get("kk", as.integer(1)), "[[D", simplify = TRUE)))
cc_ret <- as.vector(as.matrix(.jevalArray(arr$get("cc", as.integer(1)), "[[D", simplify = TRUE)))
alpha_ret = as.vector(as.matrix(.jevalArray(arr$get("alpha", as.integer(1)), "[[D", simplify = TRUE)))[1]
CLACLA <- if(as.integer(arr$hasField("CLACLA", as.integer(1))) != 1) NULL
else as.matrix(.jevalArray(arr$get("CLACLA", as.integer(1)), "[[D", simplify = TRUE))
IDXCLA <- if(as.integer(arr$hasField("IDXCLA", as.integer(1))) != 1) NULL
else unwrapComplexNumericCellArray(as.matrix(.jevalArray(arr$get("IDXCLA", as.integer(1)))))
MIXMIX <- if(as.integer(arr$hasField("MIXMIX", as.integer(1))) != 1) NULL
else as.matrix(.jevalArray(arr$get("MIXMIX", as.integer(1)), "[[D", simplify = TRUE))
MIXCLA <- if(as.integer(arr$hasField("MIXCLA", as.integer(1))) != 1) NULL
else as.matrix(.jevalArray(arr$get("MIXCLA", as.integer(1)), "[[D", simplify = TRUE))
IDXMIX <- if(as.integer(arr$hasField("IDXMIX", as.integer(1))) != 1) NULL
else unwrapComplexNumericCellArray(as.matrix(.jevalArray(arr$get("IDXMIX", as.integer(1)))))
xkk <- paste0("k=", kk_ret)
xcc <- paste0("c=", cc_ret)
if(!is.null(MIXMIX))
dimnames(MIXMIX) <- list(xkk, xcc)
if(!is.null(MIXCLA))
dimnames(MIXCLA) <- list(xkk, xcc)
if(!is.null(CLACLA))
dimnames(CLACLA) <- list(xkk, xcc)
if(!is.null(IDXMIX))
dimnames(IDXMIX) <- list(xkk, xcc)
if(!is.null(IDXCLA))
dimnames(IDXCLA) <- list(xkk, xcc)
Y <- if(as.integer(arr$hasField("Y", as.integer(1))) != 1) NULL
else as.matrix(.jevalArray(arr$get("Y", as.integer(1)), "[[D", simplify = TRUE))
ans = list(call=match.call(), CLACLA=CLACLA, IDXCLA=IDXCLA, MIXMIX=MIXMIX, MIXCLA=MIXCLA, IDXMIX=IDXMIX,
kk=kk_ret, cc=cc_ret, alpha=alpha_ret, whichIC=whichIC, X=Y)
freeMatlabResources(out)
## Remove any NULL elements (this happens if whichIC != ALL)
if(length(del <- which(unlist(lapply(ans, FUN=is.null)))) > 0)
ans <- ans[-del]
if(trace)
{
cat("\ntclustIC(): object 'out' after removing the NULL objects:")
print(names(ans))
}
class(ans) <- outclass
return (ans)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.