R/ClusterDiagGaussian.R

Defines functions clusterDiagGaussian .dGauss

Documented in clusterDiagGaussian

#-----------------------------------------------------------------------
#     Copyright (C) 2012-2016  Serge Iovleff, University Lille 1, Inria
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as
#    published by the Free Software Foundation; either version 2 of the
#    License, or (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public
#    License along with this program; if not, write to the
#    Free Software Foundation, Inc.,
#    59 Temple Place,
#    Suite 330,
#    Boston, MA 02111-1307
#    USA
#
#    Contact : S..._Dot_I..._At_stkpp_Dot_org (see copyright for ...)
#
#-----------------------------------------------------------------------
#' @include global.R ClusterModelNames.R IClusterModel.R
NULL

#-----------------------------------------------------------------------
#' Create an instance of the [\code{\linkS4class{ClusterDiagGaussian}}] class
#'
#' This function computes the optimal diagonal Gaussian mixture model according
#' to the \code{criterion} among the list of model given in \code{models}
#' and the number of clusters given in \code{nbCluster}, using the strategy
#' specified in \code{strategy}.
#'
#' @param data frame or matrix containing the data. Rows correspond to observations
#' and columns correspond to variables. If the data set contains NA values, they
#' will be estimated during the estimation process.
#' @param nbCluster  [\code{\link{vector}}] listing the number of clusters to test.
#' @param models [\code{\link{vector}}] of model names to run. By default all diagonal
#' Gaussian models are estimated. All the model names are given by the method
#' [\code{\link{clusterDiagGaussianNames}}].
#' @param strategy a [\code{\linkS4class{ClusterStrategy}}] object containing
#' the strategy to run. [\code{\link{clusterStrategy}}]() method by default.
#' @param criterion character defining the criterion to select the best model.
#' The best model is the one with the lowest criterion value.
#' Possible values: "BIC", "AIC", "ICL", "ML". Default is "ICL".
#' @param nbCore integer defining the number of processors to use (default is 1, 0 for all).
#'
#' @examples
#' ## A quantitative example with the famous geyser data set
#' data(geyser)
#' ## add 10 missing values as random
#' x = as.matrix(geyser); n <- nrow(x); p <- ncol(x);
#' indexes <- matrix(c(round(runif(5,1,n)), round(runif(5,1,p))), ncol=2);
#' x[indexes] <- NA;
#' ## estimate model (using fast strategy, results may be misleading)
#' model <- clusterDiagGaussian( data=x, nbCluster=2:3
#'                             , models=c( "gaussian_pk_sjk")
#'                             , strategy = clusterFastStrategy()
#'                             )
#'
#' ## use graphics functions
#' \dontrun{
#' plot(model)
#' }
#'
#' ## get summary
#' summary(model)
#' ## print model
#' \dontrun{
#' print(model)
#' }
#' ## get estimated missing values
#' missingValues(model)
#'
#' @return An instance of the [\code{\linkS4class{ClusterDiagGaussian}}] class.
#' @author Serge Iovleff
#'
#'
clusterDiagGaussian <- function( data, nbCluster=2
                               , models=clusterDiagGaussianNames()
                               , strategy=clusterStrategy()
                               , criterion="ICL"
                               , nbCore = 1)
{
  # check nbCluster
  nbClusterModel = length(nbCluster);
  nbClusterMin = min(nbCluster);
  nbClusterMax = max(nbCluster);
  if (nbClusterMin < 1) { stop("The number of clusters must be greater or equal to 1")}

  # check criterion
  if(sum(criterion %in% c("BIC","AIC", "ICL", "ML")) != 1)
  { stop("criterion is not valid. See ?clusterDiagGaussian for the list of valid criterion")}

  # check data
  data = as.matrix(data)
  if (nrow(data) <= 3*nbClusterMax) {stop("There is not enough individuals (rows) in the data set")}
  if (ncol(data) < 1) {stop("Error: empty data set")}

  # check models
  if (!clusterValidDiagGaussianNames(models))
  { stop("models is not valid. See ?clusterDiagGaussianNames for the list of valid model names")}

  # check strategy
  if(class(strategy)[1] != "ClusterStrategy")
  {stop("strategy is not a Cluster Strategy class (must be an instance of the class ClusterStrategy).")}
  validObject(strategy);

  # Create model
  model = new("ClusterDiagGaussian", data)
  model@strategy = strategy;
  model@criterionName = criterion

  # start estimation of the models
  resFlag = .Call("clusterMixture", model, nbCluster, models, nbCore, PACKAGE="MixAll");
  # set names
  if (resFlag != TRUE ) {cat("WARNING: An error occur during the clustering process");}
  colnames(model@component@mean)  <- colnames(model@component@data);
  colnames(model@component@sigma) <- colnames(model@component@data);
  model
}

#-----------------------------------------------------------------------
#' Definition of the [\code{\linkS4class{ClusterDiagGaussianComponent}}] class
#'
#' This class defines a diagonal Gaussian component of a mixture model. It inherits
#' from [\code{\linkS4class{IClusterComponent}}].
#'
#' @slot mean  Matrix with the mean of the jth variable in the kth cluster.
#' @slot sigma  Matrix with the standard deviation of the jth variable in the kth cluster.
#'
#' @seealso [\code{\linkS4class{IClusterComponent}}] class
#'
#' @examples
#' getSlots("ClusterDiagGaussianComponent")
#'
#' @author Serge Iovleff
#'
#' @name ClusterDiagGaussianComponent
#' @rdname ClusterDiagGaussianComponent-class
#' @aliases ClusterDiagGaussianComponent-class
#'
setClass(
  Class = "ClusterDiagGaussianComponent",
  representation( mean = "matrix", sigma = "matrix"),
  contains=c("IClusterComponent"),
  validity=function(object)
  {
    if (ncol(object@mean)!=ncol(object@data))
    {stop("mean must have nbVariable columns.")}
    if (ncol(object@sigma)!=ncol(object@data))
    {stop("sigma must have nbVariable columns.")}
    if (!clusterValidDiagGaussianNames(object@modelName))
    {stop("Invalid Gaussian model name.")}
    return(TRUE)
  }
)
#' Initialize an instance of a MixAll S4 class.
#'
#' Initialization method of the [\code{\linkS4class{ClusterDiagGaussianComponent}}] class.
#' Used internally in the 'MixAll' package.
#'
#' @rdname initialize-methods
#' @keywords internal
setMethod(
    f="initialize",
    signature=c("ClusterDiagGaussianComponent"),
    definition=function(.Object, data=matrix(nrow=0, ncol=0), nbCluster=2, modelName="gaussian_pk_sjk")
    {
      # for data
      if(missing(data)) {stop("data is mandatory in ClusterDiagGaussianComponent.")}
      # check model name
      if (!clusterValidDiagGaussianNames(modelName)) { stop("modelName is invalid");}
      # call base class initialize
      .Object <- callNextMethod(.Object, data, modelName)
      # create slots
      nbVariable = ncol(.Object@data);
      .Object@mean  = matrix(0., nrow=nbCluster, ncol=nbVariable);
      .Object@sigma = matrix(1., nrow=nbCluster, ncol=nbVariable);
      # validate
      validObject(.Object)
      return(.Object)
    }
)

#' @rdname extract-methods
#' @aliases [,ClusterDiagGaussianComponent-method
setMethod(
    f="[",
    signature(x = "ClusterDiagGaussianComponent"),
    definition=function(x, i, j, drop)
    {
      if ( missing(j) )
      {
        switch(EXPR=i,
            "mean" ={return(x@mean)},
            "sigma"={return(x@sigma)},
            stop("This attribute doesn't exist !")
        )
      }
      else
      {
        if (!is.numeric(j)) {stop("j must be an integer.")}
        if (round(j)!=j)    {stop("j must be an integer.")}
        switch(EXPR=i,
            "mean" ={return(x@mean[j,])},
            "sigma"={return(x@sigma[j,])},
            stop("This attribute doesn't exist !")
        )
      }
    }
)

#' @rdname print-methods
#' @aliases print print,ClusterDiagGaussianComponent-method
#'
setMethod(
  signature=c("ClusterDiagGaussianComponent"),
  f="print",
  function(x,k,...)
  {
    if(missing(k))
    {
      callNextMethod()
    }
    else
    {
      cat("* Means      = ", format(x@mean[k,]), "\n")
      cat("* S.D.       = ", format(x@sigma[k,]), "\n")
    }
  }
)

#' @rdname show-methods
#' @aliases show-ClusterDiagGaussianComponent,ClusterDiagGaussianComponent,ClusterDiagGaussianComponent-method
setMethod(
  f="show",
  signature=c("ClusterDiagGaussianComponent"),
  function(object)
  {
    callNextMethod()
  }
)

#-----------------------------------------------------------------------
#' Definition of the [\code{\linkS4class{ClusterDiagGaussian}}] class
#'
#' This class defines a diagonal Gaussian mixture Model.
#'
#' This class inherits from the [\code{\linkS4class{IClusterModel}}] class.
#' A diagonal gaussian model is a mixture model of the form:
#' \deqn{
#'   f({x}|\boldsymbol{\theta})
#'   =\sum_{k=1}^K p_k \prod_{j=1}^d \phi(x_j;\mu_{jk},\sigma^2_{jk})
#'    \quad x \in {R}^d.
#' }
#' Some constraints can be added to the variances in order to reduce the number
#' of parameters.
#'
#' @slot component  A [\code{\linkS4class{ClusterDiagGaussianComponent}}] with the
#' mean and standard deviation of the diagonal mixture model.
#' @seealso [\code{\linkS4class{IClusterModel}}] class
#'
#' @examples
#' getSlots("ClusterDiagGaussian")
#' data(geyser)
#' new("ClusterDiagGaussian", data=geyser)
#'
#' @author Serge Iovleff
#'
#' @name ClusterDiagGaussian
#' @rdname ClusterDiagGaussian-class
#' @aliases ClusterDiagGaussian-class
#'
setClass(
  Class = "ClusterDiagGaussian",
  representation( component = "ClusterDiagGaussianComponent"),
  contains=c("IClusterModel"),
  validity=function(object)
  {
    if (nrow(object@component@mean)!=object@nbCluster)
    {stop("mean must have nbCluster rows.")}
    if (ncol(object@component@mean)!=ncol(object@component@data))
    {stop("mean must have nbVariable columns.")}
    if (nrow(object@component@sigma)!=object@nbCluster)
    {stop("sigma must have nbCluster rows.")}
    if (ncol(object@component@sigma)!=ncol(object@component@data))
    {stop("sigma must have nbVariable columns.")}
    if (!clusterValidDiagGaussianNames(object@component@modelName))
    {stop("Invalid Gaussian model name.")}
    return(TRUE)
  }
)

#' Initialize an instance of a MixAll S4 class.
#'
#' Initialization method of the [\code{\linkS4class{ClusterDiagGaussian}}] class.
#' Used internally in the 'MixAll' package.
#'
#' @rdname initialize-methods
#' @keywords internal
setMethod(
    f="initialize",
    signature=c("ClusterDiagGaussian"),
    definition=function(.Object, data=matrix(nrow=0, ncol=0), nbCluster=2, modelName="gaussian_pk_sjk")
    {
      # for data
      if(missing(data)) {stop("data is mandatory in ClusterDiagGaussian.")}
      # initialize component
      .Object@component = new("ClusterDiagGaussianComponent", data, nbCluster, modelName);
      .Object <- callNextMethod(.Object, nrow(.Object@component@data), nbCluster);
      # validate
      validObject(.Object)
      return(.Object)
    }
)

#' @rdname print-methods
#' @aliases print print,ClusterDiagGaussian-method
#'
setMethod(
  f="print",
  signature=c("ClusterDiagGaussian"),
  function(x,...){
    cat("****************************************\n")
    print(x@component)
    callNextMethod();
    cat("****************************************\n")
    for(k in 1:length(x@pk))
    {
      cat("*** Cluster: ",k,"\n")
      cat("* Proportion = ", format(x@pk[k]), "\n")
      print(x@component, k);
      cat("****************************************\n")
    }
  }
)

#' @rdname show-methods
#' @aliases show-ClusterDiagGaussian,ClusterDiagGaussian,ClusterDiagGaussian-method
setMethod(
  f="show",
  signature=c("ClusterDiagGaussian"),
  function(object)
  {
    cat("****************************************\n")
    show(object@component)
    callNextMethod();
    cat("****************************************\n")
    for(k in 1:length(object@pk))
    {
      cat("*** Cluster: ",k,"\n")
      cat("* Proportion = ", format(object@pk[k]), "\n")
      print(object@component, k);
      cat("****************************************\n")
    }
  }
)

#' @rdname summary-methods
#' @aliases summary summary,ClusterDiagGaussian-method
#'
setMethod(
  f="summary",
  signature=c("ClusterDiagGaussian"),
  function(object, ...)
  {
    cat("****************************************\n")
    summary(object@component)
    callNextMethod()
    cat("****************************************\n")
  }
)

#' Plotting of a class [\code{\linkS4class{ClusterDiagGaussian}}]
#'
#' Plotting data from a [\code{\linkS4class{ClusterDiagGaussian}}] object
#' using the estimated parameters and partition.
#'
#' @param x an object of class [\code{\linkS4class{ClusterDiagGaussian}}]
#' @param y a list of variables to plot (subset). Variables names or indices.
#' If missing all the variables are represented.
#' @param ... further arguments passed to or from other methods
#'
#' @aliases plot-ClusterDiagGaussian
#' @docType methods
#' @rdname plot-ClusterDiagGaussian-method
#'
#'
#' @seealso \code{\link{plot}}
#' @examples
#'   ## the famous iris data set
#' \dontrun{
#'   data(iris)
#'   model <- clusterDiagGaussian(iris[1:4], 3, strategy = clusterFastStrategy())
#'   plot(model)
#'   plot(model, c(1,3))
#'   plot(model, c("Sepal.Length","Sepal.Width"))
#'   }
#'
setMethod(
    f="plot",
    signature=c("ClusterDiagGaussian"),
    function(x, y, ...)
    { # use generic plot
      .clusterPlot(x, y, .dGauss,...);
    }
)

# wrapper of dnorm
# x a vector with the point
.dGauss <- function(x, j, k, model)
{ dnorm(x, (model@component@mean)[k, j] , (model@component@sigma)[k, j])}

Try the MixAll package in your browser

Any scripts or data that you put into this service are public.

MixAll documentation built on Sept. 12, 2019, 5:05 p.m.