R/ModalEM.R

Defines functions gmm2margParams smallClusters hypvolGaussian connectedComponents plot.MclustMEM print.summary.MclustMEM summary.MclustMEM print.MclustMEM MclustMEM GaussianMixtureMEM

Documented in GaussianMixtureMEM MclustMEM plot.MclustMEM print.MclustMEM print.summary.MclustMEM summary.MclustMEM

#
# Modal EM algorithm for Gaussian Mixtures
#
# Author:  Luca Scrucca
# Version: 15 June 2020
#

GaussianMixtureMEM <- function(data, pro, mu, sigma,
                               control = list(eps = 1e-5, 
                                              maxiter = 1e3, 
                                              stepsize = function(t) 1-exp(-0.1*t),
                                              denoise = TRUE,
                                              alpha = 0.01,
                                              keep.path = FALSE),
                               ...)
{
# Modal EM for Gaussian Mixtures
# data = (n x d) data matrix 
# pro = (G x 1) vector of mixing probs
# mu = (d x G) matrix of components means 
# sigma = (d x d x G) matrix of components cov matrices
# control = a list of control parameters

  data <- as.matrix(data)
  n <- nrow(data)
  d <- ncol(data)
  ctrl <- eval(formals(GaussianMixtureMEM)$control)
  ctrl[names(control)] <- control[names(control)]
  
  varnames <- colnames(data)
  pro   <- as.vector(pro)
  pro   <- pro/sum(pro)
  G     <- length(pro)
  mu    <- array(mu, dim = c(d, G))
  sigma <- array(sigma, dim = c(d,d,G))
  stopifnot(nrow(mu) == d & all(dim(sigma)[1:2] == d))
  stopifnot(ncol(mu) == G & dim(sigma)[3] == G)
  
  par <- list(pro = pro, mean = mu,
              variance = mclustVariance(if(d > 1) "VVV" else "V", 
                                        d = d, G = G))
  if(d > 1)
  { 
    invsigma <- cholsigma <- array(as.double(NA), dim(sigma))
    for(k in 1:G)
    {
      cholsigma[,,k] <- chol(sigma[,,k])
      invsigma[,,k]  <- chol2inv(cholsigma[,,k])
    }
    par$variance$cholsigma <- cholsigma
    par$variance$sigma     <- sigma
  } else
  {
    invsigma <- 1/sigma
    par$variance$sigmasq <- drop(sigma)
    par$variance[c("cholsigma", "sigma")] <- NULL
  }
  
  Estep <- function(x) 
  {
    z <- cdens(modelName = if(d > 1) "VVV" else "V",
               data = x,
               parameters = par, logarithm = TRUE)
    # z <- cbind(z) # drop redundant attributes
    # z <- sweep(z, MARGIN = 2, FUN = "+", STATS = log(pro))
    # z <- sweep(z, MARGIN = 1, FUN = "-", 
    #            STATS = apply(z, 1, mclust:::logsumexp))
    # z <- exp(z)
    z <- softmax(z, log(pro))
    return(z)
  }
  
  Mstep <- function(z) 
  {
    num <- matrix(0.0, nrow = n*d, ncol = 1)
    den <- matrix(0.0, nrow = n*d, ncol = d)
    for(k in 1:G)
    {
      invsigmak <- invsigma[,,k]
      if(d == 1) invsigmak <- matrix(invsigmak, nrow = d, ncol = d)      
      num <- num + z[,k] %x% invsigmak %*% mu[,k]
      den <- den + z[,k] %x% invsigmak
    }
    xnew <- matrix(0.0, nrow = n, ncol = d)
    # xdet <- rep(as.double(NA), n)
    for(i in 1:n) 
    {
      ii <- seq((i-1)*d+1, i*d)
      # xdet[i]  <- det(den[ii,])
      xnew[i,] <- solve(den[ii,], num[ii,])
    }
    return(xnew)
    # return(list(xnew = xnew, xdet = xdet))
  }
  
  # EM maximisation algorithm ----
  x <- data
  xpath <- if(ctrl$keep.path) list(x) else NULL
  x0 <- x+1
  iter <- 0

  while( any(abs(x-x0) > ctrl$eps*(1+abs(x0))) &
	       iter < ctrl$maxiter )
  {
    iter <- iter + 1
    x0 <- x
    # E-step
    z <- Estep(x)
    # M-step
    xnew <- Mstep(z)
    stepsize <- ctrl$stepsize(iter)
    x <- (1-stepsize)*x + stepsize*xnew
    # mstep <- Mstep(z)
    # stepsize <- ctrl$stepsize(mstep$xdet/max(mstep$xdet))
    # x <- (1-stepsize)*x + stepsize*mstep$xnew
    #
    if(ctrl$keep.path)
      xpath <- append(xpath, list(x))
  }
  
  # Denoising ----
  # if denoise then discard all modes whose density is negligible
  # and run additional EM steps to assign observations from negligible modes
  logvol <- NA
  if(ctrl$denoise & d > 1)
  {
    converge <- FALSE
    margpar  <- gmm2margParams(pro, mu, sigma)
    logvol   <- hypvolGaussian(margpar$variance, 
                               alpha = ctrl$alpha, 
                               logarithm = TRUE)
    inc <- rep(TRUE, length = n)
    while(!converge)
    {
      logdens <- dens(modelName = if(d > 1) "VVV" else "V",
                      data = x[inc,,drop=FALSE],
                      parameters = par, logarithm = TRUE)
      inc[inc] <- (logdens > -logvol)
      if(all(logdens > -logvol)) converge <- TRUE
    }
    noise <- (!inc)
    if(sum(noise) > 0)
    {
      x[noise] <- data[noise,,drop=FALSE]
      cl <- map(z)
      tab <- tabulate(cl,G)
      rmv <- which(tab == 0)
      rmv <- unique(c(rmv, cl[noise]))
      pro <- pro[-rmv]; pro <- pro/sum(pro)
      G <- length(pro)
      mu <- mu[,-rmv,drop=FALSE]
      sigma <- sigma[,,-rmv,drop=FALSE]
      invsigma <- invsigma[,,-rmv,drop=FALSE]
      par$pro <- pro
      par$mean <- mu
      par$variance$G <- G
      par$variance$sigma <- sigma
      par$variance$cholsigma <- array(as.double(NA), dim(sigma))
      for(k in 1:G)
       par$variance$cholsigma[,,k] <- chol(sigma[,,k])
      # re-run ModalEM 
      x0 <- x+1
      while(any(abs(x-x0) > ctrl$eps*(1+abs(x0))) &
            iter < ctrl$maxiter )
      {
        iter <- iter + 1
        x0 <- x
        # E-step
        z <- Estep(x)
        # M-step
        x <- Mstep(z)
      }
    }  
  }

  # Connected-components of tight-cluster modes ----
  # compute connected components for modes and clusters
  # max(apply(x, 2, bw.nrd))
  # apply(x, 2, function(x) diff(range(x))/nclass.Sturges(x))
  # apply(x, 2, function(x) diff(range(x))/nclass.scott(x))
  # bw.nrd(dist(x))^(1/d)
  # 3.729*sd(dist(x))*n^(-1/3)
  # geometric mean of Scott's oversmoothed bin width (2009, p. 78)
  # exp(mean(log(apply(data, 2, function(x) 3.729*sd(x)*n^(-1/3)))))
  # a more robust version of above
  bw <- exp(mean(log(apply(data, 2, function(x) 2.603*IQR(x)*n^(-1/3)))))
  concomp <- connectedComponents(x, eps = bw)
  colnames(concomp$components) <- varnames
  cl <- concomp$clusters

  if(ctrl$keep.path) 
  { # create a list of paths for each obs
    path <- vector(mode = "list", length = n)
    for(i in seq(n))
      path[[i]] <- t(sapply(xpath,"[",i,) )
  } else path <- xpath
  
  out <- list(n = n, d = d, 
              parameters = par, 
              iter = iter,
              nmodes = nrow(concomp$components),
              modes = concomp$components,
              path = path,
              logdens = dens(modelName = if(d > 1) "VVV" else "V",
                             data = concomp$components, 
                             parameters = par, logarithm = TRUE),
              logvol = logvol,
              classification = cl)
  return(out)
}


MclustMEM <- function(mclustObject, data = NULL, ...)
{
# Modal EM for Gaussian Mixtures fitted via Mclust() or densityMclust()

  stopifnot(inherits(mclustObject, "Mclust") | 
            inherits(mclustObject, "densityMclust"))
  if(is.null(data)) 
    data <- mclustObject$data
  if(is.null(colnames(data)) & (mclustObject$d == 1))
    colnames(data) <- deparse(mclustObject$call$data)
    
  pro   <- mclustObject$parameters$pro[seq(mclustObject$G)]
  mu    <- mclustObject$parameters$mean
  sigma <- mclustObject$parameters$variance$sigma
  
  obj <- GaussianMixtureMEM(data, pro = pro, mu = mu, sigma = sigma, ...)
  obj <- append(obj, list(call = match.call()), after = 0)
  obj <- append(obj, list(data = data), after = 1)
  obj <- append(obj, list(modelName = mclustObject$modelName, 
                          G = mclustObject$G), after = 5)
  class(obj) <- "MclustMEM"
  return(obj)
}

print.MclustMEM <- function(x, digits = getOption("digits"), ...)
{
  if(!is.null(cl <- x$call))
  { 
    cat("Call:\n")
    dput(cl, control = NULL)
  }
  cat("\n'MclustMEM' object containing:","\n")
  print(names(x)[-1])
  invisible()
}


summary.MclustMEM <- function(object, ...)
{
  out <- list(title = "Modal EM for GMMs",
              n = object$n, d = object$d,
              model = paste(object$modelName, object$G, sep = ","),
              iter = object$iter, logvol = object$logvol,
              modes = object$modes,
              tabClassification = table(factor(object$classification)))
  class(out) <- "summary.MclustMEM"
  return(out)
}

print.summary.MclustMEM <- function(x, digits = getOption("digits"), ...)
{
  
  if(!requireNamespace("cli", quietly = TRUE) |
     !requireNamespace("crayon", quietly = TRUE))
  {    
    cat(paste0("-- ", x$title, " "))
    cat(paste0(rep("-", 40-nchar(x$title)-4)), sep="", "\n")
  } else 
  {
    cat(cli::rule(left = crayon::bold(x$title), 
                  width = min(getOption("width"),40)), "\n")
  }
  #
  cat("\n")
  cat(paste("Data dimensions =", x$n, "x", x$d, "\n"))
  cat(paste("Mclust model    =", x$model, "\n"))
  cat(paste("MEM iterations  =", x$iter, "\n"))
  cat(paste("Number of modes =", nrow(x$modes), "\n"))
  # if(!is.na(x$logvol))
  #   cat(paste("Denoise logvol  =", signif(x$logvol, digits = digits), "\n"))
  cat("\nModes:\n")
  print(x$modes, digits = digits)
  cat("\nModal clustering:")
  print(x$tabClassification)
  #
  invisible()  
}

plot.MclustMEM <- function(x, dimens = NULL, 
                           addDensity = TRUE, addPoints = TRUE, 
                           symbols = NULL, colors = NULL, cex = NULL, 
                           labels = NULL, cex.labels = NULL, 
                           gap = 0.2, ...) 
{
  
  object <- x # Argh.  Really want to use object anyway
  if(!inherits(object, "MclustMEM")) 
    stop("object not of class 'MclustMEM'")
  
  data <- object$data
  d <- object$d
  dimens <- if(is.null(dimens)) seq(d) else dimens[dimens <= d]
  data <- data[,dimens,drop=FALSE]
  d <- ncol(data)
  if(is.null(labels))
    labels <- dimnames(data)[[2]]
  classification <- object$classification
  if(!is.factor(classification)) 
    classification <- as.factor(classification)
  l <- length(levels(classification))
  if(is.null(symbols)) 
  { 
    if(l == 1) 
      { symbols <- "." }
    if(l <= length(mclust.options("classPlotSymbols")))
      { symbols <- mclust.options("classPlotSymbols") }
    else { if(l <= 9) { symbols <- as.character(1:9) }
             else if(l <= 26) { symbols <- LETTERS[1:l] }
                  else symbols <- rep(16,l)
         }
  }
  if(length(symbols) == 1) symbols <- rep(symbols, l)
  if(length(symbols) < l) 
    { symbols <- rep(16, l)
      warning("more symbols needed")
  }
  if(is.null(colors)) 
    { if(l <= length(mclust.options("classPlotColors"))) 
      colors <- mclust.options("classPlotColors")[1:l]
  }
  if(length(colors) == 1) colors <- rep(colors, l)
  if(length(colors) < l) 
    { colors <- rep( "black", l)
      warning("more colors needed")
  }
	if(is.null(cex)) 
	  cex <- rep(1, l)
  if(is.null(cex.labels)) 
    cex.labels <- if(d > 2) 1.5 else 1
  
  oldpar <- par(no.readonly = TRUE)
  on.exit(par(oldpar))
  
  if(d == 1)
  { 
    pars <- object$parameters
    pars$mean <- pars$mean[dimens,,drop=FALSE]
    if(pars$variance$d > 1)
    {
      pars$variance$sigma <- pars$variance$sigma[dimens,dimens,,drop=FALSE]
      pars$variance$d <- length(dimens)
    }
    xrange <- extendrange(data[,dimens,drop=FALSE], f = 0.1)
    eval.points <- seq(from = xrange[1], to = xrange[2], length = 1000)
    f <- dens(data = eval.points,
              modelName = ifelse(length(pars$variance$sigma) > 1, "V", "E"),
              parameters = pars)
    h <- hist(data[,dimens,drop=FALSE], breaks = "Sturges", plot = FALSE)
    plot(h, freq = FALSE, col = "lightgrey", border = "white", main = "",
         xlim = range(h$breaks, eval.points),
         ylim =  range(0, h$density, exp(object$logdens)),
         xlab = labels[1])
    box()    
    if(addDensity)
    {
      lines(eval.points, f)
    }
    if(addPoints)
    { 
      for(k in 1:pars$variance$G)
        rug(data[classification==k,dimens], 
            side = 1, col = colors[k])
    }
    points(object$modes[,1], exp(object$logdens),
           col = "black", pch = 3, lwd = 2, cex = cex*1.2)
  }
  if(d == 2) 
  { 
    pars <- object$parameters
    pars$mean <- pars$mean[dimens,,drop=FALSE]
    pars$variance$d <- length(dimens)
    pars$variance$sigma <- pars$variance$sigma[dimens,dimens,,drop=FALSE]
    plot(data, type = "n", 
         xlab = labels[1],
         ylab = labels[2],
         cex.lab = cex.labels)
    if(addPoints)
      points(data, cex = cex[object$classification], 
             pch = symbols[classification], 
             col = colors[classification])
    if(addDensity)
      surfacePlot(data = data, parameters = pars,
                  what = "density", add = TRUE, 
                  col = "grey30", drawlabels = FALSE)
    points(object$modes, pch = 3, lwd = 2, cex = cex*1.2)
  }
  if(d > 2)
  { 
    par(mfrow = c(d, d), 
        mar = rep(0.2/2,4), 
        oma = rep(3,4))
    for(i in seq(d))
    {
      for(j in seq(d))
      {
        if(i == j)
        {
          plot(data[, dimens[c(j, i)]],
               type = "n", xlab = "", ylab = "", axes = FALSE)
          text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
               labels = colnames(data[, dimens])[i],
               cex = 1.5, adj = 0.5)
          box()
        } else 
        { 
          pars <- object$parameters
          pars$mean <- pars$mean[dimens[c(i,j)],,drop=FALSE]
          pars$variance$d <- 2
          pars$variance$sigma <- pars$variance$sigma[dimens[c(i,j)],dimens[c(i,j)],,drop=FALSE]
          pars$variance$cholsigma <- pars$variance$sigma
          for(k in seq(pars$variance$G))
             pars$variance$cholsigma[,,k] <- chol(pars$variance$sigma[,,k])
          plot(data[,c(i,j)], type = "n", 
               xlab = labels[i], ylab = labels[j], 
               xaxt = "n", yaxt = "n")
          if(addPoints)
            points(data[,c(i,j)], cex = cex[object$classification], 
                   pch = symbols[classification], 
                   col = colors[classification])
          if(addDensity)
            surfacePlot(data = data[,c(i,j)], parameters = pars,
                        what = "density", add = TRUE,
                        col = "grey30", drawlabels = FALSE)
          points(object$modes[,c(i,j)], pch = 3, lwd = 2, cex = cex*1.2)
        }
        if(i == 1 && (!(j%%2))) axis(3)
        if(i == d && (j%%2))    axis(1)
        if(j == 1 && (!(i%%2))) axis(2)
        if(j == d && (i%%2))    axis(4)
      }
    }
  }
  invisible()
}


connectedComponents <- function(data, eps = 1e-3)
{
# Efficient connected-components algorithm for "tight clusters"
# 
# Carreira-Perpiñán M. Á. (2016) Clustering Methods Based on Kernel Density 
#   Estimators: Mean-Shift Algorithms. In Hennig et al (eds) Handbook of 
#   Cluster Analysis
  
  data <- as.matrix(data)
	n <- nrow(data)
	# initialize components matrix
	C <- data
	# initialize components vector
	clusters <- vector(mode="integer", length=n)
	# euclidean distance function
	distance <- function(x, y) sqrt(sum((x-y)^2))
	# start
	K <- 1 
	clusters[1] <- 1
	C[1,] <- data[1,,drop=FALSE]
	if(n > 1)
	{
	  # loop over remaining data points
	  for(i in 2:n)
	  {
	    assigned <- FALSE
	    for(k in 1:K)
	    {
	      d <- distance(data[i,], C[k,])
	      if(d < eps)
	      {
	        clusters[i] <- k
	        assigned <- TRUE
	        break
	      }
	    }
	    if(!assigned)
	    {
	      K <- K + 1
	      clusters[i] <- K
	      C[K,] <- data[i,]
	    }
	  }
	}
	C <- C[1:K,,drop=FALSE]
	dimnames(C) <- list(paste0("mode", 1:K), colnames(data))
	out <- list(components = C, clusters = clusters)
	return(out)
}

# hypvol <- function(data, logarithm = TRUE, ...)
# {
# # Compute simple estimates of the hypervolume of a dataset: 
# # - volume of the hyperbox from variable bounds
# # - volume of the hyperbox from variable bounds of the principal components
# # - volume of ellipsoid hull
#  
#   data <- as.matrix(data)
#   sumlogdifcol <- function(x) 
#     sum(log(apply(x, 2, function(xc) diff(range(xc)))))
#   #  
#   boxvol <- sumlogdifcol(data)
#   if(!logarithm) boxvol <- exp(boxvol)
#   #
#   pcavol <- sumlogdifcol(princomp(data)$scores)
#   if(!logarithm) pcavol <- exp(pcavol)
#   #
#   elhvol <- NULL
#   if(ncol(data) > 1)
#     elhvol <- cluster::volume(cluster::ellipsoidhull(data), log = logarithm)
#   #
#   out <- c(boxvol, pcavol, elhvol)
#   return(out)  
# }

hypvolGaussian <- function(sigma, alpha = 0.05, logarithm = TRUE, ...)
{
# hypervolume of central (1-alpha)100% region of a multivariate Gaussian
# (see Maitra notes on MVN and https://online.stat.psu.edu/stat505/lesson/4/4.6)
  sigma <- as.matrix(sigma)
  stopifnot(isSymmetric(sigma, tol = sqrt(.Machine$double.eps)))
  alpha <- pmin(pmax(1e-5, as.numeric(alpha)), 1-1e-5)
  d <- dim(sigma)[1]
  vol <- log(2) + d/2*log(pi) - (log(d) + lgamma(d/2)) + 
         d/2*log(qchisq(1-alpha, df = d)) + 0.5*log(det(sigma))
  if(!logarithm) vol <- exp(vol)
  return(vol)
}

smallClusters <- function(n, d) 
{
# Compute threshold on cluster size as suggested by 
# Chen Genovese Wasserman (2016) A comprehensive approach to mode 
# clustering, Electronic Journal of Statistics, 10:210-241

  (n*log(n)/20)^(d/(d+6))
}

gmm2margParams <- function(pro, mu, sigma, ...)
{
# Compute  marginal parameters from Gaussian mixture parameters
# Source: Frühwirth-Schnatter (2006) Finite Mixture and Markov 
#         Switching Models, Sec. 6.1.1
  pro   <- as.vector(pro)
  G     <- length(pro)  
  d     <- length(mu)/G
  mu    <- array(mu, dim = c(d, G))
  sigma <- array(sigma, dim = c(d,d,G))
  mean  <- matrix(apply(mu, 1, function(m) sum(pro*m)), 1, d)
  var   <- matrix(as.double(0), d, d)
  for(g in seq(G))
    var <- var + pro[g]*sigma[,,g] + pro[g]*crossprod(mu[,g] - mean)
  out <- list(mean = mean, variance = var)
  return(out)
}

Try the mclustAddons package in your browser

Any scripts or data that you put into this service are public.

mclustAddons documentation built on Jan. 6, 2023, 5:21 p.m.