In michaelfop/mixggm: Mixtures of Gaussian Graphical Models

library(knitr)
opts_chunk$set(fig.align = "center", 
               out.width = "90%",
               fig.width = 6, 
               fig.height = 5.5,
               dev.args = list(pointsize=10),
               par = TRUE, # needed for setting hook 
               collapse = TRUE, # collapse input & ouput code in chunks
               cache = FALSE,
               warning = FALSE, message = FALSE)
knit_hooks$set(par = function(before, options, envir)
  { if(before && options$fig.show != "none") 
       par(mar=c(4.1,4.1,1.1,1.1), mgp=c(3,1,0), tcl=-0.5)
})

Introduction

The mixggm package implements mixtures of Gaussian graphical models for model-based clustering with sparse covariance and concentration matrices. Estimation of a mixture of Gaussian covariance or concentration graph models using structural-EM algorithm. The mixture model selected is the optimal model according to BIC.

This document gives a quick tour of mixgmm (version r packageVersion("mixggm")) functionalities. It was written in R Markdown, using the knitr package for production. See help(package="mixggm") for further details and references provided by citation("mixggm").

library(mixggm)
cat(mixggm:::mixggmStartupMessage(), sep="")

Fit a mixture of concentration graph models

Iris data

data(iris)
mod <- mixGGM(iris[,-5], model = "concentration")
summary(mod)
summary(mod, graphs = TRUE, clusters = TRUE, parameters = TRUE)

plot(mod, what = "graph")

plot(mod, what = "adjacency")

plot(mod, what = "classification")

plot(mod, what = "common")

A complex simulated data example

set.seed(20190105)
N <- 500
V <- 20
tau <- c(0.3, 0.7)
Nk <- rowSums( rmultinom(N, 1, tau) )
class <- rep(1:2, Nk)
sigma1 <- rWishart(1, V+1, diag(V))[,,1]
mu1 <- rep(0, V)
mu2 <- rnorm(V, 0.5, 2)
x1 <- MASS::mvrnorm(Nk[1], mu1, sigma1)
x2 <- matrix(NA, Nk[2], V)
x2[,1] <- rnorm(Nk[2])
for(j in 2:V) 
  x2[,j] <- x2[,j-1] + rnorm(Nk[2], mu2[j], sd = 0.5)
x <- rbind(x1, x2)
colnames(x) <- paste0("x", 1:ncol(x))

mod <- mixGGM(x, K = 1:4, model = "concentration",
              penalty = "ebic", beta = 0.5)
save(mod, file = "vignettes/simcomplex.Rdata")

load(file = "simcomplex.Rdata")
summary(mod)

plot(mod, what = "classification", dimens = c(1,5,10,15,20))

plot(mod, what = "graph")

plot(mod, what = "adjacency")
table(class, mod$classification)

Fit a mixture of covariance graph models

Wine data

data(wine, package = "gclus")

mod <- mixGGM(wine[,-1], K = 1:4, model = "covariance",
              penalty = "erdos", beta = 0.01)
save(mod, file = "vignettes/wine.Rdata")

load(file = "wine.Rdata")
summary(mod, graphs = TRUE)

plot(mod, what = "graph")

plot(mod, what = "adjacency")

plot(mod, what = "classification", dimens = 1:4)
table(wine$Class, mod$classification)
mclust::adjustedRandIndex(wine$Class, mod$classification)

A simple simulated data example

set.seed(20190105)
N <- 200
tau <- c(0.3, 0.7)
Nk <- rowSums( rmultinom(N, 1, tau) )
class <- rep(1:2, Nk)
sigma1 <- diag(2)                          # independent variables
sigma2 <- matrix( c(1,0.9,0.9,1), 2,2 )    # correlated variables
mu1 <- c(0, 0)
mu2 <- c(2, 3)
x <- rbind(MASS::mvrnorm(Nk[1], mu1, sigma1),
           MASS::mvrnorm(Nk[2], mu2, sigma2))
colnames(x) <- paste0("x", 1:ncol(x))
mod <- mixGGM(x, model = "covariance")
summary(mod, graphs = TRUE, parameters = TRUE)

plot(mod, what = "graph")

plot(mod, what = "adjacency")

plot(mod, what = "classification")

References

Fop, M., Murphy, T.B., and Scrucca, L. (2019) Model-based clustering with sparse covariance matrices. Statistics and Computing, 29:4, 791–819. URL: https://doi.org/10.1007/s11222-018-9838-y