inst/doc/clustvarsel.R

## ----setup, include=FALSE-----------------------------------------------------
library(knitr)
opts_chunk$set(fig.align = "center", 
               out.width = "90%",
               fig.width = 6, 
               fig.height = 5.5,
               dev.args = list(pointsize=10),
               par = TRUE, # needed for setting hook 
               collapse = TRUE, # collapse input & ouput code in chunks
               cache = FALSE,
               warning = FALSE, message = FALSE)
knit_hooks$set(par = function(before, options, envir)
  { if(before && options$fig.show != "none") 
       par(mar=c(4.1,4.1,1.1,1.1), mgp=c(3,1,0), tcl=-0.5)
})

## -----------------------------------------------------------------------------
library(mclust)
library(clustvarsel)

## -----------------------------------------------------------------------------
n <- 200      # sample size
pro <- 0.5    # mixing proportion
mu1 <- c(0,0) # mean vector for the first cluster
mu2 <- c(3,3) # mean vector for the second cluster
sigma1 <- matrix(c(1,0.5,0.5,1),2,2)       # covar matrix for the first cluster
sigma2 <- matrix(c(1.5,-0.7,-0.7,1.5),2,2) # covar matrix for the second cluster
X <- matrix(0, n, 5, dimnames = list(NULL, paste0("X", 1:5)))
set.seed(1234) # for replication
u <- runif(n)
Class <- ifelse(u < pro, 1, 2)
X[u < pro, 1:2]  <- MASS::mvrnorm(sum(u < pro), mu = mu1, Sigma = sigma1)
X[u >= pro, 1:2] <- MASS::mvrnorm(sum(u >= pro), mu = mu2, Sigma = sigma2)
X[, 3] <- X[, 1] + rnorm(n)
X[, 4] <- rnorm(n, mean = 1.5, sd = 2)
X[, 5] <- rnorm(n, mean = 2, sd = 1)
clPairs(X, Class)

## ----eval=FALSE---------------------------------------------------------------
#  out <- clustvarsel(X)
#  ## iter 1
#  ## + adding step
#  ##   Var  BICdiff Step Decision
#  ## 1  X2 11.17931  Add Accepted
#  ## iter 2
#  ## + adding step
#  ##   Var BICdiff Step Decision
#  ## 2  X1 85.1953  Add Accepted
#  ## iter 3
#  ## + adding step
#  ## - removing step
#  ##   Var   BICdiff   Step Decision
#  ## 3  X3 -14.91130    Add Rejected
#  ## 4  X1  85.19104 Remove Rejected
#  ## final iter
#  ## * fitting model on selected subset
#  
#  out
#  ## ------------------------------------------------------
#  ## Variable selection for Gaussian model-based clustering
#  ## Stepwise (forward/backward) greedy search
#  ## ------------------------------------------------------
#  ##
#  ##  Variable proposed Type of step   BICclust Model G   BICdiff Decision
#  ##                 X2          Add  -822.6398     E 2  11.17931 Accepted
#  ##                 X1          Add -1482.8408   VEV 2  85.19530 Accepted
#  ##                 X3          Add -2047.7064   EEV 2 -14.91130 Rejected
#  ##                 X1       Remove  -822.6355     E 2  85.19104 Rejected
#  ##
#  ## Selected subset: X2, X1
#  
#  summary(out$model)
#  ## ----------------------------------------------------
#  ## Gaussian finite mixture model fitted by EM algorithm
#  ## ----------------------------------------------------
#  ##
#  ## Mclust VEV (ellipsoidal, equal shape) model with 2 components:
#  ##
#  ##  log.likelihood   n df       BIC       ICL
#  ##       -714.9288 200 10 -1482.841 -1493.898
#  ##
#  ## Clustering table:
#  ##   1   2
#  ##  99 101

## ----eval=FALSE---------------------------------------------------------------
#  out <- clustvarsel(X, direction = "backward", fit = FALSE)
#  ## iter 1
#  ## - removing step
#  ##   Var   BICdiff   Step Decision
#  ## 1  X3 -38.09195 Remove Accepted
#  ## iter 2
#  ## - removing step
#  ##   Var   BICdiff   Step Decision
#  ## 2  X4 -23.50212 Remove Accepted
#  ## iter 3
#  ## - removing step
#  ## + adding step
#  ##   Var   BICdiff   Step Decision
#  ## 3  X5 -16.25831 Remove Accepted
#  ## 4  X3 -14.91130    Add Rejected
#  ## iter 4
#  ## - removing step
#  ## + adding step
#  ##   Var   BICdiff   Step Decision
#  ## 5  X1  95.55735 Remove Rejected
#  ## 6  X3 -14.91130    Add Rejected
#  
#  out
#  ## ------------------------------------------------------
#  ## Variable selection for Gaussian model-based clustering
#  ## Stepwise (backward/forward) greedy search
#  ## ------------------------------------------------------
#  ##
#  ##  Variable proposed Type of step   BICclust Model G   BICdiff Decision
#  ##                 X3       Remove -2925.4851   EVE 2 -38.09195 Accepted
#  ##                 X4       Remove -2067.0668   EVE 2 -23.50212 Accepted
#  ##                 X5       Remove -1482.8408   VEV 2 -16.25831 Accepted
#  ##                 X3          Add -2047.7064   EEV 2 -14.91130 Rejected
#  ##                 X1       Remove  -833.0019     V 2  95.55735 Rejected
#  ##                 X3          Add -2047.7064   EEV 2 -14.91130 Rejected
#  ##
#  ## Selected subset: X1, X2

## ----eval=FALSE---------------------------------------------------------------
#  out <- clustvarsel(X, search = "headlong")
#  ## iter 1
#  ## + adding step
#  ##   Var  BICdiff Step Decision
#  ## 1  X2 11.17931  Add Accepted
#  ## iter 2
#  ## + adding step
#  ##   Var BICdiff Step Decision
#  ## 2  X1 85.1953  Add Accepted
#  ## iter 3
#  ## + adding step
#  ## - removing step
#  ##   Var  BICdiff   Step Decision
#  ## 3  X3 -14.9113    Add Rejected
#  ## 4  X1  85.1953 Remove Rejected
#  ## final iter
#  ## * fitting model on selected subset
#  
#  out
#  ## ------------------------------------------------------
#  ## Variable selection for Gaussian model-based clustering
#  ## Headlong (forward/backward) search
#  ## ------------------------------------------------------
#  ##
#  ##  Variable proposed Type of step   BICclust Model G   BICdiff Decision
#  ##                 X2          Add  -822.6398     E 2  11.17931 Accepted
#  ##                 X1          Add -1482.8408   VEV 2  85.19530 Accepted
#  ##                 X3          Add -2047.7064   EEV 2 -14.91130 Rejected
#  ##                 X1       Remove  -822.6398     E 2  85.19530 Rejected
#  ##
#  ## Selected subset: X2, X1

## -----------------------------------------------------------------------------
n <- 200
p <- 10
mu <- rep(0,p)
sigma1 <- matrix(c(1,0.5,0.5,1),2,2)
sigma2 <- matrix(c(1.5,-0.7,-0.7,1.5),2,2)
sigma <- Matrix::bdiag(sigma1, sigma2, diag(6))
set.seed(12345)
X <- MASS::mvrnorm(n, mu, sigma)
colnames(X) <- paste0("X", 1:p)
clPairs(X)

## ----eval=FALSE---------------------------------------------------------------
#  mod <- Mclust(X)
#  summary(mod$BIC)
#  ## Best BIC values:
#  ##              EII,2       VII,2       EII,3
#  ## BIC      -5899.073 -5901.88582 -5922.53452
#  ## BIC diff     0.000    -2.81261   -23.46131
#  summary(mod)
#  ## ----------------------------------------------------
#  ## Gaussian finite mixture model fitted by EM algorithm
#  ## ----------------------------------------------------
#  ##
#  ## Mclust EII (spherical, equal volume) model with 2 components:
#  ##
#  ##  log.likelihood   n df       BIC       ICL
#  ##       -2891.255 200 22 -5899.073 -5953.953
#  ##
#  ## Clustering table:
#  ##   1   2
#  ##  90 110

## ----eval=FALSE---------------------------------------------------------------
#  (out1 <- clustvarsel(X, verbose = FALSE))
#  ## ------------------------------------------------------
#  ## Variable selection for Gaussian model-based clustering
#  ## Stepwise (forward/backward) greedy search
#  ## ------------------------------------------------------
#  ##
#  ##  Variable proposed Type of step   BICclust Model G    BICdiff Decision
#  ##                 X3          Add  -666.9232     E 2 -4.8472740 Accepted
#  ##                 X7          Add -1242.8998   EII 2  1.5678544 Accepted
#  ##                X10          Add -1814.8848   EII 2  1.5172709 Accepted
#  ##                X10       Remove -1242.8998   EII 2  1.5172709 Rejected
#  ##                 X6          Add -2390.3224   EII 2  0.6539224 Accepted
#  ##                 X6       Remove -1814.8848   EII 2  0.6539224 Rejected
#  ##                 X2          Add -2942.2744   EII 2  0.1214452 Accepted
#  ##                 X2       Remove -2390.3224   EII 2  0.1214452 Rejected
#  ##                 X8          Add -3495.9758   EII 2  0.1104619 Accepted
#  ##                 X8       Remove -2942.2744   EII 2  0.1104619 Rejected
#  ##                 X9          Add -4081.9212   EII 2 -2.0296117 Rejected
#  ##                 X8       Remove -2942.2744   EII 2  0.1104619 Rejected
#  ##
#  ## Selected subset: X3, X7, X10, X6, X2, X8
#  
#  summary(out1$model)
#  # or
#  # mod1 <- Mclust(X[,out1$subset])
#  # summary(mod1)
#  ## ----------------------------------------------------
#  ## Gaussian finite mixture model fitted by EM algorithm
#  ## ----------------------------------------------------
#  ##
#  ## Mclust XII (spherical multivariate normal) model with 1 component:
#  ##
#  ##  log.likelihood   n df       BIC       ICL
#  ##       -1727.188 200  7 -3491.465 -3491.465
#  ##
#  ## Clustering table:
#  ##   1
#  ## 200

## ----eval=FALSE---------------------------------------------------------------
#  (out2 <- clustvarsel(X, direction = "backward", verbose = FALSE))
#  ## ------------------------------------------------------
#  ## Variable selection for Gaussian model-based clustering
#  ## Stepwise (backward/forward) greedy search
#  ## ------------------------------------------------------
#  ##
#  ##  Variable proposed Type of step  BICclust Model G     BICdiff Decision
#  ##                 X2       Remove -5346.204   EII 2 -48.0289667 Accepted
#  ##                 X4       Remove -4696.525   EII 2  -7.1909995 Accepted
#  ##                 X3       Remove -4032.114   EII 2  -2.3352781 Accepted
#  ##                 X3          Add -4696.525   EII 2  -2.3352781 Rejected
#  ##                 X7       Remove -3453.786   EII 2  -1.1967844 Accepted
#  ##                 X7          Add -4032.114   EII 2  -1.1967844 Rejected
#  ##                 X8       Remove -2899.545   EII 2  -0.4288443 Accepted
#  ##                 X8          Add -3453.786   EII 2  -0.4288443 Rejected
#  ##                 X5       Remove -2308.785   EII 2   1.0712786 Rejected
#  ##                 X8          Add -3453.786   EII 2  -0.4288443 Rejected
#  ##
#  ## Selected subset: X1, X5, X6, X9, X10
#  
#  summary(out2$model)
#  ## ----------------------------------------------------
#  ## Gaussian finite mixture model fitted by EM algorithm
#  ## ----------------------------------------------------
#  ##
#  ## Mclust XII (spherical multivariate normal) model with 1 component:
#  ##
#  ##  log.likelihood   n df       BIC       ICL
#  ##       -1424.072 200  6 -2879.934 -2879.934
#  ##
#  ## Clustering table:
#  ##   1
#  ## 200

## -----------------------------------------------------------------------------
sessionInfo()

Try the clustvarsel package in your browser

Any scripts or data that you put into this service are public.

clustvarsel documentation built on Jan. 8, 2021, 2:06 a.m.