Nothing
## ----setup, include=FALSE-----------------------------------------------------
library(knitr)
opts_chunk$set(fig.align = "center",
out.width = "90%",
fig.width = 6,
fig.height = 5.5,
dev.args = list(pointsize=10),
par = TRUE, # needed for setting hook
collapse = TRUE, # collapse input & ouput code in chunks
cache = FALSE,
warning = FALSE, message = FALSE)
knit_hooks$set(par = function(before, options, envir)
{ if(before && options$fig.show != "none")
par(mar=c(4.1,4.1,1.1,1.1), mgp=c(3,1,0), tcl=-0.5)
})
## -----------------------------------------------------------------------------
library(mclust)
library(clustvarsel)
## -----------------------------------------------------------------------------
n <- 200 # sample size
pro <- 0.5 # mixing proportion
mu1 <- c(0,0) # mean vector for the first cluster
mu2 <- c(3,3) # mean vector for the second cluster
sigma1 <- matrix(c(1,0.5,0.5,1),2,2) # covar matrix for the first cluster
sigma2 <- matrix(c(1.5,-0.7,-0.7,1.5),2,2) # covar matrix for the second cluster
X <- matrix(0, n, 5, dimnames = list(NULL, paste0("X", 1:5)))
set.seed(1234) # for replication
u <- runif(n)
Class <- ifelse(u < pro, 1, 2)
X[u < pro, 1:2] <- MASS::mvrnorm(sum(u < pro), mu = mu1, Sigma = sigma1)
X[u >= pro, 1:2] <- MASS::mvrnorm(sum(u >= pro), mu = mu2, Sigma = sigma2)
X[, 3] <- X[, 1] + rnorm(n)
X[, 4] <- rnorm(n, mean = 1.5, sd = 2)
X[, 5] <- rnorm(n, mean = 2, sd = 1)
clPairs(X, Class)
## ----eval=FALSE---------------------------------------------------------------
# out <- clustvarsel(X)
# ## iter 1
# ## + adding step
# ## Var BICdiff Step Decision
# ## 1 X2 11.17931 Add Accepted
# ## iter 2
# ## + adding step
# ## Var BICdiff Step Decision
# ## 2 X1 85.1953 Add Accepted
# ## iter 3
# ## + adding step
# ## - removing step
# ## Var BICdiff Step Decision
# ## 3 X3 -14.91130 Add Rejected
# ## 4 X1 85.19104 Remove Rejected
# ## final iter
# ## * fitting model on selected subset
#
# out
# ## ------------------------------------------------------
# ## Variable selection for Gaussian model-based clustering
# ## Stepwise (forward/backward) greedy search
# ## ------------------------------------------------------
# ##
# ## Variable proposed Type of step BICclust Model G BICdiff Decision
# ## X2 Add -822.6398 E 2 11.17931 Accepted
# ## X1 Add -1482.8408 VEV 2 85.19530 Accepted
# ## X3 Add -2047.7064 EEV 2 -14.91130 Rejected
# ## X1 Remove -822.6355 E 2 85.19104 Rejected
# ##
# ## Selected subset: X2, X1
#
# summary(out$model)
# ## ----------------------------------------------------
# ## Gaussian finite mixture model fitted by EM algorithm
# ## ----------------------------------------------------
# ##
# ## Mclust VEV (ellipsoidal, equal shape) model with 2 components:
# ##
# ## log.likelihood n df BIC ICL
# ## -714.9288 200 10 -1482.841 -1493.898
# ##
# ## Clustering table:
# ## 1 2
# ## 99 101
## ----eval=FALSE---------------------------------------------------------------
# out <- clustvarsel(X, direction = "backward", fit = FALSE)
# ## iter 1
# ## - removing step
# ## Var BICdiff Step Decision
# ## 1 X3 -38.09195 Remove Accepted
# ## iter 2
# ## - removing step
# ## Var BICdiff Step Decision
# ## 2 X4 -23.50212 Remove Accepted
# ## iter 3
# ## - removing step
# ## + adding step
# ## Var BICdiff Step Decision
# ## 3 X5 -16.25831 Remove Accepted
# ## 4 X3 -14.91130 Add Rejected
# ## iter 4
# ## - removing step
# ## + adding step
# ## Var BICdiff Step Decision
# ## 5 X1 95.55735 Remove Rejected
# ## 6 X3 -14.91130 Add Rejected
#
# out
# ## ------------------------------------------------------
# ## Variable selection for Gaussian model-based clustering
# ## Stepwise (backward/forward) greedy search
# ## ------------------------------------------------------
# ##
# ## Variable proposed Type of step BICclust Model G BICdiff Decision
# ## X3 Remove -2925.4851 EVE 2 -38.09195 Accepted
# ## X4 Remove -2067.0668 EVE 2 -23.50212 Accepted
# ## X5 Remove -1482.8408 VEV 2 -16.25831 Accepted
# ## X3 Add -2047.7064 EEV 2 -14.91130 Rejected
# ## X1 Remove -833.0019 V 2 95.55735 Rejected
# ## X3 Add -2047.7064 EEV 2 -14.91130 Rejected
# ##
# ## Selected subset: X1, X2
## ----eval=FALSE---------------------------------------------------------------
# out <- clustvarsel(X, search = "headlong")
# ## iter 1
# ## + adding step
# ## Var BICdiff Step Decision
# ## 1 X2 11.17931 Add Accepted
# ## iter 2
# ## + adding step
# ## Var BICdiff Step Decision
# ## 2 X1 85.1953 Add Accepted
# ## iter 3
# ## + adding step
# ## - removing step
# ## Var BICdiff Step Decision
# ## 3 X3 -14.9113 Add Rejected
# ## 4 X1 85.1953 Remove Rejected
# ## final iter
# ## * fitting model on selected subset
#
# out
# ## ------------------------------------------------------
# ## Variable selection for Gaussian model-based clustering
# ## Headlong (forward/backward) search
# ## ------------------------------------------------------
# ##
# ## Variable proposed Type of step BICclust Model G BICdiff Decision
# ## X2 Add -822.6398 E 2 11.17931 Accepted
# ## X1 Add -1482.8408 VEV 2 85.19530 Accepted
# ## X3 Add -2047.7064 EEV 2 -14.91130 Rejected
# ## X1 Remove -822.6398 E 2 85.19530 Rejected
# ##
# ## Selected subset: X2, X1
## -----------------------------------------------------------------------------
n <- 200
p <- 10
mu <- rep(0,p)
sigma1 <- matrix(c(1,0.5,0.5,1),2,2)
sigma2 <- matrix(c(1.5,-0.7,-0.7,1.5),2,2)
sigma <- Matrix::bdiag(sigma1, sigma2, diag(6))
set.seed(12345)
X <- MASS::mvrnorm(n, mu, sigma)
colnames(X) <- paste0("X", 1:p)
clPairs(X)
## ----eval=FALSE---------------------------------------------------------------
# mod <- Mclust(X)
# summary(mod$BIC)
# ## Best BIC values:
# ## EII,2 VII,2 EII,3
# ## BIC -5899.073 -5901.88582 -5922.53452
# ## BIC diff 0.000 -2.81261 -23.46131
# summary(mod)
# ## ----------------------------------------------------
# ## Gaussian finite mixture model fitted by EM algorithm
# ## ----------------------------------------------------
# ##
# ## Mclust EII (spherical, equal volume) model with 2 components:
# ##
# ## log.likelihood n df BIC ICL
# ## -2891.255 200 22 -5899.073 -5953.953
# ##
# ## Clustering table:
# ## 1 2
# ## 90 110
## ----eval=FALSE---------------------------------------------------------------
# (out1 <- clustvarsel(X, verbose = FALSE))
# ## ------------------------------------------------------
# ## Variable selection for Gaussian model-based clustering
# ## Stepwise (forward/backward) greedy search
# ## ------------------------------------------------------
# ##
# ## Variable proposed Type of step BICclust Model G BICdiff Decision
# ## X3 Add -666.9232 E 2 -4.8472740 Accepted
# ## X7 Add -1242.8998 EII 2 1.5678544 Accepted
# ## X10 Add -1814.8848 EII 2 1.5172709 Accepted
# ## X10 Remove -1242.8998 EII 2 1.5172709 Rejected
# ## X6 Add -2390.3224 EII 2 0.6539224 Accepted
# ## X6 Remove -1814.8848 EII 2 0.6539224 Rejected
# ## X2 Add -2942.2744 EII 2 0.1214452 Accepted
# ## X2 Remove -2390.3224 EII 2 0.1214452 Rejected
# ## X8 Add -3495.9758 EII 2 0.1104619 Accepted
# ## X8 Remove -2942.2744 EII 2 0.1104619 Rejected
# ## X9 Add -4081.9212 EII 2 -2.0296117 Rejected
# ## X8 Remove -2942.2744 EII 2 0.1104619 Rejected
# ##
# ## Selected subset: X3, X7, X10, X6, X2, X8
#
# summary(out1$model)
# # or
# # mod1 <- Mclust(X[,out1$subset])
# # summary(mod1)
# ## ----------------------------------------------------
# ## Gaussian finite mixture model fitted by EM algorithm
# ## ----------------------------------------------------
# ##
# ## Mclust XII (spherical multivariate normal) model with 1 component:
# ##
# ## log.likelihood n df BIC ICL
# ## -1727.188 200 7 -3491.465 -3491.465
# ##
# ## Clustering table:
# ## 1
# ## 200
## ----eval=FALSE---------------------------------------------------------------
# (out2 <- clustvarsel(X, direction = "backward", verbose = FALSE))
# ## ------------------------------------------------------
# ## Variable selection for Gaussian model-based clustering
# ## Stepwise (backward/forward) greedy search
# ## ------------------------------------------------------
# ##
# ## Variable proposed Type of step BICclust Model G BICdiff Decision
# ## X2 Remove -5346.204 EII 2 -48.0289667 Accepted
# ## X4 Remove -4696.525 EII 2 -7.1909995 Accepted
# ## X3 Remove -4032.114 EII 2 -2.3352781 Accepted
# ## X3 Add -4696.525 EII 2 -2.3352781 Rejected
# ## X7 Remove -3453.786 EII 2 -1.1967844 Accepted
# ## X7 Add -4032.114 EII 2 -1.1967844 Rejected
# ## X8 Remove -2899.545 EII 2 -0.4288443 Accepted
# ## X8 Add -3453.786 EII 2 -0.4288443 Rejected
# ## X5 Remove -2308.785 EII 2 1.0712786 Rejected
# ## X8 Add -3453.786 EII 2 -0.4288443 Rejected
# ##
# ## Selected subset: X1, X5, X6, X9, X10
#
# summary(out2$model)
# ## ----------------------------------------------------
# ## Gaussian finite mixture model fitted by EM algorithm
# ## ----------------------------------------------------
# ##
# ## Mclust XII (spherical multivariate normal) model with 1 component:
# ##
# ## log.likelihood n df BIC ICL
# ## -1424.072 200 6 -2879.934 -2879.934
# ##
# ## Clustering table:
# ## 1
# ## 200
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.