inst/doc/HDclust.R

## ----setup, include = FALSE----------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- fig.show='hold'----------------------------------------------------
library(HDclust)

## ------------------------------------------------------------------------
# variable structure with one variable block of 3 variables and two mixture components, which corresponds to GMM model
Vb<- vb(1, dim=3, numst=2)
show(Vb)

## ------------------------------------------------------------------------
# variable structure with two blocks. Dimensionality of data is 40. First block contains 10 variable with 3 mixture components. Second block has 30 variables with 5 mixture components. Variable order is natural.
Vb <- vb(2, dim=40, bdim=c(10,30), numst=c(3,5), varorder=list(c(1:10),c(11:40)))
show(Vb)

## ---- results='hide'-----------------------------------------------------
# by default number of states in each block is varied from 1 to 9
data("faithful")
Vb <- vb(1, dim=2, numst=1)
set.seed(12345)
modelBIC <- hmmvbBIC(faithful, VbStructure=Vb)

## ------------------------------------------------------------------------
show(modelBIC)

## ----fig1, fig.height = 5, fig.width = 5---------------------------------
plot(modelBIC, xlab='# mixture components per block', ylab='BIC')

## ---- results='hide'-----------------------------------------------------
# user-provided configurations for the number of states in each block
data("sim3")
Vb <- vb(2, dim=40, bdim=c(10,30), numst=c(1,1), varorder=list(c(1:10),c(11:40)))
set.seed(12345)
configs = list(c(1,2), c(3,5))
modelBIC <- hmmvbBIC(sim3[,1:40], VbStructure=Vb, configList=configs)

## ------------------------------------------------------------------------
show(modelBIC)

## ------------------------------------------------------------------------
# we just illustrate the loglikelihood of the first 10 data points beacause the data set is huge
getLoglikehd(modelBIC)[1:10]

## ------------------------------------------------------------------------
data("sim3")
set.seed(12345)
Vb <- vb(2, dim=40, bdim=c(10,30), numst=c(3,5), varorder=list(c(1:10),c(11:40)))
hmmvb <- hmmvbTrain(sim3[1:40], VbStructure=Vb)
show(hmmvb)

## ------------------------------------------------------------------------
Vb <- getVb(hmmvb) # variable block structure
hmmChain <- getHmmChain(hmmvb) # list with HMM models
diagCov <- getDiagCov(hmmvb) # indicator whether covariance matrices are diagonal 
bic <- getBIC(hmmvb) # BIC value

# below we show HMM-VB parameters for the first variable block
numst <- getNumst(hmmChain[[1]]) # number of mixture components in variable block
prenumst <- getPrenumst(hmmChain[[1]]) # number of mixture components in the previous variable block
hmmParam <- getHmmParam(hmmChain[[1]]) # list with priors, transition probabilities, means, covariance matrices and other parameters of all states of HMM


## ------------------------------------------------------------------------
data("sim2")
set.seed(12345)
hmmvb <- hmmvbTrain(sim2[1:5], searchControl=vbSearchControl(nperm=5), nthread=4)
show(hmmvb)

## ---- results='hide'-----------------------------------------------------
modelBIC <- hmmvbBIC(sim2[1:5], VbStructure=getVb(hmmvb), numst=1:15, nthread=4)

## ------------------------------------------------------------------------
show(modelBIC)

## ---- fig.show='hold', results='hide'------------------------------------
data("faithful")
VbStructure <- vb(nb=1, dim=2, numst=1)
set.seed(12345)
modelBIC <- hmmvbBIC(faithful, VbStructure)

## ------------------------------------------------------------------------
clust <- hmmvbClust(faithful, bicObj=modelBIC)
show(clust)

## ----fig2, fig.height = 5, fig.width = 5---------------------------------
plot(faithful[,1], faithful[,2], xlab='eruptions', ylab='waiting', col=getClsid(clust))

## ------------------------------------------------------------------------
clustParam <- getClustParam(clust)
mode <- clustParam$mode # a matrix with cluster modes
vseq <- clustParam$vseq # A list with integer vectors representing distinct Viterbi sequences for the dataset

## ---- fig.show='hold', results='hide'------------------------------------
# If variable block structure is unknown
data("sim2")
set.seed(12345)

# find variable block structure
hmmvb <- hmmvbTrain(sim2[,1:5], searchControl=vbSearchControl(nperm=5), nthread=4)

# refine number of states in variable block structure by model selection
modelBIC <- hmmvbBIC(sim2[,1:5], VbStructure=getVb(hmmvb), numst=1:15, nthread=4)

## ------------------------------------------------------------------------
clust <- hmmvbClust(data=sim2[,1:5], bicObj=modelBIC)
show(clust)

## ----fig3, fig.height = 5, fig.width = 5---------------------------------
palette(c(palette(), "purple", "brown")) # extend palette to show all 10 clusters
plot(clust)

## ----fig4, fig.height = 5, fig.width = 5---------------------------------
plot(clust, method='PCA')

## ---- fig.show='hold'----------------------------------------------------
data("sim3")

# split data set in two halves
X1 = sim3[1:500,]
X2 = sim3[501:1000,]

set.seed(12345)
Vb <- vb(2, 40, c(10,30), c(3,5), list(c(1:10),c(11:40)))

# train HMM-VB on dataset X1 and cluster data
hmmvb <- hmmvbTrain(X1[1:40], VbStructure=Vb)
clust1 <- hmmvbClust(X1[1:40], model=hmmvb)
show(clust1)

# cluster data set X2 using HMM-VB and cluster parameters for dataset X1
clust2 <- hmmvbClust(X2[1:40], model=hmmvb, rfsClust=getClustParam(clust1))

show(clust2)

## ---- fig.show='hold'----------------------------------------------------
clust2 <- hmmvbClust(X2[1:40], model=hmmvb, rfsClust=getClustParam(clust1), control=clustControl(getlikelh = TRUE))
# we just illustrate the loglikelihood of the first 10 data points beacause the data set is huge
getLoglikehd(clust2)[1:10]

## ---- fig.show='hold'----------------------------------------------------
data("sim3")

set.seed(12345)
Vb <- vb(2, 40, c(10,30), c(3,5), list(c(1:10),c(11:40)))

# train HMM-VB
hmmvb <- hmmvbTrain(sim3[1:40], VbStructure=Vb)

# find all density modes
modes <- hmmvbFindModes(sim3[1:40], model=hmmvb)
show(modes)

# cluster density modes
mergedModes <- clustModes(modes, cutree.args = list(h=1.0))

show(mergedModes)

## ----fig5, fig.height = 5, fig.width = 5---------------------------------
plot(mergedModes)

Try the HDclust package in your browser

Any scripts or data that you put into this service are public.

HDclust documentation built on May 2, 2019, 9:20 a.m.