knitr::opts_chunk$set(echo = TRUE)

Initialization

Loading in the requried packages and doing initial setup:

library(jDirichletMixtureModels)
library(data.table)
library(ggplot2)
library(tikzDevice)

dmm.setup()

Synthetic Gaussian Data

Dataset 1

Using a conjugate multivariate normal likelihood model with parameters generated using the data:

# Load the dataset
data(syntheticGaussian1, package = "jDirichletMixtureModels")

# Make the model
model <- dmm.BaseModel(data=syntheticGaussian1)

# Run cluster code
states <- dmm.cluster(model, syntheticGaussian1, alpha = 10.0, iters = 1500, burnin = 1000, shuffled = TRUE)

# Display result of a single state
astate <- states[[1]]
dmm.summarize(astate$clusters)

dmm.plot(astate$data)

#Create a .tex file that will contain the plot as vectors
tikz(file = "synGauss1.tex", width = 2.5, height = 2.5)
dmm.plot(astate$data)
dev.off()

Dataset 2

Using a conjugate multivariate normal likelihood model with parameters generated using the data:

# Load the dataset
data(syntheticGaussian2, package = "jDirichletMixtureModels")

# Make the model
model <- dmm.BaseModel(data=syntheticGaussian2)

# Run cluster code
states2 <- dmm.cluster(model, syntheticGaussian2, alpha = 10000.0, iters = 1500, burnin = 1000, shuffled = TRUE)

# Display result of a single state
astate <- states2[[1]]
dmm.summarize(astate$clusters)

dmm.plot(astate$data)

#Create a .tex file that will contain the plot as vectors
tikz(file = "synGauss2.tex", width = 2.5, height = 2.5)
dmm.plot(astate$data)
dev.off()

Aggregation Dataset

Using a conjugate multivariate normal likelihood model with parameters generated using the data:

# Load the dataset
data(aggregationData, package = "jDirichletMixtureModels")

# Make the model
model <- dmm.BaseModel(data=aggregationData)

# Run cluster code
states3 <- dmm.cluster(model, aggregationData, alpha = 10.0, iters = 1500, burnin = 1000, shuffled = TRUE)

# Display result of a single state
astate <- states3[[1]]
dmm.summarize(astate$clusters)

dmm.plot(astate$data)

#Create a .tex file that will contain the plot as vectors
tikz(file = "aggre.tex", width = 2.5, height = 2.5)
dmm.plot(astate$data)
dev.off()

Birch Dataset

Using a conjugate multivariate normal likelihood model with parameters generated using the data:

# Load the dataset
data(birch3Data, package = "jDirichletMixtureModels")

inds <- sample(nrow(birch3Data), 5000)
Xdata <- birch3Data[inds,]

# Make the model
model <- dmm.BaseModel(data=birch3Data)

# Run cluster code
states4 <- dmm.cluster(model, birch3Data, alpha = 10.0, iters = 1500, burnin = 1000, shuffled = TRUE)

# Display result of a single state
astate <- states4[[1]]
dmm.summarize(astate$clusters)

dmm.plot(astate$data)

#Create a .tex file that will contain the plot as vectors
tikz(file = "birch.tex", width = 2.5, height = 2.5)
dmm.plot(astate$data)
dev.off()

Mouse Data

# Load the dataset
data(mouse, package = "jDirichletMixtureModels")
names(mouse) <- NULL
# Make the model
model <- dmm.BaseModel(data=as.matrix(mouse))

# Run cluster code
states5 <- dmm.cluster(model, as.matrix(mouse), alpha = 10.0, iters = 1500, burnin = 1000, shuffled = TRUE)

# Display result of a single state
astate <- states5[[1]]
dmm.summarize(astate$clusters)

dmm.plot(astate$data)

#Create a .tex file that will contain the plot as vectors
tikz(file = "mouse.tex", width = 2.5, height = 2.5)
dmm.plot(astate$data)
dev.off()

Number of Clusters vs Iterations

data(syntheticGaussian2, package = "jDirichletMixtureModels")

# Make the model
model <- dmm.BaseModel(data=syntheticGaussian2)

# Run cluster code
its <- 500
states <- dmm.cluster(model, syntheticGaussian2, alpha = 10.0, iters = its, burnin = 0, shuffled = FALSE)

# Display result of a single state
astate <- states[[1]]
dmm.summarize(astate$clusters)

clusterIt <- data.frame()
for (i in 1:its){
  clusterIt <- rbind(clusterIt, data.frame("Iterations" = i, "NumberofClusters" = length(states[[i]]$clusters)) )
}

tikz(file = "ClusterIts.tex", width = 3, height = 2)
chart <- ggplot( clusterIt[1:30,], aes(x=Iterations, y=NumberofClusters ) ) +
        geom_line(color="turquoise")+
        geom_point(color="turquoise4")+
        xlab("Iterations") +
        ylab("Number of Clusters") +
       scale_color_brewer(palette="Dark2")
chart
dev.off()


nsdumont/jDirichletMixtureModels documentation built on May 23, 2019, 2:51 p.m.