Nothing
## ---- echo=FALSE, message=FALSE------------------------------------------
# Load dependencies
library(plyr)
library(magrittr)
library(ggplot2)
library(clusternomics)
## ------------------------------------------------------------------------
set.seed(1)
# Number of elements in each cluster, follows the table given above
groupCounts <- c(50, 10, 40, 60)
# Centers of clusters
means <- c(-1.5,1.5)
# Helper function to generate test data
testData <- generateTestData_2D(groupCounts, means)
datasets <- testData$data
## ---- fig.width=6--------------------------------------------------------
qplot(datasets[[1]][,1], datasets[[1]][,2], col=factor(testData$groups)) +
geom_point(size=3) +
ggtitle("Context 1") + xlab("x") + ylab("y") +
scale_color_discrete(name="Cluster")
## ---- fig.width=6--------------------------------------------------------
qplot(datasets[[2]][,1], datasets[[2]][,2], col=factor(testData$groups)) +
geom_point(size=3) +
ggtitle("Context 2") + xlab("x") + ylab("y") +
scale_color_discrete(name="Cluster")
## ------------------------------------------------------------------------
# Setup of the algorithm
dataDistributions <- 'diagNormal'
# Pre-specify number of clusters
clusterCounts <- list(global=10, context=c(3,3))
# Set number of iterations
# The following is ONLY FOR SIMULATION PURPOSES
# Use larger number of iterations for real-life data
maxIter <- 300
burnin <- 200
lag <- 2 # Thinning of samples
## ----runSampling, message=F----------------------------------------------
# Run context-dependent clustering
results <- contextCluster(datasets, clusterCounts,
maxIter = maxIter, burnin = burnin, lag = lag,
dataDistributions = 'diagNormal',
verbose = F)
# Extract resulting cluster assignments
samples <- results$samples
# Extract global cluster assignments for each MCMC sample
clusters <-
laply(1:length(samples), function(i) samples[[i]]$Global)
## ---- fig.width=6--------------------------------------------------------
logliks <- results$logliks
qplot(1:maxIter, logliks) + geom_line() +
xlab("MCMC iterations") +
ylab("Log likelihood")
## ------------------------------------------------------------------------
wrongClusterCounts <- list(global=2, context=c(2,1))
worseResults <-
contextCluster(datasets, wrongClusterCounts,
maxIter = maxIter, burnin = burnin, lag = lag,
dataDistributions = 'diagNormal',
verbose = F)
print(paste('Original model has lower (better) DIC:', results$DIC))
print(paste('Worse model has higher (worse) DIC:', worseResults$DIC))
## ---- fig.width=6--------------------------------------------------------
cc <- numberOfClusters(clusters)
qplot(seq(from=burnin, to = maxIter, by=lag), cc) +
geom_line() + xlab("MCMC iterations") + ylab("Number of clusters")
## ---- fig.width=6--------------------------------------------------------
clusterLabels <- unique(clusters %>% as.vector)
sizes <- matrix(nrow=nrow(clusters), ncol=length(clusterLabels))
for (ci in 1:length(clusterLabels)) {
sizes[,ci] <- rowSums(clusters == clusterLabels[ci])
}
sizes <- sizes %>% as.data.frame
colnames(sizes) <- clusterLabels
boxplot(sizes,xlab="Global combined clusters", ylab="Cluster size")
## ------------------------------------------------------------------------
clusteringResult <- samples[[length(samples)]]
## ---- message=F, fig.width=5, fig.height=5-------------------------------
# Compute the co-clustering matrix from global cluster assignments
coclust <- coclusteringMatrix(clusters)
# Plot the co-clustering matrix as a heatmap
require(gplots)
mypalette <- colorRampPalette(rev(c('#d7191c','#fdae61','#ffffbf','#abd9e9','#4395d2')),
space = "Lab")(100)
h <- heatmap.2(
coclust,
col=mypalette, trace='none',
dendrogram='row', labRow='', labCol='', key = TRUE,
keysize = 1.5, density.info=c("none"),
main="MCMC co-clustering matrix",
scale = "none")
## ------------------------------------------------------------------------
diag(coclust) <- 1
fit <- hclust(as.dist(1 - coclust))
hardAssignments <- cutree(fit, k=4)
## ---- message=FALSE, fig.width=6-----------------------------------------
aris <- laply(1:nrow(clusters),
function(i) mclust::adjustedRandIndex(clusters[i,], testData$groups)) %>%
as.data.frame
colnames(aris) <- "ARI"
aris$Iteration <- seq(from=burnin, to=maxIter, by=lag)
coclustAri <- mclust::adjustedRandIndex(hardAssignments, testData$groups)
aris$Coclust <- coclustAri
ggplot(aris, aes(x=Iteration, y=ARI, colour="MCMC iterations")) +
geom_point() +
ylim(0,1) +
geom_smooth(size=1) +
theme_bw() +
geom_line(aes(x=Iteration, y=Coclust, colour="Co-clustering matrix"), size=1) +
scale_colour_discrete(name="Cluster assignments")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.