#' Dirichlet Process K-Means Clustering
#'
#' Use the Bayesian Dirichlet process to perform K Means Clustering.
#'
#' @param data a data frame or matrix of numeric variables
#' @param disp.param The dispersion parameter. Set to higher values to get fewer clusters. The dispersion
#' parameter is the expected average width of the clusters. Defaults to 2, but you should try different values.
#' @param iter number of iterations. Defaults to 4000.
#' @param tolerance defaults to .0001
#' @export
#' @examples
#' dp_means(iris[,1:4])
#'
dp_means <- function(data, disp.param = 2, iter = 4000, tolerance = .001)
{
max.iter = iter
orig.data = as.data.frame(data)
n <- nrow( orig.data )
data <- as.matrix(sapply(orig.data,as.numeric))
pick.clusters <- rep(1,n)
k <- 1
mu <- matrix(apply(data,2,mean), nrow=1, ncol=ncol(data) )
is.converged <- FALSE
iteration <- 0
ss.old <- Inf
ss.curr <- Inf
while (iteration < max.iter ) { # Iterate until convergence
iteration <- iteration + 1
for( i in 1:n ) { # Iterate over each observation and measure the distance each observation' from its mean center's squared distance from its mean
distances <- rep(NA, k)
for( j in 1:k ){
distances[j] <- sum((data[i, ] - mu[j, ])^2) # Distance formula.
}
if( min(distances) > disp.param ) { # If the dispersion parameter is still less than the minimum distance then create a new cluster
k <- k + 1
pick.clusters[i] <- k
mu <- rbind(mu, data[i, ])
} else {
pick.clusters[i] <- which(distances == min(distances))
}
}
##
# Calculate new cluster means
##
for( j in 1:k ) {
if( length(pick.clusters == j) > 0 ) {
mu[j, ] <- apply(subset(data,pick.clusters == j), 2, mean)
}
}
##
# Test for convergence
##
ss.curr <- 0
for( i in 1:n ) {
ss.curr <- ss.curr +
sum( (data[i, ] - mu[pick.clusters[i], ])^2 )
}
ss.diff <- ss.old - ss.curr
ss.old <- ss.curr
if( !is.nan( ss.diff ) & ss.diff < tolerance ) {
is.converged <- TRUE
}
}
centers <- data.frame(mu)
ret.val <- list("centers" = centers, "cluster" = factor(pick.clusters),
"k" = k, "iterations" = iteration)
return(ret.val)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.