#' k-Plane Clustering Algorithm
#'
#' It is one of the first methods proposed in 2000 for fitting \eqn{(p-1)}-dimensional planes for data in \eqn{R^p} by EM-like algorithm
#' that updates parametrization of a plane and assigning data points to the nearest plane in an alternating manner.
#' Empirically, its performance is not competitive against newer methods but we include this as a test benchmark
#' as well as it is simple and fast enough.
#'
#' @param X an \eqn{(n\times p) data matrix.}
#' @param K the number of clusters.
#' @param iter the number of EM-type updating.
#' @param print.progress a logical; \code{TRUE} to show completion of iterations by 10, \code{FALSE} otherwise.
#'
#' @return a list whose elements are also lists of following elements: \describe{
#' \item{cluster}{length-\code{n} vector of cluster label.}
#' \item{w}{an \eqn{(p\times K)} matrix of orthonormal columns.}
#' \item{gamma}{a length-\eqn{K} vector of projection values.}
#' \item{name}{name of the algorithm.}
#' }
#'
#' @examples
#' ## generate a toy example of two plane components
#' set.seed(18)
#' tester = simLP(n=100, nl=0, np=2)
#' data = tester$data
#' label = tester$class
#'
#' ## do PCA for data reduction
#' proj = base::eigen(stats::cov(data))$vectors[,1:2]
#' dat2 = data%*%proj
#'
#' ## run k-plane algorithm with K=2, 3, and 4
#' kplane2 = kplane(data, K=2)
#' kplane3 = kplane(data, K=3)
#' kplane4 = kplane(data, K=4)
#'
#' ## extract clustering
#' finc2 = kplane2$cluster
#' finc3 = kplane3$cluster
#' finc4 = kplane4$cluster
#'
#' ## visualize
#' opar <- par(mfrow=c(3,4), no.readonly=TRUE)
#' plot(dat2[,1],dat2[,2],pch=19,cex=0.3,col=finc2+1,main="K=2:PCA")
#' plot(data[,1],data[,2],pch=19,cex=0.3,col=finc2+1,main="K=2:Axis(1,2)")
#' plot(data[,1],data[,3],pch=19,cex=0.3,col=finc2+1,main="K=2:Axis(1,3)")
#' plot(data[,2],data[,3],pch=19,cex=0.3,col=finc2+1,main="K=2:Axis(2,3)")
#'
#' plot(dat2[,1],dat2[,2],pch=19,cex=0.3,col=finc3+1,main="K=3:PCA")
#' plot(data[,1],data[,2],pch=19,cex=0.3,col=finc3+1,main="K=3:Axis(1,2)")
#' plot(data[,1],data[,3],pch=19,cex=0.3,col=finc3+1,main="K=3:Axis(1,3)")
#' plot(data[,2],data[,3],pch=19,cex=0.3,col=finc3+1,main="K=3:Axis(2,3)")
#'
#' plot(dat2[,1],dat2[,2],pch=19,cex=0.3,col=finc4+1,main="K=4:PCA")
#' plot(data[,1],data[,2],pch=19,cex=0.3,col=finc4+1,main="K=4:Axis(1,2)")
#' plot(data[,1],data[,3],pch=19,cex=0.3,col=finc4+1,main="K=4:Axis(1,3)")
#' plot(data[,2],data[,3],pch=19,cex=0.3,col=finc4+1,main="K=4:Axis(2,3)")
#' par(opar)
#'
#' @references
#' \insertRef{bradley_k-plane_2000}{Rsubclust}
#'
#' @export
kplane <- function(X, K=2, iter=496, print.progress=TRUE){
#########################################################
# Initialization : use the notation from the paper
m = nrow(X)
n = ncol(X)
K = round(K)
kmeans.init = stats::kmeans(X, center=K)$cluster
kmeans.update = kplane.update(X, K, kmeans.init)
w.old = kmeans.update$w
gamma.old = kmeans.update$gamma
clust.old = kmeans.init
#########################################################
# Iterate
for (it in 1:iter){
# (a) Cluster Assignment
clust.tmp = kplane.assign.naive(X, K, w.old, gamma.old)
clust.new = clust.tmp$cluster
if (length(unique(clust.new))<K){
clust.new = kplane.assign.probabilistic(clust.tmp$distmat)
}
if ((length(clust.new)==1)&&(clust.new==FALSE)){
if (print.progress){
print(paste('* kplane : iteration ', it,'/',iter,' terminated due to collapse at ', Sys.time(),sep=""))
}
break
}
# (b) Cluster Update
update.list = kplane.update(X, K, clust.new)
w.new = update.list$w
gamma.new = update.list$gamma
# (c) Update information
inc.clust = as.double(mclustcomp::mclustcomp(clust.old, clust.new, type="jaccard")[2])
clust.old = clust.new
w.old = w.new
gamma.old = gamma.new
if (it >= 5){
if (inc.clust > 0.99){
if (print.progress){
print(paste('* kplane : iteration ', it,'/',iter,' terminated at ', Sys.time(),sep=""))
}
break
}
}
if (print.progress){
if(it%%10==0){
print(paste('* kplane : iteration ', it,'/',iter,' complete at ', Sys.time(),sep=""))
}
}
}
#########################################################
# Return output
output = list()
output$cluster = clust.old
output$w = w.old
output$gamma = gamma.old
output$name = "Rsubclust:kplane"
return(output)
}
#' @keywords internal
kplane.update <- function(X, K, cluster){
# cluster information
n = ncol(X)
list.clust = list()
for (k in 1:K){
list.clust[[k]] = which(cluster==k)
}
# prepare
w.mat = array(0,c(n,K))
gamma.vec = rep(0,K)
count.group = rep(0,K)
# compute w.mat
for (k in 1:K){
idk = list.clust[[k]]
ml = length(idk)
count.group[k] = ml
Al = X[idk,]
if (length(idk)==1){
Al = matrix(Al, nrow = 1)
}
Bl = t(Al)%*%(diag(ml)-outer(rep(1,ml),rep(1,ml))/ml)%*%Al
wl = as.vector(eigen(Bl)$vectors[,nrow(Bl)])
w.mat[,k] = wl
gamma.vec[k] = sum(as.vector(array(1,c(1,ml))%*%Al)*wl)/ml
}
# return
output = list()
output$w = w.mat
output$gamma = gamma.vec
return(output)
}
#' @keywords internal
kplane.assign.naive <- function(X, K, w.mat, gamma.vec){
m = nrow(X)
distmat = array(0,c(m,K))
for (k in 1:K){
distmat[,k] = as.vector(X%*%w.mat[,k])-gamma.vec[k]
}
output = list()
output$cluster = (apply(distmat, 1, which.min))
output$distmat = distmat
return(output)
}
#' @keywords internal
kplane.assign.probabilistic <- function(distmat){
wi = exp(-distmat)
wi = wi/rowSums(wi)
wi[is.na(wi)] = 1
return(kplane.mygibbs.step.c(wi))
}
#' @keywords internal
kplane.mygibbs.step.c <- function(wi){
n = nrow(wi)
k = ncol(wi)
cflag = TRUE
iter = 1
while (cflag){
iter = iter + 1
labels = rep(0,n)
for (j in 1:n){
labels[j] = sample(1:k, 1, prob = as.vector(wi[j,]))
}
tbl.lab = table(labels)
if (length(tbl.lab)==k){
if (all(tbl.lab>1)){
cflag = FALSE
}
}
if (iter == 100){
print("* kplane : updating the cluster assignment collapses. Return the last successful update.")
return(FALSE)
}
}
return(labels)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.