Nothing
#' k nearest neighbor algorithm for multi-variate data
#'
#' @param X data matrix, i.e. observations X dimensions
#' @param K number of clusters to use
#' @param init list of p and mu used for initialization
#' @param Ninit number of samples used per cluster if no init argument is given
#' @param verbose allows print out of progress information; in verbose mode the cluster memberships are added to the output
#' @param tol smaller changes than tol in the objective function indicate convergence, if missing chosen automatically to be 1/5 of the smallest sample variance per dimension
#' @param Niter.max maximum number of admissible iterations
#'
#' @keywords internal
knn <- function(X, K=2, init, Ninit=50, verbose=FALSE, tol, Niter.max=500) {
## in case X is no matrix, interpret it as uni-variate case
if(!is.matrix(X))
X <- matrix(X,ncol=1)
if(missing(tol)) {
## if tol is missing, then set it well below the minimial
## length-scale in the data set
sdSq <- colVars(X)
## use k means clustering with K=Nc as init
tol <- min(sdSq)/5
}
N <- dim(X)[1]
Nd <- dim(X)[2]
if(missing(init)) {
## initialize randomly
pEst <- runif(K)/K
pEst <- pEst/sum(pEst)
muEst <- matrix(0, K, Nd)
## sample for each component from the base data
for(i in seq(K))
muEst[i,] <- colMeans(X[sample.int(N,min(Ninit, N),replace=FALSE),,drop=FALSE])
} else {
pEst <- init$p
muEst <- init$mu
}
## init 1-of-K coding matrix indicating cluster membership
Kresp <- matrix(1:K,nrow=N,ncol=K,byrow=TRUE)
## distance matrix which gets updated in each iteration
DM <- matrix(0, N, K)
iter <- 1
J <- Inf
if(verbose) {
message("K nearest neighbors clustering with K =", K, ":\n")
}
while(iter < Niter.max) {
Jprev <- J
## "E" step, i.e. find for each data point the cluster with the
## smallest euclidean distance
for(i in seq(K))
DM[,i] <- rowSums(scale(X, muEst[i,], FALSE)^2)
##resp <- 1*(Kresp == apply(DM, 1, which.min))
resp <- 1*(1 == matrixStats::rowRanks(DM, ties.method="first"))
respM <- matrixStats::colSums2(resp)
if(any(respM == 0)) {
warning("Some components are assigned the empty set! Try reducing K.")
respM[respM==0] <- 1
}
## "M" step, i.e. given cluster membership, calculate new means
##muEst <- sweep(t(resp) %*% X, 1, respM, "/")
muEst <- sweep(crossprod(resp, X), 1, respM, "/", FALSE)
## functional to be minimized
J <- sum( (X - resp %*% muEst)^2 )
delta <- Jprev - J
if(verbose)
message("Iteration", iter, ": J =", J, "; delta =", delta, "\n")
if(delta < tol) {
break
}
iter <- iter + 1
}
if(iter == Niter.max)
warning("Maximum number of iterations reached.")
res <- list(center=muEst, p=colMeans(resp), J=J, delta=delta, niter=iter)
##res$cluster <- apply(resp==1, 1, which)
## 10x faster
res$cluster <- ((which(t(resp==1)) - 1) %% K) + 1
invisible(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.