Nothing
#' Hierarchical k-means clustering
#'
#' @description
#' The final k-means clustering solution is very sensitive to the initial random selection
#' of cluster centers. This function provides a solution using an hybrid approach by combining
#' the hierarchical clustering and the k-means methods. The procedure is explained in "Details" section. Read more:
#' \href{http://www.sthda.com/english/wiki/hybrid-hierarchical-k-means-clustering-for-optimizing-clustering-outputs-unsupervised-machine-learning}{Hybrid hierarchical k-means clustering for optimizing clustering outputs}.
#' \itemize{
#' \item hkmeans(): compute hierarchical k-means clustering
#' \item print.hkmeans(): prints the result of hkmeans
#' \item hkmeans_tree(): plots the initial dendrogram
#' }
#'
#' @param x a numeric matrix, data frame or vector
#' @param k the number of clusters to be generated
#' @param hc.metric the distance measure to be used. Possible values are "euclidean", "maximum", "manhattan",
#' "canberra", "binary" or "minkowski" (see ?dist).
#' @param hc.method the agglomeration method to be used. Possible values include "ward.D", "ward.D2", "single",
#' "complete", "average", "mcquitty", "median"or "centroid" (see ?hclust).
#' @param iter.max the maximum number of iterations allowed for k-means.
#' @param km.algorithm the algorithm to be used for kmeans (see ?kmeans).
#' @details
#' The procedure is as follow:
#'
#' 1. Compute hierarchical clustering
#'
#' 2. Cut the tree in k-clusters
#'
#' 3. compute the center (i.e the mean) of each cluster
#'
#' 4. Do k-means by using the set of cluster centers (defined in step 3) as the initial cluster centers. Optimize the clustering.
#'
#' This means that the final optimized partitioning obtained at step 4 might be different from the initial partitioning obtained at step 2.
#' Consider mainly the result displayed by \code{fviz_cluster()}.
#'
#' @return hkmeans returns an object of class "hkmeans" containing the following components:
#' \itemize{
#' \item The elements returned by the standard function kmeans() (see ?kmeans)
#' \item data: the data used for the analysis
#' \item hclust: an object of class "hclust" generated by the function hclust()
#' }
#' @examples
#' \donttest{
#' # Load data
#' data(USArrests)
#' # Scale the data
#' df <- scale(USArrests)
#'
#' # Compute hierarchical k-means clustering
#' res.hk <-hkmeans(df, 4)
#'
#' # Elements returned by hkmeans()
#' names(res.hk)
#'
#' # Print the results
#' res.hk
#'
#' # Visualize the tree
#' hkmeans_tree(res.hk, cex = 0.6)
#' # or use this
#' fviz_dend(res.hk, cex = 0.6)
#'
#'
#' # Visualize the hkmeans final clusters
#' fviz_cluster(res.hk, frame.type = "norm", frame.level = 0.68)
#' }
#' @name hkmeans
#' @rdname hkmeans
#' @export
hkmeans <- function(x, k, hc.metric = "euclidean", hc.method = "ward.D2",
iter.max = 10, km.algorithm = "Hartigan-Wong"){
res.hc <- stats::hclust(stats::dist(x, method = hc.metric), method = hc.method)
grp <- stats::cutree(res.hc, k = k)
clus.centers <- stats::aggregate(x, list(grp), mean)[, -1]
res.km <- kmeans(x, centers = clus.centers,
iter.max = iter.max, algorithm = km.algorithm)
class(res.km) <- "hkmeans"
res.km$data <- x
res.km$hclust <- res.hc
res.km
}
#' @rdname hkmeans
#' @export
print.hkmeans <- function (x, ...)
{
cat("Hierarchical K-means clustering with ", length(x$size), " clusters of sizes ",
paste(x$size, collapse = ", "), "\n", sep = "")
cat("\nCluster means:\n")
print(x$centers, ...)
cat("\nClustering vector:\n")
print(x$cluster, ...)
cat("\nWithin cluster sum of squares by cluster:\n")
print(x$withinss, ...)
ratio <- sprintf(" (between_SS / total_SS = %5.1f %%)\n",
100 * x$betweenss/x$totss)
cat(sub(".", getOption("OutDec"), ratio, fixed = TRUE), "Available components:\n",
sep = "\n")
print(names(x))
if (!is.null(x$ifault) && x$ifault == 2L)
cat("Warning: did *not* converge in specified number of iterations\n")
invisible(x)
}
#' @rdname hkmeans
#' @param hkmeans an object of class hkmeans (returned by the function hkmeans())
#' @param rect.col Vector with border colors for the rectangles around clusters in dendrogram
#' @param ... others arguments to be passed to the function plot.hclust(); (see ? plot.hclust)
#' @export
hkmeans_tree <- function(hkmeans, rect.col = NULL, ...)
{
res.hk <- hkmeans
if(is.null(rect.col)) rect.col <- unique(res.hk$cluster)
plot(res.hk$hclust, hang = -1, sub = "", xlab = "", ...)
k <- length(unique(res.hk$cluster))
stats::rect.hclust(res.hk$hclust, k = k, border = rect.col)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.