#' Plot the results of k-means clustering
#'
#' \code{plot} method for class \code{"kMeans"}. The result is a 2D-scatterplot
#' containing the data points and the centroids coloured by their cluster
#' allocation. If the dimension is higher than two, the user is asked to provide
#' the names of the two variables that should be plotted.
#'
#' @section Required: \code{ggplot2}
#'
#' @param x an object of class \code{"kMeans"}.
#' @param ... further arguments passed to or from other methods.
#'
#' @export
#' @examples
#' # create example data set
#' X <- rbind(matrix(rnorm(50, sd = 0.5), ncol = 2),
#' matrix(rnorm(50, mean = 1, sd = 0.5), ncol = 2))
#'
#' # perform k-means algorithm
#' result <- kMeansLloyd(x = X, centroids = 2, nStart = 2)
#'
#' # plot result
#' library(ggplot2)
#' plot(result)
plot.kMeans <- function(x, ...) {
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("Package \"ggplot2\" required for this function. Please install it.",
call. = FALSE)
} else {
k <- as.integer(nrow(x$centroids))
n <- as.integer(length(x$cluster))
p <- as.integer(ncol(x$data))
df <- as.data.frame(rbind(x$centroids, x$data))
df <- cbind(df, cluster = as.factor(c(1:k, x$cluster)),
type = as.factor(c(rep("centroid", k), rep("data point", n))))
varNames <- colnames(df)[1:p]
if (p == 2) {
name1 <- varNames[1]
name2 <- varNames[2]
} else {
message("Provided data has more than two dimensions. Choose two variables:")
cat("Possible input: ", varNames, sep = "\n")
name1 <- readline("Please provide name of the first variable: ")
if (!(name1 %in% varNames)) stop("invalid input")
name2 <- readline("Please provide name of the second variable: ")
if (!(name2 %in% varNames)) stop("invalid input")
if (name1 == name2) stop("Distinct variable names required")
}
ggplot2::ggplot(df, ggplot2::aes_string(x = name1, y = name2)) +
ggplot2::geom_point(ggplot2::aes(shape = df$type, color = df$cluster)) +
ggplot2::scale_shape_manual(name = "",
labels = c("centroid","observation"),
values = c(8, 16)) +
ggplot2::labs(y = name1, x = name2, color = "Cluster Allocation",
title = paste("2D-scatterplot of", name1, "/", name2))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.