R/plot.R

Defines functions plot.kMeans

Documented in plot.kMeans

#' Plot the results of k-means clustering
#'
#' \code{plot} method for class \code{"kMeans"}. The result is a 2D-scatterplot
#' containing the data points and the centroids coloured by their cluster
#' allocation. If the dimension is higher than two, the user is asked to provide
#' the names of the two variables that should be plotted.
#'
#' @section Required: \code{ggplot2}
#'
#' @param x an object of class \code{"kMeans"}.
#' @param ... further arguments passed to or from other methods.
#'
#' @export
#' @examples
#' # create example data set
#' X <- rbind(matrix(rnorm(50, sd = 0.5), ncol = 2),
#' matrix(rnorm(50, mean = 1, sd = 0.5), ncol = 2))
#'
#' # perform k-means algorithm
#' result <- kMeansLloyd(x = X, centroids = 2, nStart = 2)
#'
#' # plot result
#' library(ggplot2)
#' plot(result)
plot.kMeans <- function(x, ...) {
  if (!requireNamespace("ggplot2", quietly = TRUE)) {
    stop("Package \"ggplot2\" required for this function. Please install it.",
         call. = FALSE)
  } else {
  k <- as.integer(nrow(x$centroids))
  n <-  as.integer(length(x$cluster))
  p <- as.integer(ncol(x$data))

  df <- as.data.frame(rbind(x$centroids, x$data))
  df <- cbind(df, cluster = as.factor(c(1:k, x$cluster)),
              type = as.factor(c(rep("centroid", k), rep("data point", n))))
  varNames <- colnames(df)[1:p]

  if (p == 2) {
    name1 <- varNames[1]
    name2 <- varNames[2]
  } else {
    message("Provided data has more than two dimensions. Choose two variables:")
    cat("Possible input: ", varNames, sep = "\n")
    name1 <- readline("Please provide name of the first variable:  ")
    if (!(name1 %in% varNames)) stop("invalid input")
    name2 <- readline("Please provide name of the second variable:  ")
    if (!(name2 %in% varNames)) stop("invalid input")
    if (name1 == name2) stop("Distinct variable names required")
  }

  ggplot2::ggplot(df, ggplot2::aes_string(x = name1, y = name2)) +
    ggplot2::geom_point(ggplot2::aes(shape = df$type, color = df$cluster)) +
    ggplot2::scale_shape_manual(name = "",
                       labels = c("centroid","observation"),
                       values = c(8, 16)) +
    ggplot2::labs(y = name1, x = name2, color = "Cluster Allocation",
         title = paste("2D-scatterplot of", name1, "/", name2))
  }
}
heiligerl/kMeans_Rpackage documentation built on Aug. 16, 2020, 4:04 p.m.