R/show_clusters.R

Defines functions show_clusters

Documented in show_clusters

library(FactoMineR)
library(forcats)
library(ggplot2)
library(dplyr)

# author: Hazel Jiang
# date: 2021-03-06
#
# Module to plot clusters by colour
#

#' This function reduces a data set to 2 dimensions using principle component
#' analysis (PCA) and colours clusters of points.
#'
#' @param X array Data points of dimension (n,d)
#' @param clusters array Cluster assignments for each point in X,
#' dimension (n,1)
#' @param centroids array Coordinates of cluster centers, dimension (k,d)
#'
#' @return plot A 2d principal components scatter plot coloured by cluster
#' @export
#'
#' @examples
#' X = rbind(c(0,0), c(1,0), c(10,0), c(11,0))
#' centers = rbind(c(0, 0), c(10, 10))
#' labels <- c(1,1,1,1)
#' show_clusters(X, labels,centers)
show_clusters <- function(X, clusters, centroids){

  # Throw error if X and centers have different widths
  if(dim(X)[2] != dim(centroids)[2]){
    stop("`X` and `centers` must have the same width")
  }

  # Throw error if `X` and `labels` have different lengths
  if(dim(X)[1] != length(clusters)){
    stop("There must the same number of labels as points")
  }

  row_x <- nrow(X)
  combine_df <- rbind(X, centroids)
  pca = FactoMineR::PCA(combine_df, scale.unit = TRUE, ncp = 2, graph = FALSE)
  transformed_data <- as.data.frame(pca$ind$coord)
  transformed_X <- transformed_data[1:row_x,]
  transformed_centroid <- dplyr::slice(transformed_data,
                                (row_x+1):nrow(transformed_data))
  transformed_X$clusters = as.factor(clusters)
  transformed_centroid$clusters = as.factor(1:dim(centroids)[1])

  plot <- ggplot2::ggplot() +
    ggplot2::geom_point(data = transformed_X, ggplot2::aes(`Dim.1`, `Dim.2`, colour = clusters),
               size = 1, alpha = 0.7) +
    ggplot2::geom_point(data = transformed_centroid, ggplot2::aes(`Dim.1`, `Dim.2`,
                                                colour = clusters),
               size = 4, shape = 8) +
    ggplot2::ggtitle('PCA Plot') +
    ggplot2::theme_bw()

  plot
}
UBC-MDS/kmeaningfulR documentation built on March 29, 2021, 7:55 a.m.