doc/lloyd-s-k-means-algorithm-from-scratch.R

## ----setup, include = FALSE----------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  fig.width = 7,
  fig.height = 4,
  comment = "#>"
)

## ---- echo = FALSE-------------------------------------------------------
getFromNamespace("distances", "kMeans")

## ---- echo=FALSE---------------------------------------------------------
getFromNamespace("lloyd", "kMeans")

## ------------------------------------------------------------------------
library(kMeans)
kMeansLloyd

## ---- echo = FALSE-------------------------------------------------------
getFromNamespace("print.kMeans", "kMeans")

## ---- echo = FALSE-------------------------------------------------------
getFromNamespace("summary.kMeans", "kMeans")

## ---- echo = FALSE-------------------------------------------------------
getFromNamespace("print.kMeansSummary", "kMeans")

## ---- echo = FALSE-------------------------------------------------------
getFromNamespace("fitted.kMeans", "kMeans")

## ---- echo = FALSE-------------------------------------------------------
getFromNamespace("plot.kMeans", "kMeans")

## ------------------------------------------------------------------------
scIris <- scale(iris[, -5])

## ------------------------------------------------------------------------
# Example 1: integer case (starting centroids have to be drawn randomly)
set.seed(2)  # fix randomness
ex1 <- kMeansLloyd(x = scIris, centroids = 3, maxIter = 8, nStart = 5)
set.seed(2)
ref1 <- kmeans(x = scIris, centers = 3, iter.max = 8, nstart = 5, algorithm = "Lloyd")

# check equivalence, example 1
check1 <- c(all.equal(ex1$cluster, ref1$cluster),
            all.equal(ex1$centroids, ref1$centers),
            all.equal(ex1$iterations, ref1$iter),
            all.equal(ex1$withinSS, ref1$withinss),
            all.equal(ex1$withinTot, ref1$tot.withinss),
            all.equal(ex1$groupSizes, ref1$size))

# Example 2: matrix case
# define starting centroids by hand
y <- matrix(c(-1, .1, .9, .8, -.6, -.2, -1.3, .3, 1, -1.25, .15, 1), ncol = 4)
ex2 <- kMeansLloyd(x = scIris, centroids = y, maxIter = 10)
ref2 <- kmeans(x = scIris, centers = y, iter.max = 10, algorithm = "Lloyd")

# check equivalence, example 2
check2 <- c(all.equal(ex2$cluster, ref2$cluster),
            all.equal(ex2$centroids, ref2$centers),
            all.equal(ex2$iterations, ref2$iter),
            all.equal(ex2$withinSS, ref2$withinss),
            all.equal(ex2$withinTot, ref2$tot.withinss),
            all.equal(ex2$groupSizes, ref2$size))

# print object of class kMeans
print(ex1)
# result check 1
print(all(check1))
# result check 2
print(all(check2))

## ------------------------------------------------------------------------
sumEx1 <- summary(ex1)

# check equivalence of the two added elements
check3 <- c(all.equal(sumEx1$totalSS, ref1$totss),
            all.equal(sumEx1$betweenSS, ref1$betweenss))

print(sumEx1)
print(all(check3))

## ------------------------------------------------------------------------
fitScIris <- fitted(ex1)
# the result can be used to compute the residuals residScIris
residScIris <- scIris - fitScIris

## ------------------------------------------------------------------------
# call plot(ex1) in the console to see the interactive functioning in the multidimensional case

# reduced example (since Markdown cannot handle the interactive character):
scIris2 <- scale(iris[,3:4])
ex3 <- kMeansLloyd(x = scIris2, centroids = 3, nStart = 5)
plot(ex3)


## ------------------------------------------------------------------------
library(microbenchmark)
u <- matrix(c(-1, .1, .8, -.6, -1.3, .3, -1.25, .15), ncol = 4)
microbenchmark(kMeansLloyd(scIris, u), unit = "ms")
# vs.
microbenchmark(kmeans(scIris, u, algorithm = "Lloyd"), unit = "ms")
heiligerl/kMeans_Rpackage documentation built on Aug. 16, 2020, 4:04 p.m.