library(learnr) knitr::opts_chunk$set(echo = TRUE, exercise = FALSE)
We load the libraries and the data.
library(plotly)
df <- moxier::earthquakes
p <- plot_ly(df, x = ~lat, y = ~long, z = ~depth) %>% add_markers() %>% layout(scene = list(xaxis = list(title = 'Latitude'), yaxis = list(title = 'Longitude'), zaxis = list(title = 'Depth'))) p
We rescale the data.
rescale <- function(x, na.rm = FALSE) { (x - min(x, na.rm = na.rm)) / (max(x, na.rm = na.rm) - min(x, na.rm = na.rm)) }
df_r <- as.data.frame(apply(X = df, MARGIN = 2, FUN = rescale, na.rm = FALSE))
and we plot it.
p <- plot_ly(df_r, x = ~lat, y = ~long, z = ~depth) %>% add_markers() %>% layout(scene = list(xaxis = list(title = 'Latitude'), yaxis = list(title = 'Longitude'), zaxis = list(title = 'Depth'))) p
within_ss <- matrix(data = NA, nrow = 1000, ncol = 10) between_ss <- matrix(data = NA, nrow = 1000, ncol = 10) for (k in seq.int(from = 1, to = 10)) { for (iter in seq.int(from = 1, to = 1000)) { clust_iter <- kmeans(x = df_r, centers = k) within_ss[iter, k] <- clust_iter$tot.withinss between_ss[iter, k] <- clust_iter$betweenss } }
Let us check the result.
boxplot(within_ss, main = "Within SS")
boxplot(between_ss, main = "Between SS")
clust_k <- kmeans(x = df_r, centers = 4, iter.max = 50L, nstart = 100L)
We plot the results.
p <- plot_ly(df_r, x = ~lat, y = ~long, z = ~depth, color = ~as.factor(clust_k$cluster)) %>% add_markers() %>% layout(scene = list(xaxis = list(title = 'Latitude'), yaxis = list(title = 'Longitude'), zaxis = list(title = 'Depth'))) p
We also inspect what we have obtained.
clust_k$iter clust_k$cluster clust_k$size clust_k$centers clust_k$tot.withinss clust_k$betweenss clust_k$totss
We now cluster considering with a different technique.
First of all, we compute the distance matrix.
d <- dist(df_r, method = "euclidean")
image(as.matrix(d), asp = 1)
We perform different clusterings and plot them.
clustc <- hclust(d, method='complete') clusts <- hclust(d, method='single') clusta <- hclust(d, method='average') clustw <- hclust(d, method='ward.D')
par(mfrow=c(2,2)) plot(clustc, hang=0, labels=FALSE, main='Complete', xlab='', sub='') plot(clusts, hang=0, labels=FALSE, main='Single', xlab='', sub='') plot(clusta, hang=0, labels=FALSE, main='Average', xlab='', sub='') plot(clustw, hang=0, labels=FALSE, main='Ward', xlab='', sub='')
For each method, we shall display the results.
clusterc <- cutree(hclust(d, method='complete'), 2) p <- plot_ly(df_r, x = ~lat, y = ~long, z = ~depth, color = ~as.factor(clusterc)) %>% add_markers() %>% layout(scene = list(xaxis = list(title = 'Latitude'), yaxis = list(title = 'Longitude'), zaxis = list(title = 'Depth'))) p
clusters <- cutree(hclust(d, method='single'), 4) p <- plot_ly(df_r, x = ~lat, y = ~long, z = ~depth, color = ~as.factor(clusters)) %>% add_markers() %>% layout(scene = list(xaxis = list(title = 'Latitude'), yaxis = list(title = 'Longitude'), zaxis = list(title = 'Depth'))) p
clustera <- cutree(hclust(d, method='average'), 2) p <- plot_ly(df_r, x = ~lat, y = ~long, z = ~depth, color = ~as.factor(clustera)) %>% add_markers() %>% layout(scene = list(xaxis = list(title = 'Latitude'), yaxis = list(title = 'Longitude'), zaxis = list(title = 'Depth'))) p
clusterw <- cutree(hclust(d, method='ward.D'), 3) p <- plot_ly(df_r, x = ~lat, y = ~long, z = ~depth, color = ~as.factor(clusterw)) %>% add_markers() %>% layout(scene = list(xaxis = list(title = 'Latitude'), yaxis = list(title = 'Longitude'), zaxis = list(title = 'Depth'))) p
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.