Nothing
## ----eval = T-----------------------------------------------------------------
library(ClusterR)
data(dietary_survey_IBS)
dim(dietary_survey_IBS)
X = dietary_survey_IBS[, -ncol(dietary_survey_IBS)] # data (excluding the response variable)
y = dietary_survey_IBS[, ncol(dietary_survey_IBS)] # the response variable
dat = center_scale(X, mean_center = T, sd_scale = T) # centering and scaling the data
## ----eval = F-----------------------------------------------------------------
# gmm = GMM(dat, 2, dist_mode = "maha_dist", seed_mode = "random_subset", km_iter = 10,
# em_iter = 10, verbose = F)
#
# # predict centroids, covariance matrix and weights
# pr = predict(gmm, newdata = dat)
#
## ----eval = F-----------------------------------------------------------------
#
# opt_gmm = Optimal_Clusters_GMM(dat, max_clusters = 10, criterion = "BIC",
#
# dist_mode = "maha_dist", seed_mode = "random_subset",
#
# km_iter = 10, em_iter = 10, var_floor = 1e-10,
#
# plot_data = T)
#
## ----eval = F-----------------------------------------------------------------
#
# res = external_validation(dietary_survey_IBS$class, pr$cluster_labels,
#
# method = "adjusted_rand_index", summary_stats = T)
#
# res
#
# ##
# ## ----------------------------------------
# ## purity : 1
# ## entropy : 0
# ## normalized mutual information : 1
# ## variation of information : 0
# ## ----------------------------------------
# ## specificity : 1
# ## sensitivity : 1
# ## precision : 1
# ## recall : 1
# ## F-measure : 1
# ## ----------------------------------------
# ## accuracy OR rand-index : 1
# ## adjusted-rand-index : 1
# ## jaccard-index : 1
# ## fowlkes-mallows-index : 1
# ## mirkin-metric : 0
# ## ----------------------------------------
#
## ----eval = F-----------------------------------------------------------------
#
# pca_dat = stats::princomp(dat)$scores[, 1:2]
#
# km = KMeans_arma(pca_dat, clusters = 2, n_iter = 10, seed_mode = "random_subset",
#
# verbose = T, CENTROIDS = NULL)
#
# pr = predict_KMeans(pca_dat, km)
#
# table(dietary_survey_IBS$class, pr)
#
# class(km) = 'matrix'
#
# plot_2d(data = pca_dat, clusters = as.vector(pr), centroids_medoids = as.matrix(km))
## ----fig.width = 3.0, fig.height = 3.0, echo = T, eval = T--------------------
library(OpenImageR)
im = readImage('elephant.jpg')
# first resize the image to reduce the dimensions
im = resizeImage(im, 75, 75, method = 'bilinear')
imageShow(im) # plot the original image
im2 = apply(im, 3, as.vector) # vectorize RGB
## ----eval = T-----------------------------------------------------------------
# perform KMeans_rcpp clustering
km_rc = KMeans_rcpp(im2, clusters = 5, num_init = 5, max_iters = 100,
initializer = 'optimal_init', verbose = F)
km_rc$between.SS_DIV_total.SS
pr = predict(km_rc, newdata = im2)
## ----fig.width = 3.0, fig.height = 3.0, echo = T, eval = T--------------------
getcent = km_rc$centroids
getclust = km_rc$clusters
new_im = getcent[getclust, ] # each observation is associated with the nearby centroid
dim(new_im) = c(nrow(im), ncol(im), 3) # back-convertion to a 3-dimensional image
imageShow(new_im)
## ----fig.width = 5.0, fig.height = 5.0, echo = T, eval = T--------------------
opt = Optimal_Clusters_KMeans(im2, max_clusters = 10, plot_clusters = T,
criterion = 'distortion_fK', fK_threshold = 0.85,
initializer = 'optimal_init', tol_optimal_init = 0.2)
## ----fig.width = 3.0, fig.height = 3.0, echo = T, eval = T--------------------
im_d = readImage('dog.jpg')
# first resize the image to reduce the dimensions
im_d = resizeImage(im_d, 350, 350, method = 'bilinear')
imageShow(im_d) # plot the original image
im3 = apply(im_d, 3, as.vector) # vectorize RGB
dim(im3) # initial dimensions of the data
## ----fig.width = 3.0, fig.height = 3.0, echo = T, eval = T--------------------
start = Sys.time()
km_init = KMeans_rcpp(im3, clusters = 5, num_init = 5, max_iters = 100,
initializer = 'kmeans++', verbose = F)
end = Sys.time()
t = end - start
cat('time to complete :', t, attributes(t)$units, '\n')
getcent_init = km_init$centroids
getclust_init = km_init$clusters
new_im_init = getcent_init[getclust_init, ] # each observation is associated with the nearby centroid
dim(new_im_init) = c(nrow(im_d), ncol(im_d), 3) # back-convertion to a 3-dimensional image
imageShow(new_im_init)
## ----fig.width = 3.0, fig.height = 3.0, echo = T, eval = T--------------------
start = Sys.time()
km_mb = MiniBatchKmeans(im3, clusters = 5, batch_size = 20, num_init = 5, max_iters = 100,
init_fraction = 0.2, initializer = 'kmeans++', early_stop_iter = 10,
verbose = F)
pr_mb = predict(object = km_mb, newdata = im3)
end = Sys.time()
t = end - start
cat('time to complete :', t, attributes(t)$units, '\n')
getcent_mb = km_mb$centroids
new_im_mb = getcent_mb[pr_mb, ] # each observation is associated with the nearby centroid
dim(new_im_mb) = c(nrow(im_d), ncol(im_d), 3) # back-convertion to a 3-dimensional image
imageShow(new_im_mb)
## ----eval = T-----------------------------------------------------------------
data(mushroom)
X = mushroom[, -1]
y = as.numeric(mushroom[, 1]) # convert the labels to numeric
gwd = FD::gowdis(X) # calculate the 'gower' distance for the factor variables
gwd_mat = as.matrix(gwd) # convert the distances to a matrix
cm = Cluster_Medoids(gwd_mat, clusters = 2, swap_phase = TRUE, verbose = F)
## ----eval = T, echo = F-------------------------------------------------------
knitr::kable(data.frame(adusted_rand_index = external_validation(y, cm$clusters, method = "adjusted_rand_index", summary_stats = F), avg_silhouette_width = mean(cm$silhouette_matrix[, 'silhouette_widths'])), caption = "Non-Weigthed-K-medoids", align = 'l')
## ----eval = T, echo = F-------------------------------------------------------
knitr::kable(data.frame(predictors = c("cap_shape", "cap_surface", "cap_color", "bruises", "odor",
"gill_attachment", "gill_spacing", "gill_size", "gill_color",
"stalk_shape", "stalk_root", "stalk_surface_above_ring", "stalk_surface_below_ring",
"stalk_color_above_ring", "stalk_color_below_ring", "veil_type",
"veil_color", "ring_number", "ring_type", "spore_print_color",
"population", "habitat"), weights = c(4.626, 38.323, 55.899, 34.028, 169.608, 6.643, 42.08, 57.366,
37.938, 33.081, 65.105, 18.718, 76.165, 27.596, 26.238, 0.0, 1.507,
37.314, 32.685, 127.87, 64.019, 44.519)), align = 'l')
## ----eval = T-----------------------------------------------------------------
weights = c(4.626, 38.323, 55.899, 34.028, 169.608, 6.643, 42.08, 57.366, 37.938,
33.081, 65.105, 18.718, 76.165, 27.596, 26.238, 0.0, 1.507, 37.314,
32.685, 127.87, 64.019, 44.519)
gwd_w = FD::gowdis(X, w = weights) # 'gower' distance using weights
gwd_mat_w = as.matrix(gwd_w) # convert the distances to a matrix
cm_w = Cluster_Medoids(gwd_mat_w, clusters = 2, swap_phase = TRUE, verbose = F)
## ----eval = T, echo = F-------------------------------------------------------
knitr::kable(data.frame(adusted_rand_index = external_validation(y, cm_w$clusters, method = "adjusted_rand_index", summary_stats = F), avg_silhouette_width = mean(cm_w$silhouette_matrix[, 'silhouette_widths'])), caption = "Weigthed-K-medoids", align = 'l')
## ----eval = T-----------------------------------------------------------------
cl_X = X # copy initial data
# the Clara_Medoids function allows only numeric attributes
# so first convert to numeric
for (i in 1:ncol(cl_X)) { cl_X[, i] = as.numeric(cl_X[, i]) }
start = Sys.time()
cl_f = Clara_Medoids(cl_X, clusters = 2, distance_metric = 'hamming', samples = 5,
sample_size = 0.2, swap_phase = TRUE, verbose = F, threads = 1)
end = Sys.time()
t = end - start
cat('time to complete :', t, attributes(t)$units, '\n')
## ----eval = T, echo = F-------------------------------------------------------
knitr::kable(data.frame(adusted_rand_index = external_validation(y, cl_f$clusters, method = "adjusted_rand_index", summary_stats = F), avg_silhouette_width = mean(cl_f$silhouette_matrix[, 'silhouette_widths'])), caption = "hamming-Clara-Medoids", align = 'l')
## ----eval = T-----------------------------------------------------------------
start = Sys.time()
cl_e = Cluster_Medoids(cl_X, clusters = 2, distance_metric = 'hamming', swap_phase = TRUE,
verbose = F, threads = 1)
end = Sys.time()
t = end - start
cat('time to complete :', t, attributes(t)$units, '\n')
## ----eval = T, echo = F-------------------------------------------------------
knitr::kable(data.frame(adusted_rand_index = external_validation(y, cl_e$clusters, method = "adjusted_rand_index", summary_stats = F), avg_silhouette_width = mean(cl_e$silhouette_matrix[, 'silhouette_widths'])), caption = "hamming-Cluster-Medoids", align = 'l')
## ----eval = T-----------------------------------------------------------------
# Silhouette Plot for the "Clara_Medoids" object
Silhouette_Dissimilarity_Plot(cl_f, silhouette = TRUE)
## ----eval = T-----------------------------------------------------------------
# Silhouette Plot for the "Cluster_Medoids" object
Silhouette_Dissimilarity_Plot(cl_e, silhouette = TRUE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.