################## FOR OPERATION ########################################################################################
#######################kluster main function FOR OPERATION ######################################################################
kluster <- function(data,
iter_klust, #number of iterations for clustering with sample_n size x
smpl, #size of the sample_n to be taken with replacement out of data
algorithm = "Default" #select analysis algorithm from BIC, PAMK, CAL, and AP
) {
size <- nrow(data)
if (algorithm == 'BIC') {
###kluster procedure
##bic
kbics <- list()
tBIC_kluster <- list()
for (i in 1:iter_klust) {
dat2 <- as.matrix(data[sample(nrow(data), smpl, replace=TRUE), ])
kbics[[i]] <- dim(Mclust(dat2, G=1:15)$z)[2]
rm(dat2)
}
m_BIC_k <- round(mean(as.numeric(kbics)),0)
f_BIC_k <- as.numeric(names(which.max(table(unlist(kbics)))))
method <- c("BIC_kluster")
k_mean <- c(m_BIC_k)
k_freq <- c(f_BIC_k)
sim <- data.frame(method,k_mean,k_freq)
return(
list("sim"=sim,
"m_BIC_k"=m_BIC_k,
"f_BIC_k"=f_BIC_k,
"BICsimk"=kbics)
)
#
# } else
# if (algorithm == "PAMK") {
#
# # tic()
# # pamk.best <- pamk(data)$nc
# # tpamk <- toc()
# # tpamk <- as.numeric(tpamk$toc - tpamk$tic)
#
# ###pam method
# kpamsum <- list()
# kpam2 <- list()
# t2 <- list()
# tpam_kluster <- list()
# for (j in 1:iter_sim) {
# for (i in 1:iter_klust) {
# tic()
# dat2 <- data[sample(nrow(data), smpl, replace=TRUE), ]
# kpam2[[i]] <- pamk(dat2,krange=1:15)$nc
# rm(dat2)
# t <- toc()
# t2[[i]] <- t$toc - t$tic
# }
# kpamsum[[j]] <- mean(as.numeric(kpam2))
# }
# tpam_kluster <- mean(unlist(t2))
# m_pam_k <- round(mean(as.numeric(kpam2)),0)
# f_pam_k <- as.numeric(names(which.max(table(unlist(kpam2)))))
#
# method <- c("pam_kluster")
# ptime <- c(tpam_kluster/iter_klust)
# k_mean <- c(m_pam_k)
# k_freq <- c(f_pam_k)
#
#
# sim <- data.frame(method,k_mean,k_freq,ptime)
# sim$k_orig <- clusters
# sim$e_mean <- k_mean - clusters
# sim$e_freq <- k_freq - clusters
#
# sim$n <- size
#
#
#
# ####cluster and visualize the performance
#
# # km2 <- hkmeans(data, k = pamk.best,iter.max = 300)
# # km6 <- hkmeans(data, k = mean_pam_kluster,iter.max = 300)
# #
# #
# #
# # par(mfrow=c(1,2))
# #
# # plot(data, col = km2$cluster, pch = 19, frame = FALSE,
# # main = paste0("HK-means with k = ",pamk.best,""))
# # points(km2$centers, col = 1:pamk.best, pch = 8, cex = 3)
#
# # plot(data, col = km6$cluster, pch = 19, frame = FALSE,
# # main = paste0("HK-means with kluster PAM process, k = ",mean_pam_kluster,""))
# # points(km6$centers, col = 1:mean_pam_kluster, pch = 8, cex = 3)
# #
#
#
# # par(mfrow=c(1,1))
#
# # boxplot(round(as.numeric(kpamsum),0),
# # main = paste0("kluster optimum cluster number from PAM w/ resampling.
# # Mean resampling estimate = ",mean_pam_kluster,"
# # ",
# # " and ordinary PAM suggested ",pamk.best," clusters."))
# #
# #
# #
# return(
# list("sim"=sim,
# "m_pam_k"=m_pam_k,
# "f_pam_k"=f_pam_k,
# "PAMsimk"=kpam2)
# )
#
#
# } else
# if (algorithm == "CAL") {
# # tic()
# # calinski.best <- as.numeric(which.max(cascadeKM(data, 1, 15, iter = 1000)$results[2,]))
# # tcalinski <- toc()
# # tcalinski <- as.numeric(tcalinski$toc - tcalinski$tic)
#
#
# kcalsum <- list()
# kcal2 <- list()
# t2 <- list()
# tcal_kluster <- list()
# for (j in 1:iter_sim) {
# for (i in 1:iter_klust) {
# tic()
# dat2 <- data[sample(nrow(data), smpl, replace=TRUE), ]
# kcal2[[i]] <- as.numeric(which.max(cascadeKM(dat2, 1, 15, iter = 1000)$results[2,]))
# rm(dat2)
# t <- toc()
# t2[[i]] <- t$toc - t$tic
# }
# kcalsum[[j]] <- mean(as.numeric(kcal2))
# }
# tcal_kluster <- mean(unlist(t2))
# m_cal_kluster <- round(mean(as.numeric(kcal2)),0)
# f_cal_k <- as.numeric(names(which.max(table(unlist(kcal2)))))
#
# method <- c("cal_kluster")
# ptime <- c(tcal_kluster/iter_klust)
# k_mean <- c(m_cal_k)
# k_freq <- c(f_cal_k)
#
# sim <- data.frame(method,k_mean,k_freq,ptime)
# sim$k_orig <- clusters
# sim$e_mean <- k_mean - clusters
# sim$e_freq <- k_freq - clusters
#
# sim$n <- size
# ####cluster and visualize the performance
# # km3 <- hkmeans(data, k = calinski.best,iter.max = 300)
# # km7 <- hkmeans(data, k = mean_cal_kluster,iter.max = 300)
#
#
#
# # par(mfrow=c(1,2))
# #
# # plot(data, col = km3$cluster, pch = 19, frame = FALSE,
# # main = paste0("HK-means with k = ",calinski.best,""))
# # points(km3$centers, col = 1:calinski.best, pch = 8, cex = 3)
# #
#
# # plot(data, col = km7$cluster, pch = 19, frame = FALSE,
# # main = paste0("HK-means with kluster Calinski process, k = ",mean_cal_kluster,""))
# # points(km7$centers, col = 1:mean_cal_kluster, pch = 8, cex = 3)
# #
#
#
# # par(mfrow=c(1,1))
#
# # boxplot(round(as.numeric(kcalsum),0),
# # main = paste0("kluster optimum cluster number from Calinski w/ resampling.
# # Mean resampling estimate = ",mean_cal_kluster,"
# # and ordinary Calinski suggested ",calinski.best," clusters."))
#
#
# return(
# list("sim"=sim,
# "m_cal_k"=m_cal_k,
# "f_cal_k"=f_cal_k,
# "CALsimk"=kcal2)
# )
#
#
# } else
#
# if (algorithm == "AP") {
#
# # tic()
# # apclus.best <- length(apcluster(negDistMat(r=2), data)@clusters)
# # tap <- toc()
# # tap <- as.numeric(tap$toc - tap$tic)
#
#
# ###AP
# kapsum <- list()
# kap2 <- list()
# t2 <- list()
# tap_kluster <- list()
# for (j in 1:iter_sim) {
# for (i in 1:iter_klust) {
# tic()
# dat2 <- data[sample(nrow(data), smpl, replace=TRUE), ]
# kap2[[i]] <- length(apcluster(negDistMat(r=2), dat2)@clusters)
# rm(dat2)
# t <- toc()
# t2[[i]] <- t$toc - t$tic
# }
# kapsum[[j]] <- mean(as.numeric(kap2))
# }
# tap_kluster <- mean(unlist(t2))
# m_ap_k <- round(mean(as.numeric(kap2)),0)
# f_ap_k <- as.numeric(names(which.max(table(unlist(kap2)))))
#
#
# method <- c("ap_kluster")
# ptime <- c(tap_kluster/iter_klust)
# k_mean <- c(m_ap_k)
# k_freq <- c(f_ap_k)
#
#
#
# sim <- data.frame(method,k_mean,k_freq,ptime)
# sim$k_orig <- clusters
# sim$e_mean <- k_mean - clusters
# sim$e_freq <- k_freq - clusters
#
# sim$n <- size
#
#
# ####cluster and visualize the performance
#
# # km4 <- hkmeans(data, k = apclus.best,iter.max = 300)
#
# # km8 <- hkmeans(data, k = mean_ap_kluster,iter.max = 300)
# #
# #
# #
# # par(mfrow=c(1,2))
# #
# # plot(data, col = km4$cluster, pch = 19, frame = FALSE,
# # main = paste0("HK-means with k = ",apclus.best,""))
# # points(km4$centers, col = 1:apclus.best, pch = 8, cex = 3)
# #
# # plot(data, col = km8$cluster, pch = 19, frame = FALSE,
# # main = paste0("HK-means with kluster AP process, k = ",mean_ap_kluster,""))
# # points(km8$centers, col = 1:mean_ap_kluster, pch = 8, cex = 3)
#
#
# # par(mfrow=c(1,1))
#
# # boxplot(round(as.numeric(kapsum),0),
# # main = paste0("kluster optimum cluster number from AP w/ resampling.
# # Mean resampling estimate = ",mean_ap_kluster,"
# # and ordinary AP suggested ",apclus.best," clusters."))
#
#
# return(
# list("sim"=sim,
# "m_ap_k"=m_ap_k,
# "f_ap_k"=f_ap_k,
# "APsimk"=kap2)
# )
#
#
# } else
# if (algorithm == "Default") {
#
#
# ###kluster procedure
# ##bic
# kbicssum <- list()
# kbics2 <- list()
# t1 <- list()
# tBIC_kluster <- list()
# kpamsum <- list()
# kpam2 <- list()
# t2 <- list()
# tpam_kluster <- list()
# # kcalsum <- list()
# # kcal2 <- list()
# # t3 <- list()
# # tcal_kluster <- list()
# kapsum <- list()
# kap2 <- list()
# t4 <- list()
# tap_kluster <- list()
# for (j in 1:iter_sim) {
# for (i in 1:iter_klust) {
# dat2 <- data[sample(nrow(data), smpl, replace=TRUE), ]
# tic()
# kbics2[[i]] <- dim(Mclust(as.matrix(dat2), G=3:15)$z)[2]
# t1i <- toc()
# t1[[i]] <- t1i$toc - t1i$tic
#
# tic()
# kpam2[[i]] <- pamk(dat2,krange=1:15)$nc
# t2i <- toc()
# t2[[i]] <- t2i$toc - t2i$tic
#
# # tic()
# # kcal2[[i]] <- as.numeric(which.max(cascadeKM(dat2, 1, 15, iter = 1000)$results[2,]))
# # t3i <- toc()
# # t3[[i]] <- t3i$toc - t3i$tic
#
# tic()
# kap2[[i]] <- length(apcluster(negDistMat(r=2), dat2)@clusters)
# t4i <- toc()
# t4[[i]] <- t4i$toc - t4i$tic
#
# rm(dat2)
#
# }
# kbicssum[[j]] <- mean(as.numeric(kbics2))
# kpamsum[[j]] <- mean(as.numeric(kpam2))
# # kcalsum[[j]] <- mean(as.numeric(kcal2))
# kapsum[[j]] <- mean(as.numeric(kap2))
#
# }
# tBIC_kluster <- mean(unlist(t1))
# m_BIC_k <- round(mean(as.numeric(kbics2)),0)
# tpam_kluster <- mean(unlist(t2))
# m_pam_k <- round(mean(as.numeric(kpam2)),0)
# # tcal_kluster <- mean(unlist(t3))
# # m_cal_kluster <- round(mean(as.numeric(kcal2)),0)
# tap_kluster <- mean(unlist(t4))
# m_ap_k <- round(mean(as.numeric(kap2)),0)
#
# f_BIC_k <- as.numeric(names(which.max(table(unlist(kbics2)))))
# f_pam_k <- as.numeric(names(which.max(table(unlist(kpam2)))))
# f_ap_k <- as.numeric(names(which.max(table(unlist(kap2)))))
#
# method <- c("BIC_kluster","pam_kluster","ap_kluster")
# ptime <- c(tBIC_kluster/iter_klust,tpam_kluster/iter_klust,tap_kluster/iter_klust)
# k_mean <- c(m_BIC_k,m_pam_k,m_ap_k)
# k_freq <- c(f_BIC_k,f_pam_k,f_ap_k)
#
#
#
# sim <- data.frame(method,k_mean,k_freq,ptime)
# sim$k_orig <- clusters
# sim$e_mean <- k_mean - clusters
# sim$e_freq <- k_freq - clusters
#
# sim$n <- size
#
# #
# #
# #
# # ####cluster and visualize the performance
# # km5 <- hkmeans(data, k = mean_BIC_kluster,iter.max = 300)
# # km6 <- hkmeans(data, k = mean_pam_kluster,iter.max = 300)
# # km7 <- hkmeans(data, k = mean_cal_kluster,iter.max = 300)
# # km8 <- hkmeans(data, k = mean_ap_kluster,iter.max = 300)
# #
# #
# #
# #
# #
# # par(mfrow=c(2,2))
# # # plot(data, col = km1$cluster, pch = 19, frame = FALSE,
# # # main = paste0("HK-means with k = ",BIC.best,""))
# # #
# # plot(data, col = km5$cluster, pch = 19, frame = FALSE,
# # main = paste0("HK-means with kluster BIC process, k = ",mean_BIC_kluster,""))
# #
# # plot(data, col = km6$cluster, pch = 19, frame = FALSE,
# # main = paste0("HK-means with kluster PAM process, k = ",mean_pam_kluster,""))
# # points(km6$centers, col = 1:mean_pam_kluster, pch = 8, cex = 3)
# #
# # plot(data, col = km7$cluster, pch = 19, frame = FALSE,
# # main = paste0("HK-means with kluster Calinski process, k = ",mean_cal_kluster,""))
# # points(km7$centers, col = 1:mean_cal_kluster, pch = 8, cex = 3)
# #
# # plot(data, col = km8$cluster, pch = 19, frame = FALSE,
# # main = paste0("HK-means with kluster AP process, k = ",mean_ap_kluster,""))
# # points(km8$centers, col = 1:mean_ap_kluster, pch = 8, cex = 3)
# #
# #
# #
# #
# # boxplot(round(as.numeric(kbicssum),0),
# # main = paste0("kluster optimum cluster number from BIC w/ resampling.
# # Mean resampling estimate = ",mean_BIC_kluster,"
# # ",
# # " and ordinary BIC suggested ",BIC.best," clusters."))
# # boxplot(round(as.numeric(kpamsum),0),
# # main = paste0("kluster optimum cluster number from PAM w/ resampling.
# # Mean resampling estimate = ",mean_pam_kluster,"
# # ",
# # " and ordinary PAM suggested ",pamk.best," clusters."))
# # boxplot(round(as.numeric(kcalsum),0),
# # main = paste0("kluster optimum cluster number from Calinski w/ resampling.
# # Mean resampling estimate = ",mean_cal_kluster,"
# # and ordinary Calinski suggested ",calinski.best," clusters."))
# # boxplot(round(as.numeric(kapsum),0),
# # main = paste0("kluster optimum cluster number from AP w/ resampling.
# # Mean resampling estimate = ",mean_ap_kluster,"
# # and ordinary AP suggested ",apclus.best," clusters."))
#
#
# return(
# list("sim"=sim,
# "m_BIC_k"=m_BIC_k,
# "m_pam_k"=m_pam_k,
# "m_ap_k"=m_ap_k,
# "f_BIC_k"=f_BIC_k,
# "f_pam_k"=f_pam_k,
# "f_ap_k"=f_ap_k,
# "BICsimk"=kbics2,
# "PAMsimk"=kpam2,
# "APsimk"=kap2)
# )
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.