inst/doc/kmedoid.R

## -----------------------------------------------------------------------------
library(kmed)
iris[1:3,]

## -----------------------------------------------------------------------------
num <- as.matrix(iris[,1:4])
rownames(num) <- rownames(iris)
#calculate the Manhattan weighted by range distance of all iris objects
mrwdist <- distNumeric(num, num)
#show the distance among objects 1 to 3
mrwdist[1:3,1:3]

## -----------------------------------------------------------------------------
#extract the range of each variable
apply(num, 2, function(x) max(x)-min(x))

## -----------------------------------------------------------------------------
#the distance between objects 1 and 2
abs(5.1-4.9)/3.6 + abs(3.5 - 3.0)/2.4 + abs(1.4-1.4)/5.9 + abs(0.2-0.2)/2.4

## ---- echo=FALSE--------------------------------------------------------------
#data object
num[1:2,]

## -----------------------------------------------------------------------------
#calculate the squared Euclidean weighthed by range distance of all iris objects
serdist <- distNumeric(num, num, method = "ser")
#show the distance among objects 1 to 3
serdist[1:3,1:3]

## -----------------------------------------------------------------------------
#the distance between objects 1 and 2
(5.1-4.9)^2/3.6 + (3.5 - 3.0)^2/2.4 + (1.4-1.4)^2/5.9 + (0.2-0.2)^2/2.4

## -----------------------------------------------------------------------------
#calculate the squared Euclidean weighthed by squared range distance of 
#all iris objects
ser.2dist <- distNumeric(num, num, method = "ser.2")
#show the distance among objects 1 to 3
ser.2dist[1:3,1:3]

## -----------------------------------------------------------------------------
(5.1-4.9)^2/3.6^2 + (3.5 - 3.0)^2/2.4^2 + (1.4-1.4)^2/5.9^2 + (0.2-0.2)^2/2.4^2

## ---- echo=FALSE--------------------------------------------------------------
#data object
num[1:2,]

## -----------------------------------------------------------------------------
#calculate the squared Euclidean weighthed by variance distance of 
#all iris objects
sevdist <- distNumeric(num, num, method = "sev")
#show the distance among objects 1 to 3
sevdist[1:3,1:3]

## -----------------------------------------------------------------------------
#calculate the range of each variable
apply(num[,1:4], 2, function(x) var(x))

## -----------------------------------------------------------------------------
(5.1-4.9)^2/0.6856935 + (3.5 - 3.0)^2/0.1899794 + (1.4-1.4)^2/3.1162779 +
  (0.2-0.2)^2/0.5810063

## -----------------------------------------------------------------------------
#calculate the squared Euclidean distance of all iris objects
sedist <- distNumeric(num, num, method = "se")
#show the distance among objects 1 to 3
sedist[1:3,1:3]

## -----------------------------------------------------------------------------
(5.1-4.9)^2 + (3.5 - 3.0)^2 + (1.4-1.4)^2 + (0.2-0.2)^2

## -----------------------------------------------------------------------------
set.seed(1)
bin <- matrix(sample(1:2, 4*2, replace = TRUE), 4, 2)
rownames(bin) <- 1:nrow(bin)
colnames(bin) <- c("x", "y")

## -----------------------------------------------------------------------------
bin
#calculate simple matching distance
matching(bin, bin)

## -----------------------------------------------------------------------------
((1 == 1) + (1 == 2))/ 2

## -----------------------------------------------------------------------------
#calculate co-occurrence distance
cooccur(bin)

## -----------------------------------------------------------------------------
bin

## -----------------------------------------------------------------------------
#cross tabulation to define score in the y variable
(tab.y <- table(bin[,'x'], bin[,'y']))
#cross tabulation to define score in the x variable
(tab.x <- table(bin[,'y'], bin[,'x']))

## -----------------------------------------------------------------------------
#proportion in the y variable
(prop.y <- apply(tab.y, 2, function(x) x/sum(x)))
#proportion in the x variable
(prop.x <- apply(tab.x, 2, function(x) x/sum(x)))

## -----------------------------------------------------------------------------
#maximum proportion in the y variable
(max.y <- apply(prop.y, 2, function(x) max(x)))
#maximum proportion in the x variable
(max.x <- apply(prop.x, 2, function(x) max(x)))

## -----------------------------------------------------------------------------
#score mis-match in the y variable
(sum(max.y) - 1)/1
#score mis-match in the x variable
(sum(max.x) - 1)/1

## -----------------------------------------------------------------------------
cat <- matrix(c(1, 3, 2, 1, 3, 1, 2, 2), 4, 2)
mixdata <- cbind(iris[c(1:2, 51:52),3:4], bin, cat)
rownames(mixdata) <- 1:nrow(mixdata)
colnames(mixdata) <- c(paste(c("num"), 1:2, sep = ""), 
                       paste(c("bin"), 1:2, sep = ""), 
                       paste(c("cat"), 1:2, sep = ""))

## -----------------------------------------------------------------------------
mixdata

## -----------------------------------------------------------------------------
#calculate the Gower distance
distmix(mixdata, method = "gower", idnum = 1:2, idbin = 3:4, idcat = 5:6)

## -----------------------------------------------------------------------------
#extract the range of each numerical variable
apply(mixdata[,1:2], 2, function(x) max(x)-min(x))

## -----------------------------------------------------------------------------
#the Gower similarity
(gowsim <- ((1-abs(4.7-4.5)/3.3) + (1-abs(1.4-1.5)/1.3) + 1 + 1 + 0 + 1)/ 6 ) 

## -----------------------------------------------------------------------------
#the Gower distance
1 - gowsim

## -----------------------------------------------------------------------------
#calculate the Wishart distance
distmix(mixdata, method = "wishart", idnum = 1:2, idbin = 3:4, idcat = 5:6)

## -----------------------------------------------------------------------------
#extract the variance of each numerical variable
apply(mixdata[,1:2], 2, function(x) var(x))

## -----------------------------------------------------------------------------
wish <- (((4.7-4.5)^2/3.42) + ((1.4-1.5)^2/0.5225) + 0 + 0 + 1 + 0)/ 6 
#the Wishart distance
sqrt(wish)

## -----------------------------------------------------------------------------
#calculate Podani distance
distmix(mixdata, method = "podani", idnum = 1:2, idbin = 3:4, idcat = 5:6)

## -----------------------------------------------------------------------------
poda <- ((4.7-4.5)^2/3.3^2) + ((1.4-1.5)^2/1.3^2) + 0 + 0 + 1 + 0 
#the Podani distance
sqrt(poda)

## ---- echo=FALSE--------------------------------------------------------------
#data object
mixdata[3:4,]

## -----------------------------------------------------------------------------
#calculate the Huang distance
distmix(mixdata, method = "huang", idnum = 1:2, idbin = 3:4, idcat = 5:6)

## -----------------------------------------------------------------------------
#find the average standard deviation of the numerical variables
mean(apply(mixdata[,1:2], 2, function(x) sd(x)))

## -----------------------------------------------------------------------------
(4.7-4.5)^2 + (1.4-1.5)^2 + 1.286083*(0 + 0) + 1.286083*(1 + 0)

## -----------------------------------------------------------------------------
#calculate Harikumar-PV distance
distmix(mixdata, method = "harikumar", idnum = 1:2, idbin = 3:4, idcat = 5:6)

## -----------------------------------------------------------------------------
cooccur(mixdata[,5:6])

## -----------------------------------------------------------------------------
abs(4.7-4.5) + abs(1.4-1.5) + (0 + 0) + (0.5)

## ---- echo=FALSE--------------------------------------------------------------
#data object
mixdata[3:4,]

## -----------------------------------------------------------------------------
#calculate Ahmad-Dey distance
distmix(mixdata, method = "ahmad", idnum = 1:2, idbin = 3:4, idcat = 5:6)

## -----------------------------------------------------------------------------
cooccur(mixdata[,3:6])

## -----------------------------------------------------------------------------
(1.4-4.7)^2 + (0.2-1.4)^2 + (2)^2

## ---- echo=FALSE--------------------------------------------------------------
#data object
mixdata[2:3,]

## -----------------------------------------------------------------------------
#run the sfkm algorihtm on iris data set with mrw distance
(sfkm <- fastkmed(mrwdist, ncluster = 3, iterate = 50))

## -----------------------------------------------------------------------------
(sfkmtable <- table(sfkm$cluster, iris[,5]))

## -----------------------------------------------------------------------------
(3+11)/sum(sfkmtable)

## -----------------------------------------------------------------------------
#set the initial medoids
set.seed(1)
(kminit <- sample(1:nrow(iris), 3))
#run the km algorihtm on iris data set with mrw distance
(km <- fastkmed(mrwdist, ncluster = 3, iterate = 50, init = kminit))

## -----------------------------------------------------------------------------
(kmtable <- table(km$cluster, iris[,5]))

## -----------------------------------------------------------------------------
(3+9)/sum(kmtable)

## -----------------------------------------------------------------------------
#run the rkm algorihtm on iris data set with mrw distance and m = 10
(rkm <- rankkmed(mrwdist, ncluster = 3, m = 10, iterate = 50))

## -----------------------------------------------------------------------------
(rkmtable <- table(rkm$cluster, iris[,5]))

## -----------------------------------------------------------------------------
(3+3)/sum(rkmtable)

## -----------------------------------------------------------------------------
#run the inckm algorihtm on iris data set with mrw distance and alpha = 1.2
(inckm <- inckmed(mrwdist, ncluster = 3, alpha = 1.1, iterate = 50))

## -----------------------------------------------------------------------------
(inckmtable <- table(inckm$cluster, iris[,5]))

## -----------------------------------------------------------------------------
(9+3)/sum(inckmtable)

## -----------------------------------------------------------------------------
#run the sfkm algorihtm on iris data set with mrw distance
(simplekm <- skm(mrwdist, ncluster = 3, seeding = 50))

## -----------------------------------------------------------------------------
(simpletable <- table(simplekm$cluster, iris[,5]))

## -----------------------------------------------------------------------------
(4+11)/sum(simpletable)

## -----------------------------------------------------------------------------
#calculate silhouette of the RKM result of iris data set 
siliris <- sil(mrwdist, rkm$medoid, rkm$cluster, 
                     title = "Silhouette plot of Iris data set via RKM")

## -----------------------------------------------------------------------------
#silhouette indices of objects 49 to 52
siliris$result[c(49:52),]

## ---- fig.width=7, fig.asp=0.8------------------------------------------------
siliris$plot

## -----------------------------------------------------------------------------
#calculate centroid-base shadow value of the RKM result of iris data set 
csviris <- csv(mrwdist, rkm$medoid, rkm$cluster, 
                     title = "CSV plot of Iris data set via RKM")

## -----------------------------------------------------------------------------
#shadow values of objects 49 to 52
csviris$result[c(49:52),]

## ---- fig.width=7, fig.asp=0.8------------------------------------------------
csviris$plot

## -----------------------------------------------------------------------------
#calculate medoid-based shadow value of the RKM result of iris data set 
msviris <- msv(mrwdist, rkm$medoid, rkm$cluster, 
                     title = "MSV plot of Iris data set via RKM")

## -----------------------------------------------------------------------------
#Medoid-based shadow values of objects 49 to 52
msviris$result[c(49:52),]

## ---- fig.width=7, fig.asp=0.8------------------------------------------------
msviris$plot

## -----------------------------------------------------------------------------
#The RKM function for an argument input
rkmfunc <- function(x, nclust) {
  res <- rankkmed(x, nclust, m = 10, iterate = 50)
  return(res$cluster)
}

## -----------------------------------------------------------------------------
#The RKM algorthim evaluation by inputing the rkmfunc function
#in the algorithm argument
rkmbootstrap <- clustboot(mrwdist, nclust=3, nboot=50, algorithm = rkmfunc)

## -----------------------------------------------------------------------------
rkmbootstrap[1:4,1:5]

## -----------------------------------------------------------------------------
#The ward function to order the objects in the consensus matrix
wardorder <- function(x, nclust) {
  res <- hclust(as.dist(x), method = "ward.D2")
  member <- cutree(res, nclust)
  return(member)
}
consensusrkm <- consensusmatrix(rkmbootstrap, nclust = 3, wardorder)

## -----------------------------------------------------------------------------
consensusrkm[c(1:4),c(1:4)]

## ---- fig.width=7, fig.asp=0.8------------------------------------------------
clustheatmap(consensusrkm, "Iris data evaluated by the RKM, ordered by Ward linkage")

## ---- fig.width=7, fig.asp=0.8------------------------------------------------
#convert the data set into principle component object
pcadat <- prcomp(iris[,1:4], scale. = TRUE)
#plot the pca with the corresponding RKM clustering result 
pcabiplot(pcadat, colobj = rkm$cluster, o.size = 2)

## ---- fig.height=3, fig.width=7-----------------------------------------------
pcabiplot(pcadat, y = "PC3",colobj = rkm$cluster, o.size = 1.5)

## ---- fig.width=7, fig.asp=0.8------------------------------------------------
barplotnum(iris[,1:4], rkm$cluster, alpha = 0.05)

## ---- fig.width=7, fig.asp=0.8------------------------------------------------
barplotnum(iris[,1:4], rkm$cluster, nc = 2, alpha = 0.01)

Try the kmed package in your browser

Any scripts or data that you put into this service are public.

kmed documentation built on Aug. 29, 2022, 9:06 a.m.