Description Usage Arguments Details Value Note Author(s) References See Also Examples
k-nearest neighbour classification of Mahalanobis Distance version for test set from training set. This function allows you measure the distance bewteen vectors by Mahalanobis Distance. K Threshold Value Check and Same K_i Problem Dealing are also been considered.
1 |
TrnX |
matrix or data frame of training set cases. |
OrigTrnG |
matrix or data frame of true classifications of training set. |
TstX |
matrix or data frame of test set cases. A vector will be interpreted as a row vector for a single case. |
K |
number of top K nearest neighbours considered. |
ShowObs |
logical, when it's ture, the funtion will output the imformation of training set cases. |
The knnMCN function determines which class a undetermined case should belong to by following steps. First, calculate the Mahalanobis Distance between all the cases in training dataset. Then, select top K cases with nearest distances. Finally, these selected cases represent their classes and vote for the undetermined case under the principle of the minority is subordinate to the majority.
When calculating the Mahalanobis Distance, we use samples' covariance matrix (CM) and the Mahalanobis Distance is defined as follows:
MD=sqrt((X-Y)*inverse(CM)*transpose(X-Y)) , where X,Y are 1*n vectors and CM is n*n matrix.
Sometimes a case may get same "ballot" from class A and class B (even C, D, ...), this time a weighted voting process will be activated. The weight is based on the actual distance calculated between the test case and K cases in neighbor A and B. The test case belongs to the class with less total distance.
Also, to avoid unfair voting for undetermined case, K Threshold Value is stipulated to be less than the minimum size of the class in training dataset, or a warning will be shown.
result of classifications of test set will be returned. (When TstX is NULL, the function will automatically consider the user is trying to test the knn algorithm. Hence, a test result table and accuracy report will be shown on the R-console.)
Sometimes, singular covariance matrix may appear due to the input data and the size of the classes. This time the funtion will return a warning and hint the user try knnVCN to retry the classification.
Boxian Wei
Venables, W. N. and Ripley, B. D. (2002) Modern Applied Statistics with S. Fourth edition. Springer.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | library(knnGarden)
data(iris)
## Define data
TrnX=iris[c(1:20,80:100,140:150),1:4]
OrigTrnG=iris[c(1:20,80:100,140:150),5]
#
TstX<-iris[c(1:20,50:70,120:140),1:4]
#or
TstX<-NULL
## Call function
knnMCN(TrnX=TrnX,OrigTrnG=OrigTrnG,TstX=TstX,ShowObs=FALSE,K=5)
## The function is currently defined as
function (TrnX, OrigTrnG, TstX = NULL, K = 1, ShowObs = F)
{
OrigTrnG = as.factor(OrigTrnG)
TrnG = as.numeric(OrigTrnG)
CodeMeaning = data.frame(TrnG, OrigTrnG)
TK = sort(as.matrix(table(TrnG)), decreasing = F)
if (K > TK[1]) {
stop(c("\nNOTES: \nsorry, the value of K ", "(K=", K,
") ", "you have selected is bigger than the capacity of one class in your training data set",
"(", "the capacity is ", TK[1], ")", ",", "please choose a less value for K"))
}
if (is.null(TstX) == T) {
IsTst = 1
TstX <- as.matrix(TrnX)
}
else {
IsTst = 0
}
if (is.matrix(TstX) == F) {
TstX <- as.matrix(TstX)
}
TrnX <- as.matrix(TrnX)
ElmTrnG = union(TrnG, TrnG)
LevTrnG = length(ElmTrnG)
TrnTotal = cbind(TrnG, TrnX)
if (abs(det(cov(TrnX[which(TrnTotal[, 1] == ElmTrnG[1]),
]))) < 1e-07) {
stop("\nWarnings:\nsample variance-covariance matrix is singular,\nand larger class sample capacity is required ,\nor you can try other methods in knnWXM of this package")
}
else {
MaDisList = list(solve(cov(TrnX[which(TrnTotal[, 1] ==
ElmTrnG[1]), ]), LINPACK = T))
}
if (LevTrnG > 1) {
for (i in (1 + 1):LevTrnG) {
if (abs(det(cov(TrnX[which(TrnTotal[, 1] == ElmTrnG[i]),
]))) < 1e-07) {
stop("\nWarnings:\nsample variance-covariance matrix is singular,\nand larger class sample capacity is required ,\nor you can try other methods in knnWXM of this package")
}
else {
MaDisNode = list(solve(cov(TrnX[which(TrnTotal[,
1] == ElmTrnG[i]), ]), LINPACK = T))
MaDisList = c(MaDisList, MaDisNode)
}
}
}
NTstX = nrow(TstX)
NTrnTotal = nrow(TrnTotal)
VoteResult = NULL
VoteResultList = NULL
for (i in 1:nrow(TstX)) {
RankBoardI <- NULL
RankBoardIJ <- NULL
for (j in 1:LevTrnG) {
TempTrnXI = TrnX[which(TrnTotal[, 1] == ElmTrnG[j]),
]
TempCovJ = as.matrix(MaDisList[[j]])
TempTstXI = NULL
for (k in 1:nrow(TempTrnXI)) {
TempTstXI = rbind(TempTstXI, TstX[i, ])
}
TempMadisBoardI <- sqrt(diag((TempTstXI - TempTrnXI) %*%
TempCovJ %*% t(TempTstXI - TempTrnXI)))
MadisBoardI <- as.matrix(TempMadisBoardI)
GBoardI <- as.matrix(rep(ElmTrnG[j], nrow(TempTrnXI)))
RankBoardI <- cbind(GBoardI, MadisBoardI)
RankBoardIJ <- rbind(RankBoardIJ, RankBoardI)
}
VoteAndWeight = RankBoardIJ[sort(RankBoardIJ[, 2], index.return = T)$ix[1:k],
1:2]
TempVote4TstXI = RankBoardIJ[sort(RankBoardIJ[, 2], index.return = T)$ix[1:k],
1]
ElmVote = union(TempVote4TstXI, TempVote4TstXI)
CountVote = as.matrix(sort(table(TempVote4TstXI), decreasing = T))
TempWinner = as.numeric(rownames(CountVote))
if (length(CountVote) == 1 | K == 1) {
Winner = TempWinner[1]
TstXIBelong = union(CodeMeaning$OrigTrnG[which(CodeMeaning$TrnG ==
Winner)], CodeMeaning$OrigTrnG[which(CodeMeaning$TrnG ==
Winner)])
VoteResultNode = data.frame(TstXIBelong)
VoteResultList = rbind(VoteResultList, VoteResultNode)
}
else {
NumOfTie = CountVote[1]
FinalList = NULL
j = 1
TempWeight = sum(VoteAndWeight[which(VoteAndWeight[,
1] == TempWinner[j]), 2])
FinalList = data.frame(TempWinner[j], TempWeight)
while (CountVote[j] == CountVote[j + 1] & j < length(CountVote)) {
TempWeight = sum(VoteAndWeight[which(VoteAndWeight[,
1] == TempWinner[j + 1]), 2])
FinalListNode = c(TempWinner[j + 1], TempWeight)
FinalList = rbind(FinalList, FinalListNode)
j = j + 1
}
FinalList = FinalList[sort(FinalList$TempWeight,
index.return = T)$ix[1], ]
TstXIBelong = union(CodeMeaning$OrigTrnG[which(CodeMeaning$TrnG ==
FinalList[1, 1])], CodeMeaning$OrigTrnG[which(CodeMeaning$TrnG ==
FinalList[1, 1])])
VoteResultNode = data.frame(TstXIBelong)
VoteResultList = rbind(VoteResultList, VoteResultNode)
}
}
if (IsTst == 1) {
CheckT = as.matrix(table(data.frame(VoteResultList, OrigTrnG)))
AccuStat = 1 - sum(CheckT - diag(diag(CheckT)))/length(TrnG)
cat("test results", "\n")
print(CheckT)
cat("the classification accuracy of this algorithm on this training dataset is: ",
AccuStat * 100, "%", "\n\n\n")
}
if (IsTst == 1 & ShowObs == F) {
result = data.frame(VoteResultList, OrigTrnG)
}
else {
if (IsTst == 1 & ShowObs == T) {
result = data.frame(TstX, VoteResultList, OrigTrnG)
}
else {
if (ShowObs == F) {
result = data.frame(VoteResultList)
}
else {
result = data.frame(TstX, VoteResultList)
}
}
}
return(result)
}
|
Loading required package: cluster
test results
OrigTrnG
TstXIBelong setosa versicolor virginica
setosa 20 0 0
versicolor 0 21 0
virginica 0 0 11
the classification accuracy of this algorithm on this training dataset is: 100 %
TstXIBelong OrigTrnG
1 setosa setosa
2 setosa setosa
3 setosa setosa
4 setosa setosa
5 setosa setosa
6 setosa setosa
7 setosa setosa
8 setosa setosa
9 setosa setosa
10 setosa setosa
11 setosa setosa
12 setosa setosa
13 setosa setosa
14 setosa setosa
15 setosa setosa
16 setosa setosa
17 setosa setosa
18 setosa setosa
19 setosa setosa
20 setosa setosa
21 versicolor versicolor
22 versicolor versicolor
23 versicolor versicolor
24 versicolor versicolor
25 versicolor versicolor
26 versicolor versicolor
27 versicolor versicolor
28 versicolor versicolor
29 versicolor versicolor
30 versicolor versicolor
31 versicolor versicolor
32 versicolor versicolor
33 versicolor versicolor
34 versicolor versicolor
35 versicolor versicolor
36 versicolor versicolor
37 versicolor versicolor
38 versicolor versicolor
39 versicolor versicolor
40 versicolor versicolor
41 versicolor versicolor
42 virginica virginica
43 virginica virginica
44 virginica virginica
45 virginica virginica
46 virginica virginica
47 virginica virginica
48 virginica virginica
49 virginica virginica
50 virginica virginica
51 virginica virginica
52 virginica virginica
function (TrnX, OrigTrnG, TstX = NULL, K = 1, ShowObs = F)
{
OrigTrnG = as.factor(OrigTrnG)
TrnG = as.numeric(OrigTrnG)
CodeMeaning = data.frame(TrnG, OrigTrnG)
TK = sort(as.matrix(table(TrnG)), decreasing = F)
if (K > TK[1]) {
stop(c("\\nNOTES: \\nsorry, the value of K ", "(K=", K,
") ", "you have selected is bigger than the capacity of one class in your training data set",
"(", "the capacity is ", TK[1], ")", ",", "please choose a less value for K"))
}
if (is.null(TstX) == T) {
IsTst = 1
TstX <- as.matrix(TrnX)
}
else {
IsTst = 0
}
if (is.matrix(TstX) == F) {
TstX <- as.matrix(TstX)
}
TrnX <- as.matrix(TrnX)
ElmTrnG = union(TrnG, TrnG)
LevTrnG = length(ElmTrnG)
TrnTotal = cbind(TrnG, TrnX)
if (abs(det(cov(TrnX[which(TrnTotal[, 1] == ElmTrnG[1]),
]))) < 1e-07) {
stop("\\nWarnings:\\nsample variance-covariance matrix is singular,\\nand larger class sample capacity is required ,\\nor you can try other methods in knnWXM of this package")
}
else {
MaDisList = list(solve(cov(TrnX[which(TrnTotal[, 1] ==
ElmTrnG[1]), ]), LINPACK = T))
}
if (LevTrnG > 1) {
for (i in (1 + 1):LevTrnG) {
if (abs(det(cov(TrnX[which(TrnTotal[, 1] == ElmTrnG[i]),
]))) < 1e-07) {
stop("\\nWarnings:\\nsample variance-covariance matrix is singular,\\nand larger class sample capacity is required ,\\nor you can try other methods in knnWXM of this package")
}
else {
MaDisNode = list(solve(cov(TrnX[which(TrnTotal[,
1] == ElmTrnG[i]), ]), LINPACK = T))
MaDisList = c(MaDisList, MaDisNode)
}
}
}
NTstX = nrow(TstX)
NTrnTotal = nrow(TrnTotal)
VoteResult = NULL
VoteResultList = NULL
for (i in 1:nrow(TstX)) {
RankBoardI <- NULL
RankBoardIJ <- NULL
for (j in 1:LevTrnG) {
TempTrnXI = TrnX[which(TrnTotal[, 1] == ElmTrnG[j]),
]
TempCovJ = as.matrix(MaDisList[[j]])
TempTstXI = NULL
for (k in 1:nrow(TempTrnXI)) {
TempTstXI = rbind(TempTstXI, TstX[i, ])
}
TempMadisBoardI <- sqrt(diag((TempTstXI - TempTrnXI) %*%
TempCovJ %*% t(TempTstXI - TempTrnXI)))
MadisBoardI <- as.matrix(TempMadisBoardI)
GBoardI <- as.matrix(rep(ElmTrnG[j], nrow(TempTrnXI)))
RankBoardI <- cbind(GBoardI, MadisBoardI)
RankBoardIJ <- rbind(RankBoardIJ, RankBoardI)
}
VoteAndWeight = RankBoardIJ[sort(RankBoardIJ[, 2], index.return = T)$ix[1:k],
1:2]
TempVote4TstXI = RankBoardIJ[sort(RankBoardIJ[, 2], index.return = T)$ix[1:k],
1]
ElmVote = union(TempVote4TstXI, TempVote4TstXI)
CountVote = as.matrix(sort(table(TempVote4TstXI), decreasing = T))
TempWinner = as.numeric(rownames(CountVote))
if (length(CountVote) == 1 | K == 1) {
Winner = TempWinner[1]
TstXIBelong = union(CodeMeaning$OrigTrnG[which(CodeMeaning$TrnG ==
Winner)], CodeMeaning$OrigTrnG[which(CodeMeaning$TrnG ==
Winner)])
VoteResultNode = data.frame(TstXIBelong)
VoteResultList = rbind(VoteResultList, VoteResultNode)
}
else {
NumOfTie = CountVote[1]
FinalList = NULL
j = 1
TempWeight = sum(VoteAndWeight[which(VoteAndWeight[,
1] == TempWinner[j]), 2])
FinalList = data.frame(TempWinner[j], TempWeight)
while (CountVote[j] == CountVote[j + 1] & j < length(CountVote)) {
TempWeight = sum(VoteAndWeight[which(VoteAndWeight[,
1] == TempWinner[j + 1]), 2])
FinalListNode = c(TempWinner[j + 1], TempWeight)
FinalList = rbind(FinalList, FinalListNode)
j = j + 1
}
FinalList = FinalList[sort(FinalList$TempWeight,
index.return = T)$ix[1], ]
TstXIBelong = union(CodeMeaning$OrigTrnG[which(CodeMeaning$TrnG ==
FinalList[1, 1])], CodeMeaning$OrigTrnG[which(CodeMeaning$TrnG ==
FinalList[1, 1])])
VoteResultNode = data.frame(TstXIBelong)
VoteResultList = rbind(VoteResultList, VoteResultNode)
}
}
if (IsTst == 1) {
CheckT = as.matrix(table(data.frame(VoteResultList, OrigTrnG)))
AccuStat = 1 - sum(CheckT - diag(diag(CheckT)))/length(TrnG)
cat("test results", "\\n")
print(CheckT)
cat("the classification accuracy of this algorithm on this training dataset is: ",
AccuStat * 100, "%", "\\n\\n\\n")
}
if (IsTst == 1 & ShowObs == F) {
result = data.frame(VoteResultList, OrigTrnG)
}
else {
if (IsTst == 1 & ShowObs == T) {
result = data.frame(TstX, VoteResultList, OrigTrnG)
}
else {
if (ShowObs == F) {
result = data.frame(VoteResultList)
}
else {
result = data.frame(TstX, VoteResultList)
}
}
}
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.