Description Usage Format Examples
This is a simulated mixture example with 200 instances and two classes. 100 members in each class.
1 |
The format is: List of 8 \$ x : 200 x 2 matrix of training predictors \$ y : 200 x 2 matrix of class labels, 0==green, 1==red \$ xnew : matrix [1:6831, 1:2] -2.6 -2.5 -2.4 -2.3 -2.2 ... ..- attr(*, "class")= chr "matrix" ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:6831] "1" "2" "3" "4" ... .. ..$ : chr [1:2] "x1" "x2" : matrix 6831 x 2 of lattice points in predictor space \$ prob : atomic [1:6831] 3.55e-05 3.05e-05 2.63e-05 2.27e-05 1.96e-05 ... ..- attr(*, ".Names")= chr [1:6831] "1" "2" "3" "4" ... vector of 6831 probabilities (of class RED) at each lattice point \$ marginal: atomic [1:6831] 6.65e-15 2.31e-14 7.62e-14 2.39e-13 7.15e-13 ... ..- attr(*, ".Names")= chr [1:6831] "1" "2" "3" "4" ... : marginal probability at each lattice point \$ px1 : 69 lattice coordinates for x.1 \$ px2 : 99 lattice values for x.2 (69*99=6831) \$ means : num [1:20, 1:2] : 20 x 2 matrix of the mixture centers, first ten for one class, next ten for the other
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | str(mixture.example)
if(interactive())par(ask=TRUE)
x <- mixture.example$x
g <- mixture.example$y
x.mod <- lm( g ~ x)
# Figure 2.1:
plot(x, col=ifelse(g==1,"red", "green"), xlab="x1", ylab="x2")
coef(x.mod)
abline( (0.5-coef(x.mod)[1])/coef(x.mod)[3], -coef(x.mod)[2]/coef(x.mod)[3])
ghat <- ifelse( fitted(x.mod)>0.5, 1, 0)
length(ghat)
sum(ghat == g)
1 - sum(ghat==g)/length(g)
#[1] 0.27
# Training misclassification rate
xnew <- mixture.example$xnew
dim(xnew)
colnames(xnew)
library(class)
mod15 <- knn(x, xnew, g, k=15, prob=TRUE)
summary(mod15)
#Figure 2.2:
plot(x, col=ifelse(g==1,"red", "green"),xlab="x1", ylab="x2")
str(mod15)
prob <- attr(mod15, "prob")
prob <- ifelse( mod15=="1", prob, 1-prob) # prob is voting fraction for winning class!
# Now it is voting fraction for red==1
px1 <- mixture.example$px1
px2 <- mixture.example$px2
prob15 <- matrix(prob, length(px1), length(px2))
contour(px1, px2, prob15, levels=0.5, labels="", xlab="x1", ylab="x2", main=
"15-nearest neighbour")
# adding the points to the plot:
points(x, col=ifelse(g==1, "red", "green"))
ghat15 <- ifelse(knn(x,x,k=15, cl=g)=="1", 1, 0)
sum(ghat15==g)
# [1] 169
1 - sum(ghat15==g)/length(g)
# [1] 0.155
# Misclassification rate for knn(, k=15)
# Then we want the plot for knn with k=1: (Figure 2.3)
mod1 <- knn(x, xnew, k=1, cl=g, prob=TRUE)
prob <- attr(mod1, "prob")
prob <- ifelse( mod1=="1", prob, 1-prob) # prob now is voting
# fraction for "red"
prob1 <- matrix(prob, length(px1), length(px2) )
contour(px1, px2, prob1, level=0.5, labels="", xlab="x1", ylab="x2", main=
"1-nearest neighbour")
# Adding the points to the plot:
points(x, col=ifelse(g==1, "red", "green"))
# Reproducing figure 2.4, page 17 of the book:
# The data do not contain a test sample, so we make one,
# using the description of the oracle page 17 of the book: The centers
# is in the means component of mixture.example, with green(0) first,
# so red(1). For a test sample of size 10000 we simulate
# 5000 observations of each class.
library(MASS)
set.seed(123)
centers <- c(sample(1:10, 5000, replace=TRUE),
sample(11:20, 5000, replace=TRUE))
means <- mixture.example$means
means <- means[centers, ]
mix.test <- mvrnorm(10000, c(0,0), 0.2*diag(2))
mix.test <- mix.test + means
cltest <- c(rep(0, 5000), rep(1, 5000))
ks <- c(1,3,5,7,9,11,15,17,23,25,35,45,55,83,101,151 )
# nearest neighbours to try
nks <- length(ks)
misclass.train <- numeric(length=nks)
misclass.test <- numeric(length=nks)
names(misclass.train) <- names(misclass.test) <- ks
for (i in seq(along=ks)) {
mod.train <- knn(x,x,k=ks[i],cl=g)
mod.test <- knn(x, mix.test,k= ks[i],cl= g)
misclass.train[i] <- 1 - sum(mod.train==factor(g))/200
misclass.test[i] <- 1 - sum(mod.test==factor(cltest))/10000
}
print(cbind(misclass.train, misclass.test))
# Using package mclust02 # Note that this package is no longer on CRAN,
# but must be searched in the archives.
## Not run:
if(require(mclust02)){
x <- mixture.example$x
g <- mixture.example$y
xnew <- mixture.example$xnew
px1 <- mixture.example$px1
px2 <- mixture.example$px2
mix.mclust <- mclustDA(x, g, xnew, G=1:6, verbose=TRUE)
mix.mclust
} # end require (mclust02)
## End(Not run) # end \dontrun
# Figure 2.4
plot(misclass.train,xlab="Number of NN",ylab="Test error",type="n",xaxt="n")
axis(1, 1:length(ks), as.character(ks))
lines(misclass.test,type="b",col='blue',pch=20)
lines(misclass.train,type="b",col='red',pch=20)
legend("bottomright",lty=1,col=c("red","blue"),legend = c("train ", "test "))
#Figure 2.5
prob<-mixture.example$prob
prob.bayes <- matrix(prob, length(px1), length(px2))
contour(px1, px2, prob.bayes, levels=0.5, labels="", xlab="x1",
ylab="x2",
main="Bayes decision boundary")
points(x, col=ifelse(g==1, "red", "green"))
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.