packrat/lib-R/MASS/scripts/ch09.R

#-*- R -*-

## Script from Fourth Edition of `Modern Applied Statistics with S'

# Chapter 9   Tree-based Methods

library(MASS)
pdf(file="ch09.pdf", width=8, height=6, pointsize=9)
options(digits=5)

library(rpart)

# Figure 9.3
shuttle.rp <- rpart(use ~ ., data=shuttle, minbucket=0, xval = 0,
                    maxsurrogate = 0, cp = 0, subset = 1:253)
post(shuttle.rp, horizontal = FALSE, height=10, width=8, title = "",
     pointsize = 8, pretty = 0)


# 9.3   Implementation in rpart

set.seed(123)
cpus.rp <- rpart(log10(perf) ~ ., cpus[ , 2:8], cp = 1e-3)
cpus.rp
print(cpus.rp, cp = 0.01) # default pruning

plot(cpus.rp, uniform = TRUE)
text(cpus.rp, digits = 3)

printcp(cpus.rp)
plotcp(cpus.rp)

cpus.rp1 <- prune(cpus.rp, cp = 0.006)
print(cpus.rp1, digits = 3)
plot(cpus.rp1, branch = 0.4, uniform = TRUE)
text(cpus.rp1, digits = 3)

# for figure 9.2
cpus.rp2 <- prune(cpus.rp, cp = 0.03)
post(cpus.rp2, horizontal = FALSE, title = "", digits=4, pointsize=18)


set.seed(123)
fgl.rp <- rpart(type ~ ., fgl, cp = 0.001)
plotcp(fgl.rp)
printcp(fgl.rp)

fgl.rp2 <- prune(fgl.rp, cp = 0.02)
plot(fgl.rp2, uniform = TRUE)
text(fgl.rp2, use.n = TRUE)
fgl.rp2

summary(fgl.rp2)

set.seed(123)
fgl.rp3 <- rpart(type ~ ., fgl, cp = 0.001,
                 parms = list(split="information"))
plotcp(fgl.rp3)
printcp(fgl.rp3)

fgl.rp4 <- prune(fgl.rp3, cp = 0.03)
plot(fgl.rp4, uniform = TRUE); text(fgl.rp4, use.n = TRUE)

plot(cpus.rp, branch = 0.6, compress = TRUE, uniform = TRUE)
text(cpus.rp, digits = 3, all = TRUE, use.n = TRUE)


# 9.3   Implementation in tree

library(tree)
## the stopping criteria differ slightly between R and S-PLUS
cpus.ltr <- tree(log10(perf) ~ ., data = cpus[, 2:8], mindev = 0.005)
summary(cpus.ltr)

cpus.ltr
plot(cpus.ltr, type="u");  text(cpus.ltr)

par(mfrow = c(1, 2), pty = "s")
set.seed(321)
plot(cv.tree(cpus.ltr, , prune.tree))
cpus.ltr1 <- prune.tree(cpus.ltr, best = 10)
plot(cpus.ltr1, type = "u")
text(cpus.ltr1, digits = 3)
par(mfrow = c(1, 1), pty = "m")

fgl.tr <- tree(type ~ ., fgl)
summary(fgl.tr)
plot(fgl.tr)
text(fgl.tr, all = TRUE, cex = 0.5)

par(mfrow = c(1, 2), pty = "s")
set.seed(123)
fgl.cv <- cv.tree(fgl.tr,, prune.misclass)
for(i in 2:5)  fgl.cv$dev <- fgl.cv$dev +
    cv.tree(fgl.tr,, prune.misclass)$dev
fgl.cv$dev <- fgl.cv$dev/5
fgl.cv
plot(fgl.cv)

fgl.tr1 <- prune.misclass(fgl.tr, best = 9)
plot(fgl.tr1, type = "u")
text(fgl.tr1, all = TRUE)

# End of ch09
UBC-MDS/Karl documentation built on May 22, 2019, 1:53 p.m.