Nothing
#-*- R -*-
## Script from Fourth Edition of `Modern Applied Statistics with S'
# Chapter 6 Linear Statistical Models
library(MASS)
library(lattice)
options(width=65, digits=5, height=9999)
pdf(file="ch06.pdf", width=8, height=6, pointsize=9)
options(contrasts = c("contr.helmert", "contr.poly"))
# 6.1 A linear regression example
xyplot(Gas ~ Temp | Insul, whiteside, panel =
function(x, y, ...) {
panel.xyplot(x, y, ...)
panel.lmline(x, y, ...)
}, xlab = "Average external temperature (deg. C)",
ylab = "Gas consumption (1000 cubic feet)", aspect = "xy",
strip = function(...) strip.default(..., style = 1))
gasB <- lm(Gas ~ Temp, data = whiteside, subset = Insul=="Before")
gasA <- update(gasB, subset = Insul=="After")
summary(gasB)
summary(gasA)
varB <- deviance(gasB)/gasB$df.resid # direct calculation
varB <- summary(gasB)$sigma^2 # alternative
gasBA <- lm(Gas ~ Insul/Temp - 1, data = whiteside)
summary(gasBA)
gasQ <- lm(Gas ~ Insul/(Temp + I(Temp^2)) - 1, data = whiteside)
summary(gasQ)$coef
# R: options(contrasts = c("contr.helmert", "contr.poly"))
gasPR <- lm(Gas ~ Insul + Temp, data = whiteside)
anova(gasPR, gasBA)
oldcon <- options(contrasts = c("contr.treatment", "contr.poly"))
gasBA1 <- lm(Gas ~ Insul*Temp, data = whiteside)
summary(gasBA1)$coef
options(oldcon)
# 6.2 Model formulae and model matrices
dat <- data.frame(a = factor(rep(1:3, 3)),
y = rnorm(9, rep(2:4, 3), 0.1))
obj <- lm(y ~ a, dat)
(alf.star <- coef(obj))
Ca <- contrasts(dat$a) # contrast matrix for `a'
drop(Ca %*% alf.star[-1])
dummy.coef(obj)
N <- factor(Nlevs <- c(0,1,2,4))
contrasts(N)
contrasts(ordered(N))
N2 <- N
contrasts(N2, 2) <- poly(Nlevs, 2)
N2 <- C(N, poly(Nlevs, 2), 2) # alternative
contrasts(N2)
fractions(ginv(contr.helmert(n = 4)))
Cp <- diag(-1, 4, 5); Cp[row(Cp) == col(Cp) - 1] <- 1
Cp
fractions(ginv(Cp))
# 6.3 Regression diagnostics
(hills.lm <- lm(time ~ dist + climb, data = hills))
frame()
par(fig = c(0, 0.6, 0, 0.55))
plot(fitted(hills.lm), studres(hills.lm))
abline(h = 0, lty = 2)
# identify(fitted(hills.lm), studres(hills.lm), row.names(hills))
par(fig = c(0.6, 1, 0, 0.55), pty = "s")
qqnorm(studres(hills.lm))
qqline(studres(hills.lm))
par(pty = "m")
hills.hat <- lm.influence(hills.lm)$hat
cbind(hills, lev = hills.hat)[hills.hat > 3/35, ]
cbind(hills, pred = predict(hills.lm))["Knock Hill", ]
(hills1.lm <- update(hills.lm, subset = -18))
update(hills.lm, subset = -c(7, 18))
summary(hills1.lm)
summary(update(hills1.lm, weights = 1/dist^2))
lm(time ~ -1 + dist + climb, hills[-18, ], weight = 1/dist^2)
# hills <- hills # make a local copy (needed in S-PLUS)
hills$ispeed <- hills$time/hills$dist
hills$grad <- hills$climb/hills$dist
(hills2.lm <- lm(ispeed ~ grad, data = hills[-18, ]))
frame()
par(fig = c(0, 0.6, 0, 0.55))
plot(hills$grad[-18], studres(hills2.lm), xlab = "grad")
abline(h = 0, lty = 2)
# identify(hills$grad[-18], studres(hills2.lm), row.names(hills)[-18])
par(fig = c(0.6, 1, 0, 0.55), pty = "s")
qqnorm(studres(hills2.lm))
qqline(studres(hills2.lm))
par(pty = "m")
hills2.hat <- lm.influence(hills2.lm)$hat
cbind(hills[-18,], lev = hills2.hat)[hills2.hat > 1.8*2/34, ]
# 6.4 Safe prediction
quad1 <- lm(Weight ~ Days + I(Days^2), data = wtloss)
quad2 <- lm(Weight ~ poly(Days, 2), data = wtloss)
new.x <- data.frame(Days = seq(250, 300, 10),
row.names = seq(250, 300, 10))
predict(quad1, newdata = new.x)
predict(quad2, newdata = new.x)
# predict.gam(quad2, newdata = new.x) # S-PLUS only
# 6.5 Robust and resistant regression
# library(lqs)
phones.lm <- lm(calls ~ year, data = phones)
attach(phones); plot(year, calls); detach()
abline(phones.lm$coef)
abline(rlm(calls ~ year, phones, maxit=50), lty = 2, col = 2)
abline(lqs(calls ~ year, phones), lty =3, col = 3)
# legend(locator(1), lty = 1:3, col = 1:3,
# legend = c("least squares", "M-estimate", "LTS"))
## cor = FALSE is the default in R
summary(lm(calls ~ year, data = phones))
summary(rlm(calls ~ year, maxit = 50, data = phones))
summary(rlm(calls ~ year, scale.est = "proposal 2", data = phones))
summary(rlm(calls ~ year, data = phones, psi = psi.bisquare))
lqs(calls ~ year, data = phones)
lqs(calls ~ year, data = phones, method = "lms")
lqs(calls ~ year, data = phones, method = "S")
summary(rlm(calls ~ year, data = phones, method = "MM"))
# library(robust) # S-PLUS only
# phones.lmr <- lmRob(calls ~ year, data = phones)
# summary(phones.lmr)
# plot(phones.lmr)
hills.lm
hills1.lm # omitting Knock Hill
rlm(time ~ dist + climb, data = hills)
summary(rlm(time ~ dist + climb, data = hills,
weights = 1/dist^2, method = "MM"))
lqs(time ~ dist + climb, data = hills, nsamp = "exact")
summary(hills2.lm) # omitting Knock Hill
summary(rlm(ispeed ~ grad, data = hills))
summary(rlm(ispeed ~ grad, data = hills, method="MM"))
# summary(lmRob(ispeed ~ grad, data = hills))
lqs(ispeed ~ grad, data = hills)
# 6.6 Bootstrapping linear models
library(boot)
fit <- lm(calls ~ year, data = phones)
ph <- data.frame(phones, res = resid(fit), fitted = fitted(fit))
ph.fun <- function(data, i) {
d <- data
d$calls <- d$fitted + d$res[i]
coef(update(fit, data=d))
}
(ph.lm.boot <- boot(ph, ph.fun, R = 999))
fit <- rlm(calls ~ year, method = "MM", data = phones)
ph <- data.frame(phones, res = resid(fit), fitted = fitted(fit))
(ph.rlm.boot <- boot(ph, ph.fun, R = 999))
# 6.7 Factorial designs and designed experiments
options(contrasts=c("contr.helmert", "contr.poly"))
(npk.aov <- aov(yield ~ block + N*P*K, data = npk))
summary(npk.aov)
alias(npk.aov)
coef(npk.aov)
options(contrasts=c("contr.treatment", "contr.poly"))
npk.aov1 <- aov(yield ~ block + N + K, data = npk)
summary.lm(npk.aov1)
se.contrast(npk.aov1, list(N == "0", N == "1"), data = npk)
model.tables(npk.aov1, type = "means", se = TRUE)
mp <- c("-", "+")
(NPK <- expand.grid(N = mp, P = mp, K = mp))
if(FALSE) { ## fac.design is part of S-PLUS.
blocks13 <- fac.design(levels = c(2, 2, 2),
factor= list(N=mp, P=mp, K=mp), rep = 3, fraction = 1/2)
blocks46 <- fac.design(levels = c(2, 2, 2),
factor = list(N=mp, P=mp, K=mp), rep = 3, fraction = ~ -N:P:K)
NPK <- design(block = factor(rep(1:6, each = 4)),
rbind(blocks13, blocks46))
i <- order(runif(6)[NPK$block], runif(24))
NPK <- NPK[i,] # Randomized
lev <- rep(2, 7)
factors <- list(S=mp, D=mp, H=mp, G=mp, R=mp, B=mp, P=mp)
(Bike <- fac.design(lev, factors,
fraction = ~ S:D:G + S:H:R + D:H:B + S:D:H:P))
replications(~ .^2, data=Bike)
}
if(require("FrF2")) {
NPK <- FrF2(8, factor.names = c("N", "P", "K"), default.levels = 0:1,
blocks = 2, replications = 3)
print(NPK)
print(as.data.frame(NPK))
print(Bike <- FrF2(factor.names = c("S", "D", "H", "G", "R", "B", "P"),
default.levels = c("+", "-"), resolution = 3))
print(replications(~ .^2, data=Bike))
}
# 6.8 An unbalanced four-way layout
attach(quine)
table(Lrn, Age, Sex, Eth)
Means <- tapply(Days, list(Eth, Sex, Age, Lrn), mean)
Vars <- tapply(Days, list(Eth, Sex, Age, Lrn), var)
SD <- sqrt(Vars)
par(mfrow = c(1, 2), pty="s")
plot(Means, Vars, xlab = "Cell Means", ylab = "Cell Variances")
plot(Means, SD, xlab = "Cell Means", ylab = "Cell Std Devn.")
detach()
## singular.ok = TRUE is the default in R
boxcox(Days+1 ~ Eth*Sex*Age*Lrn, data = quine, singular.ok = TRUE,
lambda = seq(-0.05, 0.45, len = 20))
logtrans(Days ~ Age*Sex*Eth*Lrn, data = quine,
alpha = seq(0.75, 6.5, len = 20), singular.ok = TRUE)
quine.hi <- aov(log(Days + 2.5) ~ .^4, quine)
quine.nxt <- update(quine.hi, . ~ . - Eth:Sex:Age:Lrn)
dropterm(quine.nxt, test = "F")
quine.lo <- aov(log(Days+2.5) ~ 1, quine)
addterm(quine.lo, quine.hi, test = "F")
quine.stp <- stepAIC(quine.nxt,
scope = list(upper = ~Eth*Sex*Age*Lrn, lower = ~1),
trace = FALSE)
quine.stp$anova
dropterm(quine.stp, test = "F")
quine.3 <- update(quine.stp, . ~ . - Eth:Age:Lrn)
dropterm(quine.3, test = "F")
quine.4 <- update(quine.3, . ~ . - Eth:Age)
quine.5 <- update(quine.4, . ~ . - Age:Lrn)
dropterm(quine.5, test = "F")
# 6.9 Predicting computer performance
par(mfrow = c(1, 2), pty = "s")
boxcox(perf ~ syct + mmin + mmax + cach + chmin + chmax,
data = cpus, lambda = seq(0, 1, 0.1))
cpus1 <- cpus
attach(cpus)
for(v in names(cpus)[2:7])
cpus1[[v]] <- cut(cpus[[v]], unique(quantile(cpus[[v]])),
include.lowest = TRUE)
detach()
boxcox(perf ~ syct + mmin + mmax + cach + chmin + chmax,
data = cpus1, lambda = seq(-0.25, 1, 0.1))
par(mfrow = c(1, 1), pty = "m")
set.seed(123)
cpus2 <- cpus[, 2:8] # excludes names, authors' predictions
cpus2[, 1:3] <- log10(cpus2[, 1:3])
#cpus.samp <- sample(1:209, 100)
cpus.samp <-
c(3, 5, 6, 7, 8, 10, 11, 16, 20, 21, 22, 23, 24, 25, 29, 33, 39, 41, 44, 45,
46, 49, 57, 58, 62, 63, 65, 66, 68, 69, 73, 74, 75, 76, 78, 83, 86,
88, 98, 99, 100, 103, 107, 110, 112, 113, 115, 118, 119, 120, 122,
124, 125, 126, 127, 132, 136, 141, 144, 146, 147, 148, 149, 150, 151,
152, 154, 156, 157, 158, 159, 160, 161, 163, 166, 167, 169, 170, 173,
174, 175, 176, 177, 183, 184, 187, 188, 189, 194, 195, 196, 197, 198,
199, 202, 204, 205, 206, 208, 209)
cpus.lm <- lm(log10(perf) ~ ., data = cpus2[cpus.samp, ])
test.cpus <- function(fit)
sqrt(sum((log10(cpus2[-cpus.samp, "perf"]) -
predict(fit, cpus2[-cpus.samp,]))^2)/109)
test.cpus(cpus.lm)
cpus.lm2 <- stepAIC(cpus.lm, trace=FALSE)
cpus.lm2$anova
test.cpus(cpus.lm2)
# 6.10 Multiple comparisons
immer.aov <- aov((Y1 + Y2)/2 ~ Var + Loc, data = immer)
summary(immer.aov)
model.tables(immer.aov, type = "means", se = TRUE, cterms = "Var")
if(FALSE) {
multicomp(immer.aov, plot = TRUE)
oats1 <- aov(Y ~ N + V + B, data = oats)
summary(oats1)
multicomp(oats1, focus = "V")
multicomp(oats1, focus = "N", comparisons = "mcc", control = 1)
lmat <- matrix(c(0,-1,1,rep(0, 11), 0,0,-1,1, rep(0,10),
0,0,0,-1,1,rep(0,9)),,3,
dimnames = list(NULL,
c("0.2cwt-0.0cwt", "0.4cwt-0.2cwt", "0.6cwt-0.4cwt")))
multicomp(oats1, lmat = lmat, bounds = "lower", comparisons = "none")
}
(tk <- TukeyHSD(immer.aov, which = "Var"))
plot(tk)
oats1 <- aov(Y ~ N + V + B, data = oats)
(tk <- TukeyHSD(oats1, which = "V"))
plot(tk)
## An alternative under R is to use package multcomp (which requires mvtnorm)
## This code is for multcomp >= 0.991-1
library(multcomp)
## next is slow:
(tk <- confint(glht(immer.aov, linfct = mcp(Var = "Tukey"))))
plot(tk)
confint(glht(oats1, linfct = mcp(V = "Tukey")))
lmat <- matrix(c(0,-1,1,rep(0, 11), 0,0,-1,1, rep(0,10),
0,0,0,-1,1,rep(0,9)),,3,
dimnames = list(NULL,
c("0.2cwt-0.0cwt", "0.4cwt-0.2cwt", "0.6cwt-0.4cwt")))
confint(glht(oats1, linfct = mcp(N = t(lmat[2:5, ])), alternative = "greater"))
plot(tk)
# End of ch06
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.