Description Author(s) References See Also Examples
R code for Chapter 3 exercise solutions.
Simon Wood <simon@r-project.org>
Maintainer: Simon Wood <simon@r-project.org>
Wood, S.N. (2017) Generalized Additive Models: An Introduction with R, CRC
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | library(gamair); library(mgcv)
## Q.2 Residuals
n <- 100; m <- 10
x <- runif(n)
lp <- 3*x-1
mu <- binomial()$linkinv(lp)
y <- rbinom(1:n,m,mu)
par(mfrow=c(2,2))
plot(glm(y/m ~ x,family=binomial,weights=rep(m,n)))
## example glm fit...
b <- glm(y/m ~ x,family=binomial,weights=rep(m,n))
reps <- 200;mu <- fitted(b)
rsd <- matrix(0,reps,n) # array for simulated resids
runs <- rep(0,reps) # array for simulated run counts
for (i in 1:reps) { # simulation loop
ys <- rbinom(1:n,m,mu) # simulate from fitted model
## refit model to simulated data
br <- glm(ys/m ~ x,family=binomial,weights=rep(m,n))
rs <- residuals(br) # simulated resids (meet assumptions)
rsd[i,] <- sort(rs) # store sorted residuals
fv.sort <- sort(fitted(br),index.return=TRUE)
rs <- rs[fv.sort$ix] # order resids by sorted fit values
rs <- rs > 0 # check runs of +ve, -ve resids
runs[i] <- sum(rs[1:(n-1)]!=rs[2:n])
}
# plot original ordered residuals, and simulation envelope
for (i in 1:n) rsd[,i] <- sort(rsd[,i])
par(mfrow=c(1,1))
plot(sort(residuals(b)),(1:n-.5)/n) # original
## plot 95% envelope ....
lines(rsd[5,],(1:n-.5)/n);lines(rsd[reps-5,],(1:n-.5)/n)
# compare original runs to distribution under independence
rs <- residuals(b)
fv.sort <- sort(fitted(b),index.return=TRUE)
rs <- rs[fv.sort$ix]
rs <- rs > 0
obs.runs <- sum(rs[1:(n-1)]!=rs[2:n])
sum(runs>obs.runs)
## Q.3 Death penalty
## read in data...
count <- c(53,414,11,37,0,16,4,139)
death <- factor(c(1,0,1,0,1,0,1,0))
defendant <- factor(c(0,0,1,1,0,0,1,1))
victim <- factor(c(0,0,0,0,1,1,1,1))
levels(death) <- c("no","yes")
levels(defendant) <- c("white","black")
levels(victim) <- c("white","black")
## a)
sum(count[death=="yes"&defendant=="black"])/
sum(count[defendant=="black"])
sum(count[death=="yes"&defendant=="white"])/
sum(count[defendant=="white"])
## b)
dm <- glm(count~death*victim+death*defendant+
victim*defendant,family=poisson(link=log))
summary(dm)
dm0 <- glm(count~death*victim+victim*defendant,
family=poisson(link=log))
anova(dm0,dm,test="Chisq")
## Q.7 IRLS
y <- c(12,14,33,50,67,74,123,141,165,204,253,246,240)
t <-1:13
X <- cbind(rep(1,13),t,t^2) # model matrix
mu <- y;eta <- log(mu) # initial values
ok <- TRUE
while (ok) {
## evaluate pseudodata and weights
z <- (y-mu)/mu + eta
w <- as.numeric(mu)
## fit weighted working linear model
z <- sqrt(w)*z; WX <- sqrt(w)*X
beta <- coef(lm(z~WX-1))
## evaluate new eta and mu
eta.old <- eta
eta <- X%*%beta
mu <- exp(eta)
## test for convergence...
if (max(abs(eta-eta.old))<1e-7*max(abs(eta))) ok <- FALSE
}
plot(t,y);lines(t,mu) # plot fit
## Q.8
## b)
data(harrier)
m <- 1
b <- glm(Consumption.Rate~I(1/Grouse.Density^m),
family=quasi(link=inverse,variance=mu),data=harrier)
## c)
plot(harrier$Grouse.Density,residuals(b))
## clear pattern if $m=1$, and the parameter estimates lead to a rather odd curve.
## d)
## search leads to...
m <- 3.25
b <- glm(Consumption.Rate~I(1/Grouse.Density^m),
family=quasi(link=inverse,variance=mu),data=harrier)
## e)
pd <- data.frame(Grouse.Density = seq(0,130,length=200))
pr <- predict(b,newdata=pd,se=TRUE)
with(harrier,plot(Grouse.Density,Consumption.Rate))
lines(pd$Grouse.Density,1/pr$fit,col=2)
lines(pd$Grouse.Density,1/(pr$fit-pr$se*2),col=3)
lines(pd$Grouse.Density,1/(pr$fit+pr$se*2),col=3)
## f)
ll <- function(b,cr,d)
## evalates -ve quasi-log likelihood of model
## b is parameters, cr is consumption, d is density
{ ## get expected consumption...
dm <- d^b[3]
Ec <- exp(b[1])*dm/(1+exp(b[1])*exp(b[2])*dm)
## appropriate quasi-likelihood...
ind <- cr>0 ## have to deal with cr==0 case
ql <- cr - Ec
ql[ind] <- ql[ind] + cr[ind]*log(Ec[ind]/cr[ind])
-sum(ql)
}
## Now fit model ...
fit <- optim(c(log(.4),log(10),3),ll,method="L-BFGS-B",
hessian=TRUE,cr=harrier$Consumption.Rate,
d=harrier$Grouse.Density)
## and plot results ...
b <- fit$par
d <- seq(0,130,length=200); dm <- d^b[3]
Ec <- exp(b[1])*dm/(1+exp(b[1])*exp(b[2])*dm)
with(harrier,plot(Grouse.Density,Consumption.Rate))
lines(d,Ec,col=2)
## Q.9
death <- as.numeric(ldeaths)
month <- rep(1:12,6)
time <- 1:72
ldm <- glm(death ~ sin(month/12*2*pi)+cos(month/12*2*pi),
family=poisson(link=identity))
plot(time,death,type="l");lines(time,fitted(ldm),col=2)
summary(ldm)
plot(ldm)
## Q.10
y <- c(12,14,33,50,67,74,123,141,165,204,253,246,240)
t <- 1:13
b <- glm(y ~ t + I(t^2),family=poisson)
log.lik <- b1 <- seq(.4,.7,length=100)
for (i in 1:100)
{ log.lik[i] <- logLik(glm(y~offset(b1[i]*t)+I(t^2),
family=poisson))
}
plot(b1,log.lik,type="l")
points(coef(b)[2],logLik(b),pch=19)
abline(logLik(b)[1]-qchisq(.95,df=1),0,lty=2)
## Q.11 Soybean
## a)
library(nlme)
attach(Soybean)
lmc <- lmeControl(niterEM=300) ## needed for convergence
m1<-lme(weight~Variety*Time+Variety*I(Time^2)+
Variety*I(Time^3),Soybean,~Time|Plot,control=lmc)
plot(m1) ## clear increasing variance with mean
## b)
library(MASS)
m2<-glmmPQL(weight~Variety*Time+Variety*I(Time^2)+
Variety*I(Time^3),data=Soybean,random=~Time|Plot,
family=Gamma(link=log),control=lmc)
plot(m2) ## much better
## c)
m0<-glmmPQL(weight~Variety*Time+Variety*I(Time^2)+
Variety*I(Time^3),data=Soybean,random=~1|Plot,
family=Gamma(link=log),control=lmc) # simpler r.e.'s
m3<-glmmPQL(weight~Variety*Time+Variety*I(Time^2)+
Variety*I(Time^3),data=Soybean,random=~Time+
I(Time^2)|Plot,family=Gamma(link=log),control=lmc)
## ... m3 has more complex r.e. structure
## Following not strictly valid, but gives a rough
## guide. Suggests m2 is best...
AIC(m0,m2,m3)
summary(m2) ## drop Variety:Time
m4<-glmmPQL(weight~Variety+Time+Variety*I(Time^2)+
Variety*I(Time^3),data=Soybean,random=~Time|Plot,
family=Gamma(link=log),control=lmc)
summary(m4) ## perhaps drop Variety:I(Time^3)?
m5<-glmmPQL(weight~Variety+Time+Variety*I(Time^2)+
I(Time^3),data=Soybean,random=~Time|Plot,
family=Gamma(link=log),control=lmc)
summary(m5) ## don't drop any more
AIC(m2,m4,m5) ## supports m4
intervals(m5,which="fixed")
## So m4 or m5 are probably the best models to use, and
## both suggest that variety P has a higher weight on average.
|
Loading required package: nlme
This is mgcv 1.8-25. For overview type 'help("mgcv-package")'.
[1] 161
[1] 0.07853403
[1] 0.1097308
Call:
glm(formula = count ~ death * victim + death * defendant + victim *
defendant, family = poisson(link = log))
Deviance Residuals:
1 2 3 4 5 6 7 8
0.02505 -0.00895 -0.05463 0.03000 -0.60362 0.04572 0.09251 -0.01545
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 6.02631 0.04913 122.669 < 2e-16 ***
deathyes -2.05946 0.14585 -14.121 < 2e-16 ***
victimblack -3.26517 0.25478 -12.816 < 2e-16 ***
defendantblack -2.42032 0.17155 -14.109 < 2e-16 ***
deathyes:victimblack -2.40444 0.60061 -4.003 6.25e-05 ***
deathyes:defendantblack 0.86780 0.36707 2.364 0.0181 *
victimblack:defendantblack 4.59497 0.31353 14.656 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 1225.07955 on 7 degrees of freedom
Residual deviance: 0.37984 on 1 degrees of freedom
AIC: 52.42
Number of Fisher Scoring iterations: 3
Analysis of Deviance Table
Model 1: count ~ death * victim + victim * defendant
Model 2: count ~ death * victim + death * defendant + victim * defendant
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 2 5.3940
2 1 0.3798 1 5.0142 0.02514 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Call:
glm(formula = death ~ sin(month/12 * 2 * pi) + cos(month/12 *
2 * pi), family = poisson(link = identity))
Deviance Residuals:
Min 1Q Median 3Q Max
-13.0248 -3.5368 0.1624 2.9021 19.5843
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2056.625 5.345 384.81 <2e-16 ***
sin(month/12 * 2 * pi) 614.992 7.431 82.76 <2e-16 ***
cos(month/12 * 2 * pi) 409.015 7.431 55.04 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 12329.6 on 71 degrees of freedom
Residual deviance: 2226.2 on 69 degrees of freedom
AIC: 2910.9
Number of Fisher Scoring iterations: 4
iteration 1
iteration 2
iteration 3
iteration 4
iteration 1
iteration 2
iteration 3
iteration 4
iteration 1
iteration 2
iteration 3
iteration 4
df AIC
m0 10 NA
m2 12 NA
m3 15 NA
Linear mixed-effects model fit by maximum likelihood
Data: Soybean
AIC BIC logLik
NA NA NA
Random effects:
Formula: ~Time | Plot
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.372420536 (Intr)
Time 0.002915643 -0.994
Residual 0.188632406
Variance function:
Structure: fixed weights
Formula: ~invwt
Fixed effects: weight ~ Variety * Time + Variety * I(Time^2) + Variety * I(Time^3)
Value Std.Error DF t-value p-value
(Intercept) -5.364971 0.15662819 358 -34.25291 0.0000
VarietyP 0.397564 0.22107160 46 1.79835 0.0787
Time 0.218397 0.01090543 358 20.02648 0.0000
I(Time^2) -0.001627 0.00025381 358 -6.40927 0.0000
I(Time^3) 0.000002 0.00000177 358 1.12806 0.2601
VarietyP:Time 0.006519 0.01536237 358 0.42435 0.6716
VarietyP:I(Time^2) -0.000287 0.00035706 358 -0.80307 0.4225
VarietyP:I(Time^3) 0.000002 0.00000249 358 0.93665 0.3496
Correlation:
(Intr) VartyP Time I(T^2) I(T^3) VrtP:T VP:I(T^2
VarietyP -0.708
Time -0.873 0.618
I(Time^2) 0.807 -0.571 -0.984
I(Time^3) -0.765 0.542 0.956 -0.992
VarietyP:Time 0.620 -0.872 -0.710 0.699 -0.678
VarietyP:I(Time^2) -0.573 0.806 0.700 -0.711 0.705 -0.984
VarietyP:I(Time^3) 0.544 -0.765 -0.680 0.705 -0.711 0.955 -0.992
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.6981180 -0.6552450 -0.0616646 0.5768139 3.6926753
Number of Observations: 412
Number of Groups: 48
iteration 1
iteration 2
iteration 3
iteration 4
Linear mixed-effects model fit by maximum likelihood
Data: Soybean
AIC BIC logLik
NA NA NA
Random effects:
Formula: ~Time | Plot
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.371809198 (Intr)
Time 0.002901003 -0.994
Residual 0.188638481
Variance function:
Structure: fixed weights
Formula: ~invwt
Fixed effects: weight ~ Variety + Time + Variety * I(Time^2) + Variety * I(Time^3)
Value Std.Error DF t-value p-value
(Intercept) -5.406957 0.12272546 359 -44.05733 0.0000
VarietyP 0.479437 0.10778930 46 4.44791 0.0001
Time 0.221753 0.00767198 359 28.90429 0.0000
I(Time^2) -0.001704 0.00018136 359 -9.39481 0.0000
I(Time^3) 0.000003 0.00000130 359 1.94091 0.0531
VarietyP:I(Time^2) -0.000137 0.00006302 359 -2.18150 0.0298
VarietyP:I(Time^3) 0.000001 0.00000074 359 1.79830 0.0730
Correlation:
(Intr) VartyP Time I(T^2) I(T^3) VP:I(T^2
VarietyP -0.437
Time -0.784 -0.003
I(Time^2) 0.666 0.109 -0.969
I(Time^3) -0.599 -0.138 0.916 -0.985
VarietyP:I(Time^2) 0.263 -0.610 0.008 -0.183 0.286
VarietyP:I(Time^3) -0.206 0.478 -0.007 0.178 -0.291 -0.980
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.68996385 -0.65631664 -0.05972778 0.58812592 3.72020038
Number of Observations: 412
Number of Groups: 48
iteration 1
iteration 2
iteration 3
iteration 4
Linear mixed-effects model fit by maximum likelihood
Data: Soybean
AIC BIC logLik
NA NA NA
Random effects:
Formula: ~Time | Plot
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.371510720 (Intr)
Time 0.002872102 -0.991
Residual 0.189367267
Variance function:
Structure: fixed weights
Formula: ~invwt
Fixed effects: weight ~ Variety + Time + Variety * I(Time^2) + I(Time^3)
Value Std.Error DF t-value p-value
(Intercept) -5.364724 0.12028256 360 -44.60101 0.0000
VarietyP 0.388361 0.09475109 46 4.09875 0.0002
Time 0.222096 0.00769357 360 28.86777 0.0000
I(Time^2) -0.001769 0.00017899 360 -9.88285 0.0000
I(Time^3) 0.000003 0.00000125 360 2.61307 0.0093
VarietyP:I(Time^2) -0.000027 0.00001255 360 -2.13175 0.0337
Correlation:
(Intr) VartyP Time I(T^2) I(T^3)
VarietyP -0.394
Time -0.803 0.001
I(Time^2) 0.731 0.027 -0.984
I(Time^3) -0.704 0.001 0.955 -0.991
VarietyP:I(Time^2) 0.312 -0.800 0.004 -0.040 0.004
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.67130867 -0.66143822 -0.05416341 0.57914166 3.65293634
Number of Observations: 412
Number of Groups: 48
df AIC
m2 12 NA
m4 11 NA
m5 10 NA
Approximate 95% confidence intervals
Fixed effects:
lower est. upper
(Intercept) -5.599540e+00 -5.364724e+00 -5.129908e+00
VarietyP 1.990307e-01 3.883609e-01 5.776911e-01
Time 2.070768e-01 2.220963e-01 2.371157e-01
I(Time^2) -2.118323e-03 -1.768904e-03 -1.419484e-03
I(Time^3) 8.258938e-07 3.265598e-06 5.705303e-06
VarietyP:I(Time^2) -5.127377e-05 -2.676397e-05 -2.254168e-06
attr(,"label")
[1] "Fixed effects:"
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.