packrat/lib-R/stats/demo/lm.glm.R

### Examples from: "An Introduction to Statistical Modelling"
###			By Annette Dobson
###
### == with some additions ==

#  Copyright (C) 1997-2015 The R Core Team

require(stats); require(graphics)

## Plant Weight Data (Page 9)
ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group <- gl(2,10, labels=c("Ctl","Trt"))
weight <- c(ctl,trt)
anova  (lm(weight~group))
summary(lm(weight~group -1))


## Birth Weight Data (Page 14)
age <- c(40, 38, 40, 35, 36, 37, 41, 40, 37, 38, 40, 38,
	 40, 36, 40, 38, 42, 39, 40, 37, 36, 38, 39, 40)
birthw <- c(2968, 2795, 3163, 2925, 2625, 2847, 3292, 3473, 2628, 3176,
	    3421, 2975, 3317, 2729, 2935, 2754, 3210, 2817, 3126, 2539,
	    2412, 2991, 2875, 3231)
sex <- gl(2,12, labels=c("M","F"))
plot(age, birthw, col=as.numeric(sex), pch=3*as.numeric(sex),
     main="Dobson's Birth Weight Data")
lines(lowess(age[sex=='M'], birthw[sex=='M']), col=1)
lines(lowess(age[sex=='F'], birthw[sex=='F']), col=2)
legend("topleft", levels(sex), col=1:2, pch=3*(1:2), lty=1, bty="n")

summary(l1 <- lm(birthw ~ sex + age),    correlation=TRUE)
summary(l0 <- lm(birthw ~ sex + age -1), correlation=TRUE)
anova(l1,l0)
summary(li <- lm(birthw ~ sex + sex:age -1), correlation=TRUE)
anova(li,l0)

summary(zi <- glm(birthw ~ sex + age, family=gaussian()))
summary(z0 <- glm(birthw ~ sex + age - 1, family=gaussian()))
anova(zi, z0)

summary(z.o4 <- update(z0, subset = -4))
summary(zz <- update(z0, birthw ~ sex+age-1 + sex:age))
anova(z0,zz)

## Poisson Regression Data (Page 42)
x <- c(-1,-1,0,0,0,0,1,1,1)
y <- c(2,3,6,7,8,9,10,12,15)
summary(glm(y~x, family=poisson(link="identity")))


## Calorie Data (Page 45)
calorie <- data.frame(
    carb = c(33,40,37,27,30,43,34,48,30,38,
	     50,51,30,36,41,42,46,24,35,37),
    age	 = c(33,47,49,35,46,52,62,23,32,42,
	     31,61,63,40,50,64,56,61,48,28),
    wgt	 = c(100, 92,135,144,140,101, 95,101, 98,105,
	     108, 85,130,127,109,107,117,100,118,102),
    prot = c(14,15,18,12,15,15,14,17,15,14,
	     17,19,19,20,15,16,18,13,18,14))
summary(lmcal <- lm(carb~age+wgt+prot, data= calorie))


## Extended Plant Data (Page 59)
ctl <-	c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trtA <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
trtB <- c(6.31,5.12,5.54,5.50,5.37,5.29,4.92,6.15,5.80,5.26)
group <- gl(3, length(ctl), labels=c("Ctl","A","B"))
weight <- c(ctl,trtA,trtB)
anova(lmwg <- lm(weight~group))
summary(lmwg)
coef(lmwg)
coef(summary(lmwg))#- incl.  std.err,  t- and P- values.


## Fictitious Anova Data (Page 64)
y <- c(6.8,6.6,5.3,6.1,7.5,7.4,7.2,6.5,7.8,9.1,8.8,9.1)
a <- gl(3,4)
b <- gl(2,2, length(a))
anova(z <- lm(y~a*b))


## Achievement Scores (Page 70)
y <- c(6,4,5,3,4,3,6, 8,9,7,9,8,5,7, 6,7,7,7,8,5,7)
x <- c(3,1,3,1,2,1,4, 4,5,5,4,3,1,2, 3,2,2,3,4,1,4)
m <- gl(3,7)
anova(z <- lm(y~x+m))


## Beetle Data (Page 78)
dose <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.861, 1.8839)
x <- c( 6, 13, 18, 28, 52, 53, 61, 60)
n <- c(59, 60, 62, 56, 63, 59, 62, 60)
dead <- cbind(x, n-x)
summary(     glm(dead ~ dose, family=binomial(link=logit)))
summary(     glm(dead ~ dose, family=binomial(link=probit)))
summary(z <- glm(dead ~ dose, family=binomial(link=cloglog)))
anova(z, update(z, dead ~ dose -1))


## Anther Data (Page 84)
## Note that the proportions below are not exactly
## in accord with the sample sizes quoted below.
## In particular, the last value, 5/9, should have been 0.556 instead of 0.555:
n <- c(102,  99,   108,	 76,   81,   90)
p <- c(0.539,0.525,0.528,0.724,0.617,0.555)
x <- round(n*p)
## x <- n*p
y <- cbind(x,n-x)
f <- rep(c(40,150,350),2)
(g <- gl(2,3))
summary(glm(y ~ g*f, family=binomial(link="logit")))
summary(glm(y ~ g + f, family=binomial(link="logit")))
## The "final model"
summary(glm.p84 <- glm(y~g,  family=binomial(link="logit")))
op <- par(mfrow = c(2,2), oma = c(0,0,1,0))
plot(glm.p84) # well ?
par(op)

## Tumour Data (Page 92)
counts <- c(22,2,10,16,54,115,19,33,73,11,17,28)
type <- gl(4,3,12,labels=c("freckle","superficial","nodular","indeterminate"))
site <- gl(3,1,12,labels=c("head/neck","trunk","extremities"))
data.frame(counts,type,site)
summary(z <- glm(counts ~ type + site, family=poisson()))

## Randomized Controlled Trial (Page 93)
counts <- c(18,17,15, 20,10,20, 25,13,12)
outcome   <- gl(3, 1, length(counts))
treatment <- gl(3, 3)
summary(z <- glm(counts ~ outcome + treatment, family=poisson()))

## Peptic Ulcers and Blood Groups
counts <- c(579, 4219, 911, 4578, 246, 3775, 361, 4532, 291, 5261, 396, 6598)
group <- gl(2, 1, 12, labels=c("cases","controls"))
blood <- gl(2, 2, 12, labels=c("A","O"))
city  <- gl(3, 4, 12, labels=c("London","Manchester","Newcastle"))
cbind(group, blood, city, counts) # gives internal codes for the factors

summary(z1 <- glm(counts ~ group*(city + blood), family=poisson()))
summary(z2 <- glm(counts ~ group*city + blood, family=poisson()),
        correlation = TRUE)
anova(z2, z1, test = "Chisq")
UBC-MDS/Karl documentation built on May 22, 2019, 1:53 p.m.