Nothing
#-*- R -*-
##########################################################
### ###
### Script tratti da `Laboratorio di statistica con R' ###
### ###
### Stefano M. Iacus & Guido Masaratto ###
### ###
### CAPITOLO 3 ###
##########################################################
require(labstatR)
### Sez 3.1 ANALISI DI DIPENDENZA: LA CONNESSIONE
x<- c("O","O","S","B","S","O","B","B","S",
"B","O","B","B","O","S")
y<- c("O","B","B","B","S","S","O","O","B",
"B","O","S","B","S","B")
x <- ordered(x, levels=c("S","B","O"))
y <- ordered(y, levels=c("S","B","O"))
table(x,y)
tab <- matrix(c(1,1,2,3,3,1,0,2,2),3,3)
tab
rownames(tab)
rownames(tab) <- c("S","B","O")
tab
colnames(tab) <- c("S","B","O")
tab
table(x,y) -> tabella
tabella
# condizionate di Y ad X
tabella[1,] # Y | X=S
tabella[2,] # Y | X=B
tabella[3,] # Y | X=O
# condizionate di X ad Y
tabella[,1] # X | Y=S
tabella[,2] # X | Y=B
tabella[,3] # X | Y=O
tabella
margin.table(tabella,1)
margin.table(tabella,2)
tabella[3,]/sum(tabella[3,])
tabella[,1]/sum(tabella[,1])
tab2 <- tabella
tab2[1,] <- tab2[1,]/sum(tab2[1,])
tab2[2,] <- tab2[2,]/sum(tab2[2,])
tab2[3,] <- tab2[3,]/sum(tab2[3,])
print(tab2,digits=2)
tab3 <- tabella
tab3[,1] <- tab3[,1]/sum(tab3[,1])
tab3[,2] <- tab3[,2]/sum(tab3[,2])
tab3[,3] <- tab3[,3]/sum(tab3[,3])
print(tab3,digits=2)
# distribuzione doppia relativa
prop.table(tabella)
# marginali relative di Y condizionate ad X
prop.table(tabella,1)
# marginali relative di X condizionate ad Y
prop.table(tabella,2)
summary(tabella)
str(summary(tabella))
chi2(x,y)
### Sez 3.1.1 RAPPRESENTAZIONI GRAFICHE DI TABELLE
x <- c("O","O","S","B","S","O","B","B","S",
"B","O","B","B","O","S")
y <- c("O","B","B","B","S","S","O","O","B",
"B","O","S","B","S","B")
x <- ordered(x, levels=c("S","B","O"))
y <- ordered(y, levels=c("S","B","O"))
table(x,y)
bubbleplot(table(x,y),main="Musica versus Pittura")
table(x,y) -> mytab
mytab
str(mytab)
dimnames(mytab)
names(dimnames(mytab))
load("dati1.rda")
bubbleplot(table(dati$Z,dati$X), joint=FALSE,
main="Z dato X")
bubbleplot(table(dati$X,dati$Z), joint=FALSE,
main="X dato Z")
bubbleplot(table(dati$Z,dati$X), main = "Z versus X")
### Sez 3.1.2 IL CASO DEL TITANIC
data(Titanic)
str(Titanic)
Titanic
apply(Titanic,c(2,3),sum)
# Dipendenza dal sesso
as.table(apply(Titanic,c(2,4),sum)) -> tabsex
tabsex
summary(tabsex)$statistic/2201
# Dipendenza dall'eta'
as.table(apply(Titanic,c(3,4),sum)) -> tabage
tabage
summary(tabage)$statistic/2201
# Dipendenza dalla classe di imbarco
as.table(apply(Titanic,c(1,4),sum)) -> tabclass
tabclass
summary(tabclass)$statistic/2201
# Effetto della classe di imbarco senza l'equipaggio
apply(Titanic,c(1,4),sum) -> tabclass
tabclass <- as.table(tabclass[1:3,])
tabclass
summary(tabclass)$statistic/sum(tabclass)
as.table(apply(Titanic,c(1,4),sum)) -> tabclass
t(tabclass)
bubbleplot(tabclass, main="Distribuzione dei sopravvissuti per classe")
### Sez 3.1.3 IL PARADOSSO DI SIMPSON (I)
x <- c( rep(TRUE,160), rep(FALSE,40), rep(TRUE,170),
rep(FALSE,30), rep(TRUE,15), rep(FALSE,85),
rep(TRUE,100), rep(FALSE,300))
y <- c( rep("A",200), rep("B",200), rep("A",100),
rep("B",400))
z <- c( rep(1,400), rep(2,500) )
simpson <- data.frame( trattamento = y, decesso = x, ospedale = z )
table(simpson)
table(simpson) -> tab
osp1 <- tab[,,1]
osp1
osp2 <- tab[,,2]
osp2
osp1[1,] <- osp1[1,]/sum(osp1[1,])
osp1[2,] <- osp1[2,]/sum(osp1[2,])
osp1 # tabella delle condizionate
osp2[1,] <- osp2[1,]/sum(osp2[1,])
osp2[2,] <- osp2[2,]/sum(osp2[2,])
osp2 # tabella delle condizionate
table(simpson) -> tab
apply(tab,c(1,2),sum)
apply(table(simpson),c(1,2),sum) -> ritab
prop.table(ritab,1)
### Sez 3.2 DIPENDENZA IN MEDIA
scricciolo <- c(19.85, 20.05, 20.25, 20.85, 20.85, 20.85,
21.05, 21.05, 21.05, 21.25, 21.45, 22.05,
22.05, 22.05, 22.25)
pettirosso <- c(21.05, 21.85, 22.05, 22.05, 22.05, 22.25,
22.45, 22.45, 22.65, 23.05, 23.05, 23.05,
23.05, 23.05, 23.25, 23.85)
boxplot(scricciolo,pettirosso, names=c("scricciolo", "pettirosso"))
summary(scricciolo)
summary(pettirosso)
sqrt(sigma2(scricciolo))
sqrt(sigma2(pettirosso))
lunghezza <- c(scricciolo, pettirosso)
plot(rep(1,length(scricciolo)),scricciolo,xaxt="n",
xlim=c(0,3),ylim=c(18,25),xlab="",ylab="lunghezza")
axis(1,c(1,2),c("scricciolo","pettirosso"))
points(rep(2,length(pettirosso)),pettirosso)
abline(h=mean(lunghezza))
points(1,mean(scricciolo),pch=4, cex=4, lwd=1.5)
points(2,mean(pettirosso),pch=4, cex=4, lwd=1.5)
ospite <- c(rep(1,length(scricciolo)), rep(2,length(pettirosso)))
eta(ospite,lunghezza)
x <- c(rep(1,10),rep(0,23), rep(2,15))
y <- c(rnorm(10,mean=7),rnorm(23,mean=19),
rnorm(15,mean=17))
eta(x,y)
y <- c(rnorm(10,mean=8),rnorm(23,mean=7),
rnorm(15,mean=6.5))
eta(x,y)
t(as.table(apply(Titanic,c(1,4),sum))) -> tabclass
tabclass
tabclass[1,]/(tabclass[1,]+tabclass[2,]) -> reg
reg
plot(reg,axes=FALSE,type="b")
abline(h=1490/2201, lty=2)
axis(1,1:length(reg),names(reg))
axis(2)
box()
n <- sum(tabclass)
md <- 1490/n
sy <- md*(1-md)
nx <- apply(tabclass,2,sum)
sm <- sum( (reg-md)^2 * nx) / n
sm/sy
eta(dati$X,dati$W)
eta(dati$Y,dati$W)
eta(dati$Z,dati$W)
### Sez 3.3.1 I GRAFICI DI DISPERSIONE E LA COVARIANZA
x <- c(2,3,4,2,5,4,5,3,4,1)
y <- c(5,4,3,6,2,5,3,5,3,3)
plot(x, y, axes=FALSE)
axis(1,c(mean(x),0:6),
c(expression(bar(x)),0:6))
axis(2,c(mean(y),0,1,2,3,5,6),
c(expression(bar(y)),0,1,2,3,5,6))
box()
lines(c(2,2,0), c(0,5,5), lty=2)
points(2,5, pch = 3, cex = 3, col = "red", lty=2)
lines(c(3.3,3.3,0), c(0,3.9,3.9), lty=3)
text(3.6, 3.9, expression((list(bar(x)[n],bar(y)[n]))))
points(mean(x), mean(y), pch = 4, cex = 3, col = "red")
COV(x,y)
cov(x,y)
x <- c(-2, -1, 0, 0, 1, 2)
y <- c(4, 1, 0, 0, 1, 4)
plot(x,y, main="parabola")
cor(x,y)
table(x,y)
summary(table(x,y))
### Sez 3.3.2 LA RETTA DI REGRESSIONE
x <- c(11,8,28,17,9,4,28,5,12,23,6,24,18,21,6,22,
27,17,27,6,29,9,3,12,9,23,5,27,20,13)
y <- c(28,21,63,42,28,2,80,19,33,60,14,58,54,67,
18,64,65,68,77, 17,95,12,1,30,34,67,20,75,59,55)
plot(x,y)
cor(x,y)
lm(y~x)
lm(y~x) -> model
plot(x,y)
abline(model, col="red", lwd=2)
text(10, 80, expression(y[i]==0.349 + 2.805*x[i]))
### Sez 3.3.3 PREVISIONI
predict(model, data.frame(x=50))
predict(model, data.frame(x=70))
predict(model, data.frame(x=c(50,70)))
predict(model, data.frame(x))
### Sez 3.3.4 BONTA' DI ADATTAMENTO
predict(model,data.frame(x)) -> yy
sum((yy-y)^2)/length(y)
var(y)*(length(y)-1)/length(y)
summary(model)
### Sez 3.3.5 EFFETTO DEGLI OUTLIER SULLA RETTA DI REGRESSIONE
x <- c(1,1,2,2)
y <- c(4,3,3,2)
cor(x,y)
lm(y~x) -> model
model
abline(model)
summary(model)
# aggiunta di un outlier
x <- c(x,8)
y <- c(y,8)
cor(x,y)
lm(y~x) -> model
model
abline(model)
summary(model)
### Sez CAMBIAMENTI DI SCALA
x <- c(75,76,77,78,79,80,81)
y <- c(21,15.5,11.7,10.7,9.2,8.9,8)
cor(x,y)
plot(x,y,xlab="anni",ylab="incidenza")
lm(y~x) -> model
abline(model)
model
plot(model)
cor(x,log(y))
plot(x,log(y))
lm(log(y)~x) -> model2
abline(model2)
model2
plot(model2)
predict(model,data.frame(x=85))
predict(model2,data.frame(x=85)) -> z
z
exp(z)
x <- c(33,49,65,33,79,49,93)
y <- c(5.3,14.5,21.21,6.5,38.45,11.23,50.42)
cor(x,y)
plot(x,y)
lm(y~x) -> model
abline(model)
model
cor(x,sqrt(y))
plot(x,sqrt(y))
lm(sqrt(y)~x) -> model2
abline(model2)
model2
### Sez 3.4 DALLA REGRESSIONE LINEARE A QUELLA NON PARAMETRICA
data(cars)
attach(cars)
plot(speed, dist)
lines(ksmooth(speed, dist, "normal", bandwidth=2))
lines(ksmooth(speed, dist, "normal", bandwidth=5),lty=3)
lines(ksmooth(speed, dist, "normal", bandwidth=10))
detach()
data(cars)
attach(cars)
plot(cars)
lines(lowess(cars))
lines(lowess(cars, f=.2), lty = 3)
legend(5, 120, c(paste("f = ", c("2/3", ".2"))), lty = c(1:3))
detach()
# EOF Cap3.R
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.