Nothing
#-*- R -*-
##########################################################
### ###
### Script tratti da `Laboratorio di statistica con R' ###
### ###
### Stefano M. Iacus & Guido Masaratto ###
### ###
### CAPITOLO 5 ###
##########################################################
require(labstatR)
### Sez 5.1.1 LA LEGGE DEI GRANDI NUMERI
n <- seq(10,10000,length=40)
par(mfrow=c(2,2))
for(k in 1:4){
mn <- numeric(40)
for(i in 1:40)
mn[i] <- mean(rnorm(n,mean=10,sd=2))
plot(n,mn,type="l",ylim=c(8,12),xaxt="n")
abline(h=10,lty=2)
axis(1,c(100,5000,10000))
}
par(mfrow=c(1,1))
### Sez 5.1.2 TEOREMA DEL LIMITE CENTRALE
n <- c(10,50,100,1000)
p <- 0.5
par(mfrow=c(2,2))
for(k in n){
mn <- numeric(500)
for(i in 1:500){
x <- rbinom(k,1,p)
mn[i] <- mean(x)
}
z <- (mn-p)/sqrt(p*(1-p)/k)
hist(z,freq=FALSE,ylim=c(0,pnorm(0)),
xlim=c(-4,4),col="red",main=paste("n = ",k))
curve(dnorm(x),-4,4,add=TRUE)
}
par(mfrow=c(1,1))
### Sez 5.2.1 INTERVALLO DI CONFIDENZA PER LA MEDIA
x <- c(0.39, 0.68, 0.82, 1.35, 1.38, 1.62, 1.70, 1.71, 1.85, 2.14, 2.89, 3.69)
s2 <- var(x)
mx <- mean(x)
n <- length(x)
l.inf <- mx - qt(0.975,df=n-1) * sqrt(s2/n)
l.sup <- mx + qt(0.975,df=n-1) * sqrt(s2/n)
cat("(",l.inf,":",l.sup,")\n")
t.test(x)
t.test(x, conf.lev=0.99)
### Sez 5.2.2 INTERVALLO DI CONFIDENZA PER LE PROPORZIONI
curve(x*(1-x),0,1)
abline(v=0.5,lty=2)
pn <- 0.51
n <- 2500
l.inf <- pn - qnorm(0.975) * sqrt(pn*(1-pn)/n)
l.sup <- pn + qnorm(0.975) * sqrt(pn*(1-pn)/n)
cat("(",l.inf,":",l.sup,")\n")
l.inf <- pn - qnorm(0.975) * 0.5/sqrt(n)
l.sup <- pn + qnorm(0.975) * 0.5/sqrt(n)
cat("(",l.inf,":",l.sup,")\n")
prop.test(x=1275,n=2500,corr=FALSE)
prop.test(1275, 2500)
binom.test(1275,2500)
pnorm(0.02*sqrt(2500))
pnorm(0.02*sqrt(1000))
pnorm(0.02*sqrt(500))
pbinom(1250,2500,0.51,lower.tail=FALSE)
pbinom(500,1000,0.51,lower.tail=FALSE)
pbinom(250,500,0.51,lower.tail=FALSE)
### Sez 5.2.3 INTERVALLO DI CONFIDENZA PER LA VARIANZA
x
var(x)
ic.var(x)
ic.var(x,FALSE)
### Sez 5.3.1 TEST D'IPOTESI PER LA MEDIA
x <- c(2928, 2997, 2689, 3081, 3011, 2996, 2962,
3007, 3000, 2953, 2792, 2947, 3094, 2913, 3017)
mean(x)
t.test(x, mu=3010, alternative="less")
x <- c(1975, 1869, 1879, 1790, 1860, 1895, 1810, 1831,
1759, 1585, 1553, 1774, 1640, 1761, 1946, 1915, 1894,
1971, 1876, 1716, 1652, 1591, 1700, 1842, 1781)
mean(x)
t.test(x,mu=1730,alternative="greater")
### Sez 5.3.2 VERIFICA DI IPOTESI SULLE PROPORZIONI
z <- (32/50 - 0.5)/sqrt(0.5*0.5/50)
z
pnorm(z,lower.tail=FALSE) # il p-value
binom.test(32,50,alternative="greater")
prop.test(32,50,alternative="greater")
### Sez 5.3.3 VERIFICA DI IPOTESI SULLA VARIANZA
x <- rnorm(100, sd=5)
var(x)
test.var(x,20)
test.var(x,30)
test.var(x,22,"less")
### Sez 5.4.1 VERIFICA DI IPOTESI PER DUE PROPORZIONI
farmaco <- c("Placebo", "Aspirina")
esito <- c("Infartuati", "Non infartuati")
tab <- matrix( c(239,139,10795,10898), 2,2,
dimnames = list(farmaco,esito))
tab
p1 <- 239/11034
p2 <- 139/11037
p1
p2
p <- (239+139)/22071
p
qnorm(0.99)
prop.test( c(239,139), c(11034,11037) )
prop.test( c(239,139), c(11034,11037), alternative = "greater" )
prop.test( c(239,139), c(11034,11037), alternative = "less" )
### Sez 5.4.2 CONFRONTO TRA LE MEDIE DI GRUPPI
qt(0.95,df=10)
m <- rnorm(5,mean=540,sd=299)
f <- rnorm(7,mean=300,sd=238)
t.test(m,f,alternative="greater")
### Sez 5.4.3 CONFRONTO TRA VARIANZE
x <- rnorm(10, sd=5)
y <- rnorm(15, sd=3)
var(x)
var(y)
var.test(x,y,alternative="greater")
var.test(x,y,alternative="less")
### Sez 5.5 VERIFICA DI IPOTESI DI INDIPENDENZA
qchisq(0.95,df=4)
voti <- c("S", "B", "O")
tab <- matrix( c(1,1,2,3,3,1,0,2,2), 3,3, dimnames = list(voti, voti))
tab
summary(tab)
chisq.test(tab)
farmaco <- c("Placebo", "Aspirina")
esito <- c("Infartuati", "Non infartuati")
tab <- as.table(matrix( c(239,139,10795,10898), 2,2,
dimnames = list(farmaco, esito)))
tab
summary(tab)
### Sez 5.6 ANALISI DI REGRESSIONE IN AMBITO INFERENZIALE
x <- rnorm(30,sd=3)
y <- 30+3*x
lm(y~x) -> mod
summary(mod)
anova(mod)
str(mod)
x <- rnorm(30,sd=3)
y <- 30+sin(3*x)
plot(x,y)
lm(y~x) -> mod
summary(mod)
anova(mod)
plot(x,y)
mod <- lm(y~x)
abline(mod)
curve(30+sin(3*x),min(x),max(x),add=TRUE)
abline(h=30,lty=2)
### Sez 5.6.1 BANDE DI CONFIDENZA
data(cars)
attach(cars)
lm(dist~speed) -> mod
pc <- predict(mod,interval="c")
pc
pp <- predict(mod,interval="p")
pp
plot(speed,dist)
abline(mod)
matlines(speed,pc[,2:3],lty=3:3,col=1:1,lwd=2:2)
pp <- predict(mod,interval="p")
matlines(speed,pp[,2:3],lty=2:2,col=1:1,lwd=3:3)
detach()
### Sez 5.7 ESTENSIONI DEL MODELLO DI REGRESSIONE
x1 <- runif(50)
x2 <- sin(runif(50))
y <- 4 + 2*x1 - x2 + rnorm(50)
lm(y ~ x1 + x2)
z <- data.frame(y, x1, x2)
lm( y ~ . , data = z)
# specificando i regressori da includere
lm(y ~ x1 + x3 + x5, data=z)
# o specificando quelli da escludere
lm(y ~ . - x2 - x4)
lm(y ~ x1 + x2 -1)
### Sez 5.7.1 ANALISI DELLA VARIANZA (AD UNA VIA)
x <- c(rep(1,6), rep(2,6), rep(3,6), rep(4,6))
y <- c(64, 72, 68, 77, 56, 95, 78, 91, 97, 82, 85, 77,
75, 93, 78, 71, 63, 76, 55, 66, 49, 64, 70, 68)
bombe <- data.frame(y,x)
lm(y~x)
anova(lm(y~x))
plot(x,y)
abline(lm(y~x))
x <- factor(x)
x
lm(y~x)
anova(lm(y~x))
### Sez 5.7.2 REGRESSIONE LOGISTICA
glm(y~x1+x2+x3, family=binomial("logit"))
glm(y~x1+x2+x3, binomial)
str(interinale,vec.len=0)
glm(avviato~., binomial, data=interinale) -> model
model
summary(model)
attach(interinale)
table(avviato, areares)
ftable(avviato, areares)
table(lingue)
prop.table(table(avviato))
glm(avviato~. - istruzione, binomial, data=interinale) -> model1
summary(model1)
predict(model1,type="response") -> pr
plot(density(pr),xlim=c(0,0.2),main="",xlab="probabilita' di avviamento")
# estraiamo gli indici di x che corrispondono
# all'intervallo (0.03, 0.05)
which(den$x > 0.03 & den$x < 0.05) -> intervallo
# in corrispondenza di questi cerchiamo il minimo
# valore di y
which(den$y[intervallo] == min(den$y[intervallo]))
# estraiamo l'indice di x cui corrisponde il minimo
# valore di y
intervallo[5]
den$x[46] # ecco la soglia cercata!
interinale2 <- data.frame(interinale, pr = pr, avviabile = (pr > 0.034) )
str(interinale2,vec.len=0)
table(interinale2$avviato, interinale2$avviabile) -> tab
tab
prop.table(tab,1)
### Sez 5.8.1 IL Q-Q PLOT
# Normale
x <- rnorm(100)
qqnorm(x)
qqline(x)
# Chi Quadrato
x <- rchisq(100,df=1)
qqnorm(x)
qqline(x)
# Normale vs Normale
x <- rnorm(100)
y <- rnorm(100)
qqplot(x,y)
# Normale vs Chi Quadrato
x <- rnorm(100)
y <- rchisq(100,df=1)
qqplot(x,y)
### Sez 5.8.2 LA FUNZIONE DI RIPARTIZIONE EMPIRICA
x <- c(3,1,4)
library(stepfun)
plot(ecdf(x))
# Normale contro Normale
x <- rnorm(50)
plot(ecdf(x))
curve(pnorm(x),min(x),max(x),add=TRUE)
# t di Student contro Normale
x <- rt(50,df=3)
plot(ecdf(x))
curve(pnorm(x),min(x),max(x),add=TRUE)
curve(pt(x,df=3),min(x),max(x),add=TRUE,lty=3,lwd=2)
# Uniforme contro Normale
x <- runif(50)
plot(ecdf(x))
curve(pnorm(x),min(x),max(x),add=TRUE)
curve(punif(x),min(x),max(x),add=TRUE,lty=3,lwd=2)
# Chi-Quadrato contro Normale
x <- rchisq(50,df=3)
plot(ecdf(x))
curve(pnorm(x),min(x),max(x),add=TRUE)
curve(pchisq(x,df=3),min(x),max(x),add=TRUE,lty=3,lwd=2)
x <- rnorm(50)
y <- rchisq(50,df=1)
xmin <- min( min(x), min(y) )
xmax <- max( max(x), max(y) )
plot(ecdf(x),xlim=c(xmin,xmax))
plot(ecdf(y),add=TRUE)
### Sez 5.8.3 IL TEST DI KOLMOGOROV-SMIRNOV
x <- rnorm(50)
ks.test(x,"pnorm",mean=1,sd=2)
x <- rnorm(50,mean=1,sd=2)
ks.test(x,"pnorm",mean=1,sd=2)
x <- rnorm(50,mean=1,sd=2)
mean(x)
sqrt(var(x))
ks.test(x,"pnorm",mean=mean(x),sd=sqrt(var(x)))
# si "accetta" H0
x <- rchisq(50,df=1)
mean(x)
sqrt(var(x))
ks.test(x,"pnorm",mean=mean(x),sd=sqrt(var(x)))
# si rifiuta H0
x <- rnorm(50)
y <- rchisq(50,df=1)
ks.test(x,y) # Normale vs ChiQuadrato
# si rifiuta H0
x <- rnorm(50)
y <- rnorm(50)
ks.test(x,y) # Normale vs Normale
# si "accetta" H0
### Sez 5.8.4 IL TEST CHI-QUADRATO DI ADATTAMENTO
p <- rep(0,5)
p[1] <- pnorm(-3)
p[2] <- pnorm(-1) - pnorm(-3)
p[3] <- pnorm(1) - pnorm(-1)
p[4] <- pnorm(3) - pnorm(1)
p[5] <- 1 - pnorm(3)
y <- rnorm(50)
n <- numeric(5)
n <- table(cut(y,c(-100,-3,-1,1,3,100)))
chisq.test(x=n,p=p)
y <- rt(50,df=1)
n <- numeric(5)
n <- table(cut(y,c(-100,-3,-1,1,3,100)))
chisq.test(n,p=p)
p <- c(9,3,3,1)/16
n <- c(315, 108, 101, 32)
chisq.test(x=n, p=p)
# EOF Cap5.R
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.