Nothing
#-*- R -*-
##########################################################
### ###
### Script tratti da `Laboratorio di statistica con R' ###
### ###
### Stefano M. Iacus & Guido Masaratto ###
### ###
### CAPITOLO 2 ###
##########################################################
require(labstatR)
### Sez 2.1 TIPOLOGIE DI DATI
sesso <- c("U","U","U","D","D","D","D")
eta <- c("giovane","giovane","adulto","adulto",
"anziano", "giovane","anziano")
str(sesso)
str(eta)
sesso2 <- factor(sesso)
str(sesso2)
sesso2
eta2 <- factor(eta)
str(eta2)
eta2
ordered(eta2,levels=c("giovane","adulto","anziano"))
eta2 <- factor(eta,levels=c("giovane","adulto","anziano"), ordered=TRUE)
eta2
eta2 <- ordered(eta,levels=c("giovane", "adulto", "anziano"))
eta2
sesso3 <- c(1,1,1,2,2,2,2)
sesso3
sesso4 <- factor(sesso3)
sesso4
levels(sesso4) <- c("U", "D")
sesso4
eta2
codes(eta2)
eta <- c(15, 16, 45, 55, 75, 15, 70)
eta
str(eta)
### Sez 2.2 LA MATRICE DEI DATI
x <- c(1, 4, 3, 3, 2, 1, 2, 2, 3, 1, 1, 1, 4, 2, 1, 2, 3, 4, 2, 2)
x <- factor(x)
levels(x) <- c("N","C","V","S")
x
y <- c(4, 2, 1, 2, 4, 3, 3, 2, 4, 2, 3, 1, 3, 3, 3, 4, 2, 2, 3, 3)
y <- factor(y)
levels(y) <- c("A","O","S","L")
y
y <- ordered(y)
y
z <- c(0, 1, 3, 4, 1, 1, 0, 2, 3, 0, 1, 0, 1, 4, 3, 0, 2, 2, 4, 4)
z
w <- c(72.5, 54.28, 50.02, 88.88, 62.3, 45.21, 57.5, 78.4,
75.13, 58, 53.7, 91.29, 74.7, 41.22, 65.2, 63.58,
48.27, 52.52, 69.5, 85.98)
w
dati <- data.frame(X=x, Y=y, Z=z, W=w)
dati
save(file="dati1.rda", dati)
rm(list=ls())
ls()
load("dati1.rda")
ls()
dati$X
attach(dati)
ls()
X
Y
detach(dati)
X
dati$X
rm(list=ls())
attach("dati1.rda")
ls()
dati$X
X
detach()
ls()
### Sez 2.3 DISTRIBUZIONI DI FREQUENZA
table(X)
table(X)/length(X)
table(X)/length(X)*100
table(Y)
table(Y)/length(Y)
table(Y)/length(Y)*100
Y
cumsum(table(Y))
cumsum(table(Y)/length(Y))
cumsum(table(Y)/length(Y)*100)
table(Z)
table(Z)/length(Z) # frequenze relative
table(W)
table( cut(W, breaks=c(40,50,58,70,95)) )
table( cut(W, c(40,50,58,70,95), right = FALSE) )
hist( W, c(40,50,58,70,95), plot = FALSE )
### Sez 2.4.1 GRAFICO A BARRE I DIAGRAMMI A TORTA
pie(table(X))
pie(table(X), density = 10, angle = 15 + 10 * 1:4)
### Sez 2.4.3 ISTOGRAMMI
hist( W, c(40,50,58,70,95), freq=TRUE ) # Errato
hist( W, c(40,50,58,70,95) ) # Corretto
hist(W, main = "Sturges")
hist(W, breaks = "Scott", main = "Scott")
hist(W, breaks = "FD", main = "Freedman-Diaconis")
hist(W, breaks = 11, main = "11 classi")
### Sez 2.4.4 LA FUNZIONE DI RIPARTIZIONE
library(stepfun) # carica il pacchetto stepfun
plot(ecdf(Z), main="Funzione di ripartizione")
str(ecdf) # cosa e' ecdf?
str(ecdf(Z)) # e cosa e' ecdf(Z)?
ecdf(Z)(1.5)
ecdf(1.5)
classi <- c(30, 40, 50, 58, 70, 95, 100)
Fi <- cumsum( table( cut(W,classi) ) ) / length(W)
Fi <- c(0, Fi)
plot(classi, Fi, type = "b", axes = FALSE,
main = "Funzione di ripartizione")
axis(2, Fi)
axis(1, classi)
box()
classi <- c(40, 50, 58, 70, 95)
hist.pf(W)
hist.pf(W, classi)
### Sez 2.5.2
median(c(4,3,4,1,7))
median(c(4,3,1,7))
median(Y)
me <- median(codes(Y))
me
levels(Y)[me]
Y2 <- c("L", "O", "A", "O", "L", "S", "S", "O")
Y2 <- ordered(Y2, levels=c("A","O","S","L"))
sort(Y2)
me <- median(codes(Y2))
me
levels(Y2)[me] # R approssima 2.5 a 2
# risultato errato!
Me(X)
Me(Y)
Me(Y2)
Me(Z)
Me(W)
quantile(W)
quantile(W,probs=c(.3,.72))
### Sez 2.5.3 IL BOXPLOT
min(W)
max(W)
range(W)
boxplot(W)
# 2.5.4 LA MEDIA ARITMETICA
mean(Z)
mean(W)
mean(W)
mean(W, trim = 0.1)
mean(W, trim = 0.3)
mean(W, trim = 0.5)
summary(Z)
summary(W)
str(dati)
summary(dati)
### Sez 2.5.5 ALTRE MEDIE
# R gestisce correttamente gli infiniti
3/Inf
-2/Inf
4/Inf
-5/Inf
Inf/0
Inf/Inf
mean.a(W)
mean.g(W)
mean(W)
mean.a(Z)
mean.g(Z)
mean(Z)
### Sez 2.6.1 LA VARIANZA
x <- c(rep(1,5), rep(2,10), rep(3,20), rep(4,30),
rep(5,20), rep(6,10), rep(7,5))
y <- c(rep(1,15), rep(2,20), rep(3,15),
rep(5,15), rep(6,20), rep(7,15))
summary(x)
summary(y)
plot(table(x), ylab="freq", lwd=10)
plot(table(y), ylab="freq", lwd=10)
sigma2(x)
sigma2(y)
### Sez 2.7 LA FORMA DELLE DISTRIBUZIONI
x <- c(0.75, 2.27, 5.19, 4.8, 1.6, 3.5,
11.19, 3.42, 4.38, 6.64, 5.41,
3.12, 9.45, 4.38, 4.77, 4.98,
3.74, 2.81, 2.04, 8.34)
y <- c(13.79, 12.11, 8.85, 14.01, 9.71,
11.08, 12.34, 12.16, 7.52, 14.02, 9.75,
14.15, 12.84, 14.73, 12.88, 10.40,
12.78, 13.19, 9.59, 12.16)
boxplot(x,y, names=c("x","y"))
skew(x)
skew(y)
kurt(x)
kurt(y)
### Sez 2.8 LA CONCENTRAZIONE
x <- c(1, 1, 1, 4, 4, 5, 7, 10)
y <- c(1, 1, 1, 1, 1, 4, 4, 4, 5,
9, 100, 100, 200)
gini(x,col="blue")
gini(y,add=TRUE,col="red")
### Sez 2.9 L'ETEROGENEITA'
attach(dati)
E(X)
E(Y)
E(Z)
E(W)
### Sez 2.10 DALL'ISTOGRAMMA ALLA STIMA DELLA DENSITA'
load("dati1.rda")
hist.pf(dati$W,br=c(40,50,58,70,95))
lines(density(dati$W),lty=3)
density(dati$W)
plot(density(dati$W), main = "stima della densita'",
xlab="W", ylim = c(0,0.03))
lines(density(dati$W,bw=3),lty=3)
legend(80,0.025,c("bw = ottimale", "bw = 3"), lty=c(1,3))
plot(density(dati$W), main = "stima della densita'",
xlab="W", ylim = c(0,0.03))
lines(density(dati$W,bw=20),lty=3)
legend(80,0.025,c("bw = ottimale", "bw = 20"), lty=c(1,3))
# EOF Cap2.R
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.