Cap2.R
In labstatR: Libreria Del Laboratorio Di Statistica Con R

#-*- R -*-

##########################################################
###                                                    ###
### Script tratti da `Laboratorio di statistica con R' ###
###                                                    ###
###          Stefano M. Iacus & Guido Masaratto        ###
###                                                    ###
### CAPITOLO 2                                         ###
##########################################################


require(labstatR)

### Sez 2.1 TIPOLOGIE DI DATI
sesso <- c("U","U","U","D","D","D","D")
eta <- c("giovane","giovane","adulto","adulto", 
     "anziano", "giovane","anziano")
str(sesso)
str(eta)

sesso2 <- factor(sesso)
str(sesso2)
sesso2

eta2 <- factor(eta)
str(eta2)
eta2

ordered(eta2,levels=c("giovane","adulto","anziano"))

eta2 <- factor(eta,levels=c("giovane","adulto","anziano"), ordered=TRUE)
eta2

eta2 <- ordered(eta,levels=c("giovane", "adulto", "anziano"))
eta2

sesso3 <- c(1,1,1,2,2,2,2)
sesso3
sesso4 <- factor(sesso3)
sesso4
levels(sesso4) <- c("U", "D")
sesso4

eta2
codes(eta2)
eta <- c(15, 16, 45, 55, 75, 15, 70)
eta
str(eta)

### Sez 2.2 LA MATRICE DEI DATI
x <- c(1, 4, 3, 3, 2, 1, 2, 2, 3, 1, 1, 1, 4, 2, 1, 2, 3, 4, 2, 2)
x <- factor(x)
levels(x) <- c("N","C","V","S")
x

y <- c(4, 2, 1, 2, 4, 3, 3, 2, 4, 2, 3, 1, 3, 3, 3, 4, 2, 2, 3, 3)
y <- factor(y)
levels(y) <- c("A","O","S","L")
y
y <- ordered(y)
y

z <- c(0, 1, 3, 4, 1, 1, 0, 2, 3, 0, 1, 0, 1, 4, 3, 0, 2, 2, 4, 4)
z

w <- c(72.5, 54.28, 50.02, 88.88, 62.3, 45.21, 57.5, 78.4,
       75.13, 58, 53.7, 91.29, 74.7, 41.22, 65.2, 63.58,
       48.27, 52.52, 69.5, 85.98)
w

dati <- data.frame(X=x, Y=y, Z=z, W=w)
dati

save(file="dati1.rda", dati)

rm(list=ls())
ls()
load("dati1.rda")
ls()

dati$X

attach(dati)
ls()
X
Y
detach(dati)
X
dati$X

rm(list=ls())
attach("dati1.rda")
ls()
dati$X
X
detach()
ls()

### Sez 2.3 DISTRIBUZIONI DI FREQUENZA
table(X)
table(X)/length(X)
table(X)/length(X)*100

table(Y)
table(Y)/length(Y)
table(Y)/length(Y)*100
Y
cumsum(table(Y))
cumsum(table(Y)/length(Y))
cumsum(table(Y)/length(Y)*100)

table(Z)
table(Z)/length(Z) # frequenze relative

table(W)
table( cut(W, breaks=c(40,50,58,70,95)) )

table( cut(W, c(40,50,58,70,95), right = FALSE) )
hist( W, c(40,50,58,70,95), plot = FALSE )

### Sez 2.4.1 GRAFICO A BARRE I DIAGRAMMI A TORTA
pie(table(X))
pie(table(X), density = 10, angle = 15 + 10 * 1:4)

### Sez 2.4.3 ISTOGRAMMI
hist( W, c(40,50,58,70,95), freq=TRUE ) # Errato
hist( W, c(40,50,58,70,95) ) # Corretto

hist(W, main = "Sturges") 
hist(W, breaks = "Scott", main = "Scott") 
hist(W, breaks = "FD", main = "Freedman-Diaconis") 
hist(W, breaks = 11, main = "11 classi")

### Sez 2.4.4 LA FUNZIONE DI RIPARTIZIONE
library(stepfun) # carica il pacchetto stepfun
plot(ecdf(Z), main="Funzione di ripartizione")
str(ecdf) # cosa e' ecdf?
str(ecdf(Z)) # e cosa e' ecdf(Z)?

ecdf(Z)(1.5)
ecdf(1.5)

classi <- c(30, 40, 50, 58, 70, 95, 100)
Fi <- cumsum( table( cut(W,classi) ) ) / length(W)
Fi <- c(0, Fi)
plot(classi, Fi, type = "b", axes = FALSE,
     main = "Funzione di ripartizione")
axis(2, Fi)
axis(1, classi)
box()

classi <- c(40, 50, 58, 70, 95)
hist.pf(W)
hist.pf(W, classi)

### Sez 2.5.2
median(c(4,3,4,1,7))
median(c(4,3,1,7))

median(Y)
me <- median(codes(Y))
me
levels(Y)[me]

Y2 <- c("L", "O", "A", "O", "L", "S", "S", "O")
Y2 <- ordered(Y2, levels=c("A","O","S","L"))
sort(Y2)

me <- median(codes(Y2))
me
levels(Y2)[me] # R approssima 2.5 a 2
               # risultato errato!

Me(X)
Me(Y)
Me(Y2)
Me(Z)
Me(W)

quantile(W)
quantile(W,probs=c(.3,.72))


### Sez 2.5.3 IL BOXPLOT
min(W)
max(W)
range(W)
boxplot(W)

# 2.5.4 LA MEDIA ARITMETICA
mean(Z)
mean(W)

mean(W)
mean(W, trim = 0.1)
mean(W, trim = 0.3)
mean(W, trim = 0.5)


summary(Z)
summary(W)

str(dati)
summary(dati)

### Sez 2.5.5 ALTRE MEDIE
# R gestisce correttamente gli infiniti 
3/Inf
-2/Inf
4/Inf
-5/Inf
Inf/0
Inf/Inf

mean.a(W)
mean.g(W)
mean(W)
mean.a(Z)
mean.g(Z)
mean(Z)

### Sez 2.6.1 LA VARIANZA
x <- c(rep(1,5), rep(2,10), rep(3,20), rep(4,30), 
       rep(5,20), rep(6,10), rep(7,5))
y <- c(rep(1,15), rep(2,20), rep(3,15), 
       rep(5,15), rep(6,20), rep(7,15))

summary(x)
summary(y)

plot(table(x), ylab="freq", lwd=10)
plot(table(y), ylab="freq", lwd=10)

sigma2(x)
sigma2(y)


### Sez 2.7 LA FORMA DELLE DISTRIBUZIONI
x <- c(0.75, 2.27, 5.19, 4.8, 1.6, 3.5, 
  11.19, 3.42, 4.38, 6.64, 5.41, 
  3.12, 9.45, 4.38, 4.77, 4.98, 
  3.74, 2.81, 2.04, 8.34)

y <- c(13.79, 12.11, 8.85, 14.01, 9.71, 
  11.08, 12.34, 12.16, 7.52, 14.02, 9.75, 
  14.15, 12.84, 14.73, 12.88, 10.40, 
  12.78, 13.19, 9.59, 12.16)

boxplot(x,y, names=c("x","y"))

skew(x)
skew(y)

kurt(x)
kurt(y)


### Sez 2.8 LA CONCENTRAZIONE
 x <- c(1, 1, 1, 4, 4, 5, 7, 10)
 y <- c(1, 1, 1, 1, 1, 4, 4, 4, 5, 
        9, 100, 100, 200)
gini(x,col="blue")

gini(y,add=TRUE,col="red")

### Sez 2.9 L'ETEROGENEITA' 
attach(dati)
E(X)
E(Y)
E(Z)
E(W)

### Sez 2.10 DALL'ISTOGRAMMA ALLA STIMA DELLA DENSITA'
load("dati1.rda")
hist.pf(dati$W,br=c(40,50,58,70,95))
lines(density(dati$W),lty=3)

density(dati$W)

plot(density(dati$W), main = "stima della densita'",
  xlab="W", ylim = c(0,0.03))
lines(density(dati$W,bw=3),lty=3)
legend(80,0.025,c("bw = ottimale", "bw = 3"), lty=c(1,3))

plot(density(dati$W), main = "stima della densita'",
  xlab="W", ylim = c(0,0.03))
lines(density(dati$W,bw=20),lty=3)
legend(80,0.025,c("bw = ottimale", "bw = 20"), lty=c(1,3))


# EOF Cap2.R