Cap4.R
In labstatR: Libreria Del Laboratorio Di Statistica Con R

#-*- R -*-

##########################################################
###                                                    ###
### Script tratti da `Laboratorio di statistica con R' ###
###                                                    ###
###          Stefano M. Iacus & Guido Masaratto        ###
###                                                    ###
### CAPITOLO 4                                         ###
##########################################################

require(labstatR)


### Sez 4.1 CALCOLO DELLE PROBABILITA' E SPAZIO CAMPIONARIO
Omega <- c(1,2,3,4,5,6)
sample(Omega, 15, replace=TRUE)
sample(Omega, 6)
sample(Omega, 6)
sample(Omega, 6)
sample(Omega, 7)

# Scommessa di De Mere
ptm <- proc.time()
gioco1a()
proc.time() - ptm
ptm <- proc.time()
gioco2a()
proc.time() - ptm
ptm <- proc.time()
gioco1()
proc.time() - ptm
ptm <- proc.time()
gioco2()
proc.time() - ptm

system.time( gioco1a() )
system.time( gioco2a() )
system.time(  gioco1() )
system.time(  gioco2() )

# Probabilita' di compleanni coincidenti
n <- c(5,10,15,20,21,22,23,24,25,30,50,60,
  70,80,90,100,200,300,365)
for(i in n)
  cat("\n n=",i,"P(A)=",birthday(i))

pbirthday(23, classes = 365, coincident = 4)

qbirthday(prob= 0.5, classes = 365, coincident = 4)
pbirthday(168, classes = 365, coincident = 4)
pbirthday(169, classes = 365, coincident = 4)

qbirthday(prob= 0.5, classes = 365, coincident = 2)
pbirthday(23, classes = 365, coincident = 2)

# calcolo combinatorio
prod(1:4)
choose(4,3)

word <- c("R","O", "M", "A")
sample(word,3)
sample(word,3)
sample(word,3)

prod(1:4)
gamma(5)

prod(1:150)
gamma(151)
exp(lgamma(151))
prod(1:170)
prod(1:171)
gamma(172)
log(gamma(172))
lgamma(200)
lgamma(2000)
lgamma(2000000000)

### Sez 4.2.2 MODELLI MEDIA-VARIANZA
p0 <- 25
pr <- c(0.1,0.2,0.4,0.2,0.1)
p <- c(20,22.5,25,30,40)
pm <- sum(p*pr)
pm
vp <- sum((p-pm)^2*pr)
vp
(pm-p0)/p0
vp/p0^2

x <- c(11,9,25,7,-2)/100
y <- c(-3,15,2,20,6)/100
pxy <- matrix(rep(0,25),5,5)
pxy[1,1] <- 0.2
pxy[2,2] <- 0.2
pxy[3,3] <- 0.2
pxy[4,4] <- 0.2
pxy[5,5] <- 0.2

Rpa(0.1,x,y,pxy)
Rpa(0.5,x,y,pxy)
Rpa(0.7,x,y,pxy)


a <- seq(0,1,0.1)
rr <- Rpa(a,x,y,pxy) 
plot(a,rr$Rm,main="rendimento medio", ylab="Rm",type="b")
plot(a,rr$VR,main="varianza del redimento", ylab="Vr",type="b")
plot(sqrt(rr$VR),rr$Rm,main="Trade-off media Varianza",
  ylab=expression(E(R[p])),xlab=expression(sigma(R[p])), type="b")

Rp(x,y,pxy)

VRpa <- function(a,vx,vy,mx,my,rxy){
    vv <- sqrt(a^2*vx+(1-a)^2*vy+2*a*(1-a)*rxy*sqrt(vx)*sqrt(vy))
  	mm <- a*mx+(1-a)*my
  	return(list(vv=vv,mm=mm))
}
 
a <- seq(0,1,0.1)
mx <- 0.10
my <- 0.08
vx <- 0.0872^2
vy <- 0.0841^2
 
rr1 <- VRpa(a,vx,vy,mx,my,0.2)
rr2 <- VRpa(a,vx,vy,mx,my,-0.3)
rr3 <- VRpa(a,vx,vy,mx,my,-1)
rr4 <- VRpa(a,vx,vy,mx,my,1)
rr5 <- VRpa(a,vx,vy,mx,my,0)
plot(c(rr4$vv,rr3$vv),c(rr4$mm,rr3$mm),type="l",
       xlim=c(0,0.10),lwd=2,xlab=expression(sigma(R[p])),
       ylab=expression(E(R[p])))
lines(rr1$vv,rr1$mm)
lines(rr2$vv,rr2$mm)
lines(rr3$vv,rr3$mm)
lines(rr5$vv,rr5$mm,lty=3,lwd=2)
text(0.06,0.09,expression(rho==0))
text(0.04,0.09,expression(rho==+0.2))
text(0.075,0.09,expression(rho==-0.3))
text(0.02,0.095,expression(rho==-1))
text(0.02,0.085,expression(rho==-1))
text(0.095,0.09,expression(rho==+1))

### Sez 4.2.3 ESPERIMENTO DI BERNOULLI E VARIABILI CASUALI DERIVATE
pbinom(3,10,0.3)
pbinom(3,10,0.3, lower.tail=FALSE)
pbinom(3,10,0.3) + pbinom(3,10,0.3, lower.tail=FALSE)
dbinom(3,10,0.3)

k <- 0:10
p <- dgeom(k,1/8)
plot(k,p,type="h",lwd=10)
pnbinom(3,5,0.3)
dnbinom(3,1,0.3)
dgeom(3,0.3)

### Sez 4.2.5 VARIABILE CASUALE DI POISSON
k <- 0:20
p <- dpois(k,lambda=5)
plot(k,p,type="h",lwd=10)
dpois(0,20/60*5)
ppois(10,20/60*10)

### Sez 4.2.10 VARIABILE CASUALE NORMALE
pnorm(3,mean=5,sd=sqrt(3))
pnorm((3-5)/sqrt(3))

curve(dnorm(x,mean=-4),-10,12,ylab="",axes=FALSE)
curve(dnorm(x,mean=7),-10,12,ylab="",add=TRUE)

mu <- 5
sigma <- 2

pnorm(mu+sigma,mean=mu,sd=sigma) - pnorm(mu-sigma,mean=mu,sd=sigma)
pnorm(1) - pnorm(-1)

pnorm(mu+2*sigma,mean=mu,sd=sigma) - pnorm(mu-2*sigma,mean=mu,sd=sigma)
pnorm(2) - pnorm(-2)

pnorm(mu+3*sigma,mean=mu,sd=sigma) - pnorm(mu-3*sigma,mean=mu,sd=sigma)
pnorm(3) - pnorm(-3)

### Sez 4.2.12 VARIABILE CHI-QUADRATO
curve(dchisq(x,df=3),0.,20,ylab="densita'")
curve(dchisq(x,df=5),0.,20,add=TRUE,lty=2)
curve(dchisq(x,df=7),0.,20,add=TRUE,lty=3)
legend(10,0.2,c("gdl = 3","gdl = 5","gdl = 7"), lty=c(1,2,3))

### Sez 4.2.13 VARIABILE t DI STUDENT
curve(dnorm(x),-5,5,ylab="densita'")
curve(dt(x,df=1),-6,6,lty=3,add=TRUE)
legend(2,0.3,c("Z","t"), lty=c(1,3))

### Sez 4.2.14 VARIABILE F DI FISHER
curve(df(x,df1=3,df2=1),0,2,ylab="densita'")

### Sez 4.2.15 VARIABILI CASUALI GAMMA E BETA

gamma(1/2)
sqrt(pi)
gamma(7)
prod(1:6)
beta(1,1)
beta(pi,2*pi)

curve(dbeta(x,1,1),ylim=c(0,3),ylab="densita'")
curve(dbeta(x,0.1,1), add=TRUE,lty=3)
curve(dbeta(x,1,.1),  add=TRUE,lty=3)
curve(dbeta(x,.1,.1), add=TRUE,lty=2,lwd=2)
curve(dbeta(x,4,4),   add=TRUE,lty=2,lwd=2)
curve(dbeta(x,2,6),   add=TRUE,lty=2,lwd=3)
curve(dbeta(x,6,2),   add=TRUE,lty=2,lwd=3)
curve(dbeta(x,2,2),   add=TRUE,lty=3,lwd=3)

dgamma(1,2,3)
dgamma(1,2,scale=3)
dgamma(1,2,1/3)


# Sez 4.3.1 IL METODO DELL'INVERSIONE
1*(runif(5)<1/3)
a <- runif(5)
a
a<1/3
1*(a<1/3)
as.integer(a<1/3)
sum((runif(10)<1/3))

gen.vc2 <- function(x,p)
  x[min(which(cumsum(p)>runif(1)))]

x <- c(-2,3,7,10,12)
p <- c(0.2, 0.1, 0.4, 0.2, 0.1) 
y <- numeric(1000)
for(i in 1:1000) y[i] <- gen.vc(x,p)
table(y)/1000

# secondo generatore
for(i in 1:1000) y[i] <- gen.vc2(x,p)
table(y)/1000

# meglio usare la funzione sample di R
y <- sample( c(-2,3,7,10,12), 1000, c(0.2, 0.1, 0.4, 0.2, 0.1), replace = TRUE)
table(y)/1000

lambda <- 0.25
y <- -log(runif(1000))/lambda

hist(y,freq=FALSE)
curve(dexp(x,lambda),0,25,add=TRUE)

### Sez 4.3.2 IL METODO DEL RIFIUTO
u <- runif(3000)
y <- runif(3000)
w <- 256/27*y*(1-y)^3
z <- which(u <= w)
x <- y[z]
length(x)
plot(density(x),main="")
curve(20*x*(1-x)^3,0,1,add=TRUE,lty=2,lwd=2)

x <- rnorm(1000)
plot(density(x))
plot(density(x), main="Dati simulati e vero modello")
curve(dnorm(x), add=TRUE, lty=2)


### Sez 4.4 I PROCESSI STOCASTICI
x <- rbinom(15,1,0.3)
x
plot(x,type="s",main="Processo di Bernoulli", ylab="Spazio degli stati",xlab="tempo")
points(1:15,x)

### Sez 4.4.1 LA PASSEGGIATA ALEATORIA
n <- 50
x <- rbinom(n,1,0.5)
x
x[which(x==0)] <- -1
x
y <- cumsum(x)
plot(1:n,y,type="l", main="passeggiata aleatoria", xlab="tempo",ylab="posizione")
abline(h=0,lty=3)

n <- 500
x <- rbinom(n,1,0.45)
x[which(x==0)] <- -1
y <- cumsum(x)
plot(1:n,y,type="l", main="passeggiata aleatoria", 
  xlab="tempo",ylab="posizione")
abline(h=0,lty=3)
n <- 500
x <- rbinom(n,1,0.51)
x[which(x==0)] <- -1
y <- cumsum(x)
plot(1:n,y,type="l", main="passeggiata aleatoria", xlab="tempo",ylab="posizione")
abline(h=0,lty=3) 

# una barriera
n <- 50000
L <-  40
t <- numeric(100)
t.na <- numeric(100)
for(i in 1:100){
  x <- rbinom(n,1,0.5)
  x <- 2*x -1
  y <- cumsum(x)
  t1 <- min(which(y==L))
  t2 <- t1 
  if(t1>n) { t1 <- n; t2 <- NA;}
  t[i] <- t1
  t.na[i] <- t2
 }
mean(t)
mean(t.na,na.rm=TRUE)

# una barriera rifettente
n <- 500
L <- 10
continua <- TRUE
x <- 2*rbinom(n,1,0.5) - 1
x1 <- x
while(continua){
  y <- cumsum(x)
  bar <- which(y==L+1) 
  if(length(bar) == 0)
   continua = FALSE
  else{
    h <- min(bar)
    x[h] <- -1
  }
}
  
plot(1:n,cumsum(x1),type="l",lty=3,ylab="", xlab="n",ylim=c(-30,30))
lines(1:n,y)
abline(h=L,lty=2)
text(5,L+2,"L")
legend(60,-15,c("libera","riflessa"),lty=c(3,1))

# Passeggiata con due barriere
n <- 1000
L1 <- 10
L2 <- -5
continua <- TRUE
x <- 2*rbinom(n,1,0.5) - 1
x1 <- x
while(continua){
  y <- cumsum(x)
  bar1 <- which(y==L1+1) 
  bar2 <- which(y==L2-1) 
  if( (length(bar1) == 0) & (length(bar2) == 0))
   continua = FALSE
  else{
    h1 <- min(bar1)
    h2 <- min(bar2)
    if(min(h1,h2) == h1)
     x[h1] <- -1
    else
     x[h2] <- +1
  }
}
plot(1:n,cumsum(x1),type="l",lty=3,ylab="",
  xlab="n",ylim=c(-30,30))
lines(1:n,y)
abline(h=L1,lty=2)
abline(h=L2,lty=2)
text(5,L1+2,"L1")
text(5,L2-4,"L2")
legend(600,-15,c("libera","riflessa"),lty=c(3,1))

### Sez 4.4.2 CATENTE DI MARKOV
p0 <- c(0,1,0)
P <- matrix(c(0.5,0.5,0.25,0.25,0,0.25,0.25,0.5,0.5),3,3)
p0 %*% P
p0 %*% (P %*% P)
p0 %*% (P %*% P %*% P)

x <- c("P","S","N")
P
Markov("S",15,x,P)  -> traj
traj

plot(traj$t,codes(factor(traj$X)),type="s",axes=FALSE, xlab="t",ylab="Che tempo fa")
axis(1)
axis(2,c(1,2,3),levels(factor(traj$X)))
box()

P
P %*% P

P <- matrix( c(1,0.5,0,0.5), 2,2)
P
P %*% P
P %*% P %*% P

# tempo medio di ritorno
P <- matrix( c(0.5,0.5,0.25,0.25,0,0.25,0.25,0.5,0.5), 3,3)
x <- c("P","S","N")
mm <- Markov("S", 10000, x, P)

r1 <- which(mm$X=="P")
n1 <- length(r1)
p1 <- 1/(mean(r1[-1]-r1[-n1]))
p1

r2 <- which(mm$X=="S")
p2 <- 1/(mean( diff(r2) ) 

r3 <- which(mm$X=="N")
p3 <- 1/(mean( diff(r3) )
p2
p3

### Sez 4.3.3 PROCESSI AUTOREGRESSIVI
n <- 100
lambda <- 0.3
x <- rnorm(n)
y <- numeric(n)
y[1] <- 0 
for(i in 2:n)  y[i] <-  y[i-1] * lambda + x[i] 
plot(1:n,y,type="l",xlab="n", ylab=expression(X[n]), main="Modello AR(1)")

curve(0.3^x, 0,4, main=expression(rho(h)==lambda^h), ylab=expression(rho(h)),xlab="h")
library(ts)
acf(y)

### Sez 4.4.4 PROCESSO DI POISSON
n <- 10
x <- rexp(n,rate=1/10)
y <- c(0,cumsum(x))
plot(y,0:n,type="s",xlim=c(0,max(y)),ylim=c(0,n),
 xlab="t",ylab="N(t)",main="Processo di Poisson")

n <- 100
t <- 50
x <- rexp(n,rate=1/10)
y <- c(0,cumsum(x))
evt <- max(which(y<t)) - 1
evt 
plot(y[0:(evt+2)],0:(evt+1),type="s",xlim=c(0,y[evt+2]),
  ylim=c(0,evt+2),xlab="t",ylab="N(t)", main="Processo di Poisson")
abline(v=t,lty=3)

# processo di Poisson non omogeneo
lambda <- 1.1
T <- 20
E <- 0
t <- 0
while(t<T){
  t <- t - 1/lambda * log(runif(1))
  if( runif(1) < sin(t)/lambda )
   E <- c(E, t) 
}
length(E)
E
plot(E,0:(length(E)-1),type="s",ylim=c(-4,length(E)))
curve(-3+sin(x),0,20,add=TRUE,lty=2,lwd=2)

lewis(T,sin)
lewis(T,sin,FALSE)

### Sez 4.4.5 PROCESSI DI DIFFUSIONE
n <- 100 
T <- 1
dt <- T/n
y <- numeric(n+1)
for(i in 2:(n+1))
  y[i] <- y[i-1] + rnorm(1) * sqrt(dt)
plot(seq(0,T,dt),y,type="l",main="Moto browniano", xlab="t",ylab="W(t)")

n <- 100
T <- 1
dt <- T/n
x <- c(0,rnorm(n,sd=sqrt(dt)))
y <- cumsum(x)

n <- 100
T <- 1
dt <- T/n
x0 <- 1

mu <- function(x,t) { -x*t } 
sigma <- function(x,t) { x*t } 

y <- numeric(n+1)
y[1] <- x0
for(i in 2:(n+1)){
  t <- dt*(i-1)
  y[i] <- y[i-1] + mu(y[i-1], t) *dt + sigma(y[i-1], t) * rnorm(1,sd=sqrt(dt))
}	
plot(seq(0,T,dt),y,type="l",main="Processo di diffusione", xlab="t",ylab="X(t)")

diffusione <- trajectory(1,0,1,mu,sigma,100)
plot(diffusione$t,diffusione$y,type="l")
acf(diffusione$y, main="Processo di diffusione")


# EOF Cap4.R