Overview

SC19089 is a simple R package developed to compare the performance of R and R++ (implemented through the R package Rcpp) for the 'Statistical Computing' course. Three functions are considered, namely, sse (Use stratified sampling to estimate the integration.) and xt (estimate the number of events in possion process) and Brown(plot of st Brownian Motion). For each function, both R versions are produced.

The R package 'microbenchmark' can be used to benchmark the above R and C++ functions.

Benchmarking sseR and sseC

The source R code for sseR is as follows:

sseR=function(n){
f=function(x){x^n*exp(-x)*(x>0)*(x<1)}
s=numeric(4)
for(i in 1:4){
  s[i]=mean(f(runif(4,(i-1)*0.25,i*0.25)))
}
mean(s)
}
sseR(0);sseR(1);sseR(1.5)

the R are known to be slow. On the other hand, the following Rcpp code is much faster. The source code for sseC is as follows:

library(Rcpp)
cppFunction('double sseC(double n){
  NumericVector s(4);
  for(int i=0;i<4;i++){
  NumericVector x=runif(4,i*0.25,(i+1)*0.25);
  NumericVector t(4);
  for(int j=0;j<4;j++){
  t[j]=pow(x[j],n)*exp(-x[j]);
  }
  s[i]=mean(t);}
return mean(s);

}')
sseC(0);sseC(1);sseC(1.5)

In order to empirically benchmark seeR and seeC,we have n=5. The R code for benchmark vaccR and vaccC is as follows.

library(microbenchmark)
tm2 <- microbenchmark(
  vR = sseR(5),
  vC = sseC(5)
)
knitr::kable(summary(tm2)[,c(1,3,5,6)])

The above results show an evident computational speed gain of C++ against R.

Benchmarking xtR and xtC

The source R code for xtR is as follows:

xtR=function(N,lambda){
x=rexp(N,lambda)
Tn=vector(length=N)
Tn[1]=x[1]
for(i in 2:N){
  Tn[i]=Tn[i-1]+x[i]
} #the total time
t=10
i=1
for(i in 1:N){
  if(Tn[i]<=t&&Tn[i+1]>t){
    xt=i
  }
}
xt
}
t=numeric(1e3)
for(i in 1:1e3){
  t[i]=xtR(100,1)
}
plot(t,ylab = "xt")
round(mean(t))

The source R code for xtC is as follows:

library(Rcpp)
cppFunction(' NumericVector xtC(int N,double lambda){
NumericVector x=rexp(N,lambda);
NumericVector Tn(N);
NumericVector xt(1);
Tn[0]=x[0];
for(int i=1;i<N;i++){
  Tn[i]=Tn[i-1]+x[i];
} 
for(int i=0;i<N;i++){
 int t=10;
  if(Tn[i]<=t&&Tn[i+1]>t){
   xt=i;}}
return xt;
}')
t=numeric(1e3)
for(i in 1:1e3){
  t[i]=xtC(100,1)
}
plot(t,ylab = "xt")
round(mean(t))

The R code for benchmarking xtR and xtC is as follows.

tm2 <- microbenchmark(
  vR = xtR(100, 1),
  vC = xtC(100, 1)
)
knitr::kable(summary(tm2)[,c(1,3,5,6)])

The results again show an evident computational speed gain of C++ against R. The source R code for Brown is as follows:

library(e1071) 
Brown=function(t,n){
N=1000 
m=n-1
time=seq(0,t,length=N) 
path=rwiener(end=1,frequency=N) 
plot(time,path,xlab="time",type="l",main=" Standard BM")
for(i in 1:m){ 
  path=rwiener(end=1,frequency=N) 
  lines(time,path)
}
}
Brown(1,6)

all hw

hw 9.29

# 3.4
n=1000
u=runif(n)

Sigma1=.05
x=(-2*(log(1-u))*Sigma1^2)^(1/2)
hist(x,prob=TRUE,main = "sigma=0.05")
y=seq(0,1,.01)
lines(y,(y/Sigma1^2)*exp(-y^2/(2*(Sigma1^2))))

Sigma2=.1
x=(-2*(log(1-u))*Sigma2^2)^(1/2)
hist(x,prob=TRUE,main = "sigma=0.1")
y=seq(0,1,.01)
lines(y,(y/Sigma2^2)*exp(-y^2/(2*(Sigma2^2))))

Sigma3=.2
x=(-2*(log(1-u))*Sigma3^2)^(1/2)
hist(x,prob=TRUE,main = "sigma=0.2")
y=seq(0,1,.01)
lines(y,(y/Sigma3^2)*exp(-y^2/(2*(Sigma3^2))))

Sigma4=.5
x=(-2*(log(1-u))*Sigma4^2)^(1/2)
hist(x,prob=TRUE,main = "sigma=0.5")
y=seq(0,1,.01)
lines(y,(y/Sigma4^2)*exp(-y^2/(2*(Sigma4^2))))
# 3.11
n=1000
x1=rnorm(n,0,1)
x2=rnorm(n,3,1)
u=runif(n)
p=runif(1)
k=as.integer(u < p)
y=k * x1 + (1-k) * x2 
hist(y,freq = FALSE,main = "y")
lines(density(y))

u1=runif(n)
k1=as.integer(u1<.75)
y1=k1* x1 + (1-k1) * x2 
hist(y1,freq = FALSE,main = "p=0.75")
lines(density(y1),lwd=2)
u2=runif(n)
k2=as.integer(u1<.6)
y2=k2* x1 + (1-k2) * x2 
hist(y2,freq = FALSE,main = "p=0.6")
lines(density(y2),lwd=2)
# 3.18
X=function(n,d,Sigma){
  mu=rep(0,d)
  ev=eigen(Sigma,symmetric = TRUE)
  lambda = ev$values
  V = ev$vectors
  C = V %*% diag(sqrt(lambda)) %*% t(V)
  Z = matrix(rnorm(n*d), nrow = n, ncol = d)
  X = Z %*% C + matrix(mu, n, d, byrow = TRUE)
  X
}
t=X(3,3,matrix(c(.1,.2,.3,.4,.5,.6,.7,.8,.9),nrow = 3,ncol = 3))
print(t)
cov(t)

hw 10.11

# 5.1
n=1e3
m=1e5
t1=runif(n,min = 0,max = pi*(1/3))
t2=runif(m,min = 0,max = pi*(1/3))
theta.hat1=mean(sin(t1))*pi*(1/3)
theta.hat2=mean(sin(t2))*pi*(1/3)
print(c(theta.hat1,theta.hat2,cos(0)-cos(pi*(1/3))))
# 5.10
#Monte Carlo estimate
n=1e4
x=runif(n,min = 0,max = 1)
theta.hat=mean(exp(-x)/(1+x^2))
t=integrate(function(x)exp(-x)/(1+x^2),0,1)
theta.hat;t
#antithetic variables
MC.Phi=function(x,n=1e4,antithetic=TRUE){
  u=runif(n/2)
  if(!antithetic)v=runif(n/2)else
    v=1-u
  u=c(u,v)
  cdf=numeric(length(x))
  for(i in 1:length(x)){
    g=x[i]*exp(-u*x[i])/(1+(u*x[i])^2)
    cdf[i]=mean(g)
  }
  cdf
}
x=1
theta.hat2=integrate(function(y)exp(-y)/(1+y^2),0,x)
Phi=theta.hat2
MC1=MC2=numeric(n)
for(i in 1:n){
MC1[i]=MC.Phi(x,antithetic = FALSE)
MC2[i]=MC.Phi(x)}
print(c(mean(MC1),mean(MC2)))
c(sd(MC1),sd(MC2),(sd(MC1)-sd(MC2))/sd(MC1))
# 5.15
m =10000
theta.hat =se =numeric(5)
g =function(x) {
exp(-x - log(1+x^2)) * (x > 0) * (x < 1)
}
x=runif(m)
fg=g(x)
theta.hat[1]= mean(fg)
se[1]=sd(fg)
x=rexp(m, 1)
fg=g(x) / exp(-x)
theta.hat[2]=mean(fg)
se[2]=sd(fg)
x=rcauchy(m)
i=c(which(x > 1), which(x < 0))
x[i]=2 
fg=g(x) / dcauchy(x)
theta.hat[3]=mean(fg)
se[3]=sd(fg)
u =runif(m) 
x=- log(1 - u * (1 - exp(-1)))
fg=g(x) / (exp(-x) / (1 - exp(-1)))
theta.hat[4]=mean(fg)
se[4]=sd(fg)
u=runif(m) 
x=tan(pi * u / 4)
fg=g(x) / (4 / ((1 + x^2) * pi))
theta.hat[5]=mean(fg)
se[5]=sd(fg)
rbind(theta.hat,se)
#stratified importance sampling
m=1e4
u=runif(m)
k=5 
r=m/k
n=50
t=numeric(k)
est=matrix(0, n, 2)
x=-log(1-u*(1-exp(-1)))
g=function(x){exp(-x)/(1+x^2)*(x>0)*(x<1)}
for (i in 1:n) {
est[i, 1]=mean(g(runif(m)))
for(j in 1:k)t[j]=mean(g(runif(m/k,(j-1)/k,j/k)))
est[i, 2]=mean(t)
}
apply(est,2,mean)
apply(est,2,var)

hw 10.18

#6.5
n=20
alpha=.05
theta.hat1=replicate(10000,expr ={
  x=rchisq(n,2)
  y=sum(x)
  sigma=var(x)^(1/2)
  theta.hat=mean(x)-(y^(1/2)*sigma*qt(alpha,df=40))/(800^(1/2))
})
print(c(mean(theta.hat1>=2)),4)
theta.hat2=replicate(10000,expr ={
  x=rchisq(n,2)
  s=var(x)^(1/2)
  xbat=mean(x)
  xbat+abs((s*qt(alpha,df=n-1))/n^(1/2))
})
print(c(mean(theta.hat2>=2)),4)
ucl=replicate(10000,expr ={
  x=rchisq(n,2)
  (n-1)*var(x)/qchisq(alpha,df=n-1)
})
mean(ucl>4)

# 6.6
m=1e4
n=1e3
p=c(0.025,0.05,0.95,0.975)
#skewness under normality
ske=function(x){ 
  x=rnorm(n)
  x.bat=mean(x)
  m1=mean((x-x.bat)^3)
  m2=mean((x-x.bat)^2)
  return (m1/m2^1.5)
}
k=replicate(m,ske(x))
c1=quantile(k,p)
c1
z=(p*(1-p))/(n*dnorm(c1,0,sqrt(6/n))^2)
sqrt(z)
ske.hat=rnorm(m,0,sqrt(6/n))
c2=quantile(ske.hat,p)
c2
c3=qnorm(p,0,sqrt(6/n))
c=data.frame(c1,c2,c3,z)
c

hw 11.01

6.7:use R to compute the rejected proportion,because of the proportion too small,the power is not robust.but when sample from t(v),we can said the power better.in the first figure,we know when ε=0.8,the power is greatest.in tne next figure,we know the ε smaller,the power greater. 6.A:we know when the sample population is non-normality,compare the results,the sample form uniform distribution have a rate which close to the nominal rate(0.05).

# 6.7
alpha=0.05
n=20
m=1e4
d=c(seq(0,0.15,0.01),seq(0.15,1,0.05))
N=length(d)
pwr=pwr1=numeric(N)
cv=qnorm(1-alpha/2,0,sqrt(6*(n-2)/((n+1)*(n+3))))
sk=function(x){ 
  x.bat=mean(x)
  m1=mean((x-x.bat)^3)
  m2=mean((x-x.bat)^2)
  return (m1/m2^1.5)
}
for(j in 1:N){
  e=d[j]
  sktests=numeric(m)
  for(i in 1:m){
  sigma=sample(c(1,100),replace = TRUE,n,c(1-e,e))
  x=rbeta(n,sigma,sigma)
  sktests[i]=as.integer(abs(sk(x))>=cv)
  }
 pwr[j]=mean(sktests)
}
mean(pwr)
for(j in 1:N){
  e=d[j]
  sktests=numeric(m)
  for(i in 1:m){
  sigma=sample(c(1,10),replace = TRUE,n,c(1-e,e))
  x=rt(n,df=sigma)
  sktests[i]=as.integer(abs(sk(x))>=cv)
  }
  pwr1[j]=mean(sktests)
}
mean(pwr1)
par(mfrow=c(1,2))
plot(d,pwr,type = "b",xlab = bquote(d),ylim = c(0,1),pch="¥",col="red3")
se=sqrt(pwr*(1-pwr)/m)
lines(d,pwr+se,col="green",lwd=1)
lines(d,pwr-se,col="blue",lwd=1)
plot(d,pwr1,type = "b",xlab = bquote(d),ylim = c(0,1),pch="¥",col="red3")
se1=sqrt(pwr1*(1-pwr1)/m)
lines(d,pwr1+se1,col="green",lwd=1)
lines(d,pwr1-se1,col="blue",lwd=1)
# 6.A
alpha=0.05
mu0=1
n=20
m=1e4
p1=p2=p3=numeric(m)
for(j in 1:m){
  x=rchisq(n,1)
  y=runif(n,0,2)
  z=rexp(n,1)
  t1=t.test(x,alternative = "greater",mu=mu0)
  t2=t.test(y,alternative = "greater",mu=mu0)
  t3=t.test(z,alternative = "greater",mu=mu0)
  p1[j]=t1$p.value
  p2[j]=t2$p.value
  p3[j]=t3$p.value
}
p.hat=c(mean(p1<alpha),mean(p2<alpha),mean(p3<alpha))
se.hat=sqrt(p.hat*(1-p.hat)/m)
data.frame(p.hat,se.hat,row.names = c("(i)χ2(1)","(ii)Uniform(0,2)","(iii)Exponential(1)"))

hw 11.08 7.6:use R to analyze the question.in the plots figures,we know all subjects have positive correlation property.look at the sparseness of the plots,we can find their correlation size.compute the s.e,all errors are small. 7.B:compare the result of normal distribution and chisq distribution,the coverage rate of normal distribution is larger.

# 7.6
library(bootstrap)
pairs(scor[1:5],pch=20,col="blue")
COR=cor(scor)
COR
N=1e4
n=88
r1=r2=r3=r4=numeric(N)
for(j in 1:N){
  i=sample(1:n,n,replace = TRUE)
  x1=scor$mec[i]
  x2=scor$vec[i]
  x3=scor$alg[i]
  x4=scor$ana[i]
  x5=scor$sta[i]
  r1[j]=cor(x1,x2)
  r2[j]=cor(x3,x4)
  r3[j]=cor(x3,x5)
  r4[j]=cor(x4,x5)
}
s.e=c(se.r1=sd(r1),se.r2=sd(r2),se.r3=sd(r3),se.r4=sd(r4))
print(s.e,4)
# 7.B
m=1000
n=100
mu=0
t=numeric(n)
set.seed(1234)
library(boot)
ci.norm=ci.basic=ci.perc=matrix(NA,m,2)
sk=function(x){ 
  x.bat=mean(x)
  m1=mean((x-x.bat)^3)
  m2=mean((x-x.bat)^2)
  return (m1/m2^1.5)
}
boot.median=function(x,j) mean(sk(x[j]))
# normal populations
for(i in 1:m){
for(j in 1:n){
  x=rnorm(n,0,1)
  t[j]=sk(x)
}  
  de=boot(data=t,statistic=boot.median,R=100)
  ci=boot.ci(de,type = c("norm","basic","perc","bca"))
  ci.norm[i,]=ci$norm[2:3]
  ci.basic[i,]=ci$basic[4:5]
  ci.perc[i,]=ci$percent[4:5]
}
p1.norm=c(mean(ci.norm[,1]<=mu&ci.norm[,2]>=mu),mean(ci.norm[,1]>mu),mean(ci.norm[,2]<mu))
p1.basic=c(mean(ci.basic[,1]<=mu&ci.basic[,2]>=mu),mean(ci.basic[,1]>mu),mean(ci.basic[,2]<mu))
p1.perc=c(mean(ci.perc[,1]<=mu&ci.perc[,2]>=mu),mean(ci.perc[,1]>mu),mean(ci.perc[,2]<mu))
data.frame(p1.norm,p1.basic,p1.perc,row.names = c("coverage","on the left","on the right"))
# χ2(5) distributions
library(moments)
mu1=(8/5)^(1/2)
t=numeric(n)
library(boot)
set.seed(12345)
for(i in 1:m){
for(j in 1:n){
  x=rchisq(n,5)
  t[j]=sk(x)
}  
  de=boot(data=t,statistic=boot.median,R=1000)
  ci=boot.ci(de,type = c("norm","basic","perc","bca"))
  ci.norm[i,]=ci$norm[2:3]
  ci.basic[i,]=ci$basic[4:5]
  ci.perc[i,]=ci$percent[4:5]
}
p2.norm=c(mean(ci.norm[,1]<=mu1&ci.norm[,2]>=mu1),mean(ci.norm[,1]>mu1),mean(ci.norm[,2]<mu1))
p2.basic=c(mean(ci.basic[,1]<=mu1&ci.basic[,2]>=mu1),mean(ci.basic[,1]>mu1),mean(ci.basic[,2]<mu1))
p2.perc=c(mean(ci.perc[,1]<=mu1&ci.perc[,2]>=mu1),mean(ci.perc[,1]>mu1),mean(ci.perc[,2]<mu1))
data.frame(p2.norm,p2.basic,p2.perc,row.names = c("coverage","on the left","on the right"))

hw 11.16 look at the result,we know when the model is quadratic,the error is least0.05452,the model is Y=24.49262-1.39334X+0.05707$X^{2}$;when the is quadratic,the $R^{2}$ is largest,too.

# 7.8
n=5;m=88
library(bootstrap)
COV.0=cov(scor)
COV=(87/88)*COV.0
eigen=c(eigen(COV,only.values=TRUE))
e=c(eigen$values[1:1],eigen$values[2:2],eigen$values[3:3],eigen$values[4:4],eigen$values[5:5])
theta.hat=max(e)/sum(e)
theta.jack=numeric(m)
e.hat=numeric(n)
for(i in 1:m){
 COV.hat0=cov(scor[-i,])
 COV.hat=(87/88)*COV.hat0
 eigen.hat=eigen(COV.hat)
 for (j in 1:n) {
   e.hat[j]=eigen.hat$values[j:j]
 }
 theta.jack[i]=max(e.hat)/sum(e.hat)
}
bias.jack=(m-1)*(mean(theta.jack)-theta.hat)
se.jack=sqrt((m-1)*mean((theta.jack-theta.hat)^2))
bias.jack;se.jack
# 7.10
library(DAAG); attach(ironslag)
# estimate
a=seq(10, 40, .1)
## linear
J10=lm(magnetic ~ chemical)
yhat10=J10$coef[1] + J10$coef[2] *a
R10=summary(J10)
## quadratic
J20=lm(magnetic~ chemical + I(chemical^2))
yhat20=J20$coef[1] + J20$coef[2] * a+
J20$coef[3] * a^2
R20=summary(J20)
## exponential
J30=lm(log(magnetic) ~ chemical)
logyhat30=J30$coef[1] + J30$coef[2] * a
R30=summary(J30)
## log-log
J40=lm(log(magnetic) ~ log(chemical))
logyhat40=J40$coef[1] + J40$coef[2] * log(a)
R40=summary(J40)
## cubic
J50=lm(magnetic~chemical+I(chemical^2)+I(chemical^3))
yhat5=J50$coef[1]+J50$coef[2] * a+J50$coef[3] * a^2+J50$coef[4] * a^3
R50=summary(J50)
R1=summary(J10);R2=summary(J20);R3=summary(J30);R4=summary(J40);R5=summary(J50)
R.squard=c(R1$adj.r.squared,R2$adj.r.squared,R3$adj.r.squared,R4$adj.r.squared,R5$adj.r.squared)
# compute the error
n =length(magnetic)
e1=e2=e3=e4=e5=numeric(n)
for (k in 1:n) {
y=magnetic[-k]
x=chemical[-k]
## linear
J1=lm(y ~ x)
yhat1=J1$coef[1] + J1$coef[2] * chemical[k]
e1[k]=magnetic[k] - yhat1

## quadratic
J2=lm(y ~ x + I(x^2))
yhat2=J2$coef[1] + J2$coef[2] * chemical[k] +
J2$coef[3] * chemical[k]^2
e2[k]=magnetic[k] - yhat2

## exponential
J3=lm(log(y) ~ x)
logyhat3=J3$coef[1] + J3$coef[2] * chemical[k]
yhat3=exp(logyhat3)
e3[k]=magnetic[k] - yhat3

## log-log
J4=lm(log(y) ~ log(x))
logyhat4=J4$coef[1] + J4$coef[2] * log(chemical[k])
yhat4=exp(logyhat4)
e4[k]=magnetic[k] - yhat4

## cubic
J5=lm(y~x+I(x^2)+I(x^3))
yhat5=J5$coef[1]+J5$coef[2] * chemical[k]+J5$coef[3] * chemical[k]^2+J5$coef[4] * chemical[k]^3
e5[k]=magnetic[k]-yhat5

}
error=c(mean(e1^2), mean(e2^2), mean(e3^2), mean(e4^2),mean(e5^2))
R.squard=c(R1$adj.r.squared,R2$adj.r.squared,R3$adj.r.squared,R4$adj.r.squared,R5$adj.r.squared)
data.frame(error,R.squard,row.names = c("linear","quadratic","exponential","log-log","cubic"))
J20

hw 11.22 8.3:use R to compute the power.when the sample sizes are not equal,the power is smaller than the power which use the permutation test. page31:look at the figures,we know when n is larger,the power larger,and the distance correlation test ia better.

# 8.3
n=1e3
K=1:50
m=20
power=x=y=z=numeric(n)
set.seed(12345)
# bulid a function
count5test=function(x, y) {
X=x-mean(x)
Y=x-mean(y)
outx=sum(X > max(Y)) + sum(X < min(Y))
outy=sum(Y > max(X)) + sum(Y < min(X))
return(as.integer(max(c(outx, outy)) > 5))
}
# permutation test method
p=replicate(n,expr={
  x=rnorm(20,0,1)
  y=rnorm(30,0,1)
  x=x-mean(x)
  y=y-mean(y)
  z=c(x,y)
for(i in 1:n){
  k=sample(K,m,replace = FALSE)
  x1=z[k]
  y1=z[-k]
  power[i]=count5test(x1,y1)
}
  mean(power)
  })
p1=mean(p)
p1
# page 31
dCov=function(x, y) {
x=as.matrix(x); y=as.matrix(y)
n=nrow(x); m=nrow(y)
if (n != m || n < 2) stop("Sample sizes must agree")
if (! (all(is.finite(c(x, y)))))
stop("Data contains missing or infinite values")
Akl=function(x) {
d=as.matrix(dist(x))
m=rowMeans(d); M=mean(d)
a=sweep(d, 1, m); b=sweep(a, 2, m)
b + M
}
A=Akl(x); B=Akl(y)
sqrt(mean(A * B))
}
ndCov2=function(z, ix, dims) {
#dims contains dimensions of x and y
p=dims[1]
q=dims[2]
d=p + q
x=z[ , 1:p] #leave x as is
y=z[ix, -(1:p)] #permute rows of y
return(nrow(z) * dCov(x, y)^2)
}
set.seed(123)
library(boot)
library(MASS)
library(Ball)
m=10
sigma=matrix(c(1,0,0,1),2,2)
p.cor=p.ball=numeric(m)
# model 1
p=function(n){
for(i in 1:m){
x=mvrnorm(n,rep(0,2),sigma)
e=mvrnorm(n,rep(0,2),sigma)
y=(1/4)*x+e
z=cbind(x,y)
boot.obj= boot(data = z, statistic = ndCov2, R = 99,sim = "permutation", dims = c(2, 2))
tb = c(boot.obj$t0, boot.obj$t)
p.cor[i] = mean(tb>=tb[1])
p.ball[i] = bcov.test(z[,1:2],z[,3:4],R=99)$p.value
}
c(1-mean(p.cor),1-mean(p.ball))}
t=rbind(p(5),p(10),p(20),p(30),p(40),p(50))
plot(c(5,seq(10,50,10)),t[,1],'l',ylim = c(0,1),xlab='n',main = 'model 1',ylab = 'power',col="blue")
lines(c(5,seq(10,50,10)),t[,2],col='red')
legend(35,0.4,c('p.cor','p.ball'),lty=c(1,1),
       col=c('blue','red'),cex = 0.7)
# model 2
f=function(n){
for(i in 1:m){
x=mvrnorm(n,rep(0,2),sigma)
e=mvrnorm(n,rep(0,2),sigma)
y=(1/4)*x*e
z=cbind(x,y)
boot.obj= boot(data = z, statistic = ndCov2, R = 99,sim = "permutation", dims = c(2, 2))
tb = c(boot.obj$t0, boot.obj$t)
p.cor[i] = mean(tb>=tb[1])
p.ball[i] = bcov.test(z[,1:2],z[,3:4],R=99)$p.value
}
c(1-mean(p.cor),1-mean(p.ball))}
t=rbind(f(5),f(10),f(20),f(30),f(40),f(50))
plot(c(5,seq(10,50,10)),t[,1],'l',ylim = c(0,1),xlab='n',main = 'model 2',ylab = 'power',col="blue")
lines(c(5,seq(10,50,10)),t[,2],col='red')
legend(35,0.4,c('p.cor','p.ball'),lty=c(1,1),
       col=c('blue','red'),cex = 0.7)

hw 11.29 9.4:use R to generate the chain,when sigma are different,look at the plots,when know when sigma is 1,the chain is mixing well.(the rate is not small and coverage is fast)

N=1e3
df=function(x)(1/2)*exp(-abs(x))
rw.Metropolis = function(sigma, x0, N) {
x=numeric(N)
x[1] = x0
u=runif(N)
k = 0
for (i in 2:N) {
y = rnorm(1, x[i-1], sigma)
if (u[i] <= (df(y) / df(x[i-1])))
x[i] = y else {
x[i] = x[i-1]
k = k + 1
}
}
return(list(x=x, k=k))
}
# generated for different variances σ2 of the proposal distribution
sigma=c(0.05,0.5,1,10)
m=4;p=numeric(m)
x0=10
rw1 = rw.Metropolis(sigma[1], x0, N)
rw2 = rw.Metropolis(sigma[2], x0, N)
rw3 = rw.Metropolis(sigma[3], x0, N)
rw4 = rw.Metropolis(sigma[4], x0, N)
rw=c(rw1$k, rw2$k, rw3$k, rw4$k)
# rates
for(i in 1:m){
        p[i]=rw[i]/N
}
q=1-p
 rw
 p
 q
 # plots
 index=1:1000
 y1=rw1$x[index]
 plot(index, y1, type="l", main="sigma=0.05", ylab="x",col="red")
 y2=rw2$x[index]
 plot(index, y2, type="l", main="sigma=0.5", ylab="x",col="red")
 y3=rw3$x[index]
 plot(index, y3, type="l", main="sigma=1", ylab="x",col="red")
 y4=rw4$x[index]
 plot(index, y4, type="l", main="sigma=10", ylab="x",col="red")

hw12.06 11.1:use R to compute the logarithm and exponential functions when x=10.compare the values,we find that the result does not hold exactly in computer arithmetic but near equality. use R to compute the roots.compare with 11.4,we know the results are approximate.

# 11.1
x=10
f=function(x)log(exp(x))
g=function(x)exp(log(x))
print(f(x)-g(x))
# compare f and g
f(x)-g(x)==0
# nearly equal
isTRUE(all.equal(f(x),g(x)))
# 11.4
c.k=function(k,a){return(sqrt((k*(a^2))/(k+1-a^2)))}
f1=function(u){(1+(u^2)/(k-1))^(-k/2)}
f2=function(u){(1+(u^2)/k)^(-(k+1)/2)}
f=function(a){
(2*gamma(k/2))/(sqrt(pi*(k-1))*gamma((k-1)/2))*integrate(f1,0,c.k(k-1,a))$value-2*gamma((k+1)/2)/(sqrt(pi*k)*gamma(k/2))*integrate(f2,0,c.k(k,a))$value
}  
# compute roots
library(rootSolve)
t=c(4:25,100)
n=length(t)
root=root2=numeric(n)
for (i in 1:n) {
  k=t[i]
  root[i]=uniroot(f,c(0.05,sqrt(k)/2+1))$root
}
f2=function(a){
  pt(sqrt((k-1)*a^2/(k-a^2)),k-1)-pt(sqrt((k*a^2)/(k+1-a^2)),k)
}
f.4=function(a){
  pt(sqrt((k-1)*a^2/(k-a^2)),k-1)-pt(sqrt((k*a^2)/(k+1-a^2)),k)
}
K1=c(4:25,100,500,1000)
n=length(K1)
root.4=numeric(n)
for (i in 1:n) {
  k=K1[i]
  root.4[i]=uniroot(f.4,c(0.5,sqrt(k)/2+1))$root
}
#the roots of 11.5
root
#the roots of 11.4
root.4
# abo
N=1e3
# max. number of the iteration
n1=28;n2=24;n3=41;n4=70
L=c(.5,.4)
# initial estimates 
tol=.Machine$double.eps^0.5
L.old=L+1
E=numeric(N)
for(j in 1:N){
  E[j]=2*L[1]*n1*log(L[1])/(2-L[1]-2*L[2])+2*L[2]*n2*log(L[2])/(2-L[2]-2*L[1])+2*n3*log(1-L[1]-L[2])+n1*(2-2*L[1]-2*L[2])*log(2*L[1]*(1-L[1]-L[2]))/(2-L[1]-2*L[2])+n2*(2-2*L[1]-2*L[2])*log(2*L[2]*(1-L[1]-L[2]))/(2-L[2]-2*L[1])+n4*log(2*L[1]*L[2])
  model=function(x){
    F1=2*L[1]*n1/((2-L[1]-2*L[2])*x[1])-2*n3/(1-x[1]-x[2])+n1*(2-2*L[1]-2*L[2])*(1-2*x[1]-x[2])/((2-L[1]-2*L[2])*x[1]*(1-x[1]-x[2]))-n2*(2-2*L[1]-2*L[2])/((2-L[2]-2*L[1])*(1-x[1]-x[2]))+n4/x[1]
    F2=2*L[2]*n2/((2-L[2]-2*L[1])*x[2])-2*n3/(1-x[1]-x[2])-n1*(2-2*L[1]-2*L[2])/((2-L[1]-2*L[2])*(1-x[1]-x[2]))+n2*(2-2*L[1]-2*L[2])*(1-2*x[2]-x[1])/((2-L[2]-2*L[1])*x[2]*(1-x[1]-x[2]))+n4/x[2]
    c(F1=F1,F2=F2)
  }
  ss=multiroot(f=model,star=c(.1,.1))
  L=ss$root
  # update p and q
  if (sum(abs(L-L.old)/L.old)<tol) break
  L.old=L
}
L.old
plot(E,type = "l")

hw 12.13 R loops and apply.

# 3
mpg=mtcars$mpg
 disp=mtcars$disp
 wt=mtcars$wt
 formulas=list(
mpg ~ disp,
mpg ~ I(1 / disp),
mpg ~ disp + wt,
mpg ~ I(1 / disp) + wt)
 n=4;mod=numeric(n)
for(i in 1:n){mod[i]=lm(formulas[[i]])}
mod
lapply(1:4,function(i)lm(formulas[[i]]))
# 4
bootstraps=lapply(1:10, function(i) {
  rows=sample(1:nrow(mtcars), rep = TRUE)
  mtcars[rows, ]
})
lapply(bootstraps,lm,formula=mpg~disp)
n=10;model=numeric(n)
for(i in 1:n){
  data=data.frame(bootstraps[[i]])
  model[i]=lm(data$mpg~data$disp)
}
model
# 5
mpg=mtcars$mpg
 disp=mtcars$disp
 wt=mtcars$wt
 formulas=list(
mpg ~ disp,
mpg ~ I(1 / disp),
mpg ~ disp + wt,
mpg ~ I(1 / disp) + wt)
mod=lapply(1:4,function(i)lm(formulas[[i]]))
lapply(1:4,function(i)summary(mod[[i]])$r.squared)
bootstraps=lapply(1:10, function(i) {
  rows=sample(1:nrow(mtcars), rep = TRUE)
  mtcars[rows, ]
})
Mod=lapply(bootstraps,lm,formula=mpg~disp)
lapply(1:10,function(i)summary(Mod[[i]])$r.squared)
# 3
trials=replicate(100,t.test(rpois(10, 10), rpois(7, 10)),simplify = FALSE)
sapply(trials,"[[",3)
p=function(n){
  p.value=numeric(n)
  for(i in 1:n){
    result=trials[[i]]
    p.value[i]=result$p.value
  }
  p.value
}
p(100)
# 7
library(parallelsugar)
mpg=mtcars$mpg
 disp=mtcars$disp
 wt=mtcars$wt
 formulas=list(
mpg ~ disp,
mpg ~ I(1 / disp),
mpg ~ disp + wt,
mpg ~ I(1 / disp) + wt)
mod=lapply(1:4,function(i)lm(formulas[[i]]))
mclapply(1:4,function(i)summary(mod[[i]])$r.squared,mc.cores = 4)

hw 12.20 use R to do this exercise,first,we creat two functions of the 9.4 and here is a example when sigma=10,x0=1 and N=10000.use ggplot2,we know the results are same.Campare the computation time of the two functions with microbenchmark,we know the rcpp is faster.

N=1e3
df=function(x)(1/2)*exp(-abs(x))
rw.Metropolis = function(sigma, x0, N) {
x=numeric(N)
x[1] = x0
u=runif(N)
k = 0
for (i in 2:N) {
y = rnorm(1, x[i-1], sigma)
if (u[i] <= (df(y) / df(x[i-1])))
x[i] = y else {
x[i] = x[i-1]
k = k + 1
}
}
return(list(x=x, k=k))
}
# rcpp
library(Rcpp)
cppFunction('List RwMetropolis(double sigma, double x0, int N) {
NumericVector x(N);
x[0] = x0;
NumericVector u=runif(N);
int k = 0;
for (int i=1;i<N;i++) {
double y = rnorm(1, x[i-1], sigma)[0];
if (u[i] <= exp2(abs(x[i-1])-abs(y))){
x[i] = y; }
else {
x[i] = x[i-1];
k += 1;
}
}
return List::create(
    _["x"] = x,
    _["k"] = k
  );
}')
# qqplot
v1=rw.Metropolis(10,10,1e3)$x
v2=RwMetropolis(10,10,1e3)$x
library(qqplotr)
qqplot(v1,v2)
abline(0,1,col='blue',lwd=2)
# compare time
library(microbenchmark)
ts=microbenchmark(rw.Metropolis(10,10,1e3),RwMetropolis(10,10,1e3))
summary(ts)


SC19089/qimo documentation built on Jan. 3, 2020, 12:11 a.m.