Question

Use knitr to produce at least 3 examples (texts, figures, tables)

Answer

ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
weight <- c(ctl, trt)
lm.D9 <- lm(weight ~ group)
summary(lm.D9)$coef
knitr::kable(head(iris))
par(mar=c(1,1,1,1))
plot(lm.D9)

Exercise 3.4

Develop an algorithm to generate random samples from a Rayleigh(deta) distribution.Generate Rayleigh(deta) samples for several choices of deta > 0 and check that the mode of the generated samples is close to the theoretical mode ??(check the histogram).

Answer

n<-1e4
for (deta in 1:4) {
   x1<-rnorm(n,0,1)
   x2<-rnorm(n,0,1)
   y<-deta*sqrt(x1^2+x2^2)
   hist(y,freq = F,main=paste("Histogram of y with deta=",deta))
   z<-seq(0,15,.1)
   lines(z,(z/deta^2)*exp((-z^2)/(2*deta^2)))
}

Exercise 3.11

Generate a random sample of size 1000 from a normal location mixture. The components of the mixture have N(0, 1) and N(3, 1) distributions with mixing probabilities p1 and p2 = 1 ??? p1. Graph the histogram of the sample with density superimposed, for p1 = 0.75. Repeat with different values for p1 and observe whether the empirical distribution of the mixture appears to be bimodal. Make a conjecture about the values of p1 that produce bimodal mixtures.

Answer

n <- 1e3
X1 <- rnorm(n,0,1)
X2 <- rnorm(n,3,1)
for (p in c(0.25,0.5,0.75)) {
  r <- sample(c(1,0),n,replace=TRUE,prob = c(p,1-p))
  Z <- r*X1+(1-r)*X2
  hist(Z,freq = F,main=paste("Histogram of z with p1=",p))
}

According to the histograms,when p1=0.5 the mixture will be bimodal

Exercise 3.18

Write a function to generate a random sample from a Wd(V, n) (Wishart) distribution for n > d+ 1 ??? 1, based on Bartlett???s decomposition.

Answer

Suppose n=10,d=8 and the scale matirx V is a unit matrix for simplify

n<-10
d<-8
A<-matrix(nrow=d,ncol=d)
for (i in 1:d){
  A[i,i]<-rchisq(1,n-i+1)
  j<-i+1
 while (j<=d){
   A[i,j]=0
   j=j+1
 }                      
  k=i-1
 while (k<i&&k>0) {
   A[i,k]<-rnorm(1,0,1)
   k=k-1
  }                     
}
V<-matrix(nrow = d,ncol = d)
for (i in 1:d) {
  for (j in 1:d) {
    if(i==j){V[i,j]=1}
    else V[i,j]=0
  }
}
L<-chol(V)             
X<-(L%*%A)%*%t(L%*%A)

Exercise 5.1

Compute a Monte Carlo estimate of $$ \int_{0}^{\pi / 3} \sin t d t $$ and compare your estimate with the exact value of the integral.

Answer

set.seed(12345)
m<-1e4
x<-runif(m,min = 0,max = pi/3)
theta.hat<-mean(sin(x))*3/pi
print(c(theta.hat,1-cos(pi/3)))

Exercise 5.10

Use Monte Carlo integration with antithetic variables to estimate $$ \int_{0}^{1} \frac{e^{-x}}{1+x^{2}} d x $$ and find the approximate reduction in variance as a percentage of the variance without variance reduction.

Answer

set.seed(12345)
m<-1e4
k<-10 
r<-m/k 
n<-50 
T<-numeric(k)
est<-matrix(0, n, 2)
g<-function(x)exp(-x)/(1+x^2)*(x>0)*(x<1)
for (i in 1:n) {
est[i, 1] <- mean(g(runif(m)))
for(j in 1:k)T[j]<-mean(g(runif(r,(j-1)/k,j/k)))
est[i, 2] <- mean(T)
}
SD<-as.array(apply(est,2,sd))
knitr::kable(rbind(apply(est,2,mean),apply(est,2,sd)),format = 'html')
print((SD[1]-SD[2])/SD[1])

Exercise 5.15

Obtain the stratified importance sampling estimate in Example 5.13 and compare it with the result of Example 5.10.

Answer

set.seed(12345)
m<-1e4
k<-5 
r<-m/k 
n<-100
T<-numeric(k)
est<-matrix(0, n, 2)
g<-function(x)exp(-x)/(1+x^2)*(x>0)*(x<1)
for (i in 1:n) {
  u<-runif(m)
  x<- -log(1-u*(1-exp(-1)))
  fg<-g(x)/(exp(-x)/(1-exp(-1)))
  est[i, 1] <- mean(fg)
  for(j in 1:k){
    u<-runif(r,(j-1)/k,j/k)
    z<- -log(1-(u*(1-exp(-1)))/5)
    fg1<-g(z)/((5*(exp(-z))/(1-exp(-1))))
    T[j]<-mean(fg1)
    }
  est[i, 2]<-sum(T)
}
knitr::kable(rbind(apply(est,2,mean),apply(est,2,sd)),format='html')

Exercise 6.5

Suppose a $95\%$ symmetric t-interval is applied to estimate a mean, but the sample data are non-normal. Then the probability that the confidence interval covers the mean is not necessarily equal to $0.95$. Use aMonte Carlo experiment to estimate the coverage probability of the t-interval for random samples of $x^2$(2) data with sample size $n = 20$. Compare your t-interval results with the simulation results in Example 6.4. (The t-interval should be more robust to departures from normality than the interval for variance.)

Answer

n<-20
m<-1e4
set.seed(1234)
mean.hat<-t.val<-numeric(m)
for (i in 1:m) {
  x<-rnorm(1)
  y<-rchisq(20,2)
  mean.hat[i]<-abs(x/sqrt(mean(y)))
  t.val[i]<-qt(0.975,40)
}
print(mean(mean.hat<=t.val))

Exercise 6.6

Estimate the $0.025$, $0.05$, $0.95$, and $0.975$ quantiles of the skewness $\sqrt{b_{1}}$ under normality by a Monte Carlo experiment. Compute the standard error of the estimates from (2.14) using the normal approximation for the density (with exact variance formula). Compare the estimated quantiles with the quantiles of the large sample approximation $\sqrt{b_{1}} \approx N(0,6 / n)$.

Answer

n<-1e3
m<-1e4
set.seed(1234)
ske<-numeric(m)
for (i in 1:m) {
  x<-rnorm(n)
  ske[i]<-(mean((x-mean(x))^3))/(mean((x-mean(x))^2))^(3/2)
}
est<-quantile(ske,prob=c(0.025,0.05,0.95,0.975))
j=1
cv<-Var<-numeric(4)
for (p in c(0.025,0.05,0.95,0.975)) {
  cv[j] <- qnorm(p, 0, sqrt(6/n))
  Var[j]<-(p*(1-p))/n*(dnorm(est[j],0,sqrt(6/n))^2)
  j=j+1
}
res.compare<-data.frame(est,cv,Var)
knitr::kable(res.compare)

Exercise 6.7

Estimate the power of the skewness test of normality against symmetric Beta(??, ??) distributions and comment on the results. Are the results different for heavy-tailed symmetric alternatives such as t(??)?

Answer

beta<-0.1
n <- 20
m <- 1e4
set.seed(1234)
epsilon <- c(seq(1,20,1))
N <- length(epsilon)
pwr <- numeric(N)
cv <- qnorm(1-beta/2, 0, sqrt(6*(n-2) / ((n+1)*(n+3))))
sk <- function(x) {
  xbar <- mean(x)
  m3 <- mean((x - xbar)^3)
  m2 <- mean((x - xbar)^2)
return( m3 / m2^1.5 )
}
for (j in 1:N) { 
  e <- epsilon[j]
  sktests <- numeric(m)
  for (i in 1:m) { 
    x<-rbeta(n,e,e)
    sktests[i] <- as.integer(abs(sk(x)) >= cv)
  }
  pwr[j] <- mean(sktests)
}
plot(epsilon, pwr, type='b',xlab = 'alpha', ylim = c(0,.1))
abline(h = .1, lty = 3)
se <- sqrt(pwr * (1-pwr) / m)
lines(epsilon, pwr+se, lty = 3)
lines(epsilon, pwr-se, lty = 3)

Exercise 6.A

Use Monte Carlo simulation to investigate whether the empirical Type I error rate of the t-test is approximately equal to the nominal significance level $??$, when the sampled population is non-normal. The t-test is robust to mild departures from normality. Discuss the simulation results for the cases where the sampled population is $(i)$$??2(1)$, $(ii)$ Uniform$(0,2)$, and $(iii)$ Exponential( rate=$1$). In each case, test $H0 : ?? = ??0$ vs $H0 : ??\neq??0$, where $??0$ is the mean of $??2(1)$, Uniform$(0,2)$, and Exponential(1), respectively.

Answer

(i)

n <- 100
alpha <- 0.1
mu0 <- 1 
m <- 1e4
set.seed(1234)
p <- numeric(m) 
for (i in 1:m) {
  x <- rchisq(n, mu0)
  ttest <- t.test(x, alternative = "two.sided", mu = mu0)
  p[i] <- ttest$p.value
}

p.hat <- mean(p<alpha)
se.hat <- sqrt(p.hat*(1-p.hat)/m)
print(c(p.hat, se.hat))

(ii)

n <- 100
alpha <- 0.1
mu0 <- 1 
m <- 1e4
set.seed(1234)
p <- numeric(m) 
for (i in 1:m) {
  x <- runif(n,0,2)
  ttest <- t.test(x, alternative = "two.sided", mu = mu0)
  p[i] <- ttest$p.value
}
p.hat <- mean(p<alpha)
se.hat <- sqrt(p.hat*(1-p.hat)/m)
print(c(p.hat, se.hat))

(iii)

n <- 100
alpha <- 0.1
mu0 <- 1 
m <-1e4
set.seed(1234)
p <- numeric(m) 
for (i in 1:m) {
  x <- rexp(n)
  ttest <- t.test(x, alternative = "two.sided", mu = mu0)
  p[i] <- ttest$p.value
}

p.hat <- mean(p<alpha)
se.hat <- sqrt(p.hat*(1-p.hat)/m)
print(c(p.hat, se.hat))

Exercise 7.6

Efron and Tibshirani discuss the scor (bootstrap) test score data on 88 students who took examinations in five subjects. The first two tests (mechanics, vectors) were closed book and the last three tests (algebra, analysis, statistics) were open book. Each row of the data frame is a set of scores $(x_{i1},...x_{i5})$ for the $i^{th}$ student. Use a panel display to display the scatter plots for each pair of test scores. Compare the plot with the sample correlation matrix. Obtain bootstrap estimates of the standard errors for each of the following estimates: $\hat\rho_{12}=\hat\rho(mec,vec),\hat \rho_{34}=\hat\rho(alg,ana), \hat\rho_{35}=\hat\rho(alg,sta), \hat\rho_{45}=\hat\rho(ana,sta)$

Answer

set.seed(12345)
library(bootstrap)
library(boot)
scor = data.matrix(scor)
pairs(scor, main='the scatter plots for each pair of test scores', pch = 18)
b.cor = function(x,i){  
  r = cor(x[i,])
  c(r[1,2], r[3,4], r[3,5], r[4,5])
}
sc.boot = boot(data = scor, statistic = b.cor, R=2000)
apply(sc.boot$t, 2, FUN = sd)

7.B

Repeat Project 7.A for the sample skewness statistic. Compare the coverage rates for normal populations (skewness 0) and $\chi^2(5)$ distributions (positive skewness $\sqrt\frac8 5$).

$Empirical\ coverage\ rates:\ P(CI[1]<=skew<=CI[2])$ $Proportion\ miss\ on\ the\ left:\ P(skewCI[2])$

Answer

set.seed(12345)
library(boot)
# normal
skew = 0
sk = function(x,i) {
  xbar = mean(x[i])
  m3 = mean((x[i] - xbar)^3)
  m2 = mean((x[i] - xbar)^2)
  return( m3 / m2^1.5 )
}
n=10
m=1000
ci.norm = ci.basic = ci.perc = matrix(0,m,2)

for(i in 1:m){
 x = rnorm(n)
 b = boot(data = x, statistic = sk, R = 2000)
 ci = boot.ci(b, type=c("norm","basic","perc"))
 ci.norm[i,] = ci$norm[2:3]
 ci.basic[i,] = ci$basic[4:5]
 ci.perc[i,] = ci$percent[4:5]
}
# output
output1 = output2 = output3 = matrix(0,2,3)
rownames(output1)=rownames(output2)=rownames(output3)=c("N(0,1)","X^2(5)")
colnames(output1)=colnames(output2)=colnames(output3)=c("norm","basic","perc")

output1[1,] = c(mean(ci.norm[,1]<=skew & ci.norm[,2]>=skew), mean(ci.basic[,1]<=skew & ci.basic[,2]>=skew), mean(ci.perc[,1]<=skew & ci.perc[,2]>=skew))
output2[1,] = c(mean(ci.norm[,1]>skew), mean(ci.basic[,1]>skew), mean(ci.perc[,1]>skew)) 
output3[1,] = c(mean(ci.norm[,2]<skew), mean(ci.basic[,2]<skew), mean(ci.perc[,2]<skew)) 
# chi-square
skew = sqrt(8/5)
ci.norm = ci.basic = ci.perc = matrix(0,m,2)
for(i in 1:m){
 x = rchisq(n,5)
 b = boot(data = x, statistic = sk, R = 2000)
 ci = boot.ci(b, type=c("norm","basic","perc"))
 ci.norm[i,] = ci$norm[2:3]
 ci.basic[i,] = ci$basic[4:5]
 ci.perc[i,] = ci$percent[4:5]
}
# output
output1[2,] = c(mean(ci.norm[,1]<=skew & ci.norm[,2]>=skew), mean(ci.basic[,1]<=skew & ci.basic[,2]>=skew), mean(ci.perc[,1]<=skew & ci.perc[,2]>=skew))
output2[2,] = c(mean(ci.norm[,1]>skew), mean(ci.basic[,1]>skew), mean(ci.perc[,1]>skew)) 
output3[2,] = c(mean(ci.norm[,2]<skew), mean(ci.basic[,2]<skew), mean(ci.perc[,2]<skew)) 
output1
output2
output3

Exercise 7.8

Refer to Exercise $7.7 .$ Obtain the jackknife estimates of bias and standard error of $\hat{\theta} .$

Exercise 7.7

Refer to Exercise $7.6 .$ Efron and Tibshirani discuss the following example $[84,$$\mathrm{Ch.} 7] .$ The five-dimensional scores data have a $5 \times 5$ covariance matrix $\Sigma$,with positive eigenvalues $\lambda_{1}>\cdots>\lambda_{5} .$ In principal components analysis, $$\theta=\frac{\lambda_{1}}{\sum_{j=1}^{5} \lambda_{j}}$$ measures the proportion of variance explained by the first principal component. Let $\hat{\lambda}{1}>\cdots>\hat{\lambda}{5}$ be the eigenvalues of $\hat{\Sigma},$ where $\hat{\Sigma}$ is the MLE of $\Sigma$.Compute the sample estimate of $\theta .$ Use bootstrap to estimate the bias and standard error of $\hat{\theta}$ $$\hat{\theta}=\frac{\hat{\lambda}{1}}{\sum{j=1}^{5} \hat{\lambda}_{j}}$$

Answer

data(scor, package = "bootstrap")
n<-nrow(scor)
f<-function(x){
  mat<-cov(x)
  evals<-eigen(mat)$values
  return(evals[1]/sum(evals))
}
theta.hat<-f(scor)
theta.jack<-numeric(n)
for (i in 1:n) {
  theta.jack[i]=f(scor[-i,])
}
bias<-(n-1)*(mean(theta.jack)-theta.hat)
print(bias)
se <- sqrt((n-1)*mean((theta.jack - mean(theta.jack))^2))
print(se)

Exercise 7.10

In Example $7.18,$ leave-one-out $(n$ -fold) cross validation was used to select the best fitting model. Repeat the analysis replacing the Log-Log model with a cubic polynomial model. Which of the four models is selected by the cross validation procedure? Which model is selected according to maximum adjusted $R^{2} ?$

Answer

library(DAAG)
attach(ironslag)
n <- length(magnetic) 
e1 <- e2 <- e3 <- e4 <- numeric(n)
for (k in 1:n) {
  y <- magnetic[-k]
  x <- chemical[-k]

  J1 <- lm(y ~ x)
  yhat1 <- J1$coef[1] + J1$coef[2] * chemical[k]
  e1[k] <- magnetic[k] - yhat1

  J2 <- lm(y ~ x + I(x^2))
  yhat2 <- J2$coef[1] + J2$coef[2] * chemical[k] +J2$coef[3] * chemical[k]^2
  e2[k] <- magnetic[k] - yhat2

  J3 <- lm(log(y) ~ x)
  logyhat3 <- J3$coef[1] + J3$coef[2] * chemical[k]
  yhat3 <- exp(logyhat3)
  e3[k] <- magnetic[k] - yhat3

  J4 <- lm(y ~ x + I(x^2)+I(x^3))
  yhat4 <- J4$coef[1] + J4$coef[2] * chemical[k]+J4$coef[3]*chemical[k]^2+J4$coef[4]*chemical[k]^3
  e4[k] <- magnetic[k] - yhat4
}
c(mean(e1^2), mean(e2^2), mean(e3^2), mean(e4^2))
summary(J1)
summary(J2)
summary(J3)
summary(J4)

According to the prediction error criterion, Model 2, the quadratic model,would be the best fit for the data.

According to maximum adjusted $R^{2}$, Model 4, the cubic polynomial model,would be the best fit for the data.

Exercise 8.3

The Count 5 test for equal variances in Section 6.4 is based on the maximum number of extreme points. Example 6.15 shows that the Count 5 criterion is not applicable for unequal sample sizes. Implement a permutation test for equal variance based on the maximum number of extreme points that applies when sample sizes are not necessarily equal.

Answer

count5<- function(x, y) {
  X <- x - mean(x)
  Y <- y - mean(y)
  outx <- sum(X > max(Y)) + sum(X < min(Y))
  outy <- sum(Y > max(X)) + sum(Y < min(X))
  return(max(c(outx, outy)))
}
set.seed(1234)
n1 <- 20
n2 <- 30
K=1:(n1+n2)
mu1 <- mu2 <- 0
sigma1 <-2
sigma2 <-1
R<-1e4
x <- rnorm(n1, mu1, sigma1)
y <- rnorm(n2, mu2, sigma2)
z<-c(x,y)
n<-length(x)
reps<-numeric(R)
t0<-count5(x,y)
for (i in 1:R) {
  k<-sample(K,size = n,replace = F)
  x1<-z[k]
  y1<-z[-k]
  reps[i]=count5(x1,y1)
}
p<-mean(c(t0,reps)>t0)
round(p,3)

so We reject the null hypothesis with the 10% confidence level and consider the sample variance to be different

Question 2

Power comparison (distance correlation test versus ball covariance test)

Model 1: $Y=X / 4+e$

Model 2: $Y=X / 4 \times e$

$X \sim N\left(0_{2}, l_{2}\right), e \sim N\left(0_{2}, l_{2}\right), X$ and $e$ are independent.

Answer

distcor.test<-function(x,y){
  dCov <- function(x, y) {
  Akl <- function(x) {
    d <- as.matrix(dist(x))
    m <- rowMeans(d); M <- mean(d)
    a <- sweep(d, 1, m); b <- sweep(a, 2, m)
    b + M
  }
A<- Akl(x); B <- Akl(y)
sqrt(mean(A * B))
}
ndCov2 <- function(z, ix, dims) {
  p <- dims[1]
  q <- dims[2]
  d <- p + q
  x <- z[ , 1:p] 
  y <- z[ix, -(1:p)] 
  return(nrow(z) * dCov(x, y)^2)
}
set.seed(1234)
z <-cbind(x,y)
boot.obj <- boot(data = z, statistic = ndCov2, R = 999,
sim = "permutation", dims = c(2, 2))
tb <- c(boot.obj$t0, boot.obj$t)
p.cor <- mean(tb>=tb[1])
}

Model 1

library('MASS')
library('Ball')
library('boot')
n<-30
mu<-matrix(c(0,0),2,1)
sigma<-matrix(c(1,0,0,1),2,2)
x<-mvrnorm(n,mu,sigma)
e<-mvrnorm(n,mu,sigma)
y<-(x/4+e)
p.dist<-distcor.test(x,y)
p.ball <- bcov.test(x,y,R=999)$p.value
round(c(p.dist,p.ball),4)

Model 2

library('MASS')
library('Ball')
library('boot')
n<-30
mu<-matrix(c(0,0),2,1)
sigma<-matrix(c(1,0,0,1),2,2)
x<-mvrnorm(n,mu,sigma)
e<-mvrnorm(n,mu,sigma)
y<-(x/4)*e
p.dist<-distcor.test(x,y)
p.ball <- bcov.test(x,y,R=999)$p.value
round(c(p.dist,p.ball),4)

Exercise 9.4

Implement a random walk Metropolis sampler for generating the standard Laplace distribution (see Exercise 3.2). For the increment, simulate from a normal distribution. Compare the chains generated when different variances are used for the proposal distribution. Also, compute the acceptance rates of each chain.

Answer

library(GeneralizedHyperbolic)
Laplace.Metropolis<-function(sigma,N){
  x<-numeric(N)
  x[1]<-rnorm(1,0,sigma)
  u<-runif(N)
  k<-0
  for (i in 2:N) {
    y<-rnorm(1,x[i-1],sigma)
    if(u[i]<=dskewlap(y)/dskewlap(x[i-1]))
      x[i]=y
    else{
      x[i]=x[i-1]
      k=k+1
    }
  }
  return(list(x=x,k=k))
}
N<-1e4
sigma<-c(1,4,9,16)
set.seed(1234)
laplace1<-Laplace.Metropolis(sigma[1],N)
laplace2<-Laplace.Metropolis(sigma[2],N)
laplace3<-Laplace.Metropolis(sigma[3],N)
laplace4<-Laplace.Metropolis(sigma[4],N)
print(c(1-laplace1$k/N,1-laplace2$k/N,1-laplace3$k/N,1-laplace4$k/N))

Questions

11.1

11.5

A-B-O blood type problem

Answers

11.1

exp(log(20)) == log(exp(20))
all.equal(exp(log(20)),log(exp(20)))

11.5

gammaratio <- function(k){
  if (k %% 2==0) 
    return(exp(sum(log((k/2-1):1))-sum(log(seq(1,k-3,2)/2))-log(gamma(1/2))))
  else
    return(exp(sum(log(seq(1,k-2,2)/2))+log(gamma(1/2))-sum(log(((k-1)/2-1):1))))
}   ### Gamma ratio computing for large k
g<- function(a,k){
  f <- function(x, N) (1 + x^2/N)^(-(N + 1)/2)
  I1 <- integrate(f,lower = 0,upper = sqrt(a^2*(k-1)/(k-a^2)),rel.tol=.Machine$double.eps^0.25,N = k-1 )$value
  I2 <- integrate(f,lower = 0,upper = sqrt(a^2*k/(k+1-a^2)),rel.tol=.Machine$double.eps^0.25,N = k )$value
  return(2*gammaratio(k)*I1/sqrt(k-1)-2*gammaratio(k+1)*I2/sqrt(k))
}   # the equation in 11.5
f1 <- function(x,k) pt(sqrt(x^2*(k-1)/(k-x^2)),k-1)-pt(sqrt(x^2*k/(k+1-x^2)),k)
##  computing method in 11.4
a <- seq(0.001,6,0.0001)
y1 <-numeric(length(a))
for ( i in 1:length(a)){
  y1[i] <- g(a[i],100) 
}
plot(cbind(a,y1),type = "l",xlab="a",ylab = "value")
nk <- c(4:25,100,500,1000)
ak <-numeric(25)
qk <-numeric(25)
for (i in 1:25) {
  ak[i] <- uniroot(g,c(0.00001,ifelse(nk[i] <=7,sqrt(nk[i])-0.01,2.5)),k=nk[i])$root # the solution of the equation
  qk[i] <- uniroot(f1,c(0.00001,ifelse(nk[i] <=7,sqrt(nk[i])-0.01,2.5)),k=nk[i])$root # the solution of 11.4
}
cbind(ak,qk)
plot(cbind(nk,ak),xlab = "k",ylab = "A(k)")
lines(cbind(nk,ak),lwd =2,col = "red")

A-B-O blood type problem

loglike <- function(pq,ng){
  p <- pq[1]
  q <- pq[2]
  r <- 1-p -q
  a <- c(p^2,q^2,r^2,2*p*r,2*q*r,2*p*q)
  return(-sum(log(a)*ng))
}
na <- 28
nb <- 24
noo <- 41
nab <- 70
n <-na + nb +noo+nab
eloglike <- function(pq){
  p <- pq[1]
  q <- pq[2]
  r <- 1-p -q
  a <- c(p^2+2*p*r,q^2+2*q*r,r^2,2*p*q)
  ng <- c(na,nb,noo,nab)
  return(-sum(log(a)*ng))
}
r <- sqrt(noo/n)
f2 <- function(x) x^2 +2*x*r - na/n
p <- uniroot(f2,c(0,1))$root
q <- 1-p-r
m <- 200
pqr <- matrix(rep(0,m*3),nrow = m)
ll <- numeric(m)
pqr[1,]<-c(p,q,r)
for (i in 2:m) {
  p <- pqr[i-1,1]
  q <- pqr[i-1,2]
  r <- pqr[i-1,3]
  naa <- p/(p+2*r)*na
  nao <- 2*r/(p+2*r)*na
  nbb <- q/(q+2*r)*nb
  nbo <- 2*r/(q+2*r)*nb
  ng <- c(naa,nbb,noo,nao,nbo,nab)  #estimation step
  res <- optim(c(p,q),loglike,method="Nelder-Mead",ng=ng) #maximum step
  pqr[i,]<-c(res$par,1-res$par[1]-res$par[2])
  ll[i] <- -eloglike(res$par)
}
pqr[200,]
res <- optim(c(p,q),eloglike,method="Nelder-Mead")
res$par
plot(ll[-1],type = "l",ylab = "log-likelihood")
abline(a=ll[200],b=0,col="red")

11.1.2 Exercise 3

Use both for loops and lapply() to fit linear models to the mtcars using the formulas stored in this list:

formulas <- list(mpg ~ disp,mpg ~ I(1 / disp),mpg ~ disp + wt,mpg ~ I(1 / disp) + wt)

Answer

formulas <- list(mpg ~ disp,mpg ~ I(1 / disp),mpg ~ disp + wt,mpg ~ I(1 / disp) + wt)
out1<-vector("list",length(formulas))
for (i in 1:length(formulas)) {
  out1[[i]]<-lm(formulas[[i]],data=mtcars)
}
out2<-lapply(formulas,lm,data=mtcars)
out1

11.1.2 Exercise 4

Fit the model mpg ~ disp to each of the bootstrap replicates of mtcars in the list below by using a for loop and lapply(). Can you do it without an anonymous function?

bootstraps <- lapply(1:10, function(i) { rows <- sample(1:nrow(mtcars), rep = TRUE) mtcars[rows, ] })

Answer

bootstraps<-lapply(1:10, function(i) {
  rows <- sample(1:nrow(mtcars), rep = TRUE)
  mtcars[rows, ]
  }
)
out3<-vector("list",10)
for (i in 1:10) {
  out3[[i]]<-lm(mpg~disp,data=bootstraps[[i]])
}
out4<-lapply(bootstraps,lm,formula=mpg~disp)
out3

11.1.2 Exercise 5

For each model in the previous two exercises, extract R2 using the function below.

rsq <- function(mod) summary(mod)$r.squared

Answer

rsq <- function(mod) summary(mod)$r.squared
lapply(out1,rsq)
lapply(out3,rsq)

11.2.5 Exercise 3

The following code simulates the performance of a t-test for non-normal data. Use sapply() and an anonymous function to extract the p-value from every trial.

trials<-replicate(100, t.test(rpois(10, 10), rpois(7, 10)), simplify = FALSE)

Extra challenge: get rid of the anonymous function by using [[ directly.

Answer

trials<-replicate(100, t.test(rpois(10, 10), rpois(7, 10)), simplify = FALSE)
p_value<-sapply(trials, function(x) x$p.value)
p_value2<-sapply(trials, '[[',3)
p_value
p_value2

11.2.5 Exercise 7

Implement mcsapply(), a multicore version of sapply(). Can you implement mcvapply(), a parallel version of vapply()? Why or why not?

Question

You have already written an R function for Exercise 9.4 (page 277, Statistical Computing with R). Rewrite an Rcpp function for the same task.

Compare the generated random numbers by the two functions using qqplot.

Campare the computation time of the two functions with microbenchmark.

Answer

library(Rcpp)
library(microbenchmark)
library(GeneralizedHyperbolic)
cppFunction('List Cpplaplace(double Sigma,int N){
  NumericVector x(N);
  int k=0;
  x[0]=as<double>(rnorm(1,25,Sigma));
  for(int i=1;i<N;i++){
    double y=as<double>(rnorm(1,x[i-1],Sigma));
    double u=as<double>(runif(1));
    if (u<=exp(-abs(y))/exp(-abs(x[i-1]))) {
    x[i]=y;
    k=k+1;  
    }
    else x[i]=x[i-1];
  }
  return(List::create(Named("x")=x,Named("k")=k));
}')
Laplace.Metropolis<-function(sigma,N){
  x<-numeric(N)
  x[1]<-rnorm(1,0,sigma)
  u<-runif(N)
  k<-0
  for (i in 2:N) {
    y<-rnorm(1,x[i-1],sigma)
    if(u[i]<=dskewlap(y)/dskewlap(x[i-1])){
      x[i]=y
      k=k+1
    }
    else
      x[i]=x[i-1]
  }
  return(list(x=x,k=k))
}
N<-10000
sigma<-c(1,4,9,16)
set.seed(1234)
laplace1<-Laplace.Metropolis(sigma[1],N)
laplace2<-Laplace.Metropolis(sigma[2],N)
laplace3<-Laplace.Metropolis(sigma[3],N)
laplace4<-Laplace.Metropolis(sigma[4],N)
Claplace1<-Cpplaplace(sigma[1],N)
Claplace2<-Cpplaplace(sigma[2],N)
Claplace3<-Cpplaplace(sigma[3],N)
Claplace4<-Cpplaplace(sigma[4],N)
par(mar=c(1,1,1,1))
qqplot(laplace1$x,Claplace1$x)
qqplot(laplace2$x,Claplace2$x)
qqplot(laplace3$x,Claplace3$x)
qqplot(laplace4$x,Claplace4$x)
compare1<-microbenchmark(Laplace.Metropolis(sigma[1],N),Cpplaplace(sigma[1],N)) 
compare2<-microbenchmark(Laplace.Metropolis(sigma[2],N),Cpplaplace(sigma[2],N))
compare3<-microbenchmark(Laplace.Metropolis(sigma[3],N),Cpplaplace(sigma[3],N))
compare4<-microbenchmark(Laplace.Metropolis(sigma[4],N),Cpplaplace(sigma[4],N))
summary(compare1)[c(1,3,5,6)]
summary(compare2)[c(1,3,5,6)]
summary(compare3)[c(1,3,5,6)]
summary(compare4)[c(1,3,5,6)]


nhhyqmsir/SC19040 documentation built on Jan. 3, 2020, 12:15 a.m.