## -----------------------------------------------------------------------------
set.seed(0)
n<-20
a1<-vector()
a2<-vector()
a3<-vector()#produce 3 empty vector to store the figures
#uniform
a1<-runif(n,1,2)
#normal
a2<-rnorm(n,1,4)
#exponential
a3<-rexp(n,rate = 1)
a4<-list(a1,a2,a3)#put all the figures into a list names a4
names(a4)<-c("a1","a2","a3")#name the elements of the list in order to read more convenient
a4
## -----------------------------------------------------------------------------
library(graphics)
x<-1:n
plot(x,y=a1,type="p",main = "Uniform",xlab = "Number",ylim = c(min(a1)-1,max(a1)+1))#picture of Uniform
lines(x,a1,type="l",col=1)#black
plot(x,y=a2,type="p",main = "Normal",xlab = "Number",ylim = c(min(a2)-1,max(a2)+1))#picture of Normal
lines(x,a2,type="l",col=2)#red
plot(x,y=a3,type="p",main = "Exponential",xlab = "Number",ylim = c(min(a3)-1,max(a3)+1))#picture of Exponential
lines(x,a3,type="l",col=3)#green
## -----------------------------------------------------------------------------
a_1<-sample(a1,size = 10,replace = FALSE)
a_2<-sample(a2,size = 10,replace = FALSE)
a_3<-sample(a3,size = 10,replace = FALSE)
a_4<-list(a_1,a_2,a_3)
names(a_4)<-c("a_1","a_2","a_3")
a_4
## -----------------------------------------------------------------------------
data<-data.frame(unif = a_1,norm = a_2, exp = a_3)
data
knitr::kable (data)#produce a table
#write.table(data,'A-21038-2021-09-16.txt',quote = F,row.names = F)
## -----------------------------------------------------------------------------
apply(data,2,mean)
apply(data,2,var)
## -----------------------------------------------------------------------------
set.seed(0)
n<-1000
u<-runif(n)# generate the samples of uniform
sigma<-c(1,2,3,4)
for(i in 1:4){
x<-sqrt(-2*sigma[i]**2*log(1-u)) # generate samples for each sigma
hist(x,prob=T,main=paste("The value of sigma=",sigma[i]))# plot the histogram of each sigma
y<-seq(0,15,.01)
lines(y,y/(sigma[i]**2)*exp(-y**2/(2*sigma[i]**2)))# plot the pdf line
}
## -----------------------------------------------------------------------------
set.seed(1)# fix the random values of samples
n<-1e3# the number of samples
X1<-rnorm(n)# N(0,1)
X2<-rnorm(n,3,1)# N(3,1)
r<-sample(c(0,1),n,replace = T,prob=c(0.25,0.75))
Z<-r*X1+(1-r)*X2
hist(Z,prob=T)# show the density of Z
y<-seq(-8,8,0.01)
lines(y,0.75*dnorm(y,0,1)+(1-0.75)*dnorm(y,3,1))# draw a line show the mixture of X1 and X2
## -----------------------------------------------------------------------------
p1<-seq(0,1,0.05)
k<-length(p1)# the number of the element of p1
for(i in 1:k){
r<-sample(c(0,1),n,replace = T,prob=c(1-p1[i],p1[i]))
Z<-r*X1+(1-r)*X2
hist(Z,prob=T,main = paste("p1 = ",p1[i]))
y<-seq(-6,6,0.01)
lines(y,p1[i]*dnorm(y,0,1)+(1-p1[i])*dnorm(y,3,1))# draw lines in every picture
}
## -----------------------------------------------------------------------------
set.seed(2)
n<-1e4
N<-rpois(n,10)# generate Poisson distribution which lambda = 10
X<-vector()
for(i in 1:n){
Y<-rgamma(N[i],4,2)# generate y according to N(t)
X[i]<-sum(Y)
}
mean(X)# expectation
var(X)# var
## -----------------------------------------------------------------------------
set.seed(3)
n<-1e4
N<-rpois(n,20)# generate Poisson distribution which lambda = 20
X<-vector()
for(i in 1:n){
Y<-rgamma(N[i],9,3)# generate y according to N(t)
X[i]<-sum(Y)
}
mean(X)# expectation
var(X)# var
## -----------------------------------------------------------------------------
set.seed(4)
n<-1e4
N<-rpois(n,30)# generate Poisson distribution which lambda = 30
X<-vector()
for(i in 1:n){
Y<-rgamma(N[i],16,4)# generate y according to N(t)
X[i]<-sum(Y)
}
mean(X)# expectation
var(X)# var
## -----------------------------------------------------------------------------
#define a function called mybeta to estimate the cdf of beta distribution, and the parameters alpha and beta should be given as whatever you need.
mybeta<-function(x,alpha,beta){
n<-1e4 # the number we generate how many samples
U<-runif(n,0,x)
value<-mean(30*x*U**(alpha-1)*(1-U)**(beta-1))
value
}
## -----------------------------------------------------------------------------
alpha<-3;beta<-3
X<-0.1*c(1:9)
m<-length(X)
pvalue1<-vector()# the value of F(x) which are estimated by mento carlo
pvalue2<-vector()# the value of pbeta
for(i in 1:m){
pvalue1[i]<-mybeta(X[i],alpha = alpha,beta = beta)
}
for(i in 1:m){
pvalue2[i]<-pbeta(X[i],alpha,beta)
}
compare<-matrix(c(pvalue1,pvalue2,pvalue1-pvalue2),ncol=3)# compare the pvalue generated by different method
colnames(compare)<-c('pvalue1','pvalue2','difference')
compare
## -----------------------------------------------------------------------------
myrayleigh<-function(n,sigma,optimal=c(1,2)){
if(optimal==1){
x<-vector()
u<-runif(n)
x<-sqrt(-2*sigma**2*log(1-u))
}
if(optimal==2){
x<-vector()
u<-runif(n)
x<-(sqrt(-2*sigma**2*log(1-u))+sqrt(-2*sigma**2*log(u)))/2
}
x
}
## -----------------------------------------------------------------------------
m<-1e4
#variance of (X+X')/2
v1<-var(myrayleigh(m,1,2))
#variance of (X1+X2)/2
X1<-myrayleigh(m,1,1)
X2<-myrayleigh(m,1,1)
cov<-cov(X1,X2)# verify the indenpedence
v2<-var((X1+X2)/2)
print(paste("X1 and X2 are",ifelse(cov<0.01&&cov>-0.01,'independent','dependent')))
print(paste('the value of v1 is ',v1))
print(paste('the value of v2 is ',v2))
print(paste('the percent reduction is ',(v2-v1)/v2))
## -----------------------------------------------------------------------------
x<-seq(1,10,0.01)
y<-seq(1,10,0.01)
plot(x,y,type="n",ylim=c(0,4))
lines(x,x/(2*exp(1)*sqrt(2*pi))*exp(x^2/2),col=2)
lines(x,x/sqrt(2*exp(1)*pi),col=3)
legend(c(8,9),c(3,4),c('g1','g2'),c(2,3))
## -----------------------------------------------------------------------------
f1_i<-function(n){# inverse function of f1
x<-vector()
u<-runif(n)
x<-sqrt(1-log(1-u))
x
}
g1<-function(x){# function g1
y<-vector()
n<-length(x)
for(i in 1:n){
y[i]<-x[i]/(2*exp(1)*sqrt(2*pi))*exp(x[i]**2/2)
}
y
}
n<-1e4
v1<-var(g1(f1_i(n)))/n
v1
## -----------------------------------------------------------------------------
f2_i<-function(n){# inverse function of f2
x<-vector()
u<-runif(n)
x<-sqrt(1-2*log(1-u))
x
}
g2<-function(x){# function g2
y<-vector()
n<-length(x)
for(i in 1:n){
y[i]<-x[i]/sqrt(2*exp(1)*pi)
}
y
}
n<-1e4
v2<-var(g2(f2_i(n)))/n
v2
## -----------------------------------------------------------------------------
f1_i<-function(n){# inverse function of f1
x<-vector()
u<-runif(n)
x<-sqrt(1-log(1-u))
x
}
g1<-function(x){# function g1
y<-vector()
n<-length(x)
for(i in 1:n){
y[i]<-x[i]/(2*exp(1)*sqrt(2*pi))*exp(x[i]**2/2)
}
y
}
n<-1e4
value1<-mean(g1(f1_i(n)))
value1
## -----------------------------------------------------------------------------
f2_i<-function(n){# inverse function of f
x<-vector()
u<-runif(n)
x<-sqrt(1-2*log(1-u))
x
}
g2<-function(x){# function g2
y<-vector()
n<-length(x)
for(i in 1:n){
y[i]<-x[i]/sqrt(2*exp(1)*pi)
}
y
}
n<-1e4
value2<-mean(g2(f2_i(n)))
value2
## -----------------------------------------------------------------------------
# t-interval
n<-20# the number of sample
alpha<-0.05
UCL<-matrix(nrow=1000,ncol=2)
# chi-squre
for(i in 1:1000){# we will sample 1000 times
x<-rchisq(n,df=2)
UCL[i,1]<-mean(x)-sd(x)*qt(1-alpha/2,n-1)/sqrt(n)# the lower
UCL[i,2]<-mean(x)+sd(x)*qt(1-alpha/2,n-1)/sqrt(n)# the upper
}
head(UCL)# observe some interval
sum<-0# initial sum=0
for(i in 1:1000){
if(UCL[i,1]<2&&UCL[i,2]>2){
sum<-sum+1
}
else{
sum<-sum
}
}# if the mean is not in the interval, then the sum will not change, otherwise it will plus 1
print("the total sum and the coverage probablity:")
sum# the total number which means the mean of the distribution is in the interval
sum/1000
# Normal
for(i in 1:1000){# we will sample 1000 times
x<-rnorm(n,0,2)
UCL[i,1]<-mean(x)-sd(x)*qt(1-alpha/2,n-1)/sqrt(n)# the lower
UCL[i,2]<-mean(x)+sd(x)*qt(1-alpha/2,n-1)/sqrt(n)# the upper
}
head(UCL)# observe some interval
sum<-0# initial sum=0
for(i in 1:1000){
if(UCL[i,1]<0&&UCL[i,2]>0){
sum<-sum+1
}
else{
sum<-sum
}
}# if the mean is not in the interval, then the sum will not change, otherwise it will plus 1
print("the total sum and the coverage probablity:")
sum# the total number which means the mean of the distribution is in the interval
sum/1000
## -----------------------------------------------------------------------------
# interval for variance
# Normal
n<-20
alpha<-0.05
UCL<-replicate(1000,expr={
x<-rnorm(n,0,2)
(n-1)*var(x)/qchisq(alpha,df=n-1)
})
mean(UCL>4)
# chi-square
UCL<-replicate(1000,expr={
x<-rchisq(n,df=2)
(n-1)*var(x)/qchisq(alpha,df=n-1)
})
mean(UCL>4)
## -----------------------------------------------------------------------------
# write a function to solve this problem, and this function can generate Type I error rate of these three distributions
power<-function(n,alpha,mu0,m,optimal=c(1,2,3)){
p<-numeric(m)
if(optimal==1){
for(i in 1:m){
x<-rchisq(n,1)
ttest<-t.test(x,mu=mu0)
p[i]<-ttest$p.value
}
}
if(optimal==2){
for(i in 1:m){
x<-runif(n,0,2)
ttest<-t.test(x,mu=mu0)
p[i]<-ttest$p.value
}
}
if(optimal==3){
for(i in 1:m){
x<-rexp(n,1)
ttest<-t.test(x,mu=mu0)
p[i]<-ttest$p.value
}
}
p.hat<-mean(p<alpha)
se.hat<-sqrt(p.hat*(1-p.hat)/m)
print(c(p.hat,se.hat))
}
## -----------------------------------------------------------------------------
set.seed(1)
n<-20
alpha<-0.05
mu0<-1
m<-1000
#chi-square
print("The result of chi-square")
power(n,alpha,mu0,m,1)
#uniform
print("The result of uniform")
power(n,alpha,mu0,m,2)
#exponential
print("The result of exponential")
power(n,alpha,mu0,m,3)
## ---- echo=FALSE--------------------------------------------------------------
data<-data.frame(l=c(6510,6760,13280),m=c(3490,3240,6730),n=c(10000,10000,20000))
rownames(data)<-c("method_1","method_2","total number")
colnames(data)<-c(paste("p<0.05",' '),paste(' ',"p>0.05"),"total number")
knitr::kable (data)#produce a table
## ---- echo=FALSE--------------------------------------------------------------
20000*(6510*3240-6760*3490)^2/(13280*6730*10000^2)
qchisq(0.95,1)
## -----------------------------------------------------------------------------
sk<-function(data,alpha){
if(is.matrix(data)==1){# whether the formula of the data is matrix
d<-ncol(data)# dim
n<-nrow(data)# number of data
cv<-qchisq(1-alpha,d*(d+1)*(d+2)/6)# quantil
colmean<-apply(data,2,mean)# the mean of every col
sigma<-(n-1)*cov(data)/n# the maximum likelihood of corvariance
b<-t(t(data)-colmean)%*%solve(sigma)%*%(t(data)-colmean)
b_1d<-sum(b^{3})/n^2
test<-n*b_1d/6
as.integer(test>=cv)
}
else{
print("Sorry that the data is not a matrix")
}
}
## -----------------------------------------------------------------------------
library(MASS)
set.seed(1)
alpha<-0.05
mu <- c(0,0)
sigma <- matrix(c(1,0,0,1),nrow=2,ncol=2)
m<-5000
N<-c(10, 20, 30, 50, 100, 500)
#m: number of replicates; n: sample size
p.reject<-numeric(length(N))
num.reject<-numeric(m)
for(i in 1:length(N)){
for(j in 1:m){
data<-mvrnorm(N[i],mu,sigma)
num.reject[j]<-sk(data,alpha)
}
p.reject[i]<-mean(num.reject)
}
p.reject
## -----------------------------------------------------------------------------
library(MASS)
alpha<-0.05
n<-30
m<-2500
epsilon<-c(seq(0,0.15,0.01),seq(0.15,1,0.05))
N<-length(epsilon)
power<-numeric(N)
mu<-c(0,0)
sd1<-matrix(c(1,0,0,1),nrow=2)
sd2<-10*sd1
for(i in 1:N){# for each epsilon
e<-epsilon[i]
sktests<-numeric(m)
for(j in 1:m){# for each replicate
c<-sample(c(0,1),replace = T,size=n,prob=c(1-e,e))
data<-(1-c)*mvrnorm(n,mu,sd1)+c*mvrnorm(n,mu,sd2)
sktests[j]<-sk(data,alpha)
}
power[i]<-mean(sktests)
}
plot(epsilon, power, type = "b",xlab = bquote(epsilon), ylim = c(0,1))
abline(h = .05, lty = 3)
se <- sqrt(power * (1-power) / m) #add standard errors
lines(epsilon, power+se, lty = 3)
lines(epsilon, power-se, lty = 3)
## -----------------------------------------------------------------------------
library(bootstrap)
n<-nrow(scor)
sigma_hat<-(n-1)*cov(scor)/n# MLE of sigma_hat
lambda_hat<-eigen(sigma_hat)$values
theta_hat<-lambda_hat[1]/sum(lambda_hat)
theta_hat# the value of theta_hat
## -----------------------------------------------------------------------------
set.seed(1)
B<-5000
theta_star<-numeric(B)
for(b in 1:B){
i<-sample(1:n,size = n, replace = T)
scor1<-scor[i,]
sigma_star<-(n-1)*cov(scor1)/n
lambda_star<-eigen(sigma_star)$values
theta_star[b]<-lambda_star[1]/sum(lambda_star)# the value of theta_star
}
round(c(original=theta_hat, bias=mean(theta_star)-theta_hat, se=sd(theta_star)),3)
## -----------------------------------------------------------------------------
theta_jack<-numeric(n)
for(i in 1:n){
scor2<-scor[-i,]# delete the ith row
sigma_jack<-(n-2)*cov(scor2)/(n-1)# MLE, the row number of data is n-1
lambda_jack<-eigen(sigma_jack)$values
theta_jack[i]<-lambda_jack[1]/sum(lambda_jack)
}
round(c(original=theta_hat,bias.jack=(n-1)*(mean(theta_jack)-theta_hat),
se.jack=sqrt((n-1)*mean((theta_jack-theta_hat)^2))),3)
## -----------------------------------------------------------------------------
library(boot)
set.seed(2)
theta.hat<-function(X,i){
n<-nrow(X)
lambda<-eigen((n-1)*cov(X[i,])/n)$values
theta.boot<-lambda[1]/sum(lambda)
return(theta.boot)
}
theta.obj<-boot(data=scor,statistic = theta.hat,R=5000)
round(c(original=theta.obj$t0,bias=mean(theta.obj$t)-theta.obj$t0,
se=sd(theta.obj$t)),3)
print(boot.ci(theta.obj,conf=0.95,type=c("perc","bca")))
#percentile
alpha<-c(0.025,0.975)
print("caculate teh percentile interval:")
print(quantile(theta.obj$t,alpha,type=6))
## -----------------------------------------------------------------------------
#BCa
#function boot.BCa
boot.BCa<-function(x,t0,t,stat,conf=0.95){
# bootstrap with BCa bootstrap confidence interval
# t0 is the observed statistic
# t is the vector of bootstrap replicates
# stat is the function to compute the statistic
x <- as.matrix(x)
n <- nrow(x) #observations in rows
N <- 1:n
alpha <- (1 + c(-conf, conf))/2
zalpha <- qnorm(alpha)
# the bias correction factor
z0 <- qnorm(sum(t < t0) / length(t))
# the acceleration factor (jackknife est.)
t.jack <- numeric(n)
for (i in 1:n) {
J <- N[1:(n-1)]
t.jack[i] <- stat(x[-i, ], J)
}
L <- mean(t.jack) - t.jack
a <- sum(L^3)/(6 * sum(L^2)^1.5)
# BCa conf. limits
adj.alpha <- pnorm(z0 + (z0+zalpha)/(1-a*(z0+zalpha)))
limits <- quantile(t, adj.alpha, type=6)
return(list("est"=t0, "BCa"=limits))
}
boot.BCa(scor,t0=theta.obj$t0,t=theta.obj$t,stat=theta.hat)
## -----------------------------------------------------------------------------
sk<-function(x,i){# statistic
meanx<-mean(x[i])
m3<-mean((x[i]-meanx)^3)
m2<-mean((x[i]-meanx)^2)
return(m3/m2^1.5)
}
m<-2000# number of mento carlo
n<-100# number of random variables
sktests1<-numeric(m)# normal
sktests1left<-numeric(m)# the mean < the left of normal
sktests2<-numeric(m)# basic
sktests2left<-numeric(m)# the mean < the left of basic
sktests3<-numeric(m)# percentile
sktests3left<-numeric(m)# the mean < the left of percentile
sk_norm<-0
sk_chisq<-sqrt(8/5)
for(i in 1:m){
x<-rnorm(n)
boot.obj<-boot(data=x,statistic = sk, R=2000)# bootstrap
ci<-boot.ci(boot.obj,conf=0.95,type=c("norm","basic","perc"))
interval1<-ci$normal[2:3]#normal
interval2<-ci$basic[4:5]#basic
interval3<-ci$percent[4:5]#percentile
sktests1[i]<-as.integer(sk_norm>interval1[1]&&
sk_norm<interval1[2])
sktests1left[i]<-as.integer(sk_norm<interval1[1])
sktests2[i]<-as.integer(sk_norm>interval2[1]&&
sk_norm<interval2[2])
sktests2left[i]<-as.integer(sk_norm<interval2[1])
sktests3[i]<-as.integer(sk_norm>interval3[1]&&
sk_norm<interval3[2])
sktests3left[i]<-as.integer(sk_norm<interval3[1])
}
coverage_rate1<-mean(sktests1)
coverage_rate2<-mean(sktests2)
coverage_rate3<-mean(sktests3)
left_rate1<-mean(sktests1left)
left_rate2<-mean(sktests2left)
left_rate3<-mean(sktests3left)
right_rate1<-1-mean(sktests1left)-mean(sktests1)
right_rate2<-1-mean(sktests2left)-mean(sktests2)
right_rate3<-1-mean(sktests3left)-mean(sktests3)
print("the coverage rate")
round(c(normal=coverage_rate1,basic=coverage_rate2,percentile=coverage_rate3),4)
print("the proportion of times theta the confidence intervals miss on the left")
round(c(normal=left_rate1,basic=left_rate2,percentile=left_rate3),4)
print("the proportion of times theta the confidence intervals miss on the right")
round(c(normal=right_rate1,basic=right_rate2,percentile=right_rate3),4)
## -----------------------------------------------------------------------------
m<-2000# number of mento carlo
n<-100# number of random variables
sktests1<-numeric(m)# normal
sktests1left<-numeric(m)# the mean < the left of normal
sktests2<-numeric(m)# basic
sktests2left<-numeric(m)# the mean < the left of basic
sktests3<-numeric(m)# percentile
sktests3left<-numeric(m)# the mean < the left of percentile
for(i in 1:m){
x<-rchisq(n,df=5)
boot.obj<-boot(data=x,statistic = sk, R=2000)# bootstrap
ci<-boot.ci(boot.obj,conf=0.95,type=c("norm","basic","perc"))
interval1<-ci$norm[2:3]#normal
interval2<-ci$basic[4:5]#basic
interval3<-ci$perc[4:5]#percentile
sktests1[i]<-as.integer(sk_chisq>interval1[1]&&
sk_chisq<interval1[2])
sktests1left[i]<-as.integer(sk_chisq<interval1[1])
sktests2[i]<-as.integer(sk_chisq>interval2[1]&&
sk_chisq<interval2[2])
sktests2left[i]<-as.integer(sk_chisq<interval2[1])
sktests3[i]<-as.integer(sk_chisq>interval3[1]&&
sk_chisq<interval3[2])
sktests3left[i]<-as.integer(sk_chisq<interval3[1])
}
coverage_rate1<-mean(sktests1)
coverage_rate2<-mean(sktests2)
coverage_rate3<-mean(sktests3)
left_rate1<-mean(sktests1left)
left_rate2<-mean(sktests2left)
left_rate3<-mean(sktests3left)
right_rate1<-1-mean(sktests1left)-mean(sktests1)
right_rate2<-1-mean(sktests2left)-mean(sktests2)
right_rate3<-1-mean(sktests3left)-mean(sktests3)
print("the coverage rate")
round(c(normal=coverage_rate1,basic=coverage_rate2,percentile=coverage_rate3),4)
print("the proportion of times theta the confidence intervals miss on the left")
round(c(normal=left_rate1,basic=left_rate2,percentile=left_rate3),4)
print("the proportion of times theta the confidence intervals miss on the right")
round(c(normal=right_rate1,basic=right_rate2,percentile=right_rate3),4)
## -----------------------------------------------------------------------------
set.seed(1)
# generate the samples
X<-rnorm(20)
Y<-rnorm(20,100,1)
R<-999 # number of replicates
Z<-c(X,Y)# pooled sample
k<-length(Z)
K<-1:k
reps<-numeric(R)
cor0<-cor(X,Y,method = "spearman")
for(i in 1:R){
m<-sample(K,size=20,replace = F)
X1<-Z[m]
Y1<-Z[-m]
reps[i]<-cor(X1,Y1,method = "spearman")
}
p<-mean(c(cor0,reps)>=cor0)
p
## -----------------------------------------------------------------------------
cor.test(X1,Y1,method = "spearman")
## -----------------------------------------------------------------------------
library(RANN)
library(energy)
library(Ball)
library(boot)
# Tn function
Tn <- function(z, ix, sizes,k){
n1 <- sizes[1]; n2 <- sizes[2]; n <- n1 + n2
if(is.vector(z)) z <- data.frame(z,0);
z <- z[ix, ];
NN <- nn2(data=z, k=k+1) # what's the first column?
block1 <- NN$nn.idx[1:n1,-1]
block2 <- NN$nn.idx[(n1+1):n,-1]
i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
(i1 + i2) / (k * n)
}
# power for NN
eqdist.nn <- function(z,sizes,k){
boot.obj <- boot(data=z,statistic=Tn,R=R,
sim = "permutation", sizes = sizes,k=k)
ts <-c(boot.obj$t0,boot.obj$t)
p.value <- mean(ts>=ts[1])
list(statistic=ts[1],p.value=p.value)
}
## -----------------------------------------------------------------------------
library(MASS)
m <- 1e2; k<-3; set.seed(2)
n1 <- n2 <- 20; R<-999; n <- n1+n2; N = c(n1,n2)
mu1<-c(0,0,0)
sigma1<-matrix(c(1,0,0,0,1,0,0,0,1),ncol=3)
mu2<-c(0,0,0)
sigma2<-matrix(c(2,0,0,0,4,0,0,0,6),ncol=3)
p.values <- matrix(NA,m,3)
for(i in 1:m){
x <-mvrnorm(n1,mu1,sigma1) ;
y <-mvrnorm(n2,mu2,sigma2) ;
z <- rbind(x,y)
p.values[i,1] <- eqdist.nn(z,N,k)$p.value
p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
p.values[i,3] <- bd.test(x=x,y=y,num.permutations=999,seed=i*2)$p.value
}
alpha <- 0.05;
pow <- colMeans(p.values<alpha)
pow
## -----------------------------------------------------------------------------
library(MASS)
m <- 1e2; k<-3; set.seed(2)
n1 <- n2 <- 20; R<-999; n <- n1+n2; N = c(n1,n2)
mu1<-c(0,0,0)
sigma1<-matrix(c(1,0,0,0,1,0,0,0,1),ncol=3)
mu2<-c(0.5,-0.5,0.5)
sigma2<-matrix(c(2,0,0,0,2,0,0,0,2),ncol=3)
p.values <- matrix(NA,m,3)
for(i in 1:m){
x <-mvrnorm(n1,mu1,sigma1) ;
y <-mvrnorm(n2,mu2,sigma2) ;
z <- rbind(x,y)
p.values[i,1] <- eqdist.nn(z,N,k)$p.value
p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
p.values[i,3] <- bd.test(x=x,y=y,num.permutations=999,seed=i*2)$p.value
}
alpha <- 0.05;
pow <- colMeans(p.values<alpha)
pow
## -----------------------------------------------------------------------------
m <- 1e2; k<-3;p<-2; set.seed(12345)
n1 <- n2 <- 20; R<-999; n <- n1+n2; N = c(n1,n2)
p.values <- matrix(NA,m,3)
for(i in 1:m){
x <- matrix(rt(n1*p,df=1,ncp =10),ncol=p);
y <- matrix(rt(n2*p,df=1,ncp = 20),ncol=p);
z <- rbind(x,y)
p.values[i,1] <- eqdist.nn(z,N,k)$p.value
p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
p.values[i,3] <- bd.test(x=x,y=y,num.permutations=999,seed=i*12345)$p.value
}
alpha <- 0.05;
pow <- colMeans(p.values<alpha)
pow
## -----------------------------------------------------------------------------
bimodel<-function(r,n){
p<-sample(c(0,1),n,replace = T,prob = c(1-r,r))
x<-p*rnorm(n)+(1-p)*rnorm(n,0,2)
return(x)
}
m <- 1e2; k<-3;p<-2; set.seed(12345)
n1 <- n2 <- 20; R<-999; n <- n1+n2; N = c(n1,n2)
p.values <- matrix(NA,m,3)
for(i in 1:m){
x <- matrix(bimodel(0.85,n1*p),ncol=p);
y <- matrix(bimodel(0.15,n2*p),ncol=p);
z <- rbind(x,y)
p.values[i,1] <- eqdist.nn(z,N,k)$p.value
p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
p.values[i,3] <- bd.test(x=x,y=y,num.permutations=999,seed=i*12345)$p.value
}
alpha <- 0.05;
pow <- colMeans(p.values<alpha)
pow
## -----------------------------------------------------------------------------
library(MASS)
m <- 1e2; k<-3; set.seed(2)
n1 <-10 ;n2 <- 100; R<-999; n <- n1+n2; N = c(n1,n2)
mu1<-c(0,0,0)
sigma1<-matrix(c(1,0,0,0,1,0,0,0,1),ncol=3)
mu2<-c(0,0,0)
sigma2<-matrix(c(4,0,0,0,4,0,0,0,8),ncol=3)
p.values <- matrix(NA,m,3)
for(i in 1:m){
x <-mvrnorm(n1,mu1,sigma1) ;
y <-mvrnorm(n2,mu2,sigma2) ;
z <- rbind(x,y)
p.values[i,1] <- eqdist.nn(z,N,k)$p.value
p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
p.values[i,3] <- bd.test(x=x,y=y,num.permutations=999,seed=i*2)$p.value
}
alpha <- 0.05;
pow <- colMeans(p.values<alpha)
pow
## -----------------------------------------------------------------------------
#function
metropolis<-function(sigma,x0,N,k){
set.seed(k)
x<-numeric(N)
x[1]<-x0
u<-runif(N)
t<-0
for(i in 2:N){
y<-rnorm(1,x[i-1],sigma)
if(u[i]<=(dcauchy(y,0,1)/dcauchy(x[i-1],0,1))){
x[i]<-y
}
else{
x[i]<-x[i-1]
t<-t+1
}
}
return(list(x=x,t=t))
}
#trial
N<-1e4
sigma<-c(1,2,3,4)
x0<-0
m1<-metropolis(sigma[1],x0,N,1)
m2<-metropolis(sigma[2],x0,N,2)
m3<-metropolis(sigma[3],x0,N,3)
m4<-metropolis(sigma[4],x0,N,4)
#compare the deciles
a<-seq(0.1,0.9,0.1)
Q<-qcauchy(a,0,1)#the standard cauchy distribution
m<-cbind(m1$x,m2$x,m3$x,m4$x)
mc<-m[1001:N,]
Qm<-apply(mc,2,function(x) quantile(x,a))
knitr::kable(round(cbind(Q=Q,m1=Qm[,1],m2=Qm[,2],m3=Qm[,3],m4=Qm[,4]),3))
## -----------------------------------------------------------------------------
a<-1
b<-5
n<-10
N<-10000 # length of chain
burn<-1000 # burn-in length
Z<-matrix(0,N,2)
Z[1,]<-c(1,0.1)# initial
for(i in 2:N){
y<-Z[i-1,2]
Z[i,1]<-rbinom(1,n,y)
x<-Z[i,1]
Z[i,2]<-rbeta(1,x+a,n-x+b)
}
b<-burn+1
z<-Z[b:N,]# the chain
colMeans(z)
cov(z)
cor(z)
plot(z,main="",cex=0.5,xlab = bquote(X),ylab = bquote(Y),ylim = range(z[,2]))
## -----------------------------------------------------------------------------
#Gelman.Rubin convergence
Gelman.Rubin <- function(psi) {
psi <- as.matrix(psi)
n <- ncol(psi)
t <- nrow(psi)
psi.means <- rowMeans(psi)
B <- n * var(psi.means)
psi.w <- apply(psi, 1, "var")
W <- mean(psi.w)
v.hat <- W*(n-1)/n + (B/n)
r.hat <- v.hat / W
return(r.hat)
}
## -----------------------------------------------------------------------------
#trial
set.seed(1)
x0<-c(-20,-10,10,20)# initial
k<-4# number of chains
n<-15000# length of chains
b<-10# burn-in length
u<-runif(n)
X<-matrix(0,nrow=k,ncol = n)
rhat<-rep(0,n)
X[,1]<-x0
for(j in 2:n){
for(i in 1:k){
y<-rcauchy(1,X[i,j-1],6)
if(u[j]<=(dcauchy(y,0,1)/dcauchy(X[i,j-1],0,1))){
X[i,j]<-y
}
else{
X[i,j]<-X[i,j-1]
}
}
psi <- t(apply(X[,1:j], 1, cumsum))
for (m in 1:nrow(psi)){
psi[m,] <- psi[m,] / (1:ncol(psi))
}
rhat[j]<-Gelman.Rubin(psi)
if(rhat[j]<=1.2){
count<-j
print(count)
break
}
}
plot(rhat[(b+1):count], type="l", xlab="", ylab="R")
abline(h=1.2, lty=2)
## -----------------------------------------------------------------------------
a<-5
b<-5
n<-20
k<-4
N<-5000 # length of chain
burn<-300 # burn-in length
set.seed(0)
X<-matrix(0,nrow=k,ncol=N)
Y<-matrix(0,nrow=k,ncol=N)
X[,1]<-c(1,2,3,4)
Y[,1]<-c(0.1,0.2,0.3,0.4)
rhatx<-rep(0,N)
rhaty<-rep(0,N)
for(j in 2:N){
for(i in 1:k){
y<-Y[i,j-1]
X[i,j]<-rbinom(1,n,y)
x<-X[i,j]
Y[i,j]<-rbeta(1,x+a,n-x+b)
}
psi1 <- t(apply(X[,1:j], 1, cumsum))
for (m in 1:nrow(psi1)){
psi1[m,] <- psi1[m,] / (1:ncol(psi1))
}
psi2 <- t(apply(Y[,1:j], 1, cumsum))
for (m in 1:nrow(psi2)){
psi2[m,] <- psi2[m,] / (1:ncol(psi2))
}
rhatx[j]<-Gelman.Rubin(psi1)
rhaty[j]<-Gelman.Rubin(psi2)
if(rhatx[j]<=1.2&&rhaty[j]<=1.2){
count<-j
print(count)
break
}
}
## -----------------------------------------------------------------------------
plot(rhatx[(burn+1):count], type="l", xlab="", ylab="R",ylim=c(1.15,2.2),xlim=c(0,1800))
lines(rhaty[(burn+1):count],type="l",col="red")
abline(h=1.2, lty=2)
## -----------------------------------------------------------------------------
kth<-function(a,i){
d<-length(a)
if(i==0){
b<-sum(a^2)*exp(lgamma((d+1)/2)+lgamma(0+3/2)-lgamma(0+d/2+1))/2 # i=0
}
if(i>=1){
b<-(-1)^i*exp((i+1)*log(sum(a^2))-sum(log(1:i))-i*log(2)-log(2*i+1)-log(2*i+2)) *
exp(lgamma((d+1)/2)+lgamma(i+3/2)-lgamma(i+d/2+1))
}
return(b)
}
kth(c(1,2),0);kth(c(1,2),1)
## -----------------------------------------------------------------------------
Sum<-function(a,n){
d<-length(a)
b<-kth(a,0)
if(n==0){
return(b)
}
if(n>=1){
for(i in 1:n){
b<-b+kth(a,i)
}
return(b)
}
}
Sum(c(1,2),10);Sum(c(1,2),100)
## -----------------------------------------------------------------------------
N<-10000 # the total number of trials
a<-c(1,2)
value<-numeric(N)
value[1]<-Sum(a,0)
value[2]<-Sum(a,1)
for(k in 1:N){
value[k+1]<-Sum(a,k)
if(abs(value[k+1]-value[k])<=0.000001){
print(value[k+1])
break
}
}
## -----------------------------------------------------------------------------
ck<-function(a,k){
return(sqrt(a^2*k/(k+1-a^2)))
}
solve<-function(a){
nu<-1/2*log(k)+2*lgamma(k/2)+log(integrate(function(u){(1+u^2/(k-1))^(-k/2)},0,ck(a,k-1))$value)
de<-1/2*log(k-1)+lgamma((k-1)/2)+lgamma((k+1)/2)+
log(integrate(function(u){(1+u^2/k)^(-(k+1)/2)},0,ck(a,k))$value)
return(nu-de)
}
## -----------------------------------------------------------------------------
m<-c(4:25,100,500,1000)
solution<-numeric(length(m))
for(i in 1:length(m)){
k<-m[i]
if(k<=25){
a<-seq(0.01,sqrt(k)-0.01,0.01)
}
if(k>25){
a<-seq(0.01,4,0.01)
}
y<-numeric(length(a))
for(j in 1:length(a)){
y[j]<-solve(a[j])
}
plot(a,y,type="l",main=bquote(k==.(m[i])))
abline(h=0,col="red")
}
## -----------------------------------------------------------------------------
for(i in 1:length(m)){
k<-m[i]
solution[i]<-uniroot(function(a){solve(a)},lower=1,upper=2)$root
}
round(solution,4)
## -----------------------------------------------------------------------------
solve2<-function(k){
value<-uniroot(function(a){pt(ck(a,k),df=k)-pt(ck(a,k-1),df=k-1)},lower=1,upper=2)
value$root
}
for(i in 1:length(m)){
k<-m[i]
solution[i]<-solve2(k)
}
round(solution,4)
## -----------------------------------------------------------------------------
Y<-c(0.54, 0.48, 0.33, 0.43, 1.00, 1.00, 0.91, 1.00, 0.21, 0.85)
lambda<-0.375# initial
N<-10000
L<-numeric(N)
L[1]<-lambda
for(i in 2:N){
L[i]<-(3*L[i-1]+sum(Y))/10 # for the value of 3+Y_1+\dots+Y_4+Y_7+Y_9+Y_{10} equals to sum(Y)
if(abs(L[i]-L[i-1])<=0.0000001){
print(L[i])
break
}
}
## -----------------------------------------------------------------------------
set.seed(0)
trims<-c(0,0.1,0.2,0.5)
x<-rcauchy(100)
lapply(trims, function(trim) mean(x,trim=trim))
lapply(trims, mean, x=x)
## -----------------------------------------------------------------------------
attach(mtcars)
rsq<-function(mod) summary(mod)$r.squared
formulas<-list(
mpg ~ disp,
mpg ~ I(1/disp),
mpg ~ disp + wt,
mpg ~ I(1/disp) + wt
)
la<-lapply(formulas, function(x) lm(formula = x, data = mtcars))
RSQ1<-lapply(la, function(x) rsq(x))
RSQ1
## -----------------------------------------------------------------------------
bootstraps <- lapply(1:10, function(i) {
rows <- sample(1:nrow(mtcars), rep = TRUE)
mtcars[rows, ]
})
la2<-lapply(bootstraps, function(x) lm(mpg ~ disp,data = x))
RSQ2<-lapply(la2,function(x) rsq(x))
RSQ2
## -----------------------------------------------------------------------------
set.seed(1)
n<-5
data1<-data.frame(alpha=rnorm(n),beta=rexp(n,1))
vapply(data1,sd,FUN.VALUE =c(sd=0))
sd(data1$alpha);sd(data1$beta)
## -----------------------------------------------------------------------------
set.seed(1)
n<-3
data2<-data.frame(alpha=rnorm(n,1,1),beta = rbeta(n,1,1), gamma=c(1,"girl","boy"))
data2
# find the numeric column and pick them up to caculate the standard deviation
vapply(data2[,vapply(data2,is.numeric,logical(1))], sd, FUN.VALUE = c(sd=0))
sd(data2$alpha);sd(data2$beta)
## -----------------------------------------------------------------------------
library(parallel)
sapply
## -----------------------------------------------------------------------------
mcsapply<-function(X, FUN, ..., mc.preschedule = TRUE, mc.set.seed = TRUE,
mc.silent = FALSE, mc.cores = getOption("mc.cores", 2L),
mc.cleanup = TRUE, mc.allow.recursive = TRUE, affinity.list = NULL,
simplify = TRUE, USE.NAMES=TRUE) {
FUN <- match.fun(FUN)
answer <- mclapply(X, FUN, ..., mc.preschedule = TRUE, mc.set.seed = TRUE,
mc.silent = FALSE, mc.cores = getOption("mc.cores", 2L),
mc.cleanup = TRUE, mc.allow.recursive = TRUE, affinity.list = NULL)
if (USE.NAMES && is.character(X) && is.null(names(answer)))
names(answer) <- X
if (!isFALSE(simplify))
simplify2array(answer, higher = (simplify == "array"))
else answer
}
boot_df <- function(x) x[sample(nrow(x), rep = T), ]
rsquared <- function(mod) summary(mod)$r.square
boot_lm <- function(i) {
dat <- boot_df(mtcars)
rsquared(lm(mpg ~ wt + disp, data = dat))
}
n <- 1e4
system.time(sapply(1:n, boot_lm))
system.time(mcsapply(1:n,boot_lm, mc.cores = 4))
## -----------------------------------------------------------------------------
vapply
## -----------------------------------------------------------------------------
library(Rcpp)
library(StatComp21038)
## ----eval=FALSE---------------------------------------------------------------
# #include <Rcpp.h>
# using namespace Rcpp;
# // [[Rcpp::export]]
# NumericMatrix gibbsC(int N, int thin) {
# NumericMatrix mat(N, 2);
# double x = 1, y = 0.1;
# int a=1, b=5, n=10;
# for(int i = 0; i < N; i++) {
# for(int j = 0; j < thin; j++) {
# x = rbinom(1, n, y)[0];
# y = rbeta(1, x+a, n-x+b)[0];
# }
# mat(i, 0) = x;
# mat(i, 1) = y;
# }
# return(mat);
# }
## -----------------------------------------------------------------------------
gibbsR <- function(N,thin) {
mat <- matrix(nrow = N, ncol = 2)
x <- 1;y <- 0.1
a<-1;b<-5;n<-10
for (i in 1:N) {
for (j in 1:thin){
x <- rbinom(1, n, y)
y <- rbeta(1, x+a, n-x+b)
}
mat[i, ] <- c(x, y)
}
mat
}
## -----------------------------------------------------------------------------
set.seed(1)
N<-1e3
burn<-1e2
gibbc<-gibbsC(N,10)
gibbr<-gibbsR(N,10)
a<-ppoints(100)
Q11<-quantile(gibbc[(burn+1):N,1],a)
Q21<-quantile(gibbr[(burn+1):N,1],a)
qqplot(Q11,Q21,main="X",
xlab = "Rcpp",ylab="R")
abline(0,1,col="red")
Q12<-quantile(gibbc[(burn+1):N,2],a)
Q22<-quantile(gibbr[(burn+1):N,2],a)
qqplot(Q12,Q22,main="Y",
xlab = "Rcpp",ylab="R")
abline(0,1,col="red")
## -----------------------------------------------------------------------------
library(microbenchmark)
ts <- microbenchmark(gibbR=gibbsR(N,10),
gibbC=gibbsC(N,10))
summary(ts)[,c(1,2,3,4,5,6,7)]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.