## ----eval=FALSE----------------------------------------------------------
# ts(1:10,start=1959)
# ts(1:47,frequency=12,start=c(1959,2))
# ts(1:10,frequency=4,start=c(1959,2))
# ts(matrix(rpois(36,5),12,3),start=c(1961,1),frequency=12)
## ----eval=FALSE----------------------------------------------------------
# layout(matrix(1:4,2,2))
# layout.show(4)
# layout(matrix(1:6,3,2))
# layout.show(6)
# layout(matrix(1:6,2,3))
# layout.show(6)
# m<-matrix(c(1:3,3),2,2)
# layout(m)
# layout.show(3)
# m<-matrix(1:4,2,2)
# layout(m,widths=c(1,3),heights=c(3,1))
# layout.show(4)
# m<-matrix(c(1,1,2,1),2,2)
# layout(m,widths=c(2,1),heights=c(1,2))
# layout.show(2)
# m<-matrix(0:3,2,2)
# layout(m,c(1,3),c(1,3))
# layout.show(3)
#
## ----eval=FALSE----------------------------------------------------------
# data(InsectSprays)
# InsectSprays
# aov.spray<-aov(sqrt(count)~spray,data=InsectSprays)
# aov.spray<-aov(sqrt(InsectSprays$count)~InsectSprays$spray)
# aov.spray<-aov(sqrt(InsectSprays[,1])~InsectSprays[,2])
# aov.spray
# summary(aov.spray)
# opar<-par()
# par(mfcol=c(2,2))
# plot(aov.spray)
# termplot(aov.spray,se=TRUE,partial.resid=TRUE,rug=TRUE)
## ----eval=FALSE----------------------------------------------------------
# x<-rnorm(10)
# y<-rnorm(10)
# plot(x,y)
# plot(x,y,xlab="Ten random values",ylab="Ten other values",xlim=c(-2,2),ylim=c(-2,2),pch=22,col="red",bg="yellow",bty="l",tcl=0.4,main="How to customize a plot with R",las=1,cex=1.5)
# opar<-par()
# par(bg="lightyellow",col.axis="blue",mar=c(4,4,2.5,0.25))
# plot(x,y,xlab="Ten random values",ylab="Ten other values",xlim=c(-2,2),ylim=c(-2,2),pch=22,col="red",bg="yellow",bty="l",tcl=-.25,las=1,cex=1.5)
# title("How to customize a plot with R(bis)",font.main=3,adj=1)
#
# opar<-par()
# par(bg="lightgray",mar=c(2.5,1.5,2.5,0.25))
# plot(x,y,type="n",xlab="",ylab="",xlim=c(-2,2),ylim=c(-2,2),xaxt="n",yaxt="n")
# rect(-3,-3,3,3,col="cornsilk")
# points(x,y,pch=10,col="red",cex=2)
# axis(side=1,c(-2,0,2),tcl=-0.2,labels=FALSE)
# axis(side=2,-1:1,tcl=-0.2,labels=FALSE)
# title("How to customize a plot with R(ter)",font.main=4,adj=1,cex.main=1)
# mtext("Ten random values",side=1,line=1,at=1,cex=0.9,font=3)
# mtext("Ten other values",line=0.5,at=-1.8,cex=0.9,font=3)
# mtext(c(-2,0,2),side=1,las=1,at=c(-2,0,2),line=0.3,col="blue",cex=0.9)
# mtext(-1:1,side=2,las=1,at=-1:1,line=0.2,col="blue",cex=0.9)
#
## ----eval=FALSE----------------------------------------------------------
# x<-1:30
# 1:10-1
# 1:(10-1)
# seq(1,5,0.5)
# seq(length=9,from=1,to=5)
# c(1,1.5,2,2.5,3,3.5,4,4.5,5)
# rep(1,30)
# sequence(4:5)
# sequence(c(10,5))
# gl(3,5)
# gl(3,5,length=30)
# gl(2,6,label=c("Male","Female"))
# gl(2,10)
# gl(2,1,length=20)
# gl(2,2,length=20)
# expand.grid(h=c(60,80),w=c(100,300),sex=c("Male","Female"))
#
#
## ----eval=FALSE----------------------------------------------------------
# x<-c(0,1,2,3,4)#generate vector x=(0,1,2,3,4),let x take the value of 0,1,2,3,4
# p<-c(0.1,0.2,0.2,0.2,0.3)#generate vector p=(0.1,0.2,0.2,0.2,0.3),Let x get 0,1,2,3,4 with probability of 0.1,0.2,0.2,0.2,0.3 respectively
# cp<-cumsum(p)#returns to the cumulative sums of vector p
# m<-1e3#m is set to 1000
# r<-numeric(m)
# r<-x[findInterval(runif(m),cp)+1]#find the interval containing samples
# r#list the observations of generated sample
# ct<-as.vector(table(r))#calculate the frequency column of different values of the sample
# ct/sum(ct)/p#get the ratio of frequency to probability
## ----eval=FALSE----------------------------------------------------------
# #beta(3,2) with pdf $f(x)=12x^{2}(1-x)$\qquad,0<x<1;g(x)=1,0<x<1
# n<-1e3
# j<-k<-0#both initial values of j and k are assigned to 0
# y<-numeric(n)
# while(k<n){
# u<-runif(1)#generate random numbers U~U(0,1)
# j<-j+1
# x<-runif(1)#random variate from g
#
# #when x equals 2/3,the function $f(x)=12x^{2}(1-x)$\qquad attain the maximum 16/9.Due to c can't be too large, or the effect will be very low,thus the value of c is set to be the maximum of f(x).In this case, the number of iterations will be reduced to 1804.
#
# if(27/4*x*x*(1-x)>u){ #we accept x when U<=f(Y)/(c*g(Y)),c=16/9
# k<-k+1
# y[k]<-x
# }
# }
# y#list the generated random sample of size 1000 from the Beta(3,2) distribution
# j#(experiments)for n random numbers
# hist(y,prob=TRUE,main=expression(f(x)==12*x^2*(1-x)))#Graph the histogram of the sample
# y<-seq(0,1,0.01)
# lines(y,12*y^2*(1-y),col="red",lwd=1.5)# add the theoretical Beta(3,2) density line
## ----eval=FALSE----------------------------------------------------------
# n<-1e3#assign n to 1000 to generate 1000 random observations
# r<-4#set the shape parameter r to 4
# beta<-2#set scale parameter beta to 4
# lambda<-rgamma(n,r,beta)#generate variable lambda subject to gamma distribution whose shape parameter is r,and scale parameter is beta
# #generate 1000 random observations from this mixture
# x<-rexp(n,rate=lambda)#generation for n observations subject to the exponential distribution with rate lambda
# x#list the generated 1000 random observations
## ----eval=FALSE----------------------------------------------------------
# Betacdf <- function( x, alpha=3, beta=3) {#construct cdf of Beta(3,3) by function()
# m<-1e3
# if ( any(x < 0) ) return (0)
# stopifnot( x < 1 )#the range of x is between 0 and 1
# t <- runif( m, min=0, max=x )#generate samples from U(0,x)
# p<-(x-0)*(1/beta(alpha,beta)) * t^(alpha-1) * (1-t)^(beta-1)
# cdf<-mean(p)#compute Monte Carlo estimate of cdf
# return( min(1,cdf) )#ensure that cdf<=1
# } #end function
#
# set.seed(123)
# for (i in 1:9) {
# estimate<-Betacdf(i/10,3,3)#use function constructed to estimate F(x) when x=0.1,0.2,...,0.9
# returnedvalues<-pbeta(i/10,3,3)#obtain values returned by the pbeta function
# print( c(estimate,returnedvalues) )#compare the eatimates with returned values
# } #end for loop
#
#
## ----eval=FALSE----------------------------------------------------------
# Rayleigh_distribution<- function(sigma, n) {#construct funtions to generate samples from Rayleigh distribution
# X <- X_ <- numeric(n)
# for (i in 1:n) {
# U<- runif(n)#generate n observations from U~U(0,1)
# V<- 1-U#then V~U(0,1)
# X<- sigma*sqrt(-2*log(V))
# X_<-sigma*sqrt(-2*log(U))
# #according inverse transform from annalysis above,we obtain that $X=F_{X}^{-1}(U)=\sqrt{-2ln(1-U)}=\sqrt{-2ln(V)}$\qquad
# var1<-var(X)#if X1 and X2 are independent,then var((X1+X2)/2)=var(X)
# var2<-(var(X)+var(X_)+2*cov(X,X_))/4#compute the variance of (X+X')/2 generated by antithetic variables
# reduction <-((var1-var2)/var1)#compute the reduction in variance by using antithetic variables
# }
# percent_reduction<-paste0(format(100*reduction, format = "fg", digits = 4), "%")#set format type by function format
# return(percent_reduction)
# }
# set.seed(123)
# sigma = 1#set the value of sigma-the scale parameter of Rayleigh distribution to 1
# n <- 1000
# Rayleigh_distribution(sigma, n)
## ----eval=FALSE----------------------------------------------------------
# x <- seq(1,4,0.01)#generate sequence x
# g <- (x^2 * exp(-x^2 / 2)) / sqrt(2 *pi)
# f1 <- exp(-x) #set the first importance function
# f2 <- 4 / ((1 + x^2) * pi)#set the second importance function
# gs <- c(expression(g(x)==e^{-x}/(1+x^2)),expression(f[1](x)==e^{-x}),expression(f[2](x)==4/((1+x^2)*pi)))
# #figure "ratio of g and f" including the funtion of g/f1 and g/f2
# plot(x, g/f1, type = "l", ylab = "",ylim = c(0,3.2), lwd = 2, lty = 2,col=2,main='(ratio of g and f)')
# lines(x, g/f2, lty = 3, lwd = 2,col=3)
# legend("topright", legend = gs[-1],lty = 2:3, lwd = 2, inset = 0.02,col=2:3)
## ----eval=FALSE----------------------------------------------------------
# g <- function(x) { (x^2/ sqrt(2*pi) * exp(-x^2 / 2))* (x > 1)}#set the function g
# f1 <- function(x) { exp(-x)}#set the importance funtion f1 and f2
# f2 <- function(x) { 4 / ((1 + x^2) * pi)* (x >= 1)}
# m <- 1e4
# x1 <- rexp(m)#f1 is constructed to be the expression of exponential distribution,so sample x1 should be generated from exponential distribution
# u <- runif(m) #generate samples from exponential distribution by inverse transform method
# x2 <- tan(pi *(u+pi^2/16)/ 4)#obtain the function of x2 via inverse transform method
# x2[which(x2 < 1)] <- 1# set the range of x2,to catch overflow errors in g(x)
# ratio1<-g(x1) / f1(x1)
# ratio2<-g(x2) / f2(x2)
# theta.hat <- se <- numeric(2)
# theta.hat<- c(mean(ratio1), mean(ratio2))#obtain a MC estimate of the integration
# se <- c(sd(ratio1), sd(ratio2))#obtain variance of the estimator
# rbind(theta.hat, se)
## ----eval=FALSE----------------------------------------------------------
# n<-20#number of samples
# m<-1e3#number of MC simulations
# G1<-G2<-G3<-numeric(m)
# set.seed(123)
# for (j in 1:m){# Monte Carlo simulations
# x1<-sort(rlnorm(n))#sort x generated from standard lognormal
# x2<-sort(runif(n))#sort x generated from uniform distribution
# x3<-sort(rbinom(n,size=1,prob=0.1))#sort x generated from Bernoulli(0.1)
# mu1<-mean(x1);mu2<-mean(x2);mu3<-mean(x3)#mu is replaced by mean(x)
# z1<-z2<-z3<-numeric(m)
# for(i in 1:n){
# z1<-(2*i-n-1)*x1[i]
# z2<-(2*i-n-1)*x2[i]
# z3<-(2*i-n-1)*x3[i]
# }
# G1[j]<- sum(z1)/(n^2*mu1)
# G2[j]<- sum(z2)/(n^2*mu2)
# G3[j]<- sum(z3)/(n^2*mu3)#set the function of Gini ratio
# }
#
# #Estimate by simulation the mean of Ghat
# meanG1<-mean(G1);meanG2<-mean(G2);meanG3<-mean(G3,na.rm = TRUE)
# #Estimate by simulation the median of Ghat
# medianG1<-median(G1);medianG2<-median(G2);medianG3<-median(G3,na.rm = TRUE)
# #Estimate by simulation the deciles of Ghat
# decileG1<-quantile(G1, seq(0, 1, 0.1));decileG2<-quantile(G2, seq(0, 1, 0.1))
# decileG3<-quantile(G3,seq(0, 1, 0.1),na.rm = TRUE)
# #print results
# print(c(meanG1,medianG1));print(decileG1)#results of the mean, median and deciles of Ghat if X is standard lognormal
# print(c(meanG2,medianG2));print(decileG2)#results of the mean, median and deciles of Ghat if X~U(0,1)
# print(c(meanG3,medianG3));print(decileG3)#results of the mean, median and deciles of Ghat if X~Bernoulli(0.1)
# hist(G1,prob=TRUE,main="x is from standard lognormal")#construct density histograms if X is standard lognormal
# hist(G2,prob=TRUE,main="x is from U(0,1) distribution")#construct density histograms if X~U(0,1)
# hist(G3,prob=TRUE,main="x is from Bernoulli(0.1)")#construct density histograms if X~Bernoulli(0.1)
#
## ----eval=FALSE----------------------------------------------------------
# n<-20#number of samples
# m<-1000#number of MC simulations
# a=0;b=1#set the parameter of lognormal distribution
# G<-numeric(m)
# alpha <-0.05
# set.seed(123)
# for (j in 1:m){# Monte Carlo simulations
# x<-sort(rlnorm(n,a,b))#sort x generated from standard lognormal
# mu<-mean(x)#?? is replaced by mean(x)
# z<-numeric(m)
# for(i in 1:n){
# z<-(2*i-n-1)*x[i]
# }
# G[j]<- sum(z)/(n^2*mu)#set the function of Gini ratio
#
# }
# UCL <- mean(G)+qt(1-(alpha/2), df=n-1)*sd(G)/sqrt(n)#obtain the confidence interval upper limit
# LCL <- mean(G)-qt(1-(alpha/2), df=n-1)*sd(G)/sqrt(n)#obtain the lower confidence interval
# CI<-(c(LCL,UCL))#results of confidence interval
# print(CI)
# # Start another same simulation,and in this circle,we compare each estimation of G to judge if G is between confidence interval,and count it,then
# for (j in 1:m){# Monte Carlo simulations
# x<-sort(rlnorm(n,a,b))#sort x generated from standard lognormal
# mu<-mean(x)#?? is replaced by mean(x)
# z<-numeric(m)
# for(i in 1:n){
# z<-(2*i-n-1)*x[i]
# }
# G[j]<- sum(z)/(n^2*mu)#set the function of Gini ratio
# c=0#Count parameter starts from 0
# if(G[j]<LCL)c=c
# else{
# if(G[j]>UCL)c=c
# else{
# c=c+1
# }#set conditions if G in each simulation is between confidence interval,then c=c+1
# }
# }
# coverage_rate<-c/m#calculate the coverage rate=times of G falling in the interval/numbers of simulation
# percent_rate<-paste0(format(100*coverage_rate, format = "fg", digits = 3), "%")#set format type by function format
## ----eval=FALSE----------------------------------------------------------
#
# library(MASS)#for mvrnorm
# N <- 1e3 # Number of random samples
# alpha<-0.05#set the significant level to 0.05
# p1<-p2<-p3<-numeric(N)
# set.seed(123)
# rho <- 0#set the correlation coefficient to be 0 due to the null hypothesis of the test is that the correlation coefficient to be 0
# mu1 <- 0; s1 <- 1#set the mean and standard deviation of variable x
# mu2 <- 0; s2 <- 1#set the mean and standard deviation of variable x
# # Parameters for bivariate normal distribution
# mu <- c(mu1,mu2) # Mean
# sigma <- matrix(c(s1^2, s1*s2*rho, s1*s2*rho, s2^2),2,2) # Covariance matrix
# for(i in 1:N){
# bvn<- mvrnorm(N, mu = mu, Sigma = sigma ) # generate bivariate normal variables from MASS package
# x<-bvn[,1]#variable x from bivariate normal distribution
# y<-bvn[,2]#variable y from bivariate normal distribution
# p1[i]<-cor.test(x,y,method="spearman")$p.value#cauculate the p_value under the nonparametric tests based on Spearman???s rank correlation coefficient ??s
# p2[i]<-cor.test(x,y,method="kendall")$p.value#cauculate the p_value under the nonparametric tests based on Kendall???s coefficient ??
# p3[i]<-cor.test(x,y,method="pearson")$p.value#cauculate the p_value under the correlation test
# }
# power<-c(mean(p1<=alpha),mean(p2<=alpha),mean(p3<=alpha))#obtain the power(average rate of test that is a significant ) under three tests
# print(power)
## ----eval=FALSE----------------------------------------------------------
# library(MASS)#for mvrnorm
# N <- 1e3 # Number of random samples
# alpha<-0.05#set the significant level to 0.05
# p1<-p2<-p3<-numeric(N)
# set.seed(123)
# rho <- 0.15#set different correlation coefficient,and after many trials,0.15 is the especial example
# mu1 <- 0; s1 <-1#set the mean and standard deviation of variable x
# mu2 <- 0; s2 <-1#set the mean and standard deviation of variable x
# # Parameters for bivariate normal distribution
# mu <- c(mu1,mu2) # Mean
# sigma <- matrix(c(s1^2, s1*s2*rho, s1*s2*rho, s2^2),2,2) # Covariance matrix
# for(i in 1:N){
# bvn<- mvrnorm(N, mu = mu, Sigma = sigma ) # generate bivariate normal variables from MASS package
# x<-bvn[,1]#variable x from bivariate normal distribution
# y<-bvn[,2]#variable y from bivariate normal distribution
# p1[i]<-cor.test(x,y,method="spearman")$p.value#cauculate the p_value under the nonparametric tests based on Spearman???s rank correlation coefficient ??s
# p2[i]<-cor.test(x,y,method="kendall")$p.value#cauculate the p_value under the nonparametric tests based on Kendall???s coefficient ??
# p3[i]<-cor.test(x,y,method="pearson")$p.value#cauculate the p_value under the correlation test
# }
# power<-c(mean(p1<=alpha),mean(p2<=alpha),mean(p3<=alpha))#obtain the power(average rate of test that is a significant ) under three tests
# print(power)
## ----eval=FALSE----------------------------------------------------------
# set.seed(1)
# library(bootstrap)#for law dataset
# data(law,package = "bootstrap")
# LSAT <- law$LSAT #the variable LAST of dataset law
# GPA <- law$GPA#the variable GPA of dataset law
# n<-length(LSAT)#n means the number of observed sample
# theta.hat <- cor(LSAT, GPA)#the correlation statistic in Example 7.2
# theta.jack <- numeric(n)
# for(i in 1:n){
# theta.jack[i] <-cor(LSAT[-i], GPA[-i])#compute the jackknife replicates, leave out the ith observation xi
# }
# bias.jack <- (n-1)*(mean(theta.jack)-theta.hat)#jackknife estimate of bias
# se.jack <- sqrt((n-1)*mean((theta.jack-mean(theta.hat))^2))#Jackknife estimate of standard error
# round(c(original=theta.hat,bias=bias.jack,se=se.jack),5)#display the results of theta.hat,and the estimator of the bias and the standard error
## ----eval=FALSE----------------------------------------------------------
# set.seed(1)
# m<-1e3#number of cycles
# B<-1e3#The number of bootstrap replicates
# library(boot)#for boot and boot.ci function
# data(aircondit,package = "boot")#for aircondi dataset
# for(i in 1:m){
# boot.obj <- boot(data=aircondit,statistic=function(x,i){mean(x[i,])}, R=B )#generate R bootstrap replicates of a statistic applied to data,statistic is the mean time
# ci <- boot.ci(boot.obj,type=c("norm","basic","perc","bca"))#generates 4 different types of equi-tailed two-sided nonparametric confidence intervals
# }
# print(ci)
#
## ----eval=FALSE----------------------------------------------------------
# library(bootstrap)#for scor dataset
# n <-length( scor[,1] )#number of calculations
# x<-as.matrix(scor)
# theta.jack <- numeric(n)
# data(scor,package = "bootstrap")#attach the data scor
# theta<-function(x){
# eigen(cov(x))$values[1]/sum(eigen(cov(x))$values)
# }#construct funtion theta=lambda1/sum(lambdai)
# theta.hat <- theta(scor)#compute the statistic to be estimated
# for(i in 1:n){
# theta.jack[i] <-theta(x[-i,])#compute the jackknife replicates, leave-one-out estimates
# }
# bias.jack <- (n-1)*(mean(theta.jack)-theta.hat)#jackknife estimate of bias
# se.jack <- sqrt((n-1)*mean((theta.jack-mean(theta.jack))^2))#Jackknife estimate of standard error
# round(c(original=theta.hat,bias=bias.jack,se=se.jack),6)#display the results of theta.hat,and the estimator of the bias and the standard error
## ----eval=FALSE----------------------------------------------------------
# data(ironslag, package = "DAAG")#for data ironslag
# magnetic<-ironslag$magnetic#extract variable magnetic
# chemical<-ironslag$chemical#extract variable chemical
# n <- length(magnetic)
# p<-n#set number of outer circulation
# q<-n-1#set number of inner circulation
# a<-array(dim=c(p,q))
# e1 <- e2 <- e3 <- e4<-a
# # for n-fold cross validation
# #fit models on leave-two-out samples
# for (k in 1:p) {#outer circulations from 1 to n
# y1 <- magnetic[-k]#y1 is magnetic leave yk out
# x1<- chemical[-k]#x1 is chemical leave xk out
# for(l in 1:q){#inner circulations from 1 to n-1
# y <- y1[-l]#y is y1 leave yl out
# x <- x1[-l]#x is x1 leave xl out
# #model selection:cross validation
# #1.linear model and estiamtion of prediction error
# J1 <- lm(y ~ x)
# yhat1 <- J1$coef[1] + J1$coef[2] * x1[l]#mutatis mutandis u for chemical
# e1[k,l] <- y1[l] - yhat1#mutatis mutandis z for magnetic
# #2.Quadratic model and estiamtion of prediction error
# J2 <- lm(y ~ x + I(x^2))
# yhat2 <- J2$coef[1] + J2$coef[2] * x1[l] +J2$coef[3] *x1[l]^2
# e2[k,l] <- y1[l] - yhat2
# #3.Exponential model and estiamtion of prediction error
# J3 <- lm(log(y) ~ x)
# logyhat3 <- J3$coef[1] + J3$coef[2] * x1[l]
# yhat3 <- exp(logyhat3)
# e3[k,l] <- y1[l] - yhat3
# #4.log-log model and estiamtion of prediction error
# J4 <- lm(log(y) ~ log(x))
# logyhat4 <- J4$coef[1] + J4$coef[2] * log(x1[l])
# yhat4 <- exp(logyhat4)
# e4[k,l] <- y1[l] - yhat4
# }
# }
# c(mean(e1^2), mean(e2^2), mean(e3^2), mean(e4^2))#estimates for prediction error
## ----eval=FALSE----------------------------------------------------------
# set.seed(1)
# attach(chickwts)
# x <- sort(as.vector(weight[feed == "soybean"]))
# y <- sort(as.vector(weight[feed == "linseed"]))
# detach(chickwts)
#
#
# R <- 999 #number of replicates
#
# z <- c(x, y) #pooled sample
#
# W <-k<- numeric(R) #storage for replicates
# m<-length(x)
# n<-length(y)
# N<-length(z)
# u<-numeric(N)
#
# cvm<-function(x1,y1){#set the statistic function
# Fn<-ecdf(x1)#Empirical cumulative distribution function of x
# Gm<-ecdf(y1)#Empirical cumulative distribution function of y
# z1<-c(x1,y1)
# for(i in 1:N){
# u[i]<-(Fn(z1[i])-Gm(z1[i]))^2
# }
# m*n/(m+n)^2*sum(u)
# }
# W0 <- cvm(x, y)#statistic
# for (i in 1:R) {
# #generate indices k for the first sample
# k <- sample(1:N, size = m, replace = FALSE)
# x1 <- z[k]
# y1 <- z[-k] #complement of x1
# W[i] <- cvm(x1, y1)
# }
# p <- mean(c(W0, W) >= W0)#calculate p value
# p
## ----eval=FALSE----------------------------------------------------------
# set.seed(1)
# library(RANN) # implementing a fast algorithm
# # for locating nearest neighbors
# # (alternative R package: "yaImpute")
# library(boot)
# library(Ball)
# library(energy)#for enenrgy test
# library(devtools)#for Ball statistic
# attach(chickwts) # chicken weights for various feed methods
# x <- as.vector(weight[feed == "sunflower"])
# y <- as.vector(weight[feed == "linseed"])
# detach(chickwts)
# z <- c(x, y)
# m <- 1e2; k<-3; p<-2;n1 <- n2 <- 20#size for sample 1 and 2
# R<-999; N = c(n1,n2)
# Tn <- function(z, ix, sizes,k) {
# n1 <- sizes[1]
# n2 <- sizes[2]
# n <- n1 + n2
# if(is.vector(z)) z <- data.frame(z,0);
# z <- z[ix, ];
# NN <- nn2(data=z, k=k+1) # what's the first column?
# block1 <- NN$nn.idx[1:n1,-1]
# block2 <- NN$nn.idx[(n1+1):n,-1]
# i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
# (i1 + i2) / (k * n)
# }
#
# eqdist.nn <- function(z,sizes,k){
# boot.obj <- boot(data=z,statistic=Tn,R=R,
# sim = "permutation", sizes = sizes,k=k)
# ts <- c(boot.obj$t0,boot.obj$t)
# p.value <- mean(ts>=ts[1])
# list(statistic=ts[1],p.value=p.value)
# }
# p.values <- matrix(NA,m,3)
#
# #Unequal variances and equal expectations
#
# for(i in 1:m){
# x <- matrix(rnorm(n1*p,0,1),ncol=p);
# y <- matrix(rnorm(n2*p,0,2),ncol=p);
# z <- rbind(x,y)
# p.values[i,1] <- eqdist.nn(z,N,k)$p.value#performance of NN method
# p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value#performance of energy method
# p.values[i,3] <- bd.test(x=x,y=y,R=999,seed=i*12345)$p.value#performance of ball method
# }
# alpha <- 0.05 #confidence level
# pow <- colMeans(p.values<alpha)
# print(pow)
#
#
#
# #Unequal variances and unequal expectations
# for(i in 1:m){
# x <- matrix(rnorm(n1*p),ncol=p);
# y <-cbind(rnorm(n2,0,2),rnorm(n2,1,1))
# z <- rbind(x,y)
# p.values[i,1] <- eqdist.nn(z,N,k)$p.value#performance of NN method
# p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value#performance of energy method
# p.values[i,3] <- bd.test(x=x,y=y,R=999,seed=i*12345)$p.value#performance of ball method
# }
# alpha <- 0.05 #confidence level
# pow <- colMeans(p.values<alpha)
# print(pow)
#
# #Non-normal distributions: t distribution with 1 df (heavy-taileddistribution), bimodel distribution (mixture of two normal distributions)
# m <- 1e2
# n1 <- n2 <- 20
# p<-2
# for(i in 1:m){
# x <- matrix(rt(n1*p,1),ncol=p);
# y <-cbind(rnorm(n2,0,1),rnorm(n2,0,1))
# z <- rbind(x,y)
# p.values[i,1] <- eqdist.nn(z,N,k)$p.value#performance of NN method
# p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value#performance of energy method
# p.values[i,3] <- bd.test(x=x,y=y,R=999,seed=i*12345)$p.value#performance of ball method
# }
# alpha <- 0.05 #confidence level
# pow <- colMeans(p.values<alpha)
# print(pow)
#
#
#
# #Unbalanced samples (say, 1 case versus 10 controls)
# n1<-10;n2<-100;N=c(n1,n2)
# for(i in 1:m){
# x <- matrix(rt(n1*p,1),ncol=p);
# y <- matrix(rnorm(n2*p,1,2),ncol=p);
# z <- rbind(x,y)
# p.values[i,1] <- eqdist.nn(z,N,k)$p.value#performance of NN method
# p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value#performance of energy method
# p.values[i,3] <- bd.test(x=x,y=y,R=999,seed=i*12345)$p.value#performance of ball method
# }
# alpha <- 0.05 #confidence level
# pow <- colMeans(p.values<alpha)
# print(pow)
## ----eval=FALSE----------------------------------------------------------
# m <- 10000
# x <- numeric(m)
# x[1] <- rnorm(1)
# k <- 0
# b<-1000
# #generates random deviates
# u <- runif(m)
# #Use the Metropolis-Hastings sampler to generate random variables from a standard Cauchy distribution.
# for (i in 2:m)
# {
# xt <- x[i-1]
# y <- rnorm(1, mean = xt)
# #Density Cauchy distribution
# num <- dcauchy(y)*dnorm(xt, mean = y)
# den <- dcauchy(xt)*dnorm(y, mean = xt)
#
# if(u[i] <= num/den)
# {
# x[i] <- y
# }
# else
# {
# x[i] <- xt
# k <- k+1#y is rejected
# }
# }
#
# plot(x,type = "l",main="",ylab="x")
# y <- x[b:m]
# b <- x[1000:m]
# sequence <- seq(0,1,0.01)
# standardCauchy <- qcauchy(sequence)
# standardCauchy <- standardCauchy[(standardCauchy> -Inf) & (standardCauchy< Inf)]
# hist(b, freq = FALSE,main = "")
# lines(standardCauchy, dcauchy(standardCauchy), lty = 2)
# qqplot(standardCauchy, quantile(x,sequence), main="",xlab="Cauchy Quantiles", ylab="Sample Quantiles")#compare the deciles of the generated observations with the deciles of the standard Cauchy distribution
#
## ----eval=FALSE----------------------------------------------------------
# set.seed(1)
# t <- c(125,18,20,34) #actual value of theta
# w <- 0.25 #width of the uniform support set
# m <- 5000 #length of the chain
# burn <- 1000 #burn-in time
# animals <- 197
# x <- numeric(m) #the chain
#
#
#
# prob <- function(y, t) {
# # computes (without the constant) the target density
# if (y < 0 || y >1)
# return (0)
# return((1/2+y/4)^t[1] *
# ((1-y)/4)^t[2] * ((1-y)/4)^t[3] *
# (y/4)^t[4])
# }
# u <- runif(m) #for accept/reject step
# v <- runif(m, -w, w) #proposal distribution
# x[1] <- .25
# for (i in 2:m) {
# y <- x[i-1] + v[i]
# if (u[i] <= prob(y, t) / prob(x[i-1], t))
# x[i] <- y else
# x[i] <- x[i-1]
# }
# xt <- x[(burn+1):m]
# theta.hat<-mean(xt)
# print(theta.hat)
# that <- sum(t) * c((2+theta.hat)/4, (1-theta.hat)/4, (1-theta.hat)/4, theta.hat/4)
# print(round(that))
## ----eval=FALSE----------------------------------------------------------
# set.seed(1)
# Gelman.Rubin <- function(psi) {
# # psi[i,j] is the statistic psi(X[i,1:j])
# # for chain in i-th row of X
# psi <- as.matrix(psi)
# n <- ncol(psi)
# k <- nrow(psi)
#
# psi.means <- rowMeans(psi) #row means
# B <- n * var(psi.means) #between variance est.
# psi.w <- apply(psi, 1, "var") #within variances
# W <- mean(psi.w) #within est.
# v.hat <- W*(n-1)/n + (B/n) #upper variance est.
# r.hat <- v.hat / W #G-R statistic
# return(r.hat)
# }
#
# t <- c(125,18,20,34) #actual value
# b<- 1000 #burn-in time
# prob<- function(y, t) {
# # computes (without the constant) the target density
# if (y < 0 || y >1)
# return (0)
# else
# return((1/2+y/4)^t[1] *((1-y)/4)^t[2] * ((1-y)/4)^t[3] *(y/4)^t[4])
# }
#
# #generate chains
# chain<-function(w,N){
# x<-rep(0,N)
# u <- runif(N) #for accept/reject step
# v <- runif(N, -w, w) #proposal distribution
# x[1] <-w
# for (i in 2:N) {
# y <- x[i-1] + v[i]
# if (u[i] <= prob(y, t) / prob(x[i-1], t))
# x[i] <- y
# else
# x[i] <- x[i-1]
# }
# return(x)
# }
# w <- c(0.05,0.2,0.4,0.8)#set the paremeter of proposal distribution
# n<- 15000 #length of the chain
# k<-4 #number of chains to generate
#
# #generate the chains
# X <- matrix(0, nrow=k, ncol=n)
# for (i in 1:k)
# X[i, ] <- chain(w[i],n)
#
# #compute diagnostic statistics
# psi <- t(apply(X, 1, cumsum))
# for (i in 1:nrow(psi))
# psi[i,] <- psi[i,] / (1:ncol(psi))
# print(Gelman.Rubin(psi))
# #plot psi for the four chains
# par(mfrow=c(2,2))
# for (i in 1:k)
# plot(psi[i, (b+1):n], type="l",
# xlab=i, ylab=bquote(psi))
# par(mfrow=c(1,1)) #restore default
#
#
# #plot the sequence of R-hat statistics
# rhat <- rep(0, n)
# for (j in (b+1):n)
# rhat[j] <- Gelman.Rubin(psi[,1:j])
# plot(rhat[(b+1):n], type="l", xlab="", ylab="R",ylim=c(1,1.3))
# abline(h=1.1, lty=2)
# abline(h=1.2, lty=2)
## ----eval=FALSE----------------------------------------------------------
# findIntersection = function (k) {
# s.k.minus.one = function (a) {#set function S_{k-1}(a)
# 1-pt(sqrt(a^2 * (k - 1) / (k - a^2)), df = k-1)
# }
# s.k = function (a) {#set function S_{k}(a)
# 1-pt(sqrt(a^2 * k / (k + 1 - a^2)), df = k)
# }
#
# f = function (a) {#set function S_{k}(a)-S_{k-1}(a)
# s.k(a) - s.k.minus.one(a)
# }#Find the intersection points A(k) in (0,sqrt(k)) of the curves
# eps = .Machine$double.eps^0.5
# return(uniroot(f, interval = c(eps, sqrt(k)-eps))$root)
# }
# k<-c(4:25, 100, 500, 1000)
# rs = sapply(k, function (k) {findIntersection(k)})
# s<-cbind(rs,k)
# s
## ----eval=FALSE----------------------------------------------------------
# dcauchy <- function(t, eta, theta) {#compute the desity function of cauchy distribution
# if (theta<=0) return (0)#let theta > 0
# else
# return (1/(theta*pi*(1+((t-eta)/theta)^2)))
# }
# cdf.cauchy <- function(x, eta, theta){#compute the cdf of cauchy distribution
# res <- integrate(dcauchy, lower=-Inf, upper=x,
# rel.tol=.Machine$double.eps^0.25,
# eta=eta, theta=theta)$value
#
# return(res)
# }
# pcauchy(0,0,1)#Compare your results to the results from the R function pcauchy.
# cdf.cauchy (0,0,1)
# pcauchy(1,0,1)
# cdf.cauchy (1,0,1)
# pcauchy(-0.1,0,1)
# cdf.cauchy (-0.1,0,1)
## ----eval=FALSE----------------------------------------------------------
# #set the likelihood function
# lnL <- function(p, q, nA =28, nB = 24, nAB = 70, nO = 41) {
# r = 1.0 - p - q
# nA * log(p^2 + 2*p*r) + nB * log(p^2 + 2 * q * r) +
# nAB * log(2 * p * q) + 2 * nO * log(r)
# }
#
# EM <- function (p,q,nA =28, nB = 24, nAB = 70, nO = 41, debug = FALSE) {
#
# # Evaluate the likelihood using initial estimates
# llk <- lnL(p, q, nA, nB, nAB, nO)
#
# # Count the number of iterations so far
# iter <- 1
#
# # Loop until convergence ...
# while (TRUE)
# {
# # Estimate the frequency for allele O
# r= 1.0 - p - q
#
# # First we carry out the E-step
#
# # The counts for genotypes O/O and A/B are effectively observed
# # Estimate the counts for the other genotypes
# nAA <- nA * p / (p + 2*r)
# nAO <- nA - nAA
# nBB <- nB * q / (q + 2*r)
# nBO <- nB - nBB
#
# # Print debugging information
# if (debug)
# {
# cat("Round #", iter, "lnLikelihood = ", llk, "\n")
# cat(" Allele frequencies: p = ", p, ", q = ", q, ",r = ", r, "\n")
# cat(" Genotype counts: nAA = ", nAA, ", nAO = ", nAO, ", nBB = ", nBB,
# ", nBO = ", nBO, "\n")
# }
#
# # Then the M-step
# p <- (2 * nAA + nAO + nAB) / (2 * (nA + nB + nO + nAB))
# q <- (2 * nBB + nBO + nAB) / (2 * (nA + nB + nO + nAB))
#
#
# # Then check for convergence ...
# llk1 <- lnL(p, q, nA, nB, nAB, nO)
# if (abs(llk1 - llk) < (abs(llk) + abs(llk1)) * 1e-6) break
#
# # Otherwise keep going
# llk <- llk1
# iter <- iter + 1
# }
#
# list(p = p, q = q)
# }
# EM(0.3,0.2,nA =28, nB = 24, nAB = 70, nO = 41, debug = TRUE)
## ----eval=FALSE----------------------------------------------------------
# formulas <- list(
# mpg ~ disp,
# mpg ~ I(1 / disp),
# mpg ~ disp + wt,
# mpg ~ I(1 / disp) + wt
# )
# #With lapply()
# model_mtcars1<-lapply(formulas,lm,data=mtcars)
# model_mtcars1
# #With a for loop
# model_mtcars2<- vector("list", length(formulas))
# for (i in seq_along(formulas)) {
# model_mtcars2[[i]] <- lm( formulas[[i]],mtcars)
# }
# model_mtcars2
## ----eval=FALSE----------------------------------------------------------
# set.seed(1)
# bootstraps <- lapply(1:10, function(i) {
# rows <- sample(1:nrow(mtcars), rep = TRUE)
# mtcars[rows, ]
# })
# #With lapply()
# model_bootstrap1<-lapply(bootstraps,lm,formula=mpg ~ disp)
# model_bootstrap1
# #With a for loop
# model_bootstrap2 <- vector("list", length(bootstraps))
# for (i in seq_along(bootstraps)) {
# model_bootstrap2[[i]] <- lm( mpg ~ disp,bootstraps[[i]])
# }
# model_bootstrap2
## ----eval=FALSE----------------------------------------------------------
# #model in question 1
# formulas <- list(
# mpg ~ disp,
# mpg ~ I(1 / disp),
# mpg ~ disp + wt,
# mpg ~ I(1 / disp) + wt
# )
# #With lapply()
# model_mtcars1<-lapply(formulas,lm,data=mtcars)
#
# #With a for loop
# model_mtcars2<- vector("list", length(formulas))
# for (i in seq_along(formulas)) {
# model_mtcars2[[i]] <- lm( formulas[[i]],mtcars)
# }
#
#
# #model in question 2
# bootstraps <- lapply(1:10, function(i) {
# rows <- sample(1:nrow(mtcars), rep = TRUE)
# mtcars[rows, ]
# })
# #With lapply()
# model_bootstrap1<-lapply(bootstraps,lm,formula=mpg ~ disp)
#
# #With a for loop
# model_bootstrap2 <- vector("list", length(bootstraps))
# for (i in seq_along(bootstraps)) {
# model_bootstrap2[[i]] <- lm( mpg ~ disp,bootstraps[[i]])
# }
#
#
#
# #extract R2
# rsq <- function(mod) summary(mod)$r.squared
# #For model in question 1
# model1_rsq1<-sapply(model_mtcars1, rsq)
# model1_rsq2<-sapply(model_mtcars2, rsq)
# #For model in question 2
# model2_rsq1<-sapply(model_bootstrap1, rsq)
# model2_rsq2<-sapply(model_bootstrap1, rsq)
# rbind(model1_rsq1,model1_rsq2)
# rbind(model2_rsq1,model2_rsq2)
## ----eval=FALSE----------------------------------------------------------
# set.seed(1)
# # Create some random data
# trials <- replicate(
# 100,
# t.test(rpois(10, 10), rpois(7, 10)),
# simplify = FALSE
# )
# #Use sapply() and an anonymous function to extract the p-value from every trial
# p1<-sapply(trials, function(test) test$p.value)
# #Get rid of the anonymous function by using [[ directly.
# p2<-sapply(trials, '[[', i = "p.value")
# cbind(p1,p2)
## ----eval=FALSE----------------------------------------------------------
# #construct function to compute the expected (theoretical) count
# expected <- function(colsum, rowsum, total) {
# (colsum / total) * (rowsum / total) * total
# }
# #construct function to compute the value of the test-statistic
# chi_stat <- function(observed, expected) {
# ((observed - expected) ^ 2) / expected
# }
#
#
# chisq_test2 <- function(x, y) {
# total <- sum(x) + sum(y)
# rowsum_x <- sum(x)
# rowsum_y <- sum(y)
# chistat <- 0
# # computes the chi-square test statistic which is apparently different from chisq.test function
# for (i in seq_along(x)) {
# colsum <- x[i] + y[i]
# expected_x <- expected(colsum, rowsum_x, total)
# expected_y <- expected(colsum, rowsum_y, total)
# chistat <- chistat + chi_stat(x[i], expected_x)
# chistat <- chistat + chi_stat(y[i], expected_y)
# }
# chistat
# }
# o <- as.table(rbind(c(56, 67, 57,34), c(135, 125, 45,65)))
# #compare the results of chi-square test
# print(chisq_test2(c(56, 67, 57,34), c(135, 125, 45,65)))
# print(chisq.test(as.table(rbind(c(56, 67, 57,34), c(135, 125, 45,65)))))
# #compare the computing time
# print(microbenchmark::microbenchmark(
# chisq_test2(c(56, 67, 57,34), c(135, 125, 45,65)),
# chisq.test(as.table(rbind(c(56, 67, 57,34), c(135, 125, 45,65))))
# ))
#
#
## ----eval=FALSE----------------------------------------------------------
# #With table
# subject1<-c(60,70,60,80)
# subject2<-c(60,60,60,90)
# data<-data.frame(subject1,subject2)
# result_table<-table(data)
# result_table
#
# #with loop
# x<-c(60,70,80,90)
# f<-function(x){
# i<-numeric(4)
# j<-numeric(4)
# q<-numeric(4)
# for (i in 1:4){
# for(j in 1:4){
# for(q in 1:4){
# result_2<-ifelse(subject1[i]==x[j]&subject2[i]==x[q],"1","0")
# }}}}
#
# print(microbenchmark::microbenchmark(
# table(data),
# f(x)))
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.