## ----eval=FALSE----------------------------------------------------------
# #The first example:
#
# x <- rnorm(10)
# y <- rnorm(10)
# plot(x,y)
#
#
# #The second example:
#
# data("InsectSprays")
# aov.spray <- aov(sqrt(count) ~ spray,data=InsectSprays)
# summary(aov.spray)
#
#
# #The third example:
#
# m1 <- matrix(1,2,2)
# m2 <- matrix(2,2,2)
# rbind(m1,m2)
# cbind(m1,m2)
#
## ----eval=FALSE----------------------------------------------------------
# #The answer of first question:
#
# x <- c(0,1,2,3,4)
# p <- c(0.1,0.2,0.2,0.2,0.3)
# cp <- cumsum(p)
# m <- 1e4
# r <- numeric(m)
# r <- x[findInterval(runif(m),cp)+1]
# ct <- table(r)/m
# x <- sample(c(0,1,2,3,4),m,p,replace = TRUE)
# y <- table(x)/m
# rbind(prob=p,prob1=ct,prob2=y)
#
# #The answer of second question:
#
# beta <- function(n,a,b){
# j<-k<-0
# y <- numeric(n)
# while (k < n) {
# u <- runif(1)
# j <- j + 1
# x <- runif(1)
# if (x^(a-1)* (1-x)^(b-1) > u) {
# k <- k + 1
# y[k] <- x
# }
# }
# return (y)
# }
#
# r<-beta(1000,3,2)
# hist(r,prob=TRUE,main="the histogram of Beta(3,2) and its density",col="red")
# t<-seq(0,1,0.001)
# s<-dbeta(t,3,2)
# lines(t,s,col="blue",lwd=3)
#
# #The answer of third question:
#
# n <- 1e3; r <- 4; beta <- 2
# lambda <- rgamma(n, r, beta)
# y <- rexp(n, lambda)
# y
#
## ----eval=FALSE----------------------------------------------------------
# #The answer of first question:
# mtbeta <- function(x){
# m <- 1e5
# t <- runif(m,0,x)
# mean(30*t*t*(1-t)*(1-t))*x
# }
# mbeta <- Vectorize(mtbeta)
# s <- seq(0.1,0.9,0.1)
# print(round(rbind(x=s,p=pbeta(s,3,3),phat=mbeta(s),se=mbeta(s)-pbeta(s,3,3)),5))
#
# #The answer of second question:
# mtrayl <- function(x, R = 10000, antithetic = TRUE) {
# u <- runif(R/2)
# if (!antithetic) v <- runif(R/2) else
# v <- 1 - u
# u <- c(u, v)
# cdf <- numeric(length(x))
# for (i in 1:length(x)) {
# g <- x[i]^2*u * exp(-(u * x[i])^2 / 2)
# cdf[i] <- mean(g)
# }
# cdf
# }
# x <- seq(0, 2.5,0.5)
# set.seed(123)
# mtr1 <- mtrayl(x, anti = FALSE)
# set.seed(123)
# mtr2 <- mtrayl(x,anti = TRUE)
# print(round(rbind(x, mtr1, mtr2), 5))
# m <- 1000
# mtr3 <- mtr4 <- numeric(m)
# x <- 1
# for (i in 1:m) {
# mtr3[i] <- mtrayl(x,antithetic = FALSE)
# mtr4[i] <- mtrayl(x)
# }
# print(var(mtr3))
# print(var(mtr4))
# print((var(mtr3) - var(mtr4))/var(mtr3))
#
# #The answer of third question:
# m <- 10000
# se <- numeric(2)
# g <- function(x) {
# exp(-x^2/2)*x^2/(sqrt(2*pi)) * (x > 1)
# }
#
# x <- rweibull(m, 2,sqrt(2))
# fg1 <- g(x) /(x* exp(-x^2/2))
#
# se[1] <- sd(fg1)
#
# x <- rexp(m, 1)
# fg2 <- g(x) / exp(-x)
#
# se[2] <- sd(fg2)
# se
#
# #The answer of fourth question:
# m <- 10000
# g <- function(x) {
# exp(-x^2/2)*x^2/(sqrt(2*pi)) * (x > 1)
# }
# x <- rweibull(m, 2,sqrt(2))
# fg1 <- g(x) /(x* exp(-x^2/2))
# theta1 <- mean(fg1)
# theta1
## ----eval=FALSE----------------------------------------------------------
#
# #The answer of first question:
# m <- 1000
# n <- 20
# g <- numeric(m)
# medians <- means <- numeric(3)
# y <- gini1 <- gini2 <- gini3 <- numeric(m)
#
# for (i in 1:m) {
# x <- sort(rlnorm(n))
# xmean <- mean(x)
# for (j in 1:n) {
# y[j] <- (2*j-n-1)*x[j]
# }
# gini1[i] <- 1/n^2/xmean*sum(y[1:n])
# }
#
# for (i in 1:m) {
# x <- sort(runif(n))
# xmean <- mean(x)
# for (j in 1:n) {
# y[j] <- (2*j-n-1)*x[j]
# }
# gini2[i] <- 1/n^2/xmean*sum(y[1:n])
# }
#
# for (i in 1:m) {
# x <- sort(rbinom(n,c(0,1),c(0.1,0.9)))
# xmean <- mean(x)
# for (j in 1:n) {
# y[j] <- (2*j-n-1)*x[j]
# }
# gini3[i] <- 1/n^2/xmean*sum(y[1:n])
# }
#
# par(mfrow=c(3,1))
# par(pin=c(2,1))
# hist(gini1,prob = TRUE)
# lines(density(gini1),col = "red",lwd = 2)
# hist(gini2,prob = TRUE)
# lines(density(gini2),col = "blue",lwd = 2)
# hist(gini3,prob = TRUE)
# lines(density(gini3),col = "green",lwd = 2)
#
# medians[1] <- median(gini1)
# medians[2] <- median(gini2)
# medians[3] <- median(gini3)
# medians
#
# quantiles1 <- as.vector(quantile(gini1,seq(0.1,0.9,0.1)))
# quantiles1
#
# quantiles2 <- as.vector(quantile(gini2,seq(0.1,0.9,0.1)))
# quantiles2
#
# quantiles3 <- as.vector(quantile(gini3,seq(0.1,0.9,0.1)))
# quantiles3
#
# means[1] <- mean(gini1)
# means[2] <- mean(gini2)
# means[3] <- mean(gini3)
# means
#
#
# #The answer of second question:
# m <- 1000
# n <- 20
# gini <- numeric(m)
#
# #Get A series of gini ratios genarating from a lognormal distribution
# for (i in 1:m) {
# x <- sort(rlnorm(n))
# xmean <- mean(x)
# for (j in 1:n) {
# y[j] <- (2*j-n-1)*x[j]
# }
# gini[i] <- 1/n^2/xmean*sum(y[1:n])
# }
#
# #get the lower confidence interval
# LCI<- mean(gini)-sd(gini)*qt(0.975,m-1)
# #get the upper confidence interval
# UCI <- mean(gini)+sd(gini)*qt(0.975,m-1)
# #get the confidence interval
# CI <- c(LCI,UCI)
# print(CI)
# #calculate the converage rte
# covrate<-sum(I(gini>CI[1]&gini<CI[2]))/m
# print(covrate)
#
#
# #The answer of third question:
# #We need load the MASS package
# library(MASS)
# mean <- c(0, 0)
# sigma <- matrix( c(25,5,
# 5, 25),nrow=2, ncol=2)
# m <- 1000
#
# #Calculate the power using pearson correlation test by setting the parameter method as pearson
# pearvalues <- replicate(m, expr = {
# mydata1 <- mvrnorm(50, mean, sigma)
# x <- mydata1[,1]
# y <- mydata1[,2]
# peartest <- cor.test(x,y,alternative = "two.sided", method = "pearson")
# peartest$p.value
# } )
# power1 <- mean(pearvalues <= 0.05)
# power1
#
# #Calculate the power using spearman correlation test by setting the parameter method as spearman
# spearvalues <- replicate(m, expr = {
# mydata2 <- mvrnorm(50, mean, sigma)
# x <- mydata2[,1]
# y <- mydata2[,2]
# speartest <- cor.test(x,y,alternative = "two.sided", method = "spearman")
# speartest$p.value
# } )
# power2 <- mean(spearvalues <= 0.05)
# power2
#
# #Calculate the power using kendall correlation test by setting the parameter method as kendall
# kenvalues <- replicate(m, expr = {
# mydata3 <- mvrnorm(50, mean, sigma)
# x <- mydata3[,1]
# y <- mydata3[,2]
# kentest <- cor.test(x,y,alternative = "two.sided", method = "kendall")
# kentest$p.value
# } )
# power3 <- mean(kenvalues <= 0.05)
# power3
#
# R <- 1000
# m <- 1000
#
# #Calculate the power using pearson correlation test by setting the parameter method as pearson
# pearvalues <- replicate(m, expr = {
# u <- runif(R/2,0,10)
# v <- sin(u)
# peartest <- cor.test(u,v,alternative = "two.sided", method = "pearson")
# peartest$p.value
# } )
# power1 <- mean(pearvalues <= 0.05)
# power1
#
# #Calculate the power using spearman correlation test by setting the parameter method as spearman
# spearvalues <- replicate(m, expr = {
# u <- runif(R/2,0,10)
# v <- sin(u)
# speartest <- cor.test(u,v,alternative = "two.sided", method = "spearman")
# speartest$p.value
# } )
# power2 <- mean(spearvalues <= 0.05)
# power2
#
# #Calculate the power using kendall correlation test by setting the parameter method as kendall
# kenvalues <- replicate(m, expr = {
# u <- runif(R/2,0,10)
# v <- sin(u)
# kentest <- cor.test(u,v,alternative = "two.sided", method = "kendall")
# kentest$p.value
# } )
# power3 <- mean(kenvalues <= 0.05)
# power3
#
## ----eval=FALSE----------------------------------------------------------
# #The answer of first question:
# LAST <- c(576,635,558,578 ,666 ,580 ,555 ,661 ,651 ,605 ,653 ,575 ,545 ,572 ,594)
# GPA <- c(339 ,330 ,281 ,303 ,344 ,307 ,300 ,343 ,336 ,313 ,312 ,274 ,276 ,288 ,296)
# n <- length(LAST)
# theta.hat <- cor(LAST,GPA) #the Correlation between the LAST and GPA we give above
# theta.jack <- numeric(n)
# library('bootstrap')
# #compute the jackknife replicates, leave-one-out estimates
# for (i in 1:n)
# theta.jack[i] <- cor(LAST[-i],GPA[-i])
# bias <- (n - 1) * (mean(theta.jack) - theta.hat)
# #jackknife estimate of bias
# print(bias)
# se <- sqrt((n-1)* mean((theta.jack - mean(theta.jack))^2))
# print(se)
#
#
# #The answerof second question:
# set.seed(1)
# air <- c(3,5,7,18,43,85,91,98,100,130,230,487)
# library(boot)
# lamda.boot <- function(data,ind) {
# f <- function (lamda) n/lamda-s
# n <- length(data[ind])
# s <- sum(data[ind])
#
# #the function uniroot can be used to get the root
# root <- uniroot(f,c(0,1))$root
# 1/root
# }
# boot.obj <- boot(data=air, statistic = lamda.boot, R = 3000)
# print(boot.obj)
# print(boot.ci(boot.obj,type = c("basic", "norm", "perc","bca")))
#
#
# #The answer of third question:
# library(bootstrap)
# n <- nrow(scor)
# #get the eigenvalues
# eigenvalues <- eigen(cov(scor))$values
# #the estimate computed from the original observed sample
# theta.hat <- max(eigenvalues)/sum(eigenvalues)
# theta.jack <- numeric(n)
# for (i in 1:n)
# theta.jack[i] <- max(eigen(cov(scor[-i,]))$values)/sum(eigen(cov(scor[-i,]))$values)
# bias <- (n - 1) * (mean(theta.jack) - theta.hat)
# print(bias) #jackknife estimate of bias
# se <- sqrt((n-1)* mean((theta.jack - mean(theta.jack))^2))
# print(se)
#
# #The answer of fourth question:
# library(DAAG)
# attach(ironslag)
# #the length of the chemical
# n <- length(chemical)
# #A pairwise combination of 1 and n
# comb <- t(combn(1:n,2))
# e1 <- e2 <- e3 <- e4 <- e5 <- e6 <- e7 <- e8 <- numeric(n)
# for (k in 1:n) {
# #comb is a matrix with two columns
# k1 <- comb[k,1]
# k2 <- comb[k,2]
# #get the training set
# x <- chemical[-k1][-(k2-1)]
# y <- magnetic[-k1][-(k2-1)]
# #use the training set to get the models
# J1 <- lm(y ~ x)
# #use the test set to get the prediction error
# yhat1 <- J1$coef[1] + J1$coef[2] * chemical[k1]
# yhat2 <- J1$coef[1] + J1$coef[2] * chemical[k2]
# e1[k] <- magnetic[k1] - yhat1
# e2[k] <- magnetic[k2] - yhat2
# #the following procedures are the same as the above
# J2 <- lm(y ~ x + I(x^2))
# yhat3 <- J2$coef[1] + J2$coef[2] * chemical[k1] +
# J2$coef[3] * chemical[k1]^2
# yhat4 <- J2$coef[1] + J2$coef[2] * chemical[k2] +
# J2$coef[3] * chemical[k2]^2
# e3[k] <- magnetic[k1] - yhat3
# e4[k] <- magnetic[k2] - yhat4
#
# J3 <- lm(log(y) ~ x)
# logyhat5 <- J3$coef[1] + J3$coef[2] * chemical[k1]
# logyhat6 <- J3$coef[1] + J3$coef[2] * chemical[k2]
# yhat5 <- exp(logyhat5)
# yhat6 <- exp(logyhat6)
# e5[k] <- magnetic[k1] - yhat5
# e6[k] <- magnetic[k2] - yhat6
#
# J4 <- lm(log(y) ~ log(x))
# logyhat7 <- J4$coef[1] + J4$coef[2] * log(chemical[k1])
# logyhat8 <- J4$coef[1] + J4$coef[2] * log(chemical[k2])
# yhat7 <- exp(logyhat7)
# yhat8 <- exp(logyhat8)
# e7[k] <- magnetic[k1] - yhat7
# e8[k] <- magnetic[k2] - yhat8
#
# }
#
# # get the prediction error by leave-two-out cross validation.
# c(mean(e1^2+e2^2), mean(e3^2+e4^2), mean(e5^2+e6^2), mean(e7^2+e8^2))
#
## ----eval=FALSE----------------------------------------------------------
# #The answer of first question:
# set.seed(1)
# x <- c(158, 171 ,193 ,199 ,230 ,243 ,248 ,248 ,250 ,267 ,271 ,316 ,327 ,329)
# y <- c(141 ,148 ,169 ,181 ,203 ,213 ,229 ,244 ,257 ,260 ,271 ,309)
# #the function cvm.test is used to calculate the Cramer-von Mises statistic
# cvm.test <- function(x,y){
# #the empirical distribution function of the x,y
# F <- ecdf(x)
# G <- ecdf(y)
# n <- length(x)
# m <- length(y)
# s <- numeric(n)
# t <- numeric(m)
# for (i in 1:n) {
# s[i] <- (F(x[i])-G(x[i]))^2
# }
# s <- sum(s)
# for (j in 1:m) {
# t[j] <- (F(y[j])-G(y[j]))^2
# }
# t <- sum(t)
# #return the Cramer-von Mises statistic
# return (m*n*(s+t)/(m+n)^2)
# }
# #number of replicates
# R <- 999
# #pooled sample
# z <- c(x, y)
# K <- 1:26
# #storage for replicates
# reps <- numeric(R)
# t0 <- cvm.test(x, y)
# for (i in 1:R) {
# #generate indices k for the first sample
# k <- sample(K, size = 14, replace = FALSE)
# x1 <- z[k]
# y1 <- z[-k] #complement of x1
# reps[i] <- cvm.test(x1, y1)
# }
# p <- mean(c(t0, reps) >= t0)
# p
# hist(reps, main = "", freq = FALSE, xlab = "T (p = 0.421)",
# breaks = "scott")
# points(t0, 0, cex = 1, pch = 16)
#
#
# #The answer of second question:
# library(RANN)
# library(boot)
# library(energy)
# library(Ball)
# m <- 100; k<-3; p<-2; mu <- 0.2; set.seed(12345)
# n1 <- n2 <- 20; R<-50; n <- n1+n2; N = c(n1,n2)
# Tn <- function(z, ix, sizes,k) {
# n1 <- sizes[1]; n2 <- sizes[2]; n <- n1 + n2
# if(is.vector(z)) z <- data.frame(z,0);
# z <- z[ix, ];
# NN <- nn2(data=z, k=k+1)
# block1 <- NN$nn.idx[1:n1,-1]
# block2 <- NN$nn.idx[(n1+1):n,-1]
# i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
# (i1 + i2) / (k * n)
# }
# #the nn method and return the p.values
# eqdist.nn <- function(z,sizes,k){
# boot.obj <- boot(data=z,statistic=Tn,R=R,
# sim = "permutation", sizes = sizes,k=k)
# ts <- c(boot.obj$t0,boot.obj$t)
# p.value <- mean(ts>=ts[1])
# list(statistic=ts[1],p.value=p.value)
# }
#
# p.values1 <- matrix(NA,m,3)
# p.values2 <- matrix(NA,m,3)
# p.values3 <- matrix(NA,m,3)
# p.values4 <- matrix(NA,m,3)
# for(i in 1:m){
# #the sample x is from the standard normal distribution and the sample y is from the normal diisreibution with mean=0,variance is 2
# x <- matrix(rnorm(n1*p),ncol=p);
# y <- cbind(rnorm(n2,sd=2),rnorm(n2,sd=2));
# z <- rbind(x,y)
# p.values1[i,1] <- eqdist.nn(z,N,k)$p.value
# p.values1[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
# p.values1[i,3] <- bd.test(x=x,y=y,R=999,seed=i*12345)$p.value
# }
# alpha <- 0.1;
# pow1 <- colMeans(p.values1<alpha)
# names(pow1) <- c('NN','energy', 'ball ')
# #get the powers by the three method
# pow1
#
# #under the situation of unequal variances and unequal expectations
# for(i in 1:m){
# #the sample x is from the standard normal distribution and the sample y is from the normal diisreibution with mean=0.2 ,variance is 2
# x <- matrix(rnorm(n1*p),ncol=p);
# y <- cbind(rnorm(n2,mean = mu,sd=2),rnorm(n2,mean=mu,sd=2));
# z <- rbind(x,y)
# p.values2[i,1] <- eqdist.nn(z,N,k)$p.value
# p.values2[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
# p.values2[i,3] <- bd.test(x=x,y=y,R=999,seed=i*12345)$p.value
# }
# alpha <- 0.1;
# pow2 <- colMeans(p.values2<alpha)
# names(pow2) <- c('NN','energy', 'ball ')
# #get the powers by the three method
# pow2
#
# #under the situation of non-normal distributions
# for(i in 1:m){
# #the sample x is from t distribution with df=1 and the sample y is from the mixture of two normal distributions
# x <- matrix(rt(n1*p,df=1),ncol = p);
# y <- cbind(rnorm(n2),rnorm(n2,mean=mu,sd=4));
# z <- rbind(x,y)
# p.values3[i,1] <- eqdist.nn(z,N,k)$p.value
# p.values3[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
# p.values3[i,3] <- bd.test(x=x,y=y,R=999,seed=i*12345)$p.value
# }
# alpha <- 0.1;
# pow3 <- colMeans(p.values3<alpha)
# names(pow3) <- c('NN','energy', 'ball ')
# #get the powers by the three method
# pow3
#
# #under the situation of Unbalanced samples which the number of x is 200 and y is 20
# n1 <- 200
# n2 <- 20
# n <- n1+n2
# N = c(n1,n2)
# for(i in 1:m){
# x <- matrix(rnorm(n1*p),ncol=p);
# y <- cbind(rnorm(n2),rnorm(n2));
# z <- rbind(x,y)
# p.values4[i,1] <- eqdist.nn(z,N,k)$p.value
# p.values4[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
# p.values4[i,3] <- bd.test(x=x,y=y,R=99,seed=i*12345)$p.value
# }
# alpha <- 0.18;
# pow4 <- colMeans(p.values4<alpha)
# names(pow4) <- c('NN','energy', 'ball ')
# #get the powers by the three method
# pow4
#
#
# #The answer of third question:
# set.seed(1)
# #build a standard Cauchy distribution
# f <- function(x, x0=0, gamma=1){
# out<-1/(pi*gamma*(1+((x-x0)/gamma)^2))
# return(out)
#
# }
# #the times of simulation
# m <- 80000
# x <- numeric(m)
# #generat the normal proposal distribution with mean=xt ,sd=1
# x[1] <- rnorm(1,mean=0,sd=1 )
# k <- 0
# u <- runif(m)
# for (i in 2:m) {
# xt <- x[i-1]
# y <- rnorm(1, mean = xt,sd=1)
# num <- f(y) * dnorm(xt, mean = y,sd=1)
# den <- f(xt) * dnorm(y, mean = xt,sd=1)
# if (u[i] <= num/den) x[i] <- y else {
# x[i] <- xt
# k <- k+1 #y is rejected
# }
# }
# #discard the burnin sample
# b <- 1001
# y <- x[b:m]
# a <- ppoints(200)
# #quantiles of cauchy distribution
# Qcauchy <- qcauchy(a)
# #quantiles of sample distribution
# Q <- quantile(x, a)
# qqplot(Qcauchy, Q,xlim=c(-2,2),ylim=c(-2,2),xlab="Cauchy Quantiles", ylab="Sample Quantiles",main = expression("Q-Q plot for Cauchy distribution"))
# hist(y, breaks=50, main="", xlab="", freq=FALSE)
# lines(Qcauchy, f(Qcauchy))
#
# #The answer of fourth question:
# size <- c(125,18,20,34)
# #The following function prob computes the target density
# prob <- function(y, size) {
# if (y < 0 || y >1)
# return (0)
# return((1/2+y/4)^size[1] *
# ((1-y)/4)^size[2] * ((1-y)/4)^size[3] *(y/4)^size[4])
# }
# #length of the chain
# m <- 5000
# #width of the uniform support set
# w <- .25
# #burn-in time
# burn <- 1000
# #for accept/reject step
# u <- runif(m)
# #proposal distribution
# v <- runif(m, -w, w)
# x[1] <- .25
# for (i in 2:m) {
# y <- x[i-1] + v[i]
# if (u[i] <= prob(y, size) / prob(x[i-1], size))
# x[i] <- y else
# x[i] <- x[i-1]
# }
# xb <- x[(burn+1):m]
# print(mean(xb))
## ----eval=FALSE----------------------------------------------------------
# #The answer of first question:
# set.seed(1)
# size <- c(125,18,20,34)
# #The following function prob computes the target density
# prob <- function(y, size) {
# if (y < 0 || y >1)
# return (0)
# return((1/2+y/4)^size[1] *
# ((1-y)/4)^size[2] * ((1-y)/4)^size[3] *(y/4)^size[4])
# }
#
# Gelman.Rubin <- function(psi) {
# # psi[i,j] is the statistic psi(X[i,1:j])
# # for chain in i-th row of X
# psi <- as.matrix(psi)
# n <- ncol(psi)
# k <- nrow(psi)
# psi.means <- rowMeans(psi) #row means
# B <- n * var(psi.means) #between variance est.
# psi.w <- apply(psi, 1, "var") #within variances
# W <- mean(psi.w) #within est.
# v.hat <- W*(n-1)/n + (B/n) #upper variance est.
# r.hat <- v.hat / W #G-R statistic
# return(r.hat)
# }
#
# multinomial.chain <- function(N, X1) {
# #generates a Metropolis chain for multino-mial distribution
# #with uniform(-w,w) proposal distribution
# #and starting value X1
# #length of the chain
# #width of the uniform support set
# w <- 0.15
# #burn-in time
# burn <- 1000
# #for accept/reject step
# u <- runif(N)
# #proposal distribution
# v <- runif(N, -w, w)
# x <- rep(0, N)
# x[1] <- X1
# for (i in 2:N) {
# y <- x[i-1] + v[i]
# r1 <- prob(y, size)
# r2 <- prob(x[i-1], size)
# r <- r1/r2
# if (u[i] <= r)
# x[i] <- y else
# x[i] <- x[i-1]
# }
# return(x)
# }
#
# k <- 4 #number of chains to generate
# n <- 10000 #length of chains
# b <- 2000 #burn-in length
# #choose overdispersed initial values
# x0 <- c(0.1,0.4,0.6,0.9)
# #generate the chains
# X <- matrix(0, nrow=k, ncol=n)
# for (i in 1:k)
# X[i, ] <- multinomial.chain(n, x0[i])
# #compute diagnostic statistics
# psi <- t(apply(X, 1, cumsum))
# for (i in 1:nrow(psi))
# psi[i,] <- psi[i,] / (1:ncol(psi))
# print(Gelman.Rubin(psi))
# #get sequences of the running means ?? for four Metropolis-Hastings chains
# par(mfrow=c(2,2))
# for (i in 1:k)
# plot(psi[i, (b+1):n], type="l",
# xlab=i, ylab=bquote(psi))
# par(mfrow=c(1,1)) #restore default
# rhat <- rep(0, n)
# for (j in (b+1):n)
# rhat[j] <- Gelman.Rubin(psi[,1:j])
# plot(rhat[(b+1):n], type="l", xlab="", ylab="R")
# abline(h=1.05, lty=2)
#
#
# #The answer of second question:
# m <- c(4:25,100,500,1000)
# root <- numeric(length(m))
# f1 <- function(a,k) pt(sqrt(a^2*(k-1)/(k-a^2)),k-1)-pt(sqrt(a^2*(k)/(k+1-a^2)),k)
# for (i in 4:25) {
# res <- uniroot(f1,c(1e-5,2),k=i)
# root[i-3] <- res$root
# }
# res1 <- uniroot(f1,c(1e-5,2),k=100)
# root[23] <- res1$root
# res2 <- uniroot(f1,c(1e-5,2),k=500)
# root[24] <- res2$root
# res3 <- uniroot(f1,c(1e-5,2),k=1000)
# root[25] <- res3$root
# data <- data.frame(k=m,a=root,row.names=1)
# data
## ----eval=FALSE----------------------------------------------------------
# #The answer of first question:
# options(digits=6)
# #get the density of cauchy distribution with the parameter theta and eta
# f <- function(x, theta, eta) {
# 1/theta/pi/(1+((x-eta)/theta)^2)
# }
# #the vector x is used to get the cdf of cauchy distribution when theta is 1,eta is 0
# x <- seq(-5,5,0.1)
# values <- numeric(length(x))
# #the vector x2 is used to get the cdf of cauchy distribution when theta is 1,eta is 10
# x2 <- seq(0,10,0.1)
# values2 <- numeric(length(x2))
# for (i in 1:length(x))
# values[i] <- integrate(f, lower=-Inf, upper=x[i],rel.tol=.Machine$double.eps^0.25,theta=1, eta=0)$value
# #
# data <- data.frame(value=values,truevalues=pcauchy(x),errors=values-pcauchy(x))
# data[1:20,]
# #compare the cauchy distributions,one of them is used integrate and the other is used pcauchy
# plot(x,values,pch=21,xlab="the values of x", ylab="the cauchy distribution function ",main = expression("the compare for Cauchy distribution when \ntheta is 1 and eta is 0 with two methods"))
# lines(x,pcauchy(x),col='red',lwd=2)
#
#
# #the situation when theta is 1 and eta is 10
# for (i in 1:length(x2))
# values2[i] <- integrate(f, lower=-Inf,upper=x2[i],rel.tol=.Machine$double.eps^0.25,theta=1, eta=10)$value
# data2 <- data.frame(value=values2,truevalues=pcauchy(x2,location=10,scale=1),error=values2-pcauchy(x2,location=10,scale=1))
# data2[1:20,]
# #compare the cauchy distributions,one of them is used integrate and the other is used pcauchy
# plot(x2,values2,pch=21,xlab="the values of x", ylab="the cauchy distribution function ",main = expression("the compare for Cauchy distribution when \ntheta is 1 and eta is 10 with two methods"))
# lines(x2,pcauchy(x2,location = 10,scale = 1),col='red',lwd=2)
#
#
# #The answer of second question:
# library(nloptr)
# # the maximum likehood function
# f <- function(x,x1,nA=28,nB=24,nOO=41,nAB=70) {
# r1<-1-sum(x1)
# nAA<-nA*x1[1]^2/(x1[1]^2+2*x1[1]*r1)
# nBB<-nB*x1[2]^2/(x1[2]^2+2*x1[2]*r1)
# r<-1-sum(x)
# return(-2*nAA*log(x[1])-2*nBB*log(x[2])-2*nOO*log(r)-
# (nA-nAA)*log(2*x[1]*r)-(nB-nBB)*log(2*x[2]*r)-nAB*log(2*x[1]*x[2]))
# }
#
# # constraint function
# g <- function(x,x1,nA=28,nB=24,nOO=41,nAB=70) {
# return(sum(x)-0.999999)
# }
#
# opts <- list("algorithm"="NLOPT_LN_COBYLA",
# "xtol_rel"=1.0e-8)
# mle<-NULL
# r<-matrix(0,1,2)
# r<-rbind(r,c(0.2,0.35))# the beginning value of p0 and q0
# j<-2
# while (sum(abs(r[j,]-r[j-1,]))>1e-8) {
# res <- nloptr( x0=c(0.3,0.25),
# eval_f=f,
# lb = c(0,0), ub = c(1,1),
# eval_g_ineq = g,
# opts = opts, x1=r[j,],nA=28,nB=24,nOO=41,nAB=70 )
# j<-j+1
# r<-rbind(r,res$solution)
# mle<-c(mle,f(x=r[j,],x1=r[j-1,]))
# }
# r #the result of EM algorithm
# mle #the max likelihood values
#
## ----eval=FALSE----------------------------------------------------------
# #The answer of first question:
# formulas <- list(
# mpg ~ disp,
# mpg ~ I(1 / disp),
# mpg ~ disp + wt,
# mpg ~ I(1 / disp) + wt
# )
# lapply(formulas,lm,data=mtcars)
# out <- vector("list", length(formulas))
# for (i in seq_along(formulas)) {
# out[[i]] <- lm(formulas[[i]], data = mtcars)
# }
# out
#
# #The answer of second question:
# set.seed(1)
# bootstraps <- lapply(1:10, function(i) {
# rows <- sample(1:nrow(mtcars), rep = TRUE)
# mtcars[rows, ]
# })
# getvalue <- function(x){
# vars <- c('mpg','disp')
# x[vars]
# }
# data <- lapply(bootstraps, getvalue)
# lapply(data,lm)
# set.seed(1)
# out1 <- vector("list", length(bootstraps))
# for (i in seq_along(bootstraps)) {
# out1[[i]] <- getvalue(bootstraps[[i]])
# }
#
# out2 <- vector("list", length(out1))
# for (i in seq_along(out1)) {
# out2[[i]] <- lm(out1[[i]])
# }
# out2
#
#
# #The answer of third question:
# formulas <- list(
# mpg ~ disp,
# mpg ~ I(1 / disp),
# mpg ~ disp + wt,
# mpg ~ I(1 / disp) + wt
# )
# funclist <- lapply(formulas,lm,data=mtcars)
# rsq <- function(mod) summary(mod)$r.squared
# lapply(funclist, rsq)
# getvalue <- function(x){
# vars <- c('mpg','disp')
# x[vars]
# }
# data <- lapply(bootstraps, getvalue)
# data1 <- lapply(data,lm)
# rsq <- function(mod) summary(mod)$r.squared
# unlist(lapply(data1, rsq))
#
#
# #The answer of fourth question:
# trials <- replicate(
# 100,
# t.test(rpois(10, 10), rpois(7, 10)),
# simplify = FALSE
# )
# #use an anonymous function
# sapply(trials, function(mod) mod$p.value)
#
#
# #The answer of fifth question:
# a <- c(1,2,3,4,5)
# b <- c(6,7,8,9,10)
# c <- c(2,4,6,8,10)
# d <- data.frame(a,b,c)
# #the lapply1 function can parallelly get the sum of the input vectors
# lapply1 <- function(x,...){
# args <- list(x,...)
# for (a in args) x <- x+a
# x
# }
# lapply1(a,b,c)
# lapply1(d[1],d[2],d[3])
# #the lapply2 function can parallelly get the product of the input vectors
# lapply2 <- function(x,...){
# args <- list(...)
# for (a in args) x <- x*a
# x
# }
# lapply2(a,b,c)
## ----eval=FALSE----------------------------------------------------------
# #The answer of first question:
# chisq.test1<- function (x, y = NULL, correct = TRUE, p = rep(1/length(x), length(x)),
# rescale.p = FALSE, simulate.p.value = FALSE, B = 2000)
# {
# DNAME <- deparse(substitute(x))
# #get the total sum of x
# n <- sum(x)
# if (is.matrix(x)) {
# METHOD <- "changed Pearson's Chi-squared test"
# nr <- as.integer(nrow(x))
# nc <- as.integer(ncol(x))
# if (is.na(nr) || is.na(nc) || is.na(nr * nc))
# stop("invalid nrow(x) or ncol(x)", domain = NA)
# #get the rowsums of x
# sr <- rowSums(x)
# #get the colsums of x
# sc <- colSums(x)
# #The sum of rows and columns that's sr,sc divided by n
# E <- outer(sr, sc, "*")/n
# v <- function(r, c, n) c * r * (n - r) * (n - c)/n^3
# V <- outer(sr, sc, v, n)
# dimnames(E) <- dimnames(x)
# #the situation of the sum of rows and columns are all not equal 0
# if (simulate.p.value && all(sr > 0) && all(sc > 0)) {
# setMETH()
# tmp <- .Call(C_chisq_sim, sr, sc, B, E)
# STATISTIC <- sum(sort((x - E)^2/E, decreasing = TRUE))
# PARAMETER <- NA
# PVAL <- (1 + sum(tmp >= almost.1 * STATISTIC))/(B +
# 1)
# }
# else {
# if (simulate.p.value)
# warning("cannot compute simulated p-value with zero marginals")
# if (correct && nrow(x) == 2L && ncol(x) == 2L) {
# YATES <- min(0.5, abs(x - E))
# if (YATES > 0)
# METHOD <- paste(METHOD, "with Yates' continuity correction")
# }
# else YATES <- 0
# STATISTIC <- sum((abs(x - E) - YATES)^2/E)
# PARAMETER <- (nr - 1L) * (nc - 1L)
# PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
# }
# }
# names(STATISTIC) <- "X-squared"
# names(PARAMETER) <- "df"
# if (any(E < 5) && is.finite(PARAMETER))
# warning("Chi-squared approximation may be incorrect")
# #output the result
# structure(list(statistic = STATISTIC, parameter = PARAMETER,
# p.value = PVAL, method = METHOD, data.name = DNAME, observed = x,
# expected = E, residuals = (x - E)/sqrt(E), stdres = (x -
# E)/sqrt(V)), class = "htest")
# }
# library(vcd)
# Treatment <- Arthritis$Treatment
# Improved <- Arthritis$Improved
# mytable <- table(Treatment,Improved)
# #you can see the changed chisq.test1's output is the same as the original function
# chisq.test(mytable)
# chisq.test1(mytable)
#
# #compare the calculate speed
# set.seed(1)
# x <- sample(1e5,1e6,10)
# y <- sample(1e5,1e6,10)
# system.time(chisq.test(rbind(x,y)))
# system.time(chisq.test1(rbind(x,y)))
#
#
# #The answer of second question:
# table2 <- function (... , dnn = list.names(...), deparse.level = 1)
# {
# list.names <- function(...) {
# l <- as.list(substitute(list(...)))[-1L]
# nm <- names(l)
# fixup <- if (is.null(nm))
# seq_along(l)
# else nm == ""
# dep <- vapply(l[fixup], function(x) switch(deparse.level +
# 1, "", if (is.symbol(x)) as.character(x) else "",
# deparse(x, nlines = 1)[1L]), "")
# if (is.null(nm))
# dep
# else {
# nm[fixup] <- dep
# nm
# }
# }
#
# args <- list(...)
# if (length(args) == 1L && is.list(args[[1L]])) {
# args <- args[[1L]]
# if (length(dnn) != length(args))
# dnn <- if (!is.null(argn <- names(args)))
# argn
# else paste(dnn[1L], seq_along(args), sep = ".")
# }
# bin <- 0L
# lens <- NULL
# dims <- integer()
# pd <- 1L
# dn <- NULL
# for (a in args) {
# if (is.null(lens))
# lens <- length(a)
# else if (length(a) != lens)
# stop("all arguments must have the same length")
# fact.a <- is.factor(a)
# if (!fact.a) {
# a0 <- a
# a <- factor(a)
# }
# ll <- levels(a)
# a <- as.integer(a)
# nl <- length(ll)
# dims <- c(dims, nl)
# if (prod(dims) > .Machine$integer.max)
# stop("attempt to make a table with >= 2^31 elements")
# dn <- c(dn, list(ll))
# bin <- bin + pd * (a - 1L)
# pd <- pd * nl
# }
# names(dn) <- dnn
# bin <- bin[!is.na(bin)]
# if (length(bin))
# bin <- bin + 1L
# y <- array(tabulate(bin, pd), dims, dimnames = dn)
# class(y) <- "table"
# y
# }
# set.seed(1)
# z <- sample(1:50,1e7,replace = TRUE)
# system.time(table(z))
# system.time(table2(z))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.