## ----eval=FALSE----------------------------------------------------------
# function(m,sigma){
# x<-numeric(m)
# u<-runif(m/2)#use inverse transformation method to generate
# v<-1-u #antithetic variables#
# u<-c(u,v)
# x<-sqrt(-2*(sigma^2)*log(1-u))#intagrate the Rayleigh density function to get the cdf#
# return(x)
# }
## ----eval=FALSE----------------------------------------------------------
# function(n,a,b){
# j<-k<-0
# y<-numeric(n)
# while(k<n){
# u<-runif(1)
# j<-j+1
# x<-runif(1)
# if(x^(a-1)*(1-x)^(b-1)>u){
# k<-k+1
# y[k]<-x
# }
# }
# return(y)
# }
## ----eval=FALSE----------------------------------------------------------
# function(x, y) {
# x_val <- unique(x)
# y_val <- unique(y)
# mat <- matrix(0L, length(x_val), length(y_val))
# for (i in seq_along(x)) {
# mat[which(x_val == x[[i]]), which(y_val == y[[i]])] <-
# mat[which(x_val == x[[i]]), which(y_val == y[[i]])] + 1L
# }
# dimnames <- list(x_val, y_val)
# names(dimnames) <- as.character(as.list(match.call())[-1]) # R has names for dimnames... :/
# tab <- array(mat, dim = dim(mat), dimnames = dimnames)
# class(tab) <- "table"
# tab
# }
## ------------------------------------------------------------------------
x<-1:4
y<-rep(1,4)
z<-x+y
z
x<-1:4
y<-1:2
z<-x+y
z
x <- 1:4
a<-10
z<-a*x
z
## ------------------------------------------------------------------------
m1<-matrix(1,nr=2,nc=2)
m2<-matrix(2,nr=2,nc=2)
rbind(m1,m2)
cbind(m1,m2)
rbind(m1,m2)%*%cbind(m1,m2)
cbind(m1,m2)%*%rbind(m1,m2)
## ------------------------------------------------------------------------
m <- matrix(1:4,2,2)
layout(m,widths=c(1,3),heights=c(3,1))
layout.show(4)
m<-matrix(c(1,1,2,1),2,2)
layout(m,widths=c(2,1),heights=c(1,2))
layout.show(2)
## ------------------------------------------------------------------------
#Inverse transform method#
x<-c(0,1,2,3,4);p<-c(0.1,0.2,0.2,0.2,0.3)
cp<-cumsum(p);m<-1e3;r<-numeric(m)
r<-x[findInterval(runif(m),cp)+1]
ct1<-as.vector(table(r));
table(r);#frequency table of sample#
ct1/sum(ct1)/p#relative frequency table#
#R sample function#
y<-sample(0:4,size=1000,replace=TRUE,prob=c(0.1,0.2,0.2,0.2,0.3))
ct2<-as.vector(table(y));
table(y);#frequency table of sample#
ct2/sum(ct2)/p#relative frequency table#
## ------------------------------------------------------------------------
#1.Write the function#
g<-function(n,a,b){
j<-k<-0;y<-numeric(n)
while(k<n){
u<-runif(1)
j<-j+1
x<-runif(1)
if(x^(a-1)*(1-x)^(b-1)>u){
k<-k+1
y[k]<-x
}
}
return(y)
}
#2.Generation#
x<-g(1000,3,2)#a random sample of size 1000 from Beta(3,2) distribution#
head(x,n=50L)#print the head 50 for example#
#3.Graph#
hist(x,prob=TRUE,xlab="x",breaks=20,ylab="Density",col="red",main=expression(f(x)==12*x^2*(1-x)))#the histogram of the sample #
z<-seq(0,1,0.01)
lines(z,12*z^2*(1-z),col="blue")#the theoretical Beta(3,2) denisy superimposed#
## ------------------------------------------------------------------------
mix<-function(n,r,beta){
lambda<-rgamma(n,r,beta)#generate paramete lambda#
x<-rexp(n,lambda)#generate the mixture#
return(x)
}
x<-mix(1000,4,2)
head(x,n=50L)#print the head 50 for example#
hist(x,prob=TRUE,breaks=60,col="red",main="Exponential-Gamma Mixture")#graph#
## ------------------------------------------------------------------------
f<-function(x){
m<-1e4;t<-runif(m,min=0,max=x)#t follows uniform distribution#
cd<-mean(30*x*(t^2)*((1-t)^2))#use the pdf of B(3,3) to get the conditional expectation as the estimation#
return(cd)
}
est<-matrix(0,2,9) #compare the generation function and pbeta function#
for(i in 1:9){
est[1,i]<-pbeta(i/10,3,3)
est[2,i]<-f(i/10)
}
print(est)
## ------------------------------------------------------------------------
function(m,sigma){
x<-numeric(m)
u<-runif(m/2)#use inverse transformation method to generate
v<-1-u #antithetic variables#
u<-c(u,v)
x<-sqrt(-2*(sigma^2)*log(1-u))#intagrate the Rayleigh density function to get the cdf#
return(x)
}
## ------------------------------------------------------------------------
m<-10000
theta.hat<-se<-numeric(2)
g<-function(x){x^2/sqrt(2*pi)*exp(-x^2/2)*(x>1)} #g(x)#
x<-rnorm(m) #importance function 1--normal distribution pdf#
fg<-g(x)/((1/sqrt(2*pi))*exp(-x^2/2))
theta.hat[1]<-mean(fg)
se[1]<-sd(fg)
fg2<-function(m,sigma){
x<-numeric(m)
u<-runif(m)
x<-sqrt(-2*(sigma^2)*log(1-u))
return(x)
} #importance function 2--Rayleigh distribution pdf from exercise 5.9 #
x<-fg2(m,1)
fg22<-g(x)/(x*exp(-x^2/2))
theta.hat<-mean(fg22)
se[2]<-sd(fg22)
se
## ------------------------------------------------------------------------
m<-10000
g<-function(x){(x^2)/sqrt(2*pi)*exp(-x^2/2)*(x>1)}
x<-rnorm(m)
fg<-g(x)/((1/sqrt(2*pi))*exp(-(x^2)/2)) # #importance function --normal distribution pdf#
theta.hat<-mean(fg)
se<-sd(fg)
theta.hat #estimation#
## ------------------------------------------------------------------------
m<-10000
n<-100
gini<-numeric(m)
for(i in 1:m){
x<-rlnorm(n,meanlog=0,sdlog=1)#standard lognormal distribution#
xbar<-mean(x)
xs<-sort(x)
t<-0
for(j in 1:n){
t<-t+(2*j-n-1)*xs[j]/(n*n*xbar)
}
gini[i]<-t
}
gini.mean<-mean(gini)
print(gini.mean)#mean#
gini.median<-median(gini)
print(gini.median)#median#
gini.deciles<-quantile(gini,probs = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9))
print(gini.deciles)#deciles#
hist(gini,freq = F,col = "red")
## ------------------------------------------------------------------------
m<-10000
n<-100
gini<-numeric(m)
for(i in 1:m){
x<-runif(n) #uniform distribution#
xbar<-mean(x)
xs<-sort(x)
t<-0
for(j in 1:n){
t<-t+(2*j-n-1)*xs[j]/(n*n*xbar)
}
gini[i]<-t
}
gini.mean<-mean(gini)
print(gini.mean)#mean#
gini.median<-median(gini)
print(gini.median)#median#
gini.deciles<-quantile(gini,probs = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9))
print(gini.deciles)#deciles#
hist(gini,freq = F,col = "blue")
## ------------------------------------------------------------------------
set.seed(2)
m<-500
n<-100
gini<-numeric(m)
for(i in 1:m){
x<-rlnorm(n,meanlog=0,sdlog=1)
xbar<-mean(x)
xs<-sort(x)
t<-0
for(j in 1:n){
t<-t+(2*j-n-1)*xs[j]/(n*n*xbar)
}
gini[i]<-t
}
mu<-mean(gini)
sigma<-sqrt(sd(gini)/m)
cv<-qnorm(.975,0,1) #using Central limit theotem to construct the 95% confidence interval ,standard normal distribution #
UCL1<-mu-cv*sigma
UCL2<-mu+cv*sigma
UCL1 #down confidence interval
UCL2 #up confidence interval #
l<-2000 #sample from the generation to compute coverge rate#
munew<-numeric(l)
for(k in 1:l){
m<-100
n<-50
gini<-numeric(m)
for(i in 1:m){
x<-rlnorm(n,meanlog=0,sdlog=1)
xbar<-mean(x)
xs<-sort(x)
t<-0
for(j in 1:n){
t<-t+(2*j-n-1)*xs[j]/(n*n*xbar)
}
gini[i]<-t
}
munew[k]<-mean(gini)
}
sum((munew<UCL2)&(munew>UCL1))/l #coverage rate#
## ----warning=FALSE,message=FALSE-----------------------------------------
attach(mtcars)
formulas<-list(
mpg~disp,
mpg~I(1/disp),
mpg~disp+wt,
mpg~I(1/disp)+wt
)
#use lapply function
out3_1<-lapply(formulas,lm)
out3_1
#use for loops
out3_2<-vector("list",length(formulas))
for(i in seq_along(formulas)){
out3_2[[i]]<-lm(formulas[[i]],data=mtcars)
}
out3_2
detach(mtcars)
## ----warning=FALSE,message=FALSE-----------------------------------------
attach(mtcars)
bootstraps<-lapply(1:10,function(i){
rows<-sample(1:nrow(mtcars),rep=TRUE)
mtcars[rows,]
})
#use lapply function
out4_1<-lapply(seq_along(bootstraps),function(i){lm(mpg~disp,data = bootstraps[[i]])})
out4_1
#use for loops
out4_2<-vector("list",length(bootstraps))
for(i in seq_along(bootstraps)){
out4_2[[i]]<-lm(mpg~disp,data<-bootstraps[[i]])
}
out4_2
detach(mtcars)
## ------------------------------------------------------------------------
rsq<-function(mod) summary(mod)$r.squared
#exercise 3
req3_1 <- lapply(out3_1, rsq)
req3_2 <- lapply(out3_2, rsq)
print(data.frame(unlist(req3_1),unlist(req3_2)))
#exercise 4
req4_1 <- lapply(out4_1, rsq)
req4_2 <- lapply(out4_2, rsq)
print(data.frame(unlist(req4_1),unlist(req4_2)))
## ------------------------------------------------------------------------
trials <- replicate(100,
t.test(rpois(10, 10), rpois(7, 10)),
simplify = FALSE)
sapply(trials,function(mod){mod[["p.value"]]},simplify=TRUE)
## ------------------------------------------------------------------------
# the example is make the data normalized (the value divide by the colmean)
# use Map and vapply to solve
data <- matrix(rnorm(20, 0, 10), nrow = 4)
x <- as.data.frame(data)
answer1 <- Map("/",x,vapply(x,mean,c(1)))
# use the lapply with an anonymous function to solve
answer2 <- lapply(x,function(data){data/(mean(data))})
print(data.frame(unlist(answer1),unlist(answer2)))
## ------------------------------------------------------------------------
library("nortest")
set.seed(1)
attach(chickwts)
x<-sort(as.vector(weight[feed=="soybean"]))
y<-sort(as.vector(weight[feed=="linseed"]))
detach(chickwts)
z<-c(x,y)#pooled sample#
R<-999#replicate number#
K<-1:length(z)
D<-numeric(R)
CM<-function(x,y){#compute statistic of Cramer-von Mises
ecdfx<-ecdf(x)
ecdfy<-ecdf(y)
l_x<-length(x)
l_y<-length(y)
sum1<-sum((ecdfx(x)-ecdfy(x))^2)
sum2<-sum((ecdfx(y)-ecdfy(y))^2)
w<-l_x*l_y/(l_x+l_y)^2*(sum1+sum2)
return(w)
}
D0<-CM(x,y)
for(i in 1:R){
k<-sample(K,size=length(x),replace=F)
x1<-z[k]
y1<-z[-k]
D[i]<-CM(x1,y1)
}
p<-mean(c(D0,D)>=D0)
print(p)
## ------------------------------------------------------------------------
library(RANN)
library(boot)
library(energy)
library(Ball)
library(ggplot2)
m<-30#time to loop
k<-3
p<-2
n1<-n2<-50
R<-999#bd.test replication number
n<-n1+n2
N=c(n1,n2)
Tn<-function(z,ix,sizes,k){
n1<-sizes[1]
n2<-sizes[2]
n<-n1+n2
if(is.vector(z)) z<-data.frame(z,0)
z<-z[ix,]
NN<-nn2(data=z,k=k+1)
block1<-NN$nn.idx[1:n1,-1]
block2<-NN$nn.idx[(n1+1):n,-1]
i1<-sum(block1<n1+.5);i2<-sum(block2>n1+.5)
(i1+i2)/(k*n)
}
eqdist.nn<-function(z,sizes,k){
boot.obj<-boot(data=z,statistic = Tn,R=R,sim="permutation",sizes=sizes,k=k)
ts<-c(boot.obj$t0,boot.obj$t)
p.value<-mean(ts>=ts[1])
list(statistic=ts[1],p.value=p.value)
}#NN
p.values<-matrix(NA,m,3)
for(i in 1:m){
x<-matrix(rnorm(n1*p,sd=1),ncol=p)#unequal variances
y<-matrix(rnorm(n2*p,sd=1.4),ncol=p)
z<-rbind(x,y)
p.values[i,1]<-eqdist.nn(z,N,k)$p.value#NN
p.values[i,2]<-eqdist.etest(z,size=N,R=R)$p.value#in the energy package
p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i)$p.value#BAll Divergence in the ball package
}
alpha<-0.1#confidence level
pow<-apply(p.values<alpha,2,mean)#compute the number of p.values which is less than 0.1 in each column
print(pow)
## ------------------------------------------------------------------------
for(i in 1:m){
x<-matrix(rnorm(n1*p,mean=0.4,sd=1),ncol=p)#uniqual variances and unequal expectations
y<-matrix(rnorm(n2*p,mean=0,sd=1.4),ncol=p)
z<-rbind(x,y)
p.values[i,1]<-eqdist.nn(z,N,k)$p.value
p.values[i,2]<-eqdist.etest(z,size=N,R=R)$p.value
p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i)$p.value
}
alpha<-0.1
pow<-apply(p.values<alpha,2,mean)
print(pow)
## ------------------------------------------------------------------------
for(i in 1:m){
x<-matrix(rt(n1*p,df=1),ncol=p)#t distribution
y<-matrix(rnorm(n2*p,sd=sample(c(1,1.3),size=n2*p,prob=c(0.5,0.5),replace=T)),ncol=p)#bimodel distuibution
z<-rbind(x,y)
p.values[i,1]<-eqdist.nn(z,N,k)$p.value
p.values[i,2]<-eqdist.etest(z,size=N,R=R)$p.value
p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i)$p.value
}
alpha<-0.01
pow<-apply(p.values<alpha,2,mean)
print(pow)
## ------------------------------------------------------------------------
n1<-50
n2<-5
n<-n1+n2
N=c(n1,n2)
for(i in 1:m){
x<-matrix(rnorm(n1*p,mean=1),ncol=p)#100 samples
y<-matrix(rnorm(n2*p,mean=2),ncol=p)#10 samples
z<-rbind(x,y)
p.values[i,1]<-eqdist.nn(z,N,k)$p.value
p.values[i,2]<-eqdist.etest(z,size=N,R=R)$p.value
p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i)$p.value
}
alpha<-0.1
pow<-apply(p.values<alpha,2,mean)
print(pow)
## ------------------------------------------------------------------------
library(tidyverse)
set.seed(1)
m<-20000#samples number
x<-numeric(m)#to store the samples
x[1]<-runif(1)#the first sample
u<-runif(m)
standard_Cauchy<-function(x){
return(1/(pi*(1+x^2)))
}
for(i in 1:m){
proposal<-x[i]+runif(1,min = -1,max = 1)
accept<-runif(1)<standard_Cauchy(proposal)/standard_Cauchy(x[i])
x[i+1]<-ifelse(accept==T,proposal,x[i])
}
x<-x[1001:m]
index<-1001:m
quantile(x,probs = seq(0.1,0.9,0.1))
qcauchy(seq(0.1,0.9,0.1),loc=0,scale=1)
## ------------------------------------------------------------------------
set.seed(1)
m<-5000#lengths of the chain
w<-0.25#width of the uniform support set
u<-runif(m)#for accept/reject step
v<-runif(m,-w,w)#proposal distribution
group<-c(125,18,20,34)
x<-numeric(m)#the chain
prob<-function(theta,group){
if(theta<0||theta>=0.8)
return(0)
return((1/2+theta/4)^group[1]*((1-theta)/4)^group[2]*((1-theta)/4)^group[3]*(theta/4)^group[4])
}
x[1]<-0.4#initialize form 0.4
for(i in 2:m){
theta<-x[i-1]+v[i]
if(u[i]<=prob(theta,group)/prob(x[i-1],group))
x[i]<-theta
else
x[i]<-x[i-1]
}
index<-1001:m#discard the first thousand sample
theta_hat<-mean(x[index])
print(theta_hat)
## ------------------------------------------------------------------------
set.seed(12345)
group<-c(125,18,20,34)
k<-4#number if chains to generate
N<-5000#length of the chain
b<-500#burn-in length
Gelman.Robin<-function(psi){
psi<-as.matrix(psi)
n<-ncol(psi)
k<-nrow(psi)
psi.means<-rowMeans(psi)#row means
B<-n*var(psi.means)#between variance est.
psi.w<-apply(psi,1,"var")#within variances
W<-mean(psi.w)#within est.
v.hat<-W*(n-1)/n+(B/n)#uppervariance est.
r.hat<-v.hat/W#G_R statistic
return(r.hat)
}
prob<-function(theta,group){
if(theta<0||theta>=0.9)
return(0)
return((1/2+theta/4)^group[1]*((1-theta)/4)^group[2]*((1-theta)/4)^group[3]*(theta/4)^group[4])
}
Chain<-function(group,N,X1){
x<-numeric(N)
x[1]<-X1#initialize x[1]
w<-0.25
u<-runif(N)
v<-runif(N,-w,w)
for(i in 2:N){
theta<-x[i-1]+v[i]
if(u[i]<=prob(theta,group)/prob(x[i-1],group))
x[i]<-theta
else
x[i]<-x[i-1]
}
return(x)
}
#generate the chains
x0<-c(0.2,0.4,0.6,0.8)
X<-matrix(0,nrow=k,ncol=N)
for(i in 1:k) X[i,]<-Chain(group,N,x0[i])
#compute diagnostic statistics
psi<-t(apply(X,1,cumsum))
for(i in 1:nrow(psi)) psi[i,]<-psi[i,]/(1:ncol(psi))
print(Gelman.Robin(psi))
#plot pso for the four chains
par(mfrow=c(2,2))
par(mfrow=c(1,1))#restore default
#plot the sequence of R-hat statistics
rhat<-rep(0,N)
## ------------------------------------------------------------------------
set.seed(12345)
eps<-.Machine$double.eps^0.25#criterion to judge whether function is near to 0
k<-c(4:25,100,500,1000)#k mentioned in the question
S<-function(k,a){
return((1-pt(sqrt((a^2*k)/(k+1-a^2)),df=k))-(1-pt(sqrt((a^2*(k-1))/(k-a^2)),df=k-1)))
}#S_k(a)-S_{k-1}(a)
Root<-function(k1){
a<-seq(0.1,sqrt(k1)-0.1,length=3)
y<-c(S(k1,a[1]),S(k1,a[2]),S(k1,a[3]))
while(abs(y[2])>eps){
if(y[1]*y[2]<0){
a[3]<-a[2]
y[3]<-y[2]
}
else{
a[1]<-a[2]
y[1]<-y[2]
}
a[2]<-(a[1]+a[3])/2
y[2]<-S(k1,a[2])
}
result<-list(k1,a[2],y[2])
return(result)
}
for(i in k){#print the out put of each k
cat('k:',Root(i)[[1]],'root:',Root(i)[[2]],'value of function:',Root(i)[[3]],'\n')
}
## ------------------------------------------------------------------------
theta1<-1;ita1<-0#standard cauchy ditribution
f<-function(x,theta,ita){
(theta*pi*(1+((x-ita)/theta)^2))^(-1)
}
k<-seq(-30,30,2)
F1<-rep(0,length(k))
for(i in 1:length(k)){
up<-k[i]
F1[i]<-integrate(f,lower=-Inf,upper=up,rel.tol = .Machine$double.eps^0.25,theta<-theta1,ita<-ita1)$value
}#integrate function
F2<-pcauchy(k)
F1
F2
## ------------------------------------------------------------------------
N=1000
na=28
nb=24
noo=41
nab=70
p=0.6 #initial est. for p
q=0.3
r=0.1
pm=numeric(0)
qm=numeric(0)
rm=numeric(0)
lofm=numeric(0)
lof=function(p,q,r){ #log maximum likelihood values
return(log(choose(n,naa)*choose(n-naa,nbb)*choose(n-naa-nbb,noo)*choose(nao+nbo+nab,nao)*choose(nbo+nab,nbo))+(nao+nbo+nab)*log(2)+(2*naa+nao+nab)*log(p)+(2*nbb+nbo+nab)*log(q)+(2*noo+nao+nbo)*log(r))
}
for (j in 1:N) {
naa=round(na*p^2/(p^2+2*p*r))
nbb=round(nb*q^2/(q^2+2*q*r))
nao=na-naa
nbo=nb-nbb
n=naa+nbb+noo+nao+nbo+nab
if(abs(p-(2*naa+nao+nab)/2/n)<0.000000001&&abs(q-(2*nbb+nbo+nab)/2/n)<0.000000001&&abs(r-(2*noo+nbo+nao)/2/n)<0.000000001&&j>5) break
p=(2*naa+nao+nab)/2/n #update estimation
q=(2*nbb+nbo+nab)/2/n
r=(2*noo+nbo+nao)/2/n
pm=c(pm,p)
qm=c(qm,q)
rm=c(rm,r)
lofm=c(lofm,lof(p,q,r))
}
c(p,q,r)
exp(lofm)
## ----warning=FALSE,message=FALSE-----------------------------------------
attach(mtcars)
formulas<-list(
mpg~disp,
mpg~I(1/disp),
mpg~disp+wt,
mpg~I(1/disp)+wt
)
#use lapply function
out3_1<-lapply(formulas,lm)
out3_1
#use for loops
out3_2<-vector("list",length(formulas))
for(i in seq_along(formulas)){
out3_2[[i]]<-lm(formulas[[i]],data=mtcars)
}
out3_2
detach(mtcars)
## ----warning=FALSE,message=FALSE-----------------------------------------
attach(mtcars)
bootstraps<-lapply(1:10,function(i){
rows<-sample(1:nrow(mtcars),rep=TRUE)
mtcars[rows,]
})
#use lapply function
out4_1<-lapply(seq_along(bootstraps),function(i){lm(mpg~disp,data = bootstraps[[i]])})
out4_1
#use for loops
out4_2<-vector("list",length(bootstraps))
for(i in seq_along(bootstraps)){
out4_2[[i]]<-lm(mpg~disp,data<-bootstraps[[i]])
}
out4_2
detach(mtcars)
## ------------------------------------------------------------------------
rsq<-function(mod) summary(mod)$r.squared
#exercise 3
req3_1 <- lapply(out3_1, rsq)
req3_2 <- lapply(out3_2, rsq)
print(data.frame(unlist(req3_1),unlist(req3_2)))
#exercise 4
req4_1 <- lapply(out4_1, rsq)
req4_2 <- lapply(out4_2, rsq)
print(data.frame(unlist(req4_1),unlist(req4_2)))
## ------------------------------------------------------------------------
trials <- replicate(100,
t.test(rpois(10, 10), rpois(7, 10)),
simplify = FALSE)
sapply(trials,function(mod){mod[["p.value"]]},simplify=TRUE)
## ------------------------------------------------------------------------
# the example is make the data normalized (the value divide by the colmean)
# use Map and vapply to solve
data <- matrix(rnorm(20, 0, 10), nrow = 4)
x <- as.data.frame(data)
answer1 <- Map("/",x,vapply(x,mean,c(1)))
# use the lapply with an anonymous function to solve
answer2 <- lapply(x,function(data){data/(mean(data))})
print(data.frame(unlist(answer1),unlist(answer2)))
## ------------------------------------------------------------------------
expected <- function(colsum, rowsum, total) {
(colsum / total) * (rowsum / total) * total
}
chisq.stat <- function(observed, expected) {
((observed - expected) ^ 2) / expected
}#compute the pearson's chisquare test statistic
## ------------------------------------------------------------------------
library(microbenchmark)
table2 <- function(x, y) {
x_val <- unique(x)
y_val <- unique(y)
mat <- matrix(0L, length(x_val), length(y_val))
for (i in seq_along(x)) {
mat[which(x_val == x[[i]]), which(y_val == y[[i]])] <-
mat[which(x_val == x[[i]]), which(y_val == y[[i]])] + 1L
}
dimnames <- list(x_val, y_val)
names(dimnames) <- as.character(as.list(match.call())[-1]) # R has names for dimnames... :/
tab <- array(mat, dim = dim(mat), dimnames = dimnames)
class(tab) <- "table"
tab
}
a <- c(2,3,4)#an example
identical(table(a, a), table2(a, a))
microbenchmark::microbenchmark(table(a, a), table2(a, a))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.