inst/doc/introhomework.R

## ----eval=FALSE----------------------------------------------------------
#  function(m,sigma){
#    x<-numeric(m)
#    u<-runif(m/2)#use inverse transformation method to generate
#    v<-1-u #antithetic variables#
#    u<-c(u,v)
#    x<-sqrt(-2*(sigma^2)*log(1-u))#intagrate the Rayleigh density function to get the cdf#
#    return(x)
#  }

## ----eval=FALSE----------------------------------------------------------
#  function(n,a,b){
#     j<-k<-0
#     y<-numeric(n)
#     while(k<n){
#       u<-runif(1)
#       j<-j+1
#       x<-runif(1)
#       if(x^(a-1)*(1-x)^(b-1)>u){
#         k<-k+1
#         y[k]<-x
#       }
#     }
#     return(y)
#  }

## ----eval=FALSE----------------------------------------------------------
#  function(x, y) {
#    x_val <- unique(x)
#    y_val <- unique(y)
#    mat <- matrix(0L, length(x_val), length(y_val))
#    for (i in seq_along(x)) {
#      mat[which(x_val == x[[i]]), which(y_val == y[[i]])] <-
#        mat[which(x_val == x[[i]]),  which(y_val == y[[i]])] + 1L
#    }
#    dimnames <- list(x_val, y_val)
#    names(dimnames) <- as.character(as.list(match.call())[-1])  # R has names for dimnames... :/
#    tab <- array(mat, dim = dim(mat), dimnames = dimnames)
#    class(tab) <- "table"
#    tab
#  }

## ------------------------------------------------------------------------
x<-1:4
y<-rep(1,4)
z<-x+y
z
x<-1:4
y<-1:2
z<-x+y
z
x <- 1:4
a<-10
z<-a*x
z

## ------------------------------------------------------------------------
m1<-matrix(1,nr=2,nc=2)
m2<-matrix(2,nr=2,nc=2)
rbind(m1,m2)
cbind(m1,m2)
rbind(m1,m2)%*%cbind(m1,m2)
cbind(m1,m2)%*%rbind(m1,m2)

## ------------------------------------------------------------------------
m <- matrix(1:4,2,2)
layout(m,widths=c(1,3),heights=c(3,1))
layout.show(4)
m<-matrix(c(1,1,2,1),2,2)
layout(m,widths=c(2,1),heights=c(1,2))
layout.show(2)

## ------------------------------------------------------------------------
#Inverse transform method#
x<-c(0,1,2,3,4);p<-c(0.1,0.2,0.2,0.2,0.3)
cp<-cumsum(p);m<-1e3;r<-numeric(m)
r<-x[findInterval(runif(m),cp)+1]
ct1<-as.vector(table(r));
table(r);#frequency table of sample#
ct1/sum(ct1)/p#relative frequency table#

#R sample function#
y<-sample(0:4,size=1000,replace=TRUE,prob=c(0.1,0.2,0.2,0.2,0.3))
ct2<-as.vector(table(y));
table(y);#frequency table of sample#
ct2/sum(ct2)/p#relative frequency table#

## ------------------------------------------------------------------------
#1.Write the function#
g<-function(n,a,b){
    j<-k<-0;y<-numeric(n)
    while(k<n){
      u<-runif(1)
      j<-j+1
      x<-runif(1)
      if(x^(a-1)*(1-x)^(b-1)>u){
        k<-k+1
        y[k]<-x
      }
    }
    return(y)
}
#2.Generation#
x<-g(1000,3,2)#a random sample of size 1000 from Beta(3,2) distribution#
head(x,n=50L)#print the head 50 for example#
#3.Graph#
hist(x,prob=TRUE,xlab="x",breaks=20,ylab="Density",col="red",main=expression(f(x)==12*x^2*(1-x)))#the histogram of the sample #
z<-seq(0,1,0.01)
lines(z,12*z^2*(1-z),col="blue")#the theoretical Beta(3,2) denisy superimposed#

## ------------------------------------------------------------------------
mix<-function(n,r,beta){
  lambda<-rgamma(n,r,beta)#generate paramete lambda#
  x<-rexp(n,lambda)#generate the mixture#
  return(x)
}
x<-mix(1000,4,2)
head(x,n=50L)#print the head 50 for example#
hist(x,prob=TRUE,breaks=60,col="red",main="Exponential-Gamma Mixture")#graph#

## ------------------------------------------------------------------------
f<-function(x){
 m<-1e4;t<-runif(m,min=0,max=x)#t follows uniform distribution#
 cd<-mean(30*x*(t^2)*((1-t)^2))#use the pdf of B(3,3) to get the conditional expectation as the estimation#
 return(cd)
}
est<-matrix(0,2,9) #compare the generation function and pbeta function#
for(i in 1:9){
  est[1,i]<-pbeta(i/10,3,3) 
  est[2,i]<-f(i/10)
}
print(est)

## ------------------------------------------------------------------------
function(m,sigma){
    x<-numeric(m)
    u<-runif(m/2)#use inverse transformation method to generate
    v<-1-u #antithetic variables#
    u<-c(u,v)
    x<-sqrt(-2*(sigma^2)*log(1-u))#intagrate the Rayleigh density function to get the cdf#
    return(x) 
}

## ------------------------------------------------------------------------
m<-10000
theta.hat<-se<-numeric(2)
g<-function(x){x^2/sqrt(2*pi)*exp(-x^2/2)*(x>1)} #g(x)#

x<-rnorm(m) #importance function 1--normal distribution pdf#
fg<-g(x)/((1/sqrt(2*pi))*exp(-x^2/2))
theta.hat[1]<-mean(fg)
se[1]<-sd(fg)

fg2<-function(m,sigma){
    x<-numeric(m)
    u<-runif(m)
    x<-sqrt(-2*(sigma^2)*log(1-u))
    return(x) 
}  #importance function 2--Rayleigh distribution pdf from exercise 5.9 #
x<-fg2(m,1)
fg22<-g(x)/(x*exp(-x^2/2))
theta.hat<-mean(fg22)
se[2]<-sd(fg22)
se

## ------------------------------------------------------------------------
m<-10000
g<-function(x){(x^2)/sqrt(2*pi)*exp(-x^2/2)*(x>1)}
x<-rnorm(m)
fg<-g(x)/((1/sqrt(2*pi))*exp(-(x^2)/2)) # #importance function --normal distribution pdf#
theta.hat<-mean(fg)
se<-sd(fg)
theta.hat #estimation#

## ------------------------------------------------------------------------
m<-10000
n<-100
gini<-numeric(m)
for(i in 1:m){
  x<-rlnorm(n,meanlog=0,sdlog=1)#standard lognormal distribution#
  xbar<-mean(x)
  xs<-sort(x)
  t<-0
  for(j in 1:n){
    t<-t+(2*j-n-1)*xs[j]/(n*n*xbar)
  }
  gini[i]<-t
}
gini.mean<-mean(gini)
print(gini.mean)#mean#
gini.median<-median(gini)
print(gini.median)#median#
gini.deciles<-quantile(gini,probs = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9))
print(gini.deciles)#deciles#
hist(gini,freq = F,col = "red")

## ------------------------------------------------------------------------
m<-10000
n<-100
gini<-numeric(m)
for(i in 1:m){
  x<-runif(n) #uniform distribution#
  xbar<-mean(x)
  xs<-sort(x)
  t<-0
  for(j in 1:n){
    t<-t+(2*j-n-1)*xs[j]/(n*n*xbar)
  }
  gini[i]<-t
}
gini.mean<-mean(gini)
print(gini.mean)#mean#
gini.median<-median(gini)
print(gini.median)#median#
gini.deciles<-quantile(gini,probs = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9))
print(gini.deciles)#deciles#
hist(gini,freq = F,col = "blue")

## ------------------------------------------------------------------------
set.seed(2)
m<-500
n<-100
gini<-numeric(m)
for(i in 1:m){
  x<-rlnorm(n,meanlog=0,sdlog=1)
  xbar<-mean(x)
  xs<-sort(x)
  t<-0
  for(j in 1:n){
    t<-t+(2*j-n-1)*xs[j]/(n*n*xbar)
  }
  gini[i]<-t
}
mu<-mean(gini)
sigma<-sqrt(sd(gini)/m)
cv<-qnorm(.975,0,1) #using Central limit theotem to construct the 95% confidence interval ,standard normal distribution #
UCL1<-mu-cv*sigma
UCL2<-mu+cv*sigma
UCL1 #down confidence interval 
UCL2 #up confidence interval #
l<-2000 #sample from the generation to compute coverge rate#
munew<-numeric(l)
for(k in 1:l){
  m<-100
  n<-50
  gini<-numeric(m)
  for(i in 1:m){
    x<-rlnorm(n,meanlog=0,sdlog=1)
    xbar<-mean(x)
    xs<-sort(x)
    t<-0
    for(j in 1:n){
      t<-t+(2*j-n-1)*xs[j]/(n*n*xbar)
    }
  gini[i]<-t
  }
  munew[k]<-mean(gini)
}
sum((munew<UCL2)&(munew>UCL1))/l #coverage rate#

## ----warning=FALSE,message=FALSE-----------------------------------------
attach(mtcars)
formulas<-list(
  mpg~disp,
  mpg~I(1/disp),
  mpg~disp+wt,
  mpg~I(1/disp)+wt
)

#use lapply function
out3_1<-lapply(formulas,lm)
out3_1

#use for loops
out3_2<-vector("list",length(formulas))
for(i in seq_along(formulas)){
  out3_2[[i]]<-lm(formulas[[i]],data=mtcars)
}
out3_2
detach(mtcars)

## ----warning=FALSE,message=FALSE-----------------------------------------
attach(mtcars)
bootstraps<-lapply(1:10,function(i){
  rows<-sample(1:nrow(mtcars),rep=TRUE)
  mtcars[rows,]
})

#use lapply function
out4_1<-lapply(seq_along(bootstraps),function(i){lm(mpg~disp,data = bootstraps[[i]])})
out4_1
#use for loops
out4_2<-vector("list",length(bootstraps))
for(i in seq_along(bootstraps)){
  out4_2[[i]]<-lm(mpg~disp,data<-bootstraps[[i]])
}
out4_2
detach(mtcars)

## ------------------------------------------------------------------------
rsq<-function(mod) summary(mod)$r.squared
#exercise 3 
req3_1 <- lapply(out3_1, rsq)
req3_2 <- lapply(out3_2, rsq)
print(data.frame(unlist(req3_1),unlist(req3_2)))
#exercise 4
req4_1 <- lapply(out4_1, rsq)
req4_2 <- lapply(out4_2, rsq)
print(data.frame(unlist(req4_1),unlist(req4_2)))

## ------------------------------------------------------------------------
trials <- replicate(100,
                    t.test(rpois(10, 10), rpois(7, 10)),
                    simplify = FALSE)

sapply(trials,function(mod){mod[["p.value"]]},simplify=TRUE)

## ------------------------------------------------------------------------
# the example is make the data normalized (the value divide by the colmean)
# use Map and vapply to solve 
data <- matrix(rnorm(20, 0, 10), nrow = 4)
x <- as.data.frame(data)
answer1 <- Map("/",x,vapply(x,mean,c(1)))

# use the lapply with an anonymous function to solve
answer2 <- lapply(x,function(data){data/(mean(data))})
print(data.frame(unlist(answer1),unlist(answer2)))

## ------------------------------------------------------------------------
library("nortest")
set.seed(1)
attach(chickwts)
x<-sort(as.vector(weight[feed=="soybean"]))
y<-sort(as.vector(weight[feed=="linseed"]))
detach(chickwts)
z<-c(x,y)#pooled sample#
R<-999#replicate number#
K<-1:length(z)
D<-numeric(R)
CM<-function(x,y){#compute statistic of Cramer-von Mises
  ecdfx<-ecdf(x)
  ecdfy<-ecdf(y)
  l_x<-length(x)
  l_y<-length(y)
  sum1<-sum((ecdfx(x)-ecdfy(x))^2)
  sum2<-sum((ecdfx(y)-ecdfy(y))^2)
  w<-l_x*l_y/(l_x+l_y)^2*(sum1+sum2)
  return(w)
}
D0<-CM(x,y)
for(i in 1:R){
  k<-sample(K,size=length(x),replace=F)
  x1<-z[k]
  y1<-z[-k]
  D[i]<-CM(x1,y1)
}
p<-mean(c(D0,D)>=D0)
print(p)

## ------------------------------------------------------------------------
library(RANN)
library(boot)
library(energy)
library(Ball)
library(ggplot2)
m<-30#time to loop
k<-3
p<-2
n1<-n2<-50
R<-999#bd.test replication number
n<-n1+n2
N=c(n1,n2)
Tn<-function(z,ix,sizes,k){
  n1<-sizes[1]
  n2<-sizes[2]
  n<-n1+n2
  if(is.vector(z)) z<-data.frame(z,0)
  z<-z[ix,]
  NN<-nn2(data=z,k=k+1)
  block1<-NN$nn.idx[1:n1,-1]
  block2<-NN$nn.idx[(n1+1):n,-1]
  i1<-sum(block1<n1+.5);i2<-sum(block2>n1+.5)
  (i1+i2)/(k*n)
}
eqdist.nn<-function(z,sizes,k){
  boot.obj<-boot(data=z,statistic = Tn,R=R,sim="permutation",sizes=sizes,k=k)
  ts<-c(boot.obj$t0,boot.obj$t)
  p.value<-mean(ts>=ts[1])
  list(statistic=ts[1],p.value=p.value)
}#NN
p.values<-matrix(NA,m,3)
for(i in 1:m){
  x<-matrix(rnorm(n1*p,sd=1),ncol=p)#unequal variances
  y<-matrix(rnorm(n2*p,sd=1.4),ncol=p)
  z<-rbind(x,y)
  p.values[i,1]<-eqdist.nn(z,N,k)$p.value#NN
  p.values[i,2]<-eqdist.etest(z,size=N,R=R)$p.value#in the energy package
  p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i)$p.value#BAll Divergence in the ball package
}
alpha<-0.1#confidence level
pow<-apply(p.values<alpha,2,mean)#compute the number of p.values which is less than 0.1 in each column
print(pow)

## ------------------------------------------------------------------------
for(i in 1:m){
  x<-matrix(rnorm(n1*p,mean=0.4,sd=1),ncol=p)#uniqual variances and unequal expectations
  y<-matrix(rnorm(n2*p,mean=0,sd=1.4),ncol=p)
  z<-rbind(x,y)
  p.values[i,1]<-eqdist.nn(z,N,k)$p.value
  p.values[i,2]<-eqdist.etest(z,size=N,R=R)$p.value
  p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i)$p.value
}
alpha<-0.1
pow<-apply(p.values<alpha,2,mean)
print(pow)

## ------------------------------------------------------------------------
for(i in 1:m){
  x<-matrix(rt(n1*p,df=1),ncol=p)#t distribution
  y<-matrix(rnorm(n2*p,sd=sample(c(1,1.3),size=n2*p,prob=c(0.5,0.5),replace=T)),ncol=p)#bimodel distuibution
  z<-rbind(x,y)
  p.values[i,1]<-eqdist.nn(z,N,k)$p.value
  p.values[i,2]<-eqdist.etest(z,size=N,R=R)$p.value
  p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i)$p.value
}
alpha<-0.01
pow<-apply(p.values<alpha,2,mean)
print(pow)

## ------------------------------------------------------------------------
n1<-50
n2<-5
n<-n1+n2
N=c(n1,n2)
for(i in 1:m){
  x<-matrix(rnorm(n1*p,mean=1),ncol=p)#100 samples
  y<-matrix(rnorm(n2*p,mean=2),ncol=p)#10 samples
  z<-rbind(x,y)
  p.values[i,1]<-eqdist.nn(z,N,k)$p.value
  p.values[i,2]<-eqdist.etest(z,size=N,R=R)$p.value
  p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i)$p.value
}
alpha<-0.1
pow<-apply(p.values<alpha,2,mean)
print(pow)

## ------------------------------------------------------------------------
library(tidyverse)
set.seed(1)
m<-20000#samples number
x<-numeric(m)#to store the samples
x[1]<-runif(1)#the first sample
u<-runif(m)
standard_Cauchy<-function(x){
  return(1/(pi*(1+x^2)))
}
for(i in 1:m){
  proposal<-x[i]+runif(1,min = -1,max = 1)
  accept<-runif(1)<standard_Cauchy(proposal)/standard_Cauchy(x[i])
  x[i+1]<-ifelse(accept==T,proposal,x[i])
}
x<-x[1001:m]
index<-1001:m
quantile(x,probs = seq(0.1,0.9,0.1))
qcauchy(seq(0.1,0.9,0.1),loc=0,scale=1)

## ------------------------------------------------------------------------
set.seed(1)
m<-5000#lengths of the chain
w<-0.25#width of the uniform support set
u<-runif(m)#for accept/reject step
v<-runif(m,-w,w)#proposal distribution
group<-c(125,18,20,34)
x<-numeric(m)#the chain
prob<-function(theta,group){
  if(theta<0||theta>=0.8)
    return(0)
  return((1/2+theta/4)^group[1]*((1-theta)/4)^group[2]*((1-theta)/4)^group[3]*(theta/4)^group[4])
}
x[1]<-0.4#initialize form 0.4
for(i in 2:m){
  theta<-x[i-1]+v[i]
  if(u[i]<=prob(theta,group)/prob(x[i-1],group))
    x[i]<-theta
  else
    x[i]<-x[i-1]
}
index<-1001:m#discard the first thousand sample
theta_hat<-mean(x[index])
print(theta_hat)

## ------------------------------------------------------------------------
set.seed(12345)
group<-c(125,18,20,34)
k<-4#number if chains to generate 
N<-5000#length of the chain
b<-500#burn-in length
Gelman.Robin<-function(psi){
  psi<-as.matrix(psi)
  n<-ncol(psi)
  k<-nrow(psi)
  psi.means<-rowMeans(psi)#row means
  B<-n*var(psi.means)#between variance est.
  psi.w<-apply(psi,1,"var")#within variances
  W<-mean(psi.w)#within est.
  v.hat<-W*(n-1)/n+(B/n)#uppervariance est.
  r.hat<-v.hat/W#G_R statistic
  return(r.hat)
}
prob<-function(theta,group){
  if(theta<0||theta>=0.9)
    return(0)
  return((1/2+theta/4)^group[1]*((1-theta)/4)^group[2]*((1-theta)/4)^group[3]*(theta/4)^group[4])
}
Chain<-function(group,N,X1){
  x<-numeric(N)
  x[1]<-X1#initialize x[1]
  w<-0.25
  u<-runif(N)
  v<-runif(N,-w,w)
  for(i in 2:N){
    theta<-x[i-1]+v[i]
    if(u[i]<=prob(theta,group)/prob(x[i-1],group))
      x[i]<-theta
    else
      x[i]<-x[i-1]
  }
  return(x)
}
#generate the chains
x0<-c(0.2,0.4,0.6,0.8)
X<-matrix(0,nrow=k,ncol=N)
for(i in 1:k) X[i,]<-Chain(group,N,x0[i])
#compute diagnostic statistics
psi<-t(apply(X,1,cumsum))
for(i in 1:nrow(psi)) psi[i,]<-psi[i,]/(1:ncol(psi))
print(Gelman.Robin(psi))
#plot pso for the four chains
par(mfrow=c(2,2))
par(mfrow=c(1,1))#restore default
#plot the sequence of R-hat statistics
rhat<-rep(0,N)

## ------------------------------------------------------------------------
set.seed(12345)
eps<-.Machine$double.eps^0.25#criterion to judge whether function is near to 0
k<-c(4:25,100,500,1000)#k mentioned in the question
S<-function(k,a){
  return((1-pt(sqrt((a^2*k)/(k+1-a^2)),df=k))-(1-pt(sqrt((a^2*(k-1))/(k-a^2)),df=k-1)))
}#S_k(a)-S_{k-1}(a)
Root<-function(k1){
  a<-seq(0.1,sqrt(k1)-0.1,length=3)
  y<-c(S(k1,a[1]),S(k1,a[2]),S(k1,a[3]))
  while(abs(y[2])>eps){
    if(y[1]*y[2]<0){
      a[3]<-a[2]
      y[3]<-y[2]
    }
    else{
      a[1]<-a[2]
      y[1]<-y[2]
    }
    a[2]<-(a[1]+a[3])/2
    y[2]<-S(k1,a[2])
  }
  result<-list(k1,a[2],y[2])
  return(result)
}
for(i in k){#print the out put of each k
  cat('k:',Root(i)[[1]],'root:',Root(i)[[2]],'value of function:',Root(i)[[3]],'\n')
}

## ------------------------------------------------------------------------
theta1<-1;ita1<-0#standard cauchy ditribution
f<-function(x,theta,ita){
  (theta*pi*(1+((x-ita)/theta)^2))^(-1)
}
k<-seq(-30,30,2)
F1<-rep(0,length(k))
for(i in 1:length(k)){
  up<-k[i]
  F1[i]<-integrate(f,lower=-Inf,upper=up,rel.tol = .Machine$double.eps^0.25,theta<-theta1,ita<-ita1)$value
}#integrate function
F2<-pcauchy(k)
F1
F2

## ------------------------------------------------------------------------
N=1000
na=28
nb=24
noo=41
nab=70
p=0.6   #initial est. for p
q=0.3
r=0.1
pm=numeric(0)
qm=numeric(0)
rm=numeric(0)
lofm=numeric(0)
lof=function(p,q,r){   #log maximum likelihood values
  return(log(choose(n,naa)*choose(n-naa,nbb)*choose(n-naa-nbb,noo)*choose(nao+nbo+nab,nao)*choose(nbo+nab,nbo))+(nao+nbo+nab)*log(2)+(2*naa+nao+nab)*log(p)+(2*nbb+nbo+nab)*log(q)+(2*noo+nao+nbo)*log(r))
}
for (j in 1:N) {
  naa=round(na*p^2/(p^2+2*p*r))
  nbb=round(nb*q^2/(q^2+2*q*r))
  nao=na-naa
  nbo=nb-nbb
  n=naa+nbb+noo+nao+nbo+nab
  if(abs(p-(2*naa+nao+nab)/2/n)<0.000000001&&abs(q-(2*nbb+nbo+nab)/2/n)<0.000000001&&abs(r-(2*noo+nbo+nao)/2/n)<0.000000001&&j>5) break
  p=(2*naa+nao+nab)/2/n  #update estimation
  q=(2*nbb+nbo+nab)/2/n
  r=(2*noo+nbo+nao)/2/n
  pm=c(pm,p)
  qm=c(qm,q)
  rm=c(rm,r)
  lofm=c(lofm,lof(p,q,r))
}
c(p,q,r)
exp(lofm)

## ----warning=FALSE,message=FALSE-----------------------------------------
attach(mtcars)
formulas<-list(
  mpg~disp,
  mpg~I(1/disp),
  mpg~disp+wt,
  mpg~I(1/disp)+wt
)

#use lapply function
out3_1<-lapply(formulas,lm)
out3_1

#use for loops
out3_2<-vector("list",length(formulas))
for(i in seq_along(formulas)){
  out3_2[[i]]<-lm(formulas[[i]],data=mtcars)
}
out3_2
detach(mtcars)

## ----warning=FALSE,message=FALSE-----------------------------------------
attach(mtcars)
bootstraps<-lapply(1:10,function(i){
  rows<-sample(1:nrow(mtcars),rep=TRUE)
  mtcars[rows,]
})

#use lapply function
out4_1<-lapply(seq_along(bootstraps),function(i){lm(mpg~disp,data = bootstraps[[i]])})
out4_1
#use for loops
out4_2<-vector("list",length(bootstraps))
for(i in seq_along(bootstraps)){
  out4_2[[i]]<-lm(mpg~disp,data<-bootstraps[[i]])
}
out4_2
detach(mtcars)

## ------------------------------------------------------------------------
rsq<-function(mod) summary(mod)$r.squared
#exercise 3 
req3_1 <- lapply(out3_1, rsq)
req3_2 <- lapply(out3_2, rsq)
print(data.frame(unlist(req3_1),unlist(req3_2)))
#exercise 4
req4_1 <- lapply(out4_1, rsq)
req4_2 <- lapply(out4_2, rsq)
print(data.frame(unlist(req4_1),unlist(req4_2)))

## ------------------------------------------------------------------------
trials <- replicate(100,
                    t.test(rpois(10, 10), rpois(7, 10)),
                    simplify = FALSE)

sapply(trials,function(mod){mod[["p.value"]]},simplify=TRUE)

## ------------------------------------------------------------------------
# the example is make the data normalized (the value divide by the colmean)
# use Map and vapply to solve 
data <- matrix(rnorm(20, 0, 10), nrow = 4)
x <- as.data.frame(data)
answer1 <- Map("/",x,vapply(x,mean,c(1)))

# use the lapply with an anonymous function to solve
answer2 <- lapply(x,function(data){data/(mean(data))})
print(data.frame(unlist(answer1),unlist(answer2)))

## ------------------------------------------------------------------------
expected <- function(colsum, rowsum, total) {
  (colsum / total) * (rowsum / total) * total
}
chisq.stat <- function(observed, expected) {
  ((observed - expected) ^ 2) / expected
}#compute the pearson's chisquare test statistic

## ------------------------------------------------------------------------
library(microbenchmark)
table2 <- function(x, y) {
  x_val <- unique(x)
  y_val <- unique(y)
  mat <- matrix(0L, length(x_val), length(y_val))
  for (i in seq_along(x)) {
    mat[which(x_val == x[[i]]), which(y_val == y[[i]])] <-
      mat[which(x_val == x[[i]]),  which(y_val == y[[i]])] + 1L
  }
  dimnames <- list(x_val, y_val)
  names(dimnames) <- as.character(as.list(match.call())[-1])  # R has names for dimnames... :/
  tab <- array(mat, dim = dim(mat), dimnames = dimnames)
  class(tab) <- "table"
  tab
}
a <- c(2,3,4)#an example
identical(table(a, a), table2(a, a))
microbenchmark::microbenchmark(table(a, a), table2(a, a))
gwgw2018/StatComp18048 documentation built on May 5, 2019, 11:07 p.m.