inst/doc/homework18081.R

## ------------------------------------------------------------------------
cells <- c(16,25,32,45)
rnames <- c("R1","R2")
cnames <- c("C1","C2")
mymatrix <- matrix(cells,nrow = 2,ncol = 2,byrow = TRUE,
                   dimnames = list(rnames,cnames))
print(mymatrix)

## ------------------------------------------------------------------------
m1 <- matrix(1, nr = 2, nc = 2)
m2 <- matrix(2, nr = 2, nc = 2)
rbind(m1, m2)
cbind(m1, m2)

## ------------------------------------------------------------------------
library(vcd)
attach(Arthritis)
counts <- table(Treatment,Improved)
spine(counts,main="Spinogram Example")
detach(Arthritis)

## ------------------------------------------------------------------------
x <- c(0:4)
p <- c(.1,.2,.2,.2,.3)
cp <- cumsum(p)
m <- 1e3
r <- numeric(m)
r <- x[findInterval(runif(m),cp)+1]
ct <- as.vector(table(r))
bili <- ct/sum(ct)/p
cat(bili)
s <- sample(0:4,size=1000,replace = TRUE,prob = c(.1,.2,.2,.2,.3))
print(s[1:100]) #To save space, only the first 100 data is output

## ------------------------------------------------------------------------
n <- 1e3
j <- 0
k <- 0
y <- numeric(n)
while (k < n) {
  u <- runif(1)
  j <- j + 1
  x <- runif(1) #random variate from g
  if (x^2 * (1-x) > u) {
    #we accept x
    k <- k + 1
    y[k] <- x
  }
}
j # #(experiments) for n random numbers
hist(y, prob = TRUE)
y <- seq(0, 1, .01)
lines(y, 12*y^2*(1-y))

## ------------------------------------------------------------------------
n <- 1e3
r <- 4
beta <- 2
lambda <- rgamma(n, r, beta)
y <- rexp(n, lambda)
y[1:10]  

## ------------------------------------------------------------------------
#Generate an estimated distribution function for the variable x
F <- function(x){
  a <- 3
  b <- 3
  m <- 1e4
  y <- runif(m, min = 0,max = x)
  F.hat <- x*mean(1/beta(a,b)*y^(a-1)*(1-y)^(b-1))
  return(F.hat)
}

#when x=c(.1,.2,...,.9)
x_seq <- seq(.1,.9,by=.1)
F_seq <- numeric(length(x_seq))  
P_seq <- numeric(length(x_seq))
for (i in 1:length(x_seq)) {
  F_seq[i] <- F(x_seq[i])  # using the F function to estimate the probability
  P_seq[i] <- pbeta(x_seq[i],3,3) # using the pbeta function in R to estimate the probability
}

# generate a table to compare
print(round(rbind(x_seq, F_seq, P_seq), 3))

## ------------------------------------------------------------------------
set.seed(123)
R <- 1000
Ray <- function(R,antithetic = TRUE){
  u <- runif(R/2)
  if (!antithetic) v <- runif(R/2) else
    v <- 1-u
  u <- c(u,v)
  return(mean(sqrt(-2*log(1-u))))
}

m <- 1000
Ray1 <- Ray2 <- numeric(m)
for (i in 1:m) {
  Ray1[i] <- Ray(R,FALSE)
  Ray2[i] <- Ray(R,TRUE)
}

print(sd(Ray1))  # X1 and X2 is independent
print(sd(Ray2))  # X1 and X2 is antithetic variables
print((var(Ray1) - var(Ray2))/var(Ray1))  # the percent reduction in variance


## ------------------------------------------------------------------------
set.seed(12345)
g <- function(x) {
  (x^2*exp(-x^2/2))/sqrt(2*pi) * (x > 1) 
}
f <- function(x){
   x*exp(-x^2/2)
}

R <- 10000
Ray_cdf <- function(R,antithetic = TRUE){
  u <- runif(R/2)
  if (!antithetic) v <- runif(R/2) else
    v <- 1-u
  u <- c(u,v)
  return(sqrt(-2*log(1-u)))
}

m <- 10000
x <- Ray_cdf(R,FALSE)
fg <- g(x)/f(x)
theta.hat <- mean(fg)
se <- sd(fg)
print(theta.hat)
print(se)


## ------------------------------------------------------------------------
ff <- function(x) {
  (x^2*exp(-x^2/2))/sqrt(2*pi) 
}
theta.right <- integrate(ff,1,Inf)
print(theta.right)


## ------------------------------------------------------------------------
set.seed(123)
G.est <- function(type = "xtype") {
  n <- 1000
  m <- 1000
  Sum <- numeric(n)
  G.hat <- numeric(m)
  for (j in 1:m) {
    if (type == "rlnorm") {
      general <- rlnorm(n)
    }
    else if (type == "runif") {
      general <- runif(n)
    }
    else if (type == "rbinom") {
      general <- rbinom(n, 100, .1)
    }
    x <- general
    mu.hat <- mean(x)
    x_order <- sort(x)
    for (i in 1:n) {
      Sum[j] <- Sum[j] + (2 * i - n - 1) * x_order[i]
    }
    G.hat[j] <- Sum[j] / (n ^ 2 * mu.hat)
  }
  print(mean(G.hat))
  print(median(G.hat))
  print(quantile(G.hat, seq(.1, .9, length = 9)))
}

G.est("rlnorm") #x is generated by standard lognormal
G.est("runif")  #x is generated by  uniform distribution
G.est("rbinom") #x is generated by   Bernoulli(0.1)


## ------------------------------------------------------------------------
set.seed(123)
n <- 1000
m <- 1000
Sum <- numeric(n)
G.hat <- numeric(m)

for (j in 1:m) {
  x <- rlnorm(n)
  mu.hat <- mean(x)
  x_order <- sort(x)
  for (i in 1:n) {
    Sum[j] <- Sum[j] + (2 * i - n - 1) * x_order[i]
  }
  G.hat[j] <- Sum[j] / (n ^ 2 * mu.hat)
}

#Generating confidence intervals
G.mu <- mean(G.hat)
G.sigma <- sd(G.hat)
CT1 <- G.mu - G.sigma*1.96
CT2 <- G.mu + G.sigma*1.96
print(c(CT1,CT2))

#Solving the confidence interval coverage
h <- 0
for (j in 1:m) {
  if(CT1 < G.hat[j] & G.hat[j] < CT2)
    h <- h +1
}
print(h/m)


## ------------------------------------------------------------------------
set.seed(123)
library(MASS)
options(digits = 3)
m <- 1000
nump <- numk <- nums <- numeric(m)
p1 <- p2 <- p3 <- numeric(m)
for (i in 1:m) {
  mean <- c(0, 1)
  sigma <- matrix(c(1, 0.15, 0.15, 1), ncol = 2, nrow = 2)
  data <- mvrnorm(n = 500, mean, sigma)
  x <- data[, 1]
  y <- data[, 2] 
  
  a1 <- cor.test(x, y, method =  "pearson")
  a2 <- cor.test(x, y, method =  "kendall")
  a3 <- cor.test(x, y, method =  "spearman")
  
  p1[i] <- a1$p.value
  p2[i] <- a2$p.value
  p3[i] <- a3$p.value
}

nump[p1 < 0.05] <- 1
numk[p2 < 0.05] <- 1
nums[p3 < 0.05] <- 1

ratio <- c(sum(nump), sum(numk), sum(nums)) / m
print(ratio)
barplot(ratio,col = c("red","green","green"),names.arg = c("pearson","kendall","spearman"),main = "Power of a Test")


## ------------------------------------------------------------------------
set.seed(123)
library(MASS)
m <- 1000
n <- 1000
nump <- numk <- nums <- numeric(m)
p1 <- p2 <- p3 <- numeric(m)
for (i in 1:m) {
  mean <- c(0, 1)
  sigma <- matrix(c(1, 0, 0, 3), ncol = 2, nrow = 2)
  data <- mvrnorm(n , mean, sigma)
  x <- data[, 1]
  y <- data[, 2] + runif(n, min = 0, max = 0.2)
  
  a1 <- cor.test(x, y, method =  "pearson")
  a2 <- cor.test(x, y, method =  "kendall")
  a3 <- cor.test(x, y, method =  "spearman")
  
  p1[i] <- a1$p.value
  p2[i] <- a2$p.value
  p3[i] <- a3$p.value
}

nump[p1 < 0.05] <- 1
numk[p2 < 0.05] <- 1
nums[p3 < 0.05] <- 1

ratio <- c(sum(nump), sum(numk), sum(nums)) / m
print(ratio)
barplot(ratio,col = c("red","green","green"),names.arg = c("pearson","kendall","spearman"),main = "Power of a Test")


## ------------------------------------------------------------------------
library(bootstrap)  #for the law data
r <- cor(law$LSAT, law$GPA)

n <- length(law$LSAT)
R <- numeric(n)  #storage for replicates
for (i in 1:n) {
  R[i] <- cor(law$LSAT[-i],law$GPA[-i])
}

bias <- (n - 1) * (mean(R) - r)
se.R <- sqrt((n - 1) *mean((R - mean(R))^2))

print(bias)
print(se.R)
hist(R,prob = TRUE, col = "pink")


## ------------------------------------------------------------------------
set.seed(123)
library(boot)
Data <- c(3,5,7,18,43,85,91,98,100,130,230,487)
mu <- function(Data,i){
  return(mean(Data[i]))
}

bootobject <- boot(data = Data, statistic = mu,  R=1000)
print(bootobject)
plot(bootobject)

boot.ci(bootobject, type = c("basic", "norm", "perc","bca"))


## ------------------------------------------------------------------------

library(bootstrap)
n <- nrow(scor)
theta.hat <- numeric(n)

sigma <- cov(scor)
results <- eigen(sigma)
theta <- results$values[1]/sum(results$values)

for (i in 1:n) {
  sigma <- cov(scor[-i, ])
  result <- eigen(sigma)
  theta.hat[i] <- result$values[1] / sum(result$values)
}

bias <- (n - 1) * (mean(theta.hat) - theta)
se <- sqrt((n - 1) *mean((theta.hat - mean(theta.hat))^2))

print(bias)
print(se)
hist(theta.hat,col = "lightgreen")

## ------------------------------------------------------------------------
library(ggplot2)
library(DAAG)
attach(ironslag)
n <- length(magnetic) #in DAAG ironslag
e1 <- e2 <- e3 <- e4 <- numeric(n-1)

# for n-fold cross validation
# fit models on leave-two-out samples
for (k in 1:n-1) {
y <- magnetic[-k:-(k+1)]
x <- chemical[-k:-(k+1)]


J1 <- lm(y ~ x)
yhat1_1 <- J1$coef[1] + J1$coef[2] * chemical[k]
yhat1_2 <- J1$coef[1] + J1$coef[2] * chemical[k+1]
e1[k] <- (magnetic[k] - yhat1_1)^2 + (magnetic[k+1] - yhat1_2)^2


J2 <- lm(y ~ x + I(x^2))
yhat2_1 <- J2$coef[1] + J2$coef[2] * chemical[k] +J2$coef[3] * chemical[k]^2
yhat2_2 <- J2$coef[1] + J2$coef[2] * chemical[k+1] +J2$coef[3] * chemical[k+1]^2
e2[k] <- (magnetic[k] - yhat2_1)^2 + (magnetic[k+1] - yhat2_2)^2


J3 <- lm(log(y) ~ x)
logyhat3_1 <- J3$coef[1] + J3$coef[2] * chemical[k]
logyhat3_2 <- J3$coef[1] + J3$coef[2] * chemical[k+1]
yhat3_1 <- exp(logyhat3_1)
yhat3_2 <- exp(logyhat3_2)
e3[k] <- (magnetic[k] - yhat3_1)^2 + (magnetic[k+1] - yhat3_2)^2


J4 <- lm(log(y) ~ log(x))
logyhat4_1 <- J4$coef[1] + J4$coef[2] * log(chemical[k])
logyhat4_2 <- J4$coef[1] + J4$coef[2] * log(chemical[k+1])
yhat4_1 <- exp(logyhat4_1)
yhat4_2 <- exp(logyhat4_2)
e4[k] <- (magnetic[k] - yhat4_1)^2 + (magnetic[k+1] - yhat4_2)^2

}

model <- c("1_Liner","2_Quadratic","3_Exponential","4_Log_Log")
pred_error <- c(mean(e1), mean(e2), mean(e3), mean(e4))
data <- data.frame(model,pred_error)

print(pred_error)
ggplot(data, aes(x = model, y = pred_error)) + 
  geom_bar(stat = "identity",fill = "lightblue", colour = "black")


## ------------------------------------------------------------------------
set.seed(12)
#define the Cramer-von Mises statistic:
CVM <- function(x,y){
  n <- length(x)
  m <- length(y)
  Fn <- ecdf(x)
  Gm <- ecdf(y)
  
  W = (m*n)/(m+n)^2*(sum((Fn(x)-Gm(x))^2) + sum((Fn(y)-Gm(y))^2))
  return(W)
}

attach(chickwts)
x <- sort(as.vector(weight[feed == "soybean"]))
y <- sort(as.vector(weight[feed == "linseed"]))
detach(chickwts)

R <- 999 #number of replicates
z <- c(x, y) #pooled sample
K <- 1:26
reps <- numeric(R) #storage for replicates
t0 <- CVM(x, y)

#data in 8.1 and 8.2
for (i in 1:R) {
#generate indices k for the first sample
k <- sample(K, size = 14, replace = FALSE)
x1 <- z[k]
y1 <- z[-k] #complement of x1
reps[i] <- CVM(x1, y1)
}

p <- mean(c(t0, reps) >= t0)
print(p)

hist(reps, main = " the two-sample Cramer-von Mises test", freq = FALSE, xlab = "T (p = 0.396)",breaks = "scott")
points(p, 0, cex = 1, pch = 16,col="red") 


## ----warning=FALSE, message=FALSE----------------------------------------
library(RANN)
library(boot)
library(energy)
library(Ball)

m <- 50; k<-3; p<-2; 
n1 <- n2 <- 50; R<-999; n <- n1+n2; N = c(n1,n2)

Tn <- function(z, ix, sizes,k) {
n1 <- sizes[1]; n2 <- sizes[2]; n <- n1 + n2
if(is.vector(z)) z <- data.frame(z,0);
z <- z[ix, ];
NN <- nn2(data=z, k=k+1) # what's the first column?
block1 <- NN$nn.idx[1:n1,-1]
block2 <- NN$nn.idx[(n1+1):n,-1]
i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
(i1 + i2) / (k * n)
}

eqdist.nn <- function(z,sizes,k){
  boot.obj <- boot(data=z,statistic=Tn,R=R,sim = "permutation", sizes = sizes,k=k)
  ts <- c(boot.obj$t0,boot.obj$t)
  p.value <- mean(ts>=ts[1])
  list(statistic=ts[1],p.value=p.value)
}

p.values <- matrix(NA,m,3)
for(i in 1:m){
  x <- matrix(rnorm(n1*p,mean = 1,sd = 0.6),ncol=p);
  y <- matrix(rnorm(n2*p,mean = 1,sd = 0.85),ncol=p);
  z <- rbind(x,y)
  p.values[i,1] <- eqdist.nn(z,N,k)$p.value
  p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
  p.values[i,3] <- bd.test(x=x,y=y,R=999,seed = i*12)$p.value
}
alpha <- 0.1;
pow <- colMeans(p.values<alpha)
print(pow)
barplot(pow,col = "lightgreen",main = "Unequal variances and unequal expectations",
        names.arg=c("NN","energy","ball"))

## ----warning=FALSE, message=FALSE----------------------------------------
library(RANN)
library(boot)
library(energy)
library(Ball)

m <- 50; k<-3; p<-2; 
n1 <- n2 <- 50; R<-999; n <- n1+n2; N = c(n1,n2)

Tn <- function(z, ix, sizes,k) {
n1 <- sizes[1]; n2 <- sizes[2]; n <- n1 + n2
if(is.vector(z)) z <- data.frame(z,0);
z <- z[ix, ];
NN <- nn2(data=z, k=k+1) # what's the first column?
block1 <- NN$nn.idx[1:n1,-1]
block2 <- NN$nn.idx[(n1+1):n,-1]
i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
(i1 + i2) / (k * n)
}

eqdist.nn <- function(z,sizes,k){
  boot.obj <- boot(data=z,statistic=Tn,R=R,sim = "permutation", sizes = sizes,k=k)
  ts <- c(boot.obj$t0,boot.obj$t)
  p.value <- mean(ts>=ts[1])
  list(statistic=ts[1],p.value=p.value)
}

p.values <- matrix(NA,m,3)
for(i in 1:m){
  x <- matrix(rnorm(n1*p,mean = 0.4,sd = 0.6),ncol=p);
  y <- matrix(rnorm(n2*p,mean = 0.5,sd = 0.85),ncol=p);
  z <- rbind(x,y)
  p.values[i,1] <- eqdist.nn(z,N,k)$p.value
  p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
  p.values[i,3] <- bd.test(x=x,y=y,R=999,seed = i*12)$p.value
}
alpha <- 0.1;
pow <- colMeans(p.values<alpha)
print(pow)
barplot(pow,col = "lightblue",main = "Unequal variances and equal expectations",
        names.arg=c("NN","energy","ball"))

## ----warning=FALSE, message=FALSE----------------------------------------
library(RANN)
library(boot)
library(energy)
library(Ball)

m <- 50; k<-3; p<-2; 
n1 <- n2 <- 20; R<-999; n <- n1+n2; N = c(n1,n2)

Tn <- function(z, ix, sizes,k) {
n1 <- sizes[1]; n2 <- sizes[2]; n <- n1 + n2
if(is.vector(z)) z <- data.frame(z,0);
z <- z[ix, ];
NN <- nn2(data=z, k=k+1) # what's the first column?
block1 <- NN$nn.idx[1:n1,-1]
block2 <- NN$nn.idx[(n1+1):n,-1]
i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
(i1 + i2) / (k * n)
}

eqdist.nn <- function(z,sizes,k){
  boot.obj <- boot(data=z,statistic=Tn,R=R,sim = "permutation", sizes = sizes,k=k)
  ts <- c(boot.obj$t0,boot.obj$t)
  p.value <- mean(ts>=ts[1])
  list(statistic=ts[1],p.value=p.value)
}

p.values <- matrix(NA,m,3)
for(i in 1:m){
  # t distribution with 1 df (heavy-tailed distribution)
  x <- matrix(rt(n1*p,df = 1),ncol=p); 
  #bimodel distribution (mixture of two normal distributions)
  y <- cbind(rnorm(n2,mean = 0.4),rnorm(n2,mean = 0.5));
  z <- rbind(x,y)
  p.values[i,1] <- eqdist.nn(z,N,k)$p.value
  p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
  p.values[i,3] <- bd.test(x=x,y=y,R=999,seed = i*12)$p.value
}
alpha <- 0.1;
pow <- colMeans(p.values<alpha)
print(pow)
barplot(pow,col = "pink",main = "Non-normal distributions",
        names.arg=c("NN","energy","ball"))

## ----warning=FALSE, message=FALSE----------------------------------------
library(RANN)
library(boot)
library(energy)
library(Ball)

m <- 50; k<-3; p<-2; 
n1 <- 10;n2 <- 100;R<-999; n <- n1+n2; N = c(n1,n2)

Tn <- function(z, ix, sizes,k) {
n1 <- sizes[1]; n2 <- sizes[2]; n <- n1 + n2
if(is.vector(z)) z <- data.frame(z,0);
z <- z[ix, ];
NN <- nn2(data=z, k=k+1) # what's the first column?
block1 <- NN$nn.idx[1:n1,-1]
block2 <- NN$nn.idx[(n1+1):n,-1]
i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
(i1 + i2) / (k * n)
}

eqdist.nn <- function(z,sizes,k){
  boot.obj <- boot(data=z,statistic=Tn,R=R,sim = "permutation", sizes = sizes,k=k)
  ts <- c(boot.obj$t0,boot.obj$t)
  p.value <- mean(ts>=ts[1])
  list(statistic=ts[1],p.value=p.value)
}

p.values <- matrix(NA,m,3)
for(i in 1:m){
  x <- c(rnorm(n1,mean = 1,sd = 1)); # n1 = 10
  y <- c(rnorm(n2,mean = 2,sd = 2)); # n2 = 100
  z <- c(x,y)
  p.values[i,1] <- eqdist.nn(z,N,k)$p.value
  p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
  p.values[i,3] <- bd.test(x=x,y=y,R=999,seed = i*12)$p.value
}
alpha <- 0.1;
pow <- colMeans(p.values<alpha)
print(pow)
barplot(pow,col = "lightgreen",main = "Unequal variances and unequal expectations",names.arg=c("NN","energy","ball"))

## ------------------------------------------------------------------------
set.seed(1)

m=10000

# target density: the cauchy distribution
cauchy<-function(x, theta=1,eta=0){
  
  out<-1/(pi*theta*(1+((x-eta)/theta)^2))
  return(out)
  
}

chain<-c(0)

for(i in 1:m){
  
  proposal<-chain[i]+runif(1, min=-20, max=20)
  accept<-runif(1) < cauchy(proposal)/cauchy(chain[i])
  chain[i+1]<-ifelse(accept==T, proposal, chain[i])
}

# plot over time
plot(chain, type="l")

# the quantiles of the generated chain in a quantile-quantile plot (QQ plot).

b <- 1001  #discard the burnin sample
y <- chain[b:m]
a <- ppoints(100)
QR <- qcauchy(a)  #quantiles of Rayleigh
Q <- quantile(chain, a)
qqplot(QR, Q, main="",xlab="Rayleigh Quantiles", ylab="Sample Quantiles",xlim=c(-20,20),ylim=c(-20,20))
hist(y, breaks="scott", main="", xlab="", freq=FALSE)
lines(QR, cauchy(QR, 4))


## ------------------------------------------------------------------------
m <- 5000
w <- 0.25
u <- runif(m)
v <- runif(m,-w,w)
group <- c(125,18,20,34)
x <- numeric(m)

prob <- function(theta,group){
  if(theta<0 || theta>=0.8)
    return(0)
  return((1/2+theta/4)^group[1]*((1-theta)/4)^group[2]*((1-theta)/4)^group[3]*(theta/4)^group[4])
}

x[1] <- 0.4
for (i in 2:m) {
  theta <- x[i-1]+v[i]
  if(u[i]<= prob(theta,group)/prob(x[i-1],group))
    x[i] <- theta
  else
    x[i] <- x[i-1]
}

index <- 1001:m
theta_hat <- mean(x[index])
print(theta_hat)

## ------------------------------------------------------------------------

k <- c(4:25, 100, 500, 1000)
n <- length(k)
A <- rep(0, n)
eps <- .Machine$double.eps ^ 0.25

for (i in 1:n) {
  f <- function(a) {
    num1 <- sqrt(a ^ 2 * (k[i] - 1) / (k[i] - a ^ 2))
    num2 <- sqrt(a ^ 2 * k[i] / (k[i] + 1 - a ^ 2))
    pt(num2, k[i]) - pt(num1, k[i] - 1)
  }
  out <- uniroot(f,lower = eps, upper = sqrt(k[i] - eps))
  A[i] <- out$root
}
print(A)

## ------------------------------------------------------------------------
options(digits=4)
x <- c(0:10) 
n <- length(x)
value_function <- rep(0,n)
value_pcauchy<- rep(0,n)

f <- function(x,theta,eta){
  1/(theta*pi*(1+((x-eta)/theta)^2))
}

# I chose theta=1,eta=0
for (i in 1:n) {
# by function to compute the cdf  
value_fun <- integrate(f, lower=-Inf, upper=x[i],rel.tol=.Machine$double.eps^0.25,theta=1,eta=0)
value_function[i] <- value_fun$value
# by R function "pcauchy" to compute the cdf 
value_pcauchy[i] <- pcauchy(x[i])
}
value <- data.frame(x,value_function,value_pcauchy)
print(t(value))


## ----warning=FALSE, message=FALSE----------------------------------------
library(stats4)
nA<-28;nB<-24;nOO<-41;nAB<-70;
n<-nA+nB+nOO+nAB;
i=0;
p1<-0.3;q1<-0.3;
p0<-0;q0<-0;
options(warn=-1)
while(!isTRUE(all.equal(p0,p1,tolerance=10^(-7)))||!isTRUE(all.equal(q0,q1,tolerance=10^(-7))))#EMËã·¨
{
  p0<-p1;
  q0<-q1;
mlogL<-function(p,q){
  # minus log-likelihood
return(-(2*n*p0^2*log(p)+2*n*q0^2*log(q)+2*nOO*log(1-p-q)+(nA-n*p0^2)*log(2*p*(1-p-q))+(nB-n*q0^2)*(log(2*q*(1-p-q)))+nAB*log(2*p*q)))
}
fit<-mle(mlogL,start=list(p=0.2,q=0.3))
p1<-fit@coef[1]
q1<-fit@coef[2]
i=i+1
}
print(c(i,p1,q1))#iÊÕÁ²´ÎÊý£¬p1,q1ÕæÖµ

## ----warning=FALSE, message=FALSE----------------------------------------
formulas <- list(
mpg ~ disp,
mpg ~ I(1 / disp),
mpg ~ disp + wt,
mpg ~ I(1 / disp) + wt
)


attach(mtcars)

# for loops
out3_1 <- vector("list", length(formulas))
for (i in seq_along(formulas)) {
out3_1[[i]] <- lm(formulas[[i]],data=mtcars)
}

# lapply
out3_2 <- lapply(formulas,lm)

detach(mtcars)


## ----warning=FALSE, message=FALSE----------------------------------------
attach(mtcars)

bootstraps <- lapply(1:10, function(i) {
  rows <- sample(1:nrow(mtcars), rep = TRUE)
  mtcars[rows, ]
})

# for loops
out4_1 <- vector("list", length(bootstraps))
for (i in seq_along(bootstraps)) {
out4_1[[i]] <- lm(mpg ~ disp,data=bootstraps[[i]])
}

# lapply
#f <- function(i){lm(mpg ~ disp)}
out4_2 <- lapply(seq_along(bootstraps), function(i) {lm(mpg ~ disp,data=bootstraps[[i]])})

detach(mtcars)

## ------------------------------------------------------------------------
rsq <- function(mod) {summary(mod)$r.squared}
# for exercise 3
Rreq3_1 <- lapply(out3_1, rsq)
Rreq3_2 <- lapply(out3_2, rsq)
print(data.frame(unlist(Rreq3_1),unlist(Rreq3_2)))


# for exercise 4
Rreq4_1 <- lapply(out4_1, rsq)
Rreq4_2 <- lapply(out4_2, rsq)
print(data.frame(unlist(Rreq4_1),unlist(Rreq4_2)))



## ------------------------------------------------------------------------
trials <- replicate(100,
                    t.test(rpois(10, 10), rpois(7, 10)),
                    simplify = FALSE)

sapply(trials,function(mod){mod[["p.value"]]},simplify=TRUE)


## ------------------------------------------------------------------------
# the example is make the data normalized (the value divide by the colmean)
# use Map and vapply to solve 
data <- matrix(rnorm(20, 0, 10), nrow = 4)
x <- as.data.frame(data)
ans1 <- Map("/",x,vapply(x,mean,c(1)))

# use the lapply with an anonymous function to solve
ans2 <- lapply(x,function(data){data/(mean(data))})
print(data.frame(unlist(ans1),unlist(ans2)))


## ----warning=FALSE, message=FALSE----------------------------------------
expected <- function(colsum, rowsum, total) {
  (colsum / total) * (rowsum / total) * total
}

chi_stat <- function(observed, expected) {
  ((observed - expected) ^ 2) / expected
}


# which is apparently different from chisq.test
chisq_test2 <- function(x, y) {
  total <- sum(x) + sum(y)
  rowsum_x <- sum(x)
  rowsum_y <- sum(y)
  chistat <- 0
  for (i in seq_along(x)) {
    colsum <- x[i] + y[i]
    expected_x <- expected(colsum, rowsum_x, total)
    expected_y <- expected(colsum, rowsum_y, total)
    chistat <- chistat + chi_stat(x[i], expected_x)
    chistat <- chistat + chi_stat(y[i], expected_y)
  }
  chistat
}

print(chisq_test2(seq(1, 9), seq(2, 10)))

print(chisq.test(seq(1, 9), seq(2, 10)))

print(microbenchmark::microbenchmark(
  chisq_test2(seq(1, 9), seq(2, 10)),
  chisq.test(seq(1, 9), seq(2, 10))
))


## ----warning=FALSE, message=FALSE----------------------------------------
table2 <- function(x, y) {
  x_val <- unique(x)
  y_val <- unique(y)
  mat <- matrix(0L, length(x_val), length(y_val))
  for (i in seq_along(x)) {
    mat[which(x_val == x[[i]]), which(y_val == y[[i]])] <-
      mat[which(x_val == x[[i]]),  which(y_val == y[[i]])] + 1L
  }
  dimnames <- list(x_val, y_val)
  names(dimnames) <- as.character(as.list(match.call())[-1])  # R has names for dimnames... :/
  tab <- array(mat, dim = dim(mat), dimnames = dimnames)
  class(tab) <- "table"
  tab
}

# for example 
a <- c(4, 5, 6)
identical(table(a, a), table2(a, a))

microbenchmark::microbenchmark(table(a, a), table2(a, a))

b <- c(7, 8, 9)
identical(table(a, b), table2(a, b))


c <- c(1, 2, 3, 1, 2, 3)
d <- c(2, 3, 4, 2, 3, 4)
identical(table(c, d), table2(c, d))
woshiershixiong/StatComp18081 documentation built on May 12, 2019, 5:19 p.m.