## ------------------------------------------------------------------------
cells <- c(16,25,32,45)
rnames <- c("R1","R2")
cnames <- c("C1","C2")
mymatrix <- matrix(cells,nrow = 2,ncol = 2,byrow = TRUE,
dimnames = list(rnames,cnames))
print(mymatrix)
## ------------------------------------------------------------------------
m1 <- matrix(1, nr = 2, nc = 2)
m2 <- matrix(2, nr = 2, nc = 2)
rbind(m1, m2)
cbind(m1, m2)
## ------------------------------------------------------------------------
library(vcd)
attach(Arthritis)
counts <- table(Treatment,Improved)
spine(counts,main="Spinogram Example")
detach(Arthritis)
## ------------------------------------------------------------------------
x <- c(0:4)
p <- c(.1,.2,.2,.2,.3)
cp <- cumsum(p)
m <- 1e3
r <- numeric(m)
r <- x[findInterval(runif(m),cp)+1]
ct <- as.vector(table(r))
bili <- ct/sum(ct)/p
cat(bili)
s <- sample(0:4,size=1000,replace = TRUE,prob = c(.1,.2,.2,.2,.3))
print(s[1:100]) #To save space, only the first 100 data is output
## ------------------------------------------------------------------------
n <- 1e3
j <- 0
k <- 0
y <- numeric(n)
while (k < n) {
u <- runif(1)
j <- j + 1
x <- runif(1) #random variate from g
if (x^2 * (1-x) > u) {
#we accept x
k <- k + 1
y[k] <- x
}
}
j # #(experiments) for n random numbers
hist(y, prob = TRUE)
y <- seq(0, 1, .01)
lines(y, 12*y^2*(1-y))
## ------------------------------------------------------------------------
n <- 1e3
r <- 4
beta <- 2
lambda <- rgamma(n, r, beta)
y <- rexp(n, lambda)
y[1:10]
## ------------------------------------------------------------------------
#Generate an estimated distribution function for the variable x
F <- function(x){
a <- 3
b <- 3
m <- 1e4
y <- runif(m, min = 0,max = x)
F.hat <- x*mean(1/beta(a,b)*y^(a-1)*(1-y)^(b-1))
return(F.hat)
}
#when x=c(.1,.2,...,.9)
x_seq <- seq(.1,.9,by=.1)
F_seq <- numeric(length(x_seq))
P_seq <- numeric(length(x_seq))
for (i in 1:length(x_seq)) {
F_seq[i] <- F(x_seq[i]) # using the F function to estimate the probability
P_seq[i] <- pbeta(x_seq[i],3,3) # using the pbeta function in R to estimate the probability
}
# generate a table to compare
print(round(rbind(x_seq, F_seq, P_seq), 3))
## ------------------------------------------------------------------------
set.seed(123)
R <- 1000
Ray <- function(R,antithetic = TRUE){
u <- runif(R/2)
if (!antithetic) v <- runif(R/2) else
v <- 1-u
u <- c(u,v)
return(mean(sqrt(-2*log(1-u))))
}
m <- 1000
Ray1 <- Ray2 <- numeric(m)
for (i in 1:m) {
Ray1[i] <- Ray(R,FALSE)
Ray2[i] <- Ray(R,TRUE)
}
print(sd(Ray1)) # X1 and X2 is independent
print(sd(Ray2)) # X1 and X2 is antithetic variables
print((var(Ray1) - var(Ray2))/var(Ray1)) # the percent reduction in variance
## ------------------------------------------------------------------------
set.seed(12345)
g <- function(x) {
(x^2*exp(-x^2/2))/sqrt(2*pi) * (x > 1)
}
f <- function(x){
x*exp(-x^2/2)
}
R <- 10000
Ray_cdf <- function(R,antithetic = TRUE){
u <- runif(R/2)
if (!antithetic) v <- runif(R/2) else
v <- 1-u
u <- c(u,v)
return(sqrt(-2*log(1-u)))
}
m <- 10000
x <- Ray_cdf(R,FALSE)
fg <- g(x)/f(x)
theta.hat <- mean(fg)
se <- sd(fg)
print(theta.hat)
print(se)
## ------------------------------------------------------------------------
ff <- function(x) {
(x^2*exp(-x^2/2))/sqrt(2*pi)
}
theta.right <- integrate(ff,1,Inf)
print(theta.right)
## ------------------------------------------------------------------------
set.seed(123)
G.est <- function(type = "xtype") {
n <- 1000
m <- 1000
Sum <- numeric(n)
G.hat <- numeric(m)
for (j in 1:m) {
if (type == "rlnorm") {
general <- rlnorm(n)
}
else if (type == "runif") {
general <- runif(n)
}
else if (type == "rbinom") {
general <- rbinom(n, 100, .1)
}
x <- general
mu.hat <- mean(x)
x_order <- sort(x)
for (i in 1:n) {
Sum[j] <- Sum[j] + (2 * i - n - 1) * x_order[i]
}
G.hat[j] <- Sum[j] / (n ^ 2 * mu.hat)
}
print(mean(G.hat))
print(median(G.hat))
print(quantile(G.hat, seq(.1, .9, length = 9)))
}
G.est("rlnorm") #x is generated by standard lognormal
G.est("runif") #x is generated by uniform distribution
G.est("rbinom") #x is generated by Bernoulli(0.1)
## ------------------------------------------------------------------------
set.seed(123)
n <- 1000
m <- 1000
Sum <- numeric(n)
G.hat <- numeric(m)
for (j in 1:m) {
x <- rlnorm(n)
mu.hat <- mean(x)
x_order <- sort(x)
for (i in 1:n) {
Sum[j] <- Sum[j] + (2 * i - n - 1) * x_order[i]
}
G.hat[j] <- Sum[j] / (n ^ 2 * mu.hat)
}
#Generating confidence intervals
G.mu <- mean(G.hat)
G.sigma <- sd(G.hat)
CT1 <- G.mu - G.sigma*1.96
CT2 <- G.mu + G.sigma*1.96
print(c(CT1,CT2))
#Solving the confidence interval coverage
h <- 0
for (j in 1:m) {
if(CT1 < G.hat[j] & G.hat[j] < CT2)
h <- h +1
}
print(h/m)
## ------------------------------------------------------------------------
set.seed(123)
library(MASS)
options(digits = 3)
m <- 1000
nump <- numk <- nums <- numeric(m)
p1 <- p2 <- p3 <- numeric(m)
for (i in 1:m) {
mean <- c(0, 1)
sigma <- matrix(c(1, 0.15, 0.15, 1), ncol = 2, nrow = 2)
data <- mvrnorm(n = 500, mean, sigma)
x <- data[, 1]
y <- data[, 2]
a1 <- cor.test(x, y, method = "pearson")
a2 <- cor.test(x, y, method = "kendall")
a3 <- cor.test(x, y, method = "spearman")
p1[i] <- a1$p.value
p2[i] <- a2$p.value
p3[i] <- a3$p.value
}
nump[p1 < 0.05] <- 1
numk[p2 < 0.05] <- 1
nums[p3 < 0.05] <- 1
ratio <- c(sum(nump), sum(numk), sum(nums)) / m
print(ratio)
barplot(ratio,col = c("red","green","green"),names.arg = c("pearson","kendall","spearman"),main = "Power of a Test")
## ------------------------------------------------------------------------
set.seed(123)
library(MASS)
m <- 1000
n <- 1000
nump <- numk <- nums <- numeric(m)
p1 <- p2 <- p3 <- numeric(m)
for (i in 1:m) {
mean <- c(0, 1)
sigma <- matrix(c(1, 0, 0, 3), ncol = 2, nrow = 2)
data <- mvrnorm(n , mean, sigma)
x <- data[, 1]
y <- data[, 2] + runif(n, min = 0, max = 0.2)
a1 <- cor.test(x, y, method = "pearson")
a2 <- cor.test(x, y, method = "kendall")
a3 <- cor.test(x, y, method = "spearman")
p1[i] <- a1$p.value
p2[i] <- a2$p.value
p3[i] <- a3$p.value
}
nump[p1 < 0.05] <- 1
numk[p2 < 0.05] <- 1
nums[p3 < 0.05] <- 1
ratio <- c(sum(nump), sum(numk), sum(nums)) / m
print(ratio)
barplot(ratio,col = c("red","green","green"),names.arg = c("pearson","kendall","spearman"),main = "Power of a Test")
## ------------------------------------------------------------------------
library(bootstrap) #for the law data
r <- cor(law$LSAT, law$GPA)
n <- length(law$LSAT)
R <- numeric(n) #storage for replicates
for (i in 1:n) {
R[i] <- cor(law$LSAT[-i],law$GPA[-i])
}
bias <- (n - 1) * (mean(R) - r)
se.R <- sqrt((n - 1) *mean((R - mean(R))^2))
print(bias)
print(se.R)
hist(R,prob = TRUE, col = "pink")
## ------------------------------------------------------------------------
set.seed(123)
library(boot)
Data <- c(3,5,7,18,43,85,91,98,100,130,230,487)
mu <- function(Data,i){
return(mean(Data[i]))
}
bootobject <- boot(data = Data, statistic = mu, R=1000)
print(bootobject)
plot(bootobject)
boot.ci(bootobject, type = c("basic", "norm", "perc","bca"))
## ------------------------------------------------------------------------
library(bootstrap)
n <- nrow(scor)
theta.hat <- numeric(n)
sigma <- cov(scor)
results <- eigen(sigma)
theta <- results$values[1]/sum(results$values)
for (i in 1:n) {
sigma <- cov(scor[-i, ])
result <- eigen(sigma)
theta.hat[i] <- result$values[1] / sum(result$values)
}
bias <- (n - 1) * (mean(theta.hat) - theta)
se <- sqrt((n - 1) *mean((theta.hat - mean(theta.hat))^2))
print(bias)
print(se)
hist(theta.hat,col = "lightgreen")
## ------------------------------------------------------------------------
library(ggplot2)
library(DAAG)
attach(ironslag)
n <- length(magnetic) #in DAAG ironslag
e1 <- e2 <- e3 <- e4 <- numeric(n-1)
# for n-fold cross validation
# fit models on leave-two-out samples
for (k in 1:n-1) {
y <- magnetic[-k:-(k+1)]
x <- chemical[-k:-(k+1)]
J1 <- lm(y ~ x)
yhat1_1 <- J1$coef[1] + J1$coef[2] * chemical[k]
yhat1_2 <- J1$coef[1] + J1$coef[2] * chemical[k+1]
e1[k] <- (magnetic[k] - yhat1_1)^2 + (magnetic[k+1] - yhat1_2)^2
J2 <- lm(y ~ x + I(x^2))
yhat2_1 <- J2$coef[1] + J2$coef[2] * chemical[k] +J2$coef[3] * chemical[k]^2
yhat2_2 <- J2$coef[1] + J2$coef[2] * chemical[k+1] +J2$coef[3] * chemical[k+1]^2
e2[k] <- (magnetic[k] - yhat2_1)^2 + (magnetic[k+1] - yhat2_2)^2
J3 <- lm(log(y) ~ x)
logyhat3_1 <- J3$coef[1] + J3$coef[2] * chemical[k]
logyhat3_2 <- J3$coef[1] + J3$coef[2] * chemical[k+1]
yhat3_1 <- exp(logyhat3_1)
yhat3_2 <- exp(logyhat3_2)
e3[k] <- (magnetic[k] - yhat3_1)^2 + (magnetic[k+1] - yhat3_2)^2
J4 <- lm(log(y) ~ log(x))
logyhat4_1 <- J4$coef[1] + J4$coef[2] * log(chemical[k])
logyhat4_2 <- J4$coef[1] + J4$coef[2] * log(chemical[k+1])
yhat4_1 <- exp(logyhat4_1)
yhat4_2 <- exp(logyhat4_2)
e4[k] <- (magnetic[k] - yhat4_1)^2 + (magnetic[k+1] - yhat4_2)^2
}
model <- c("1_Liner","2_Quadratic","3_Exponential","4_Log_Log")
pred_error <- c(mean(e1), mean(e2), mean(e3), mean(e4))
data <- data.frame(model,pred_error)
print(pred_error)
ggplot(data, aes(x = model, y = pred_error)) +
geom_bar(stat = "identity",fill = "lightblue", colour = "black")
## ------------------------------------------------------------------------
set.seed(12)
#define the Cramer-von Mises statistic:
CVM <- function(x,y){
n <- length(x)
m <- length(y)
Fn <- ecdf(x)
Gm <- ecdf(y)
W = (m*n)/(m+n)^2*(sum((Fn(x)-Gm(x))^2) + sum((Fn(y)-Gm(y))^2))
return(W)
}
attach(chickwts)
x <- sort(as.vector(weight[feed == "soybean"]))
y <- sort(as.vector(weight[feed == "linseed"]))
detach(chickwts)
R <- 999 #number of replicates
z <- c(x, y) #pooled sample
K <- 1:26
reps <- numeric(R) #storage for replicates
t0 <- CVM(x, y)
#data in 8.1 and 8.2
for (i in 1:R) {
#generate indices k for the first sample
k <- sample(K, size = 14, replace = FALSE)
x1 <- z[k]
y1 <- z[-k] #complement of x1
reps[i] <- CVM(x1, y1)
}
p <- mean(c(t0, reps) >= t0)
print(p)
hist(reps, main = " the two-sample Cramer-von Mises test", freq = FALSE, xlab = "T (p = 0.396)",breaks = "scott")
points(p, 0, cex = 1, pch = 16,col="red")
## ----warning=FALSE, message=FALSE----------------------------------------
library(RANN)
library(boot)
library(energy)
library(Ball)
m <- 50; k<-3; p<-2;
n1 <- n2 <- 50; R<-999; n <- n1+n2; N = c(n1,n2)
Tn <- function(z, ix, sizes,k) {
n1 <- sizes[1]; n2 <- sizes[2]; n <- n1 + n2
if(is.vector(z)) z <- data.frame(z,0);
z <- z[ix, ];
NN <- nn2(data=z, k=k+1) # what's the first column?
block1 <- NN$nn.idx[1:n1,-1]
block2 <- NN$nn.idx[(n1+1):n,-1]
i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
(i1 + i2) / (k * n)
}
eqdist.nn <- function(z,sizes,k){
boot.obj <- boot(data=z,statistic=Tn,R=R,sim = "permutation", sizes = sizes,k=k)
ts <- c(boot.obj$t0,boot.obj$t)
p.value <- mean(ts>=ts[1])
list(statistic=ts[1],p.value=p.value)
}
p.values <- matrix(NA,m,3)
for(i in 1:m){
x <- matrix(rnorm(n1*p,mean = 1,sd = 0.6),ncol=p);
y <- matrix(rnorm(n2*p,mean = 1,sd = 0.85),ncol=p);
z <- rbind(x,y)
p.values[i,1] <- eqdist.nn(z,N,k)$p.value
p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
p.values[i,3] <- bd.test(x=x,y=y,R=999,seed = i*12)$p.value
}
alpha <- 0.1;
pow <- colMeans(p.values<alpha)
print(pow)
barplot(pow,col = "lightgreen",main = "Unequal variances and unequal expectations",
names.arg=c("NN","energy","ball"))
## ----warning=FALSE, message=FALSE----------------------------------------
library(RANN)
library(boot)
library(energy)
library(Ball)
m <- 50; k<-3; p<-2;
n1 <- n2 <- 50; R<-999; n <- n1+n2; N = c(n1,n2)
Tn <- function(z, ix, sizes,k) {
n1 <- sizes[1]; n2 <- sizes[2]; n <- n1 + n2
if(is.vector(z)) z <- data.frame(z,0);
z <- z[ix, ];
NN <- nn2(data=z, k=k+1) # what's the first column?
block1 <- NN$nn.idx[1:n1,-1]
block2 <- NN$nn.idx[(n1+1):n,-1]
i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
(i1 + i2) / (k * n)
}
eqdist.nn <- function(z,sizes,k){
boot.obj <- boot(data=z,statistic=Tn,R=R,sim = "permutation", sizes = sizes,k=k)
ts <- c(boot.obj$t0,boot.obj$t)
p.value <- mean(ts>=ts[1])
list(statistic=ts[1],p.value=p.value)
}
p.values <- matrix(NA,m,3)
for(i in 1:m){
x <- matrix(rnorm(n1*p,mean = 0.4,sd = 0.6),ncol=p);
y <- matrix(rnorm(n2*p,mean = 0.5,sd = 0.85),ncol=p);
z <- rbind(x,y)
p.values[i,1] <- eqdist.nn(z,N,k)$p.value
p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
p.values[i,3] <- bd.test(x=x,y=y,R=999,seed = i*12)$p.value
}
alpha <- 0.1;
pow <- colMeans(p.values<alpha)
print(pow)
barplot(pow,col = "lightblue",main = "Unequal variances and equal expectations",
names.arg=c("NN","energy","ball"))
## ----warning=FALSE, message=FALSE----------------------------------------
library(RANN)
library(boot)
library(energy)
library(Ball)
m <- 50; k<-3; p<-2;
n1 <- n2 <- 20; R<-999; n <- n1+n2; N = c(n1,n2)
Tn <- function(z, ix, sizes,k) {
n1 <- sizes[1]; n2 <- sizes[2]; n <- n1 + n2
if(is.vector(z)) z <- data.frame(z,0);
z <- z[ix, ];
NN <- nn2(data=z, k=k+1) # what's the first column?
block1 <- NN$nn.idx[1:n1,-1]
block2 <- NN$nn.idx[(n1+1):n,-1]
i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
(i1 + i2) / (k * n)
}
eqdist.nn <- function(z,sizes,k){
boot.obj <- boot(data=z,statistic=Tn,R=R,sim = "permutation", sizes = sizes,k=k)
ts <- c(boot.obj$t0,boot.obj$t)
p.value <- mean(ts>=ts[1])
list(statistic=ts[1],p.value=p.value)
}
p.values <- matrix(NA,m,3)
for(i in 1:m){
# t distribution with 1 df (heavy-tailed distribution)
x <- matrix(rt(n1*p,df = 1),ncol=p);
#bimodel distribution (mixture of two normal distributions)
y <- cbind(rnorm(n2,mean = 0.4),rnorm(n2,mean = 0.5));
z <- rbind(x,y)
p.values[i,1] <- eqdist.nn(z,N,k)$p.value
p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
p.values[i,3] <- bd.test(x=x,y=y,R=999,seed = i*12)$p.value
}
alpha <- 0.1;
pow <- colMeans(p.values<alpha)
print(pow)
barplot(pow,col = "pink",main = "Non-normal distributions",
names.arg=c("NN","energy","ball"))
## ----warning=FALSE, message=FALSE----------------------------------------
library(RANN)
library(boot)
library(energy)
library(Ball)
m <- 50; k<-3; p<-2;
n1 <- 10;n2 <- 100;R<-999; n <- n1+n2; N = c(n1,n2)
Tn <- function(z, ix, sizes,k) {
n1 <- sizes[1]; n2 <- sizes[2]; n <- n1 + n2
if(is.vector(z)) z <- data.frame(z,0);
z <- z[ix, ];
NN <- nn2(data=z, k=k+1) # what's the first column?
block1 <- NN$nn.idx[1:n1,-1]
block2 <- NN$nn.idx[(n1+1):n,-1]
i1 <- sum(block1 < n1 + .5); i2 <- sum(block2 > n1+.5)
(i1 + i2) / (k * n)
}
eqdist.nn <- function(z,sizes,k){
boot.obj <- boot(data=z,statistic=Tn,R=R,sim = "permutation", sizes = sizes,k=k)
ts <- c(boot.obj$t0,boot.obj$t)
p.value <- mean(ts>=ts[1])
list(statistic=ts[1],p.value=p.value)
}
p.values <- matrix(NA,m,3)
for(i in 1:m){
x <- c(rnorm(n1,mean = 1,sd = 1)); # n1 = 10
y <- c(rnorm(n2,mean = 2,sd = 2)); # n2 = 100
z <- c(x,y)
p.values[i,1] <- eqdist.nn(z,N,k)$p.value
p.values[i,2] <- eqdist.etest(z,sizes=N,R=R)$p.value
p.values[i,3] <- bd.test(x=x,y=y,R=999,seed = i*12)$p.value
}
alpha <- 0.1;
pow <- colMeans(p.values<alpha)
print(pow)
barplot(pow,col = "lightgreen",main = "Unequal variances and unequal expectations",names.arg=c("NN","energy","ball"))
## ------------------------------------------------------------------------
set.seed(1)
m=10000
# target density: the cauchy distribution
cauchy<-function(x, theta=1,eta=0){
out<-1/(pi*theta*(1+((x-eta)/theta)^2))
return(out)
}
chain<-c(0)
for(i in 1:m){
proposal<-chain[i]+runif(1, min=-20, max=20)
accept<-runif(1) < cauchy(proposal)/cauchy(chain[i])
chain[i+1]<-ifelse(accept==T, proposal, chain[i])
}
# plot over time
plot(chain, type="l")
# the quantiles of the generated chain in a quantile-quantile plot (QQ plot).
b <- 1001 #discard the burnin sample
y <- chain[b:m]
a <- ppoints(100)
QR <- qcauchy(a) #quantiles of Rayleigh
Q <- quantile(chain, a)
qqplot(QR, Q, main="",xlab="Rayleigh Quantiles", ylab="Sample Quantiles",xlim=c(-20,20),ylim=c(-20,20))
hist(y, breaks="scott", main="", xlab="", freq=FALSE)
lines(QR, cauchy(QR, 4))
## ------------------------------------------------------------------------
m <- 5000
w <- 0.25
u <- runif(m)
v <- runif(m,-w,w)
group <- c(125,18,20,34)
x <- numeric(m)
prob <- function(theta,group){
if(theta<0 || theta>=0.8)
return(0)
return((1/2+theta/4)^group[1]*((1-theta)/4)^group[2]*((1-theta)/4)^group[3]*(theta/4)^group[4])
}
x[1] <- 0.4
for (i in 2:m) {
theta <- x[i-1]+v[i]
if(u[i]<= prob(theta,group)/prob(x[i-1],group))
x[i] <- theta
else
x[i] <- x[i-1]
}
index <- 1001:m
theta_hat <- mean(x[index])
print(theta_hat)
## ------------------------------------------------------------------------
k <- c(4:25, 100, 500, 1000)
n <- length(k)
A <- rep(0, n)
eps <- .Machine$double.eps ^ 0.25
for (i in 1:n) {
f <- function(a) {
num1 <- sqrt(a ^ 2 * (k[i] - 1) / (k[i] - a ^ 2))
num2 <- sqrt(a ^ 2 * k[i] / (k[i] + 1 - a ^ 2))
pt(num2, k[i]) - pt(num1, k[i] - 1)
}
out <- uniroot(f,lower = eps, upper = sqrt(k[i] - eps))
A[i] <- out$root
}
print(A)
## ------------------------------------------------------------------------
options(digits=4)
x <- c(0:10)
n <- length(x)
value_function <- rep(0,n)
value_pcauchy<- rep(0,n)
f <- function(x,theta,eta){
1/(theta*pi*(1+((x-eta)/theta)^2))
}
# I chose theta=1,eta=0
for (i in 1:n) {
# by function to compute the cdf
value_fun <- integrate(f, lower=-Inf, upper=x[i],rel.tol=.Machine$double.eps^0.25,theta=1,eta=0)
value_function[i] <- value_fun$value
# by R function "pcauchy" to compute the cdf
value_pcauchy[i] <- pcauchy(x[i])
}
value <- data.frame(x,value_function,value_pcauchy)
print(t(value))
## ----warning=FALSE, message=FALSE----------------------------------------
library(stats4)
nA<-28;nB<-24;nOO<-41;nAB<-70;
n<-nA+nB+nOO+nAB;
i=0;
p1<-0.3;q1<-0.3;
p0<-0;q0<-0;
options(warn=-1)
while(!isTRUE(all.equal(p0,p1,tolerance=10^(-7)))||!isTRUE(all.equal(q0,q1,tolerance=10^(-7))))#EMËã·¨
{
p0<-p1;
q0<-q1;
mlogL<-function(p,q){
# minus log-likelihood
return(-(2*n*p0^2*log(p)+2*n*q0^2*log(q)+2*nOO*log(1-p-q)+(nA-n*p0^2)*log(2*p*(1-p-q))+(nB-n*q0^2)*(log(2*q*(1-p-q)))+nAB*log(2*p*q)))
}
fit<-mle(mlogL,start=list(p=0.2,q=0.3))
p1<-fit@coef[1]
q1<-fit@coef[2]
i=i+1
}
print(c(i,p1,q1))#iÊÕÁ²´ÎÊý£¬p1,q1ÕæÖµ
## ----warning=FALSE, message=FALSE----------------------------------------
formulas <- list(
mpg ~ disp,
mpg ~ I(1 / disp),
mpg ~ disp + wt,
mpg ~ I(1 / disp) + wt
)
attach(mtcars)
# for loops
out3_1 <- vector("list", length(formulas))
for (i in seq_along(formulas)) {
out3_1[[i]] <- lm(formulas[[i]],data=mtcars)
}
# lapply
out3_2 <- lapply(formulas,lm)
detach(mtcars)
## ----warning=FALSE, message=FALSE----------------------------------------
attach(mtcars)
bootstraps <- lapply(1:10, function(i) {
rows <- sample(1:nrow(mtcars), rep = TRUE)
mtcars[rows, ]
})
# for loops
out4_1 <- vector("list", length(bootstraps))
for (i in seq_along(bootstraps)) {
out4_1[[i]] <- lm(mpg ~ disp,data=bootstraps[[i]])
}
# lapply
#f <- function(i){lm(mpg ~ disp)}
out4_2 <- lapply(seq_along(bootstraps), function(i) {lm(mpg ~ disp,data=bootstraps[[i]])})
detach(mtcars)
## ------------------------------------------------------------------------
rsq <- function(mod) {summary(mod)$r.squared}
# for exercise 3
Rreq3_1 <- lapply(out3_1, rsq)
Rreq3_2 <- lapply(out3_2, rsq)
print(data.frame(unlist(Rreq3_1),unlist(Rreq3_2)))
# for exercise 4
Rreq4_1 <- lapply(out4_1, rsq)
Rreq4_2 <- lapply(out4_2, rsq)
print(data.frame(unlist(Rreq4_1),unlist(Rreq4_2)))
## ------------------------------------------------------------------------
trials <- replicate(100,
t.test(rpois(10, 10), rpois(7, 10)),
simplify = FALSE)
sapply(trials,function(mod){mod[["p.value"]]},simplify=TRUE)
## ------------------------------------------------------------------------
# the example is make the data normalized (the value divide by the colmean)
# use Map and vapply to solve
data <- matrix(rnorm(20, 0, 10), nrow = 4)
x <- as.data.frame(data)
ans1 <- Map("/",x,vapply(x,mean,c(1)))
# use the lapply with an anonymous function to solve
ans2 <- lapply(x,function(data){data/(mean(data))})
print(data.frame(unlist(ans1),unlist(ans2)))
## ----warning=FALSE, message=FALSE----------------------------------------
expected <- function(colsum, rowsum, total) {
(colsum / total) * (rowsum / total) * total
}
chi_stat <- function(observed, expected) {
((observed - expected) ^ 2) / expected
}
# which is apparently different from chisq.test
chisq_test2 <- function(x, y) {
total <- sum(x) + sum(y)
rowsum_x <- sum(x)
rowsum_y <- sum(y)
chistat <- 0
for (i in seq_along(x)) {
colsum <- x[i] + y[i]
expected_x <- expected(colsum, rowsum_x, total)
expected_y <- expected(colsum, rowsum_y, total)
chistat <- chistat + chi_stat(x[i], expected_x)
chistat <- chistat + chi_stat(y[i], expected_y)
}
chistat
}
print(chisq_test2(seq(1, 9), seq(2, 10)))
print(chisq.test(seq(1, 9), seq(2, 10)))
print(microbenchmark::microbenchmark(
chisq_test2(seq(1, 9), seq(2, 10)),
chisq.test(seq(1, 9), seq(2, 10))
))
## ----warning=FALSE, message=FALSE----------------------------------------
table2 <- function(x, y) {
x_val <- unique(x)
y_val <- unique(y)
mat <- matrix(0L, length(x_val), length(y_val))
for (i in seq_along(x)) {
mat[which(x_val == x[[i]]), which(y_val == y[[i]])] <-
mat[which(x_val == x[[i]]), which(y_val == y[[i]])] + 1L
}
dimnames <- list(x_val, y_val)
names(dimnames) <- as.character(as.list(match.call())[-1]) # R has names for dimnames... :/
tab <- array(mat, dim = dim(mat), dimnames = dimnames)
class(tab) <- "table"
tab
}
# for example
a <- c(4, 5, 6)
identical(table(a, a), table2(a, a))
microbenchmark::microbenchmark(table(a, a), table2(a, a))
b <- c(7, 8, 9)
identical(table(a, b), table2(a, b))
c <- c(1, 2, 3, 1, 2, 3)
d <- c(2, 3, 4, 2, 3, 4)
identical(table(c, d), table2(c, d))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.