Write a .Rmd file to implement at least three examples of different types in the book "R for Beginners".
A<-matrix(1:9,3,3) B<-diag(2,3,3) C<-A%*%B C
library(lattice) n <- seq(5, 20, 5) x <- rnorm(sum(n)) y <- factor(rep(n, n), labels=paste("n =", n)) densityplot(~ x | y, panel = function(x, ...) { panel.densityplot(x, col="DarkOliveGreen", ...) panel.mathdensity(dmath=dnorm, args=list(mean=mean(x), sd=sd(x)), col="darkblue") })
ts(1:47, frequency = 12, start = c(2018, 9))
| x | 0 | 1 |2 |3 |4 | |--:|--:|--:|--:|--:|--:| |p(x)|0.1|0.2|0.2|0.2|0.3|
set.seed(123) #The inverse transform method n=1000 #size of sample u=runif(n) #generate u from [0,1] uniform distribution x=0:4 #The set of values of variable X p=c(.1,.2,.2,.2,.3) #The probability of X cp=cumsum(p) #The cumulative probability of X r=x[findInterval(u,cp)+1] #Find the corresponding value of x ct <- as.vector(table(r)) #Compare to the real probability ct/sum(ct)/p #A value close to 1 indicates a more accurate result #Repeat using the R sample function. r.sample<- sample(x, size = 1000, replace = TRUE,prob = p) ct.sample <- as.vector(table(r.sample)) ct.sample/sum(ct.sample)/p
random.Beta<-function(a,b,n){ if(a<=0||b<=0) stop("alphas and betas must be positive") else if(a<=1||b<=1) stop("The maximum of Beta(a,b) is infinite,so This method is not applicable") else { j<-k<-0;y <- numeric(n) while (k < n) { u <- runif(1) j <- j + 1 x <- runif(1) #random variate from g x0=(a-1)/(a+b-2) #x0 is the maximum point of the theoretical density of Beta(a,b) c=dbeta(x0,a,b) #c is the maximum of the theoretical density of Beta(a,b) if (x * (1-x) > c*u) { #we accept x k <- k + 1 y[k] <- x } } } return(y) } x=random.Beta(3,2,1000) hist(x, prob = T, main ="Histogram of random observations from Beta(3,2) vs. The theoretical density of Beta(3,2)",breaks=seq(0,1,0.05)) curve(dbeta(x,2,2),add=T,col="red") #random.Beta(3,2,1000) ##jie suan fa
pdf.Y<-function(x,r,beta){ y=r*beta^r/((beta+y)^(r+1)) return(y) } n <- 1000 r <- 4; beta <- 2 lambda <- rgamma(n, r, beta) x <- rexp(n, lambda) hist(x, prob = T, main ="Histogram of random observations from this mixture vs. The theoretical density of Y",breaks=seq(0,12,0.5)) y <- seq(0, 12, .1) lines(y,pdf.Y(y,r,beta),col="red") ## red line is the theoretical density of Y
set.seed(15656) MC.Beta33<-function(x){ m=10000 # The number of random Numbers n=length(x) y=NULL if(x<=0||x>=1) stop("x must be in (0,1)") else { for (i in 1:n) { g=x[i]*dbeta(runif(m,min=0,max=x[i]),3,3) #Generate MC random Numbers y[i]=mean(g) #the estimated values } } return(y) } x=seq(.1,.9,.1) ture=pbeta(x,3,3) y=MC.Beta33(x) print(ture) ##print the values returned by the pbeta function in R print(y) ##print the value of simulation by MC method print(y/ture) ##print the ratio
library(VGAM) MC.Rayleigh<-function(x,sigma,antithetic = TRUE){ m=10000 # The number of random Numbers n=length(x) y=NULL if(x<0||sigma<=0) stop("x should be non-negative and sigma should be positive") else { for (i in 1:n) { #Using antithetic variables to generate MC random Numbers u=runif(m/2,min=0,max=x[i]) if (!antithetic) v <- runif(m/2,min=0,max=x[i]) else v <- x[i] - u U=c(u,v) g=x[i]*drayleigh(U, sigma) y[i]=mean(g) #the estimated values } } return(y) } ##using the rayleigh distribution function in R to check the correctness of the estimation result y1=MC.Rayleigh(0.3,1,antithetic = F) # the estimated values without using antithetic variables y2=MC.Rayleigh(0.3,1) # the estimated values by using antithetic variables ture=prayleigh(0.3, 1) print(c(y1/ture,y2/ture)) # The ratio shows the outcome is ture. ## Computing the percent reduction m=1000 MC1 <- MC2 <- numeric(m) for (i in 1:m) { MC1[i] <- MC.Rayleigh(1, 2, anti = FALSE) MC2[i] <- MC.Rayleigh(1, 2) } c(sd(MC1),sd(MC2),sd(MC2)/sd(MC1))
x <- seq(1, 3, .1) w <- 2 g <- x^2/sqrt(2*pi)*exp(-x^2/2) f1 <- exp(-x^2/2)/sqrt(2*pi) f2<- x^2*exp(-x/2)/16 gs <- c(expression(g(x)==x^2/sqrt(2*pi)*e^{-x^2/2}),expression(f[1](x)==e^{-x^2/2}/sqrt(2*pi)),expression(f[2](x)==e^{-x/2}*{x^2}/ 16)) par(mfrow=c(1,2)) #figure (a) plot(x, g, type = "l", ylab = "", ylim = c(0,1), lwd = w,col=1,main='(A)') lines(x,f1, lty = 2, lwd = w,col=2) lines(x,f2, lty = 3, lwd = w,col=3) legend("topleft", legend = gs, lty = 1:3, lwd = w, inset = 0.02,col=1:3,cex=0.7) #figure (b) plot(x, g/f1, type = "l", ylab = "", ylim = c(0,10), lwd = w, lty = 2,col=2,main='(B)') lines(x, g/f2, lty = 3, lwd = w,col=3) legend("topleft", legend = gs[-1], lty = 2:3, lwd = w, inset = 0.02,col=2:3,cex=0.7)
g.cut<-function(x,t){ # t is the lower limit of integral n=length(x) y=numeric(n) for(i in 1:n) { if(x[i]>t) y[i]=x[i]^2/sqrt(2*pi)*exp(-x[i]^2/2) else y[i]=0 } return(y) } MC.g<-function(t){ # t is the lower limit of integral m=1000000 # The number of random Numbers n=length(t) y=numeric(n) for (i in 1:n) { u=rchisq(m,6) G <- g.cut(u,t[i]) f2 <- u^2*exp(-u/2)/16 y[i]=mean(G/f2) #the estimated values } return(y) } MC.g(0) ## this outcome shows my program is ture. MC.g(1) ## the estimated value
In order to verify the correctness of the program, I also calculate $\int {0} ^{\infty} {x^2\over \sqrt{2\pi}} e^{-x^2 \over 2}$ by the program,whose true value is 0.5. The outcome shows that $\int {0} ^{\infty} {x^2\over \sqrt{2\pi}} e^{-x^2 \over 2} \approx 0.4994781$,which is close to 0.5. So,we can infer the program is ture.
set.seed(15411) library(MASS) Gini<-function(x){ n<-length(x) a<-seq(1-n,n-1,2) x.i<-sort(x) Gini.hat<-sum(a*x.i)/(n*n*mean(x)) return(Gini.hat) } # you can estimate a G.hat if there comes a sample ## X is standard lognormal n=200 m<-2000 Gini.hat1<-numeric(m) for(i in 1:m){ y<-rnorm(n) x<-exp(y) # then x is standard lognormal Gini.hat1[i]<-Gini(x) } result1<-c(mean(Gini.hat1),quantile(Gini.hat1,probs=c(0.5,0.1))) names(result1)<-c("mean","median","deciles") print(result1) hist(Gini.hat1,breaks=seq(min(Gini.hat1)-0.01,max(Gini.hat1)+0.01,0.01),freq =F,main = "Histogram of Gini",xlab = "Standard lognormal") ## X is uniform Gini.hat2<-numeric(m) for(i in 1:m){ x<-runif(n) # then x is uniform Gini.hat2[i]<-Gini(x) } result2<-c(mean(Gini.hat2),quantile(Gini.hat2,probs=c(0.5,0.1))) names(result2)<-c("mean","median","deciles") print(result2) hist(Gini.hat2,breaks =seq(min(Gini.hat2)-0.01,max(Gini.hat2)+0.01,0.01) ,freq =F,main = "Histogram of Gini",xlab = "Uniform") ## x is Bernoulli(0.1) Gini.hat3<-numeric(m) for(i in 1:m){ x<-rbinom(n,1,0.1) # then x is Bernoulli(0.1) Gini.hat3[i]<-Gini(x) } result3<-c(mean(Gini.hat3),quantile(Gini.hat3,probs=c(0.5,0.1))) names(result3)<-c("mean","median","deciles") print(result3) hist(Gini.hat3,breaks=seq(min(Gini.hat3)-0.01,max(Gini.hat3)+0.01,0.01),freq =F,main = "Histogram of Gini",xlab = "Bernoulli(0.1)")
n<-200 m<-2000 mu<-0 sigma<-10 Gini1<-function(n,mu,sigma){ y<-rnorm(n,mu,sigma) x<-exp(y) Gini.sample<-Gini(x) return(Gini.sample) } Gini.sp<-numeric(m) for(i in 1:m){ Gini.sp[i]<-Gini1(n,mu,sigma) } CI<-c(mean(Gini.sp)-sd(Gini.sp)*qt(0.975,m-1),mean(Gini.sp)+sd(Gini.sp)*qt(0.975,m-1)) print(CI) #The approximate 95% conffidence interval cover.rate<-sum(I(Gini.sp>CI[1]&Gini.sp<CI[2]))/m print(cover.rate) #The result can show the CI is efficient.
m<-1000 sigma1<-matrix(c(1.5,0.375,0.375,4),2,2) p.value1=p.value2=p.value3=numeric(m) for(i in 1:1000){ x1<-mvrnorm(20,mu=c(0,0),Sigma = sigma1) p.value1[i]<-cor.test(x1[,1],x1[,2],method = "pearson")$p.value p.value2[i]<-cor.test(x1[,1],x1[,2],method = "spearman")$p.value p.value3[i]<-cor.test(x1[,1],x1[,2],method = "kendall")$p.value } #the power power<-c(mean(p.value1<=0.05),mean(p.value2<=0.05),mean(p.value3<=0.05)) names(power)<-c("Pearson","Spearman","Kendall") print(power) ##This result shows that the power of Spearman is bigger than the power of Pearson.
library(boot) library(bootstrap) library(DAAG)
set.seed(1) attach(law) b.cor<-function(x,i){ y=cor(x[1,i],x[2,i]) return(y) } x=t(law) n=nrow(law) theta.hat <- b.cor(x,1:n) theta.jack <- numeric(n) for(i in 1:n){ theta.jack[i] <- b.cor(x,(1:n)[-i]) } bias.jack <- (n-1)*(mean(theta.jack)-theta.hat) se.jack <- sqrt((n-1)*mean((theta.jack-theta.hat)^2)) round(c(original=theta.hat,bias=bias.jack,se=se.jack),3)
attach(aircondit) X=aircondit$hours mle.boot<-function(X,i) mean(X[i]) de <- boot(data=X,statistic=mle.boot, R =1000) ci <- boot.ci(de,type=c("norm","basic","perc","bca")) ci$norm[2:3] ci$basic[4:5] ci$percent[4:5] ci$bca[4:5]
attach(scor) X=scor n=dim(X)[1] p=dim(X)[2] eigen=numeric(p) A<-diag(p) theta.jac=numeric(n) eigen=eigen(cov(X))$values theta.hat=max(eigen)/sum(eigen) for(i in 1:n){ A=cov(scor[-i,]) eigen=eigen(A)$values theta.jac[i]=max(eigen)/sum(eigen) } bias.jac <- (n-1)*(mean(theta.jac)-theta.hat) se.jac <- sqrt((n-1)*mean((theta.jac-theta.hat)^2)) round(c(original=theta.hat,bias=bias.jack, se=se.jack),3)
library(DAAG) attach(ironslag) n <- length(magnetic) #in DAAG ironslag e1 <- e2 <- e3 <- e4 <- cbind(rep(0,n*(n-1)/2),rep(0,n*(n-1)/2)) k=1 for(i in 1:(n-1)){ for(j in (i+1):n ){ y <- magnetic[-c(i,j)] x <- chemical[-c(i,j)] J1 <- lm(y ~ x) yhat11 <- J1$coef[1] + J1$coef[2] * chemical[i] yhat12 <- J1$coef[1] + J1$coef[2] * chemical[j] e1[k,1]=yhat11-magnetic[i] e1[k,2]=yhat12-magnetic[j] J2 <- lm(y ~ x + I(x^2)) yhat21 <- J2$coef[1] + J2$coef[2] * chemical[i] +J2$coef[3] * chemical[i]^2 yhat22 <- J2$coef[1] + J2$coef[2] * chemical[j] +J2$coef[3] * chemical[j]^2 e2[k,1]=yhat21-magnetic[i] e2[k,2]=yhat22-magnetic[j] J3 <- lm(log(y) ~ x) logyhat31 <- J3$coef[1] + J3$coef[2] * chemical[i] yhat31 <- exp(logyhat31) logyhat32 <- J3$coef[1] + J3$coef[2] * chemical[j] yhat32 <- exp(logyhat32) e3[k,1] <- yhat31-magnetic[i] e3[k,2] <- yhat32-magnetic[j] J4 <- lm(log(y) ~ log(x)) logyhat41 <- J4$coef[1] + J4$coef[2] * log(chemical[i]) logyhat42 <- J4$coef[1] + J4$coef[2] * log(chemical[j]) yhat41 <- exp(logyhat41) yhat42 <- exp(logyhat42) e4[k,1] <- yhat41 -magnetic[i] e4[k,2] <- yhat42 -magnetic[j] k=k+1 } } c(mean(e1^2), mean(e2^2), mean(e3^2), mean(e4^2))
set.seed(1) library(boxplotdbl) attach(chickwts) cvm.ts<-function(x,y){ n=length(x);m=length(y) fn=ecdf(x);gn=ecdf(y) w2=m*n/((m+n)^2)*(sum((fn(x)-gn(x))^2)+sum((fn(y)-gn(y))^2)) return(w2) } N=999 x<-sort(as.vector(weight[feed=="soybean"])) y<-sort(as.vector(weight[feed=="linseed"])) detach(chickwts) z=c(x,y) K=1:26 reps=numeric(N) cvm0=cvm.ts(x,y) for (i in 1:N) { k<-sample(K,size = 14,replace = F) x1<-z[k] y1<-z[-k] reps[i]<-cvm.ts(x1,y1) } p<-mean(c(cvm0,reps)>=cvm0) p#0.421 does not support the alternative hypothesis that distributions differ hist(reps,main="",freq=F,xlab="W2(p=0.421)",breaks = "scott") points(cvm0,0,cex=1,pch=16)
library(Ball) library(energy) library(boot) library(RANN)
m<-1e2;k<-3;p<-2;mu<-0.5;R<-999; p.values <- matrix(0,m,3) Tn<-function(z,ix,sizes,k){ n1<-sizes[1];n2<-sizes[2];n<-n1+n2 if(is.vector(z)) z<-data.frame(z,0) z<-z[ix, ] NN<-nn2(data=z,k=k+1) block1<-NN$nn.idx[1:n1,-1] block2<-NN$nn.idx[(n1+1):n,-1] i1<-sum(block1 <= n1) i2<-sum(block2 >n1) (i1+i2)/(k*n) } eqdist.nn<-function(z,sizes,k){ boot.obj<-boot(data=z,statistic=Tn,R=R,sim="permutation",sizes=sizes,k=k) ts<-c(boot.obj$t0,boot.obj$t) p.value<-mean(ts>=ts[1]) list(statistic=ts[1],p.value=p.value) }
n1<-n2<-50;n<-n1+n2;N=c(n1,n2);mu1=mu2=0;sigma1=1;sigma2=2 for(i in 1:m){ x<-rnorm(n1,mu1,sigma1) y<-rnorm(n2,mu2,sigma2) z<-c(x,y) p.values[i,1]<-eqdist.nn(z,N,k)$p.value p.values[i,2]<-eqdist.etest(z,sizes=N,R=R)$p.value p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i*12345)$p.value } alpha<-0.1 pow<-colMeans(p.values<alpha) pow
n1<-n2<-50;n<-n1+n2;N=c(n1,n2);mu1=1;mu2=2;sigma1=1;sigma2=2 for(i in 1:m){ x<-rnorm(n1,mu1,sigma1) y<-rnorm(n2,mu2,sigma2) z<-c(x,y) p.values[i,1]<-eqdist.nn(z,N,k)$p.value p.values[i,2]<-eqdist.etest(z,sizes=N,R=R)$p.value p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i*12345)$p.value } alpha<-0.1 pow<-colMeans(p.values<alpha) pow
n1<-n2<-50;n<-n1+n2;N=c(n1,n2);mu1=1;mu2=2;sigma1=1;sigma2=2 for(i in 1:m){ x<-rt(n1,1) y<-c(rnorm(n2/2,mu1,sigma1),rnorm(n2/2,mu2,sigma2)) z<-c(x,y) p.values[i,1]<-eqdist.nn(z,N,k)$p.value p.values[i,2]<-eqdist.etest(z,sizes=N,R=R)$p.value p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i*12345)$p.value } alpha<-0.1 pow<-colMeans(p.values<alpha) pow
n1<-10;n2<-100;n<-n1+n2;N=c(n1,n2) for(i in 1:m){ x<-rnorm(n1) y<-rnorm(n2) z<-c(x,y) p.values[i,1]<-eqdist.nn(z,N,k)$p.value p.values[i,2]<-eqdist.etest(z,sizes=N,R=R)$p.value p.values[i,3]<-bd.test(x=x,y=y,R=999,seed=i*12345)$p.value } alpha<-0.1 pow<-colMeans(p.values<alpha) pow
f_cauchy<-function(x,u,lamada){ return(lamada/(pi*(lamada^2+(x-u)^2))) } #the density function of cauchy cauchy.chain<-function(n,sigma,x0,u,lamada){ #n:length of the chain #sigma:standard error of the proposal distribution #x0:initial value #u,lamada:parameter of cauchy distribution x<-NULL x[1]<-x0 e=runif(n) k<-0 for(i in 2:n){ y<-rnorm(1,x[i-1],sigma) if(e[i]<=(f_cauchy(y,u,lamada)/f_cauchy(x[i-1],u,lamada))) x[i]<-y else{ x[i]<-x[i-1] k<-k+1 } } return(list(x=x,k=k)) } n<-10000 cauchy<-cauchy.chain(n,sigma=0.5,x0=0,u=0,lamada=1) refuse.pr<-cauchy$k/n refuse.pr #qq plot par(mfrow=c(1,1)) qqplot(rcauchy(n-1000,0,1),cauchy$x[1001:n]) qqline(cauchy$x[1001:n]) #histplot hist(cauchy$x[1001:n],freq=F,main="density of cauchy",breaks=60) curve(f_cauchy(x,0,1),add=TRUE)
w <-0.25 #width of the uniform support set n <- 5000 #length of the chain burn <- 1000 #burn-in time y<-c(125,18,20,34) x <- numeric(n) #the chain prob <- function(b, y) { # computes (without the constant) the target density if (b < 0 || b >= 1) return (0) return((1/2+b/4)^y[1] * ((1-b)/4)^y[2] * ((1-b)/4)^y[3] * (b/4)^y[4]) } u <- runif(n) #for accept/reject step v <- runif(n, -w, w) #proposal distribution x[1] <-0.25 for (i in 2:n) { z <- x[i-1] + v[i] if (u[i] <= prob(z,y) / prob(x[i-1],y)) x[i] <-z else x[i] <- x[i-1] } xb <- x[(burn+1):n] xc<-mean(xb) print(xc) #estimation value of theta print(y/sum(y)) print(c(1/2+xc/4,(1-xc)/4,(1-xc)/4,xc/4))
set.seed(1) Gelman.Rubin <- function(psi) { psi<-as.matrix(psi) # psi[i,j] is the statistic psi(X[i,1:j]) n<-ncol(psi) # for chain in i-th row of X k<-nrow(psi) psi.means<-rowMeans(psi) #row means B<-n*var(psi.means) #between variance est. psi.w<-apply(psi,1,"var") #within variances W<-mean(psi.w) #within est. v.hat<-W*(n-1)/n+(B/n) #upper variance est. r.hat<-v.hat/W #G-R statistic return(r.hat) } prob<-function(y,ob){ # computes the target density if(y < 0 || y >= 1){ return(0) } else{ return((0.5+y/4)^ob[1]*((1-y)/4)^ob[2]*((1-y)/4)^ob[3]*(y/4)^ob[4]) } } mult.chain<-function(ob,w,m,x1){ x<-numeric(m) #the chain u<-runif(m) #for accept/reject step v<-runif(m,-w,w) #proposal distribution x[1]<-x1 for(i in 2:m) { y<-x[i-1]+v[i] if(u[i]<=prob(y,ob)/prob(x[i-1],ob)){ x[i]<-y } else{ x[i]<-x[i-1]} } return(x) } w<-.5 #width of the uniform support set ob<-c(125,18,20,34) #the observed sample m<-10000 #length of the chain burn<-1000 #burn-in time k<-4 #number of chains to generate x0<-c(0.5,0.9,0.1,0.75) #choose overdispersed initial values #generate the chains X<-matrix(0,nrow=k,ncol=m) for(i in 1:k){ X[i, ]<-mult.chain(ob,w,m,x0[i]) } psi<-t(apply(X,1,cumsum)) #compute diagnostic statistics for(i in 1:nrow(psi)){ psi[i,]<-psi[i,]/(1:ncol(psi)) } # chain has approximately converged to the target distribution within approximately 3000 iterations
Find the intersection points $A(k)$ in $(0,\sqrt{k})$ of the curves $$S_{k-1}(a)=P( t(k-1)>\sqrt{\frac{a^2(k-1)}{k-a^2}} )$$ and $$S_{k}(a)=P( t(k)>\sqrt{\frac{a^2k}{k+1-a^2}} ),$$ for $k=4:25,100,500,1000,$ where $t(k)$ is a Student $t$ random variable with $k$ degrees of freedom. (These intersection points determine the critical values for a t-test for scale-mixture errors proposed by $Sz\acute{e}kely$ [260].)
Ak<-function(a,k){ t1<- sqrt(a^2*(k-1)/(k-a^2)) t2<- sqrt(a^2*k/(k+1-a^2)) return(pt(t1,df=k-1)-pt(t2,df=k)) } ##Function Ak=0 means that two curves intersect. K=c(4:25,100,500,1000) ipts=numeric(length(K)) for(i in 1:length(K)){ ipts[i]=uniroot(Ak,lower=1e-4,upper=sqrt(K[i])-(1e-4),k=K[i])$root } ipts #output the result(Wrong) ## Check the ipts, we can find that K>=23 leads to ipts arrive at right endpoint. ## at right endpoint. Which may be wrong. ## So we should chang the right endpoint based on the curves. k=23 ## for example x=seq(0.01,sqrt(k)-1e-4,length=1000) y=Ak(x,k) plot(x,y,type="l") ## The curve shows that the null point is smaller than 3. ## For other k is the same result. ## So chang the right endpoint to 3. wrong=which(K>22) for (i in wrong){ ipts[i] <- uniroot(Ak,lower =1e-4,upper=3,k=K[i])$root } names(ipts)=K ipts ## the final result
library(nloptr) f<-function(y,eta,theta){ #theta is scale parameter #eta is the location parameter 1/(theta*3.141592653*(1+((y-eta)/theta)^2)) } # the cauchy pdf p_cauchy<-function(x,eta,theta,lower.tail=TRUE){ if(lower.tail) res<-integrate(f,lower = -Inf,upper = x,rel.tol=.Machine$double.eps^0.25,theta=theta,eta=eta) else res<-integrate(f,lower = x,upper = Inf,rel.tol=.Machine$double.eps^0.25,theta=theta,eta=eta) return(res$value) } ##compare p_cauchy(x=0,eta = 0,theta = 1) pcauchy(0,location = 0,scale = 1) p_cauchy(x=1,eta =3,theta = 2,lower.tail = F ) pcauchy(1,location = 3,scale = 2,lower.tail = F)
r
options(warn=-1)
dat <- rbind(Genotype=c('AA','BB','OO','AO','BO','AB'),
Frequency=c('p2','q2','r2','2pr','2qr','2pq',1),
Count=c('nAA','nBB','nOO','nAO','nBO','nAB','n'))
knitr::kable(dat,format='markdown')
Observed data: $n_{A\cdot}=n_{AA}+n_{AO}=28$ (A-type), $n_{B\cdot}=n_{BB}+n_{BO}=24$ (B-type), $n_{OO}=41$ (O-type), $n_{AB}=70$ (AB-type).
Use EM algorithm to solve MLE of $p$ and $q$ (consider missing data $n_{AA}$ and $n_{BB}$).
Record the maximum likelihood values in M-steps, are they increasing?
N<-10000 #max. number of iterations tol<-.Machine$double.eps L.old<-c(.2,.35) #initial est. for p and q M.value<-0 L.list<-data.frame(p=0,q=0) mlef<-function(l,l.old,n.A=28,n.B=24,n.OO=41,n.AB=70 ){ r<-1-sum(l) r.old<-1-sum(l.old) n.AA<-n.A*l.old[1]^2/(l.old[1]^2+2*l.old[1]*r.old) n.BB<-n.B*l.old[2]^2/(l.old[2]^2+2*l.old[2]*r.old) llh<-2*n.OO*log(r)+n.AB*log(2*l[1]*l[2])+ 2*n.AA*log(l[1])+2*n.BB*log(l[2])+ (n.A-n.AA)*log(2*l[1]*r)+(n.B-n.BB)*log(2*l[2]*r) -llh } for(j in 1:N){ res<-optim(c(0.3,0.2),mlef,l.old=L.old) L<-res$par L.list[j,]<-L M.value[j]<- -res$value if(sum(abs(L-L.old)/L.old)<tol) break L.old<-L } L.list #p,q print(-M.value) ##the max likelihood values
Use both for loops and lapply() to fit linear models to the mtcars using the formulas stored in this list: formulas <- list( \
$mpg \sim disp,$\ $mpg \sim I(1 / disp),$\ $mpg \sim disp + wt,$\ $mpg \sim I(1 / disp) + wt.$ \ )
set.seed(123) attach(mtcars) formulas <- list( mpg ~ disp, mpg ~ I(1 / disp), mpg ~ disp + wt, mpg ~ I(1 / disp) + wt ) #loops output <- vector("list", length(formulas)) for (i in seq_along(formulas)){ output[[i]]<-lm(formulas[[i]]) } output #lapply lapply(formulas,lm)
bootstraps <- lapply(1:10, function(i) { rows <- sample(1:nrow(mtcars), rep = TRUE) mtcars[rows, ] }) #loops for(i in seq_along(bootstraps)){ print(lm(mpg~disp,data =bootstraps[[i]])) } #lapply lapply(bootstraps,lm,formula=mpg~disp)
For each model in the previous two exercises,extract $R^2$ using the function below. rsq <- function(mod) summary(mod)$r.squared
rsq <- function(mod) summary.lm(mod)$r.squared #loops for (i in seq_along(formulas)) { print( rsq(lm(formulas[[i]]))) } #lapply lapply(lapply(formulas,lm),rsq) #loops for(i in seq_along(bootstraps)){ print(rsq(lm(mpg~disp,data =bootstraps[[i]]))) } #lapply lapply(lapply(bootstraps,lm,formula=mpg~disp),rsq)
#using anonymous function trials <- replicate( 100, t.test(rpois(10, 10), rpois(7, 10)), simplify = FALSE ) p_value<-function(mod) mod$p.value sapply(trials, p_value)
Mvap<-function (f,n,type, ...) { #n is the length of output of function f. type is the mode of output of function f. f <- match.fun(f) tt=Map(f, ...) if(type=="numeric") y=vapply(tt,cbind,numeric(n)) else if (type=="character") y=vapply(tt,cbind,character(n)) else if (type=="complex") y=vapply(tt,cbind,complex(n)) else if (type=="logical") y=vapply(tt,cbind,logical(n)) return(y) } #example kk<-function(x,w){ y=weighted.mean(x, w, na.rm = TRUE) return(c(1,y)) } xs <- replicate(5, runif(10), simplify = FALSE) ws <- replicate(5, rpois(10, 5) + 1, simplify = FALSE) Mvap(kk,2,"numeric",xs, ws) is.matrix(Mvap(kk,2,"numeric",xs, ws))
library(microbenchmark)
new.chisq.test<-function(x,y){ if(!is.vector(x) && !is.vector(y)) stop("at least one of 'x' and 'y' is not a vector") if(typeof(x)=="character" || typeof(y)=="character") stop("at least one of 'x' and 'y' is not a numeric vector") if(any(x<0) || anyNA(x)) stop("all entries of 'x' must be nonnegative and finite") if(any(y<0) || anyNA(y)) stop("all entries of 'y' must be nonnegative and finite") if((n<-sum(x))==0) stop("at least one entry of 'x' must be positive") if((n<-sum(x))==0) stop("at least one entry of 'x' must be positive") if(length(x)!=length(y)) stop("'x' and 'y' must have the same length") DNAME<-paste(deparse(substitute(x)),"and",deparse(substitute(y))) METHOD<-"Pearson's Chi-squared test" x<-rbind(x,y) nr<-as.integer(nrow(x));nc<-as.integer(ncol(x)) sr<-rowSums(x);sc<-colSums(x);n<-sum(x) E<-outer(sr,sc,"*")/n STATISTIC<-sum((x - E)^2/E) names(STATISTIC)<-"X-squared" structure(list(statistic=STATISTIC,method=METHOD,data.name=DNAME),class="htest") } #example a<-c(568,327,494);b<-c(484,339,444) new.chisq.test(a,b) chisq.test(rbind(a,b)) microbenchmark(t1=new.chisq.test(a,b),t2=chisq.test(rbind(a,b)))
new.table<-function(...,dnn = list.names(...),deparse.level = 1){ list.names <- function(...) { l <- as.list(substitute(list(...)))[-1L] nm <- names(l) fixup <- if (is.null(nm)) seq_along(l) else nm == "" dep <- vapply(l[fixup], function(x) switch(deparse.level + 1, "", if (is.symbol(x)) as.character(x) else "", deparse(x, nlines = 1)[1L]), "") if (is.null(nm)) dep else { nm[fixup] <- dep nm } } args <- list(...) if (!length(args)) stop("nothing to tabulate") if (length(args) == 1L && is.list(args[[1L]])) { args <- args[[1L]] if (length(dnn) != length(args)) dnn <- if (!is.null(argn <- names(args))) argn else paste(dnn[1L], seq_along(args), sep = ".") } bin <- 0L lens <- NULL dims <- integer() pd <- 1L dn <- NULL for (a in args) { if (is.null(lens)) lens <- length(a) else if (length(a) != lens) stop("all arguments must have the same length") fact.a <- is.factor(a) if (!fact.a) { a0 <- a a <- factor(a) } ll <- levels(a) a <- as.integer(a) nl <- length(ll) dims <- c(dims, nl) dn <- c(dn, list(ll)) bin <- bin + pd * (a - 1L) pd <- pd * nl } names(dn) <- dnn bin <- bin[!is.na(bin)] if (length(bin)) bin <- bin + 1L y <- array(tabulate(bin, pd), dims, dimnames = dn) class(y) <- "table" y } #example a<-b<-c(1,1,2,3) new.table(a,b) table(a,b) microbenchmark(t1=new.table(a,b),t2=table(a,b))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.