knitr::opts_chunk$set(echo = TRUE)
getwd()
unif_sample <-runif(10,0,5) unif_sample
In the case where a=0, and b=5:
$\mu =E(\bar Y) =\frac{a+b}{2} =\frac{0+5}{2} = 2.5$
$\sigma^2 =\frac{(b-a)^2}{12} = \frac{(5-0)^2}{12} = \frac{25}{12} = 2.0833$
# mu mean(unif_sample) # sigma^2 var(unif_sample)
The mean and variance of the sample are about accurate.
$E(T) = nE(Y_i)$
$V(T) = nV(Y_i)$
$E(\bar Y) = E(\frac{Y_1 +...+Y_n}{n}) = \frac{nE(Y_i)}{n} = E(Y_i)$
$V(\bar Y) = V(\frac{T}{N}) = \frac{1}{n^2}V(T) = \frac{nV(Y_i)}{n^2} = \frac{V(Y_i)}{n}$
A Creates a sample from the uniform distribution from 0 to 5 of size n*iter
B Stores the sample created from line A into a matrix called data that consists of n rows, and iter columns, arranging the data by row
C Applies the function sum to the columns of the data matrix and stores the result in sm
D Runs the function myclt with n = 10 and iter = 10000, stores the result in w
myclt=function(n,iter){ y=runif(n*iter,0,5) data=matrix(y,nr=n,nc=iter,byrow=TRUE) sm=apply(data,2,sum) hist(sm) sm }
w=myclt(n=10, iter=10000)
paste0("Mean of w: ",round(mean(w),4)) paste0("Variance of w: ", round(var(w),4))
myclt=function(n,iter){ y=runif(n*iter,0,5) data=matrix(y,nr=n,nc=iter,byrow=TRUE) sm=apply(data,2,mean) hist(sm) sm } w = myclt(n=10, iter=10000) paste0("Mean of w: ",round(mean(w),4)) paste0("Variance of w: ", round(var(w),4))
Being that data is a matrix, it has columns and rows. Apply uses the 2 to indicate that the function will be applied by column. A 1 would indicate that the function be applied by row.
100,000
In a population that is normally distributed the standard deviation can be calculated using the formula we learned in earlier chapters. However, when we are "normalizing" a distribution using the CLT, the formula for the variance will be different, and therefore the formula for the standard deviation will follow.
mycltu=function(n,iter,a=0,b=10){ ## r-random sample from the uniform y=runif(n*iter,a,b) ## Place these numbers into a matrix ## The columns will correspond to the iteration and the rows will equal the sample size n data=matrix(y,nr=n,nc=iter,byrow=TRUE) ## apply the function mean to the columns (2) of the matrix ## these are placed in a vector w w=apply(data,2,mean) ## We will make a histogram of the values in w ## How high should we make y axis? ## All the values used to make a histogram are placed in param (nothing is plotted yet) param=hist(w,plot=FALSE) ## Since the histogram will be a density plot we will find the max density ymax=max(param$density) ## To be on the safe side we will add 10% more to this ymax=1.1*ymax ## Now we can make the histogram hist(w,freq=FALSE, ylim=c(0,ymax), main=paste("Histogram of sample mean", "\n", "sample size= ",n,sep=""),xlab="Sample mean") ## add a density curve made from the sample distribution lines(density(w),col="Blue",lwd=3) # add a density plot ## Add a theoretical normal curve curve(dnorm(x,mean=(a+b)/2,sd=(b-a)/(sqrt(12*n))),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve ## Add the density from which the samples were taken curve(dunif(x,a,b),add=TRUE,lwd=4) } mycltu(n=1, iter=10000) mycltu(n=2, iter=10000) mycltu(n=3, iter=10000) mycltu(n=5, iter=10000) mycltu(n=10, iter=10000) mycltu(n=30, iter=10000)
My conclusion is that it seems n does not need to be very large to get a normally distributed sample
mycltb=function(n,iter,p=0.5,...){ ## r-random sample from the Binomial y=rbinom(n*iter,size=n,prob=p) ## Place these numbers into a matrix ## The columns will correspond to the iteration and the rows will equal the sample size n data=matrix(y,nr=n,nc=iter,byrow=TRUE) ## apply the function mean to the columns (2) of the matrix ## these are placed in a vector w w=apply(data,2,mean) ## We will make a histogram of the values in w ## How high should we make y axis? ## All the values used to make a histogram are placed in param (nothing is plotted yet) param=hist(w,plot=FALSE) ## Since the histogram will be a density plot we will find the max density ymax=max(param$density) ## To be on the safe side we will add 10% more to this ymax=1.1*ymax ## Now we can make the histogram ## freq=FALSE means take a density hist(w,freq=FALSE, ylim=c(0,ymax), main=paste("Histogram of sample mean","\n", "sample size= ",n,sep=""), xlab="Sample mean",...) ## add a density curve made from the sample distribution #lines(density(w),col="Blue",lwd=3) # add a density plot ## Add a theoretical normal curve curve(dnorm(x,mean=n*p,sd=sqrt(p*(1-p))),add=TRUE,col="Red",lty=2,lwd=3) }
mycltb(n=4, iter=10000, p=0.3) mycltb(n=5, iter=10000, p=0.3) mycltb(n=10, iter=10000, p=0.3) mycltb(n=20, iter=10000, p=0.3) mycltb(n=4, iter=10000, p=0.7) mycltb(n=5, iter=10000, p=0.7) mycltb(n=10, iter=10000, p=0.7) mycltb(n=20, iter=10000, p=0.7) mycltb(n=4, iter=10000, p=0.5) mycltb(n=5, iter=10000, p=0.5) mycltb(n=10, iter=10000, p=0.5) mycltb(n=20, iter=10000, p=0.5)
My conclusion is that unlike the uniform distribution, n must be larger to normalize the distribution
mycltp=function(n,iter,lambda=10,...){ ## r-random sample from the Poisson y=rpois(n*iter,lambda=lambda) ## Place these numbers into a matrix ## The columns will correspond to the iteration and the rows will equal the sample size n data=matrix(y,nr=n,nc=iter,byrow=TRUE) ## apply the function mean to the columns (2) of the matrix ## these are placed in a vector w w=apply(data,2,mean) ## We will make a histogram of the values in w ## How high should we make y axis? ## All the values used to make a histogram are placed in param (nothing is plotted yet) param=hist(w,plot=FALSE) ## Since the histogram will be a density plot we will find the max density ymax=max(param$density) ## To be on the safe side we will add 10% more to this ymax=1.1*ymax ## Make a suitable layout for graphing layout(matrix(c(1,1,2,3),nr=2,nc=2, byrow=TRUE)) ## Now we can make the histogram hist(w,freq=FALSE, ylim=c(0,ymax), col=rainbow(max(w)), main=paste("Histogram of sample mean","\n", "sample size= ",n," iter=",iter," lambda=",lambda,sep=""), xlab="Sample mean",...) ## add a density curve made from the sample distribution #lines(density(w),col="Blue",lwd=3) # add a density plot ## Add a theoretical normal curve curve(dnorm(x,mean=lambda,sd=sqrt(lambda/n)),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve # Now make a new plot # Since y is discrete we should use a barplot barplot(table(y)/(n*iter),col=rainbow(max(y)), main="Barplot of sampled y", ylab ="Rel. Freq",xlab="y" ) x=0:max(y) plot(x,dpois(x,lambda=lambda),type="h",lwd=5,col=rainbow(max(y)), main="Probability function for Poisson", ylab="Probability",xlab="y") }
mycltp(n=2, iter=1000, lambda = 4) mycltp(n=3, iter=1000, lambda = 4) mycltp(n=4, iter=1000, lambda = 4) mycltp(n=10, iter=1000, lambda = 4) mycltp(n=20, iter=1000, lambda = 4) mycltp(n=2, iter=1000, lambda = 10) mycltp(n=3, iter=1000, lambda = 10) mycltp(n=4, iter=1000, lambda = 10) mycltp(n=10, iter=1000, lambda = 10) mycltp(n=20, iter=1000, lambda = 10)
grac0009MATH4753::mycltp(n=100, iter= 10000)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.