knitr::opts_chunk$set(echo = TRUE)

Task 1

getwd()

Task 2

Create a sample of size n=10 from a uniform distribution that has lower limit 0 and upper limit 5 by using runif(10,0,5). Record the results here.

unif_sample <-runif(10,0,5)
unif_sample

Give the mean and variance of the uniform for the case where a=0, b=5, i.e. μ=(a+b)/2,σ^2=(b-a)^2/12

In the case where a=0, and b=5:

$\mu =E(\bar Y) =\frac{a+b}{2} =\frac{0+5}{2} = 2.5$

$\sigma^2 =\frac{(b-a)^2}{12} = \frac{(5-0)^2}{12} = \frac{25}{12} = 2.0833$

Use the sample you made to calculate x bar and s^2. How do they compare to the population parameters?

# mu
mean(unif_sample)

# sigma^2
var(unif_sample)

The mean and variance of the sample are about accurate.

Mean and variance of Y-bar and T

$E(T) = nE(Y_i)$

$V(T) = nV(Y_i)$

$E(\bar Y) = E(\frac{Y_1 +...+Y_n}{n}) = \frac{nE(Y_i)}{n} = E(Y_i)$

$V(\bar Y) = V(\frac{T}{N}) = \frac{1}{n^2}V(T) = \frac{nV(Y_i)}{n^2} = \frac{V(Y_i)}{n}$

Explain what lines A-D do

A Creates a sample from the uniform distribution from 0 to 5 of size n*iter

B Stores the sample created from line A into a matrix called data that consists of n rows, and iter columns, arranging the data by row

C Applies the function sum to the columns of the data matrix and stores the result in sm

D Runs the function myclt with n = 10 and iter = 10000, stores the result in w

myclt=function(n,iter){
  y=runif(n*iter,0,5)
  data=matrix(y,nr=n,nc=iter,byrow=TRUE)
  sm=apply(data,2,sum)
  hist(sm)
  sm
}

Record the plot made when D is executed

w=myclt(n=10, iter=10000)

Find w-bar and var(w)

paste0("Mean of w: ",round(mean(w),4))
paste0("Variance of w: ", round(var(w),4))

Change the code in myclt()

myclt=function(n,iter){
  y=runif(n*iter,0,5)
  data=matrix(y,nr=n,nc=iter,byrow=TRUE)
  sm=apply(data,2,mean)
  hist(sm)
  sm
}
w = myclt(n=10, iter=10000)
paste0("Mean of w: ",round(mean(w),4))
paste0("Variance of w: ", round(var(w),4))

Task 3

How is the 2 used?

Being that data is a matrix, it has columns and rows. Apply uses the 2 to indicate that the function will be applied by column. A 1 would indicate that the function be applied by row.

How many terms are in w?

100,000

Explain the sd formula

In a population that is normally distributed the standard deviation can be calculated using the formula we learned in earlier chapters. However, when we are "normalizing" a distribution using the CLT, the formula for the variance will be different, and therefore the formula for the standard deviation will follow.

Record the plots

mycltu=function(n,iter,a=0,b=10){
## r-random sample from the uniform
y=runif(n*iter,a,b)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density

ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Now we can make the histogram
hist(w,freq=FALSE,  ylim=c(0,ymax), main=paste("Histogram of sample mean",
"\n", "sample size= ",n,sep=""),xlab="Sample mean")
## add a density curve made from the sample distribution
lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve 
curve(dnorm(x,mean=(a+b)/2,sd=(b-a)/(sqrt(12*n))),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve
## Add the density from which the samples were taken
curve(dunif(x,a,b),add=TRUE,lwd=4)

}

mycltu(n=1, iter=10000)
mycltu(n=2, iter=10000)
mycltu(n=3, iter=10000)
mycltu(n=5, iter=10000)
mycltu(n=10, iter=10000)
mycltu(n=30, iter=10000)

My conclusion is that it seems n does not need to be very large to get a normally distributed sample

Task 4

mycltb=function(n,iter,p=0.5,...){

## r-random sample from the Binomial
y=rbinom(n*iter,size=n,prob=p)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density

ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax

## Now we can make the histogram
## freq=FALSE means take a density
hist(w,freq=FALSE,  ylim=c(0,ymax),
main=paste("Histogram of sample mean","\n", "sample size= ",n,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve 
curve(dnorm(x,mean=n*p,sd=sqrt(p*(1-p))),add=TRUE,col="Red",lty=2,lwd=3) 

}

Make graphs

mycltb(n=4, iter=10000, p=0.3)
mycltb(n=5, iter=10000, p=0.3)
mycltb(n=10, iter=10000, p=0.3)
mycltb(n=20, iter=10000, p=0.3)

mycltb(n=4, iter=10000, p=0.7)
mycltb(n=5, iter=10000, p=0.7)
mycltb(n=10, iter=10000, p=0.7)
mycltb(n=20, iter=10000, p=0.7)

mycltb(n=4, iter=10000, p=0.5)
mycltb(n=5, iter=10000, p=0.5)
mycltb(n=10, iter=10000, p=0.5)
mycltb(n=20, iter=10000, p=0.5)

My conclusion is that unlike the uniform distribution, n must be larger to normalize the distribution

Task 5

mycltp=function(n,iter,lambda=10,...){

## r-random sample from the Poisson
y=rpois(n*iter,lambda=lambda)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density

ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax

## Make a suitable layout for graphing
layout(matrix(c(1,1,2,3),nr=2,nc=2, byrow=TRUE))

## Now we can make the histogram
hist(w,freq=FALSE,  ylim=c(0,ymax), col=rainbow(max(w)),
main=paste("Histogram of sample mean","\n", "sample size= ",n," iter=",iter," lambda=",lambda,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve 
curve(dnorm(x,mean=lambda,sd=sqrt(lambda/n)),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve

# Now make a new plot
# Since y is discrete we should use a barplot
barplot(table(y)/(n*iter),col=rainbow(max(y)), main="Barplot of sampled y", ylab ="Rel. Freq",xlab="y" )
x=0:max(y)
plot(x,dpois(x,lambda=lambda),type="h",lwd=5,col=rainbow(max(y)),
main="Probability function for Poisson", ylab="Probability",xlab="y")
}

Make graphs

mycltp(n=2, iter=1000, lambda = 4)
mycltp(n=3, iter=1000, lambda = 4)
mycltp(n=4, iter=1000, lambda = 4)
mycltp(n=10, iter=1000, lambda = 4)
mycltp(n=20, iter=1000, lambda = 4)

mycltp(n=2, iter=1000, lambda = 10)
mycltp(n=3, iter=1000, lambda = 10)
mycltp(n=4, iter=1000, lambda = 10)
mycltp(n=10, iter=1000, lambda = 10)
mycltp(n=20, iter=1000, lambda = 10)

Task 6

grac0009MATH4753::mycltp(n=100, iter= 10000)


agracy2246/MATH4753grac0009 documentation built on April 26, 2020, 9:39 a.m.