In rgamble819/MATH4753ouGamb0004: Functions for my statistics class

knitr::opts_chunk$set(echo = FALSE)

Task 1

getwd()

Task 2

sample = runif(10,0,5)
sample
sum(sample)
y = (5+0)/2
sigsq = ((5-0)^2) / 12

ybar = mean(sample)
ssq = sd(sample) ^ 2

sprintf("Distribution Mean: %.4f Distribution Variance: %.4f", y, sigsq)
sprintf("Sample Mean: %.4f Sample Variance: %.4f", ybar, ssq)

Conclusion: The sample mean and variance are not equal to the population or distribution mean and variance but are close.

$\begin{equation}E(T)=nE(Y_{i})=n\mu=10\cdot 2.5=25.0\end{equation}$ $\begin{equation}V(T)=nV(Y_{i})=10\cdot 2.0833 = 20.833\end{equation}$

$\begin{equation}E(\bar{Y})=E(Y)=\mu=2.5\end{equation}$ $\begin{equation}V(\bar{Y})=\frac{\sigma ^{2}}{n}=\frac{2.0833}{10}=0.20833\mu\end{equation}$

myclt=function(n,iter){
y=runif(n*iter,0,5) # A
data=matrix(y,nr=n,nc=iter,byrow=TRUE) #B
sm=apply(data,2,sum) #C
hist(sm)
sm
}
w=myclt(n=10,iter=10000) #D

mean(w)
var(w)

myclt_modified=function(n,iter){
  y=runif(n*iter,0,5)
  data=matrix(y,nr=n,nc=iter,byrow=TRUE)
  sm=apply(data,2,mean)
  hist(sm)
  sm
}

w=myclt_modified(n=10,iter=10000)
mean(w)

A) Creates iter samples with n elements within the sample from a uniform distribution with a lower limit of 0 and upper limit of 6. These values are stored in one list of size n*iter.

B) Groups the samples into a matrix where each sample is a column.

C) Apply the sum function to the columns in the matrix. This is going to return the sum of each sample. It will be size = iter.

D) Calls the function for a sample population of 10,000 samples of size 10. It stores the output in w.

Task 3

mycltu=function(n,iter,a=0,b=10){
  ## r-random sample from the uniform
  y=runif(n*iter,a,b)
  ## Place these numbers into a matrix
  ## The columns will correspond to the iteration and the rows will equal the sample size n
  data=matrix(y,nr=n,nc=iter,byrow=TRUE)
  ## apply the function mean to the columns (2) of the matrix
  ## these are placed in a vector w
  w=apply(data,2,mean)
  ## We will make a histogram of the values in w
  ## How high should we make y axis?
  ## All the values used to make a histogram are placed in param (nothing is plotted yet)
  param=hist(w,plot=FALSE)
  ## Since the histogram will be a density plot we will find the max density
  ymax=max(param$density)
  ## To be on the safe side we will add 10% more to this
  ymax=1.1*ymax
  ## Now we can make the histogram
  hist(w,freq=FALSE,  ylim=c(0,ymax), main=paste("Histogram of sample mean",
                                                 "\n", "sample size= ",n,sep=""),xlab="Sample mean")
  ## add a density curve made from the sample distribution
  lines(density(w),col="Blue",lwd=3) # add a density plot
  ## Add a theoretical normal curve 
  curve(dnorm(x,mean=(a+b)/2,sd=(b-a)/(sqrt(12*n))),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve
  ## Add the density from which the samples were taken
  curve(dunif(x,a,b),add=TRUE,lwd=4)

}

Apply function uses the parameter 2 to indicate we want to apply the function to the columns.

When n=20 and iter=100,0000. The size of w is 100,000.

w=mycltu(n=1,iter=10000)
w=mycltu(n=2,iter=10000)
w=mycltu(n=3,iter=10000)
w=mycltu(n=5,iter=10000)
w=mycltu(n=10,iter=10000)
w=mycltu(n=30,iter=10000)

Conclusion: As sample size increases, or sampling distribution gets closer to normal.

Task 4

mycltb=function(n,iter,p=0.5,...){

  ## r-random sample from the Binomial
  y=rbinom(n*iter,size=n,prob=p)
  ## Place these numbers into a matrix
  ## The columns will correspond to the iteration and the rows will equal the sample size n
  data=matrix(y,nr=n,nc=iter,byrow=TRUE)
  ## apply the function mean to the columns (2) of the matrix
  ## these are placed in a vector w
  w=apply(data,2,mean)
  ## We will make a histogram of the values in w
  ## How high should we make y axis?
  ## All the values used to make a histogram are placed in param (nothing is plotted yet)
  param=hist(w,plot=FALSE)
  ## Since the histogram will be a density plot we will find the max density

  ymax=max(param$density)
  ## To be on the safe side we will add 10% more to this
  ymax=1.1*ymax

  ## Now we can make the histogram
  ## freq=FALSE means take a density
  hist(w,freq=FALSE,  ylim=c(0,ymax),
       main=paste("Histogram of sample mean","\n", "sample size= ",n,sep=""),
       xlab="Sample mean",...)
  ## add a density curve made from the sample distribution
  #lines(density(w),col="Blue",lwd=3) # add a density plot
  ## Add a theoretical normal curve 
  curve(dnorm(x,mean=n*p,sd=sqrt(p*(1-p))),add=TRUE,col="Red",lty=2,lwd=3) 

}

w=mycltb(n=3,iter=10000, p=0.3)
w=mycltb(n=5,iter=10000, p=0.3)
w=mycltb(n=10,iter=10000, p=0.3)
w=mycltb(n=20,iter=10000, p=0.3)

w=mycltb(n=3,iter=10000, p=0.7)
w=mycltb(n=5,iter=10000, p=0.7)
w=mycltb(n=10,iter=10000, p=0.7)
w=mycltb(n=20,iter=10000, p=0.7)

w=mycltb(n=3,iter=10000, p=0.5)
w=mycltb(n=5,iter=10000, p=0.5)
w=mycltb(n=10,iter=10000, p=0.5)
w=mycltb(n=20,iter=10000, p=0.5)

Conclusion: Again, as the sample size increases, we get a sample distribution closer to normal.

Task 5

mycltp=function(n,iter,lambda=10,...){

  ## r-random sample from the Poisson
  y=rpois(n*iter,lambda=lambda)
  ## Place these numbers into a matrix
  ## The columns will correspond to the iteration and the rows will equal the sample size n
  data=matrix(y,nr=n,nc=iter,byrow=TRUE)
  ## apply the function mean to the columns (2) of the matrix
  ## these are placed in a vector w
  w=apply(data,2,mean)
  ## We will make a histogram of the values in w
  ## How high should we make y axis?
  ## All the values used to make a histogram are placed in param (nothing is plotted yet)
  param=hist(w,plot=FALSE)
  ## Since the histogram will be a density plot we will find the max density

  ymax=max(param$density)
  ## To be on the safe side we will add 10% more to this
  ymax=1.1*ymax

  ## Make a suitable layout for graphing
  layout(matrix(c(1,1,2,3),nr=2,nc=2, byrow=TRUE))

  ## Now we can make the histogram
  hist(w,freq=FALSE,  ylim=c(0,ymax), col=rainbow(max(w)),
       main=paste("Histogram of sample mean","\n", "sample size= ",n," iter=",iter," lambda=",lambda,sep=""),
       xlab="Sample mean",...)
  ## add a density curve made from the sample distribution
  #lines(density(w),col="Blue",lwd=3) # add a density plot
  ## Add a theoretical normal curve 
  curve(dnorm(x,mean=lambda,sd=sqrt(lambda/n)),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve

  # Now make a new plot
  # Since y is discrete we should use a barplot
  barplot(table(y)/(n*iter),col=rainbow(max(y)), main="Barplot of sampled y", ylab ="Rel. Freq",xlab="y" )
  x=0:max(y)
  plot(x,dpois(x,lambda=lambda),type="h",lwd=5,col=rainbow(max(y)),
       main="Probability function for Poisson", ylab="Probability",xlab="y")
}

mycltp(n=2,iter=10000, lambda=4)
mycltp(n=3,iter=10000, lambda=4)
mycltp(n=5,iter=10000, lambda=4)
mycltp(n=10,iter=10000, lambda=4)
mycltp(n=20,iter=10000, lambda=4)

mycltp(n=2,iter=10000, lambda=10)
mycltp(n=3,iter=10000, lambda=10)
mycltp(n=5,iter=10000, lambda=10)
mycltp(n=10,iter=10000, lambda=10)
mycltp(n=20,iter=10000, lambda=10)

Task 6

w = MATH4753ouGamb0004::mycltp(n=10, iter=10000, lambda = 10)

rgamble819/MATH4753ouGamb0004 documentation built on April 20, 2021, 10:34 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

rgamble819/MATH4753ouGamb0004
Functions for my statistics class

In rgamble819/MATH4753ouGamb0004: Functions for my statistics class

Task 1

Task 2

Task 3

Task 4

Task 5

Task 6

R Package Documentation

Browse R Packages

We want your feedback!

rgamble819/MATH4753ouGamb0004 Functions for my statistics class

In rgamble819/MATH4753ouGamb0004: Functions for my statistics class

Task 1

Task 2

Task 3

Task 4

Task 5

Task 6

R Package Documentation

Browse R Packages

We want your feedback!

rgamble819/MATH4753ouGamb0004
Functions for my statistics class