knitr::opts_chunk$set(echo = TRUE)

Task 1

getwd()

Task 2

Line A and B

A will take a sample, of size 'n*iter' from data held in 'x', with replacement, and store in the data in 'y'

B will form a confidence interval, using the xstat vector (vector of the means) and upper and lower boundaries given as a c() vector

Why is equal probability sampling necessary

To get a normal distribution

With replacement

set.seed(35)
sam = round (rnorm(20,mean=10, sd=4),4)
sam
paste(" ")
paste(" ")
paste(" ")

unique(sample(sam,20, replace = TRUE))
unique(sample(sam,20, replace = TRUE))
unique(sample(sam,20, replace = TRUE))
unique(sample(sam,20, replace = TRUE))
unique(sample(sam,20, replace = TRUE))

The size of sam is 20, and when we use unique() on that same of sam, we can see that we get less than 20 unique values. This happens because we are using replacement. Once we draw a number, we put it back and are able to sample it again.

Without replacement

unique(sample(sam,20, replace = FALSE))
unique(sample(sam,20, replace = FALSE))
unique(sample(sam,20, replace = FALSE))
unique(sample(sam,20, replace = FALSE))
unique(sample(sam,20, replace = FALSE))

Since replace=FALSE we get 20 unique values

Sample 21

#sample(sam,21, replace=FALSE)

An error because the requested sample is larger than the population, and replace is FALSE.

Task 3

CI95% plots

myboot2<-function(iter=10000,x,fun="mean",alpha=0.05,cx=1.5,...){  #Notice where the ... is repeated in the code
n=length(x)   #sample size

y=sample(x,n*iter,replace=TRUE)
rs.mat=matrix(y,nr=n,nc=iter,byrow=TRUE)
xstat=apply(rs.mat,2,fun) # xstat is a vector and will have iter values in it 
ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval
# A histogram follows
# The object para will contain the parameters used to make the histogram
para=hist(xstat,freq=FALSE,las=1,
main=paste("Histogram of Bootstrap sample statistics","\n","alpha=",alpha," iter=",iter,sep=""),
...)

#mat will be a matrix that contains the data, this is done so that I can use apply()
mat=matrix(x,nr=length(x),nc=1,byrow=TRUE)

#pte is the point estimate
#This uses whatever fun is
pte=apply(mat,2,fun)
abline(v=pte,lwd=3,col="Black")# Vertical line
segments(ci[1],0,ci[2],0,lwd=4)      #Make the segment for the ci
text(ci[1],0,paste("(",round(ci[1],2),sep=""),col="Red",cex=cx)
text(ci[2],0,paste(round(ci[2],2),")",sep=""),col="Red",cex=cx)

# plot the point estimate 1/2 way up the density
text(pte,max(para$density)/2,round(pte,2),cex=cx)

invisible(list(ci=ci,fun=fun,x=x))# Some output to use if necessary
}
#A
set.seed(39)
sam = rnorm(25,mean=25,sd=10)
mean(sam)
catch <- myboot2(sam,iter = 10000, alpha = .05)


#B
set.seed(30)
sam = rchisq(25,df=3)
mean(sam)
catch <- myboot2(sam,iter = 10000, alpha = .05)

#C
set.seed(40)
sam = rgamma(30,shape=2, scale=3)
mean(sam)

catch <- myboot2(sam,iter = 10000, alpha = .05)

#D
set.seed(10)
sam = rbeta(20, shape1=3, shape2=4)
mean(sam)
catch <- myboot2(sam,iter = 10000, alpha = .05)

In each case the point estimate is the actual population value, rounded.

Yes the interval contains the population value in each case

CI80% plots

#A
set.seed(39)
sam = rnorm(25,mean=25,sd=10)
mean(sam)
catch <- myboot2(sam,iter = 10000, alpha = .2)


#B
set.seed(30)
sam = rchisq(25,df=3)
mean(sam)
catch <- myboot2(sam,iter = 10000, alpha = .2)

#C
set.seed(40)
sam = rgamma(30,shape=2, scale=3)
mean(sam)

catch <- myboot2(sam,iter = 10000, alpha = .2)

#D
set.seed(10)
sam = rbeta(20, shape1=3, shape2=4)
mean(sam)
catch <- myboot2(sam,iter = 10000, alpha = .2)

Task 4

Adjust myboot2

myboot2<-function(iter=10000,x,fun="mean",alpha=0.05,cx=1.5,...){  #Notice where the ... is repeated in the code
n=length(x)   #sample size

y=sample(x,n*iter,replace=TRUE)
rs.mat=matrix(y,nr=n,nc=iter,byrow=TRUE)
xstat=apply(rs.mat,2,fun) # xstat is a vector and will have iter values in it 
ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval
# A histogram follows
# The object para will contain the parameters used to make the histogram
para=hist(xstat,freq=FALSE,las=1,
main=paste("Histogram of Bootstrap sample statistics","\n","alpha=",alpha," iter=",iter,sep=""),
...)

#mat will be a matrix that contains the data, this is done so that I can use apply()
mat=matrix(x,nr=length(x),nc=1,byrow=TRUE)

#pte is the point estimate
#This uses whatever fun is
pte=apply(mat,2,fun)
abline(v=pte,lwd=3,col="Black")# Vertical line
segments(ci[1],0,ci[2],0,lwd=4)      #Make the segment for the ci
text(ci[1],0,paste("(",round(ci[1],2),sep=""),col="Red",cex=cx)
text(ci[2],0,paste(round(ci[2],2),")",sep=""),col="Red",cex=cx)

# plot the point estimate 1/2 way up the density
text(pte,max(para$density)/2,round(pte,2),cex=cx)

invisible(list(ci=ci,fun=fun,x=x,xstat=xstat))# Some output to use if necessary
}
sam <- c(1,1,1,2,2,2,2,3,3,4,4)
object <- myboot2(x=sam, fun="median")

Boostrap interval

(1,3)

Barplot

tab <- table(object$xstat)
barplot(tab)

Task 5

95% interval estimate

set.seed(39)
sam = rnorm(25, mean=25, sd=10)

norm.mm = myboot2(x=sam, fun=function(x) {mean(x) / median(x)}, alpha = 0.05, xlab="Mean/Median", col="Cyan")

set.seed(30)
sam = rchisq(20,df=3)

chisq.mm = myboot2(x=sam, fun=function(x) {mean(x) / median(x)}, alpha = 0.05, xlab="Mean/Median", col="Red")

set.seed(40)
sam = rgamma(30,shape=2, scale=3)

gamma.mm = myboot2(x=sam, fun=function(x) {mean(x) / median(x)}, alpha = 0.05, xlab="Mean/Median", col="Brown")

set.seed(10)
sam= rbeta(20, shape1=3, shape2=4)

beta.mm = myboot2(x=sam, fun=function(x) {mean(x) / median(x)}, alpha = 0.05, xlab="Mean/Median", col="Purple")

70% interval estimate

set.seed(39)
sam = rnorm(25, mean=25, sd=10)

norm.mm = myboot2(x=sam, fun=function(x) {mean(x) / median(x)}, alpha = 0.3, xlab="Mean/Median", col="Cyan")

set.seed(30)
sam = rchisq(20,df=3)

chisq.mm = myboot2(x=sam, fun=function(x) {mean(x) / median(x)}, alpha = 0.3, xlab="Mean/Median", col="Red")

set.seed(40)
sam = rgamma(30,shape=2, scale=3)

gamma.mm = myboot2(x=sam, fun=function(x) {mean(x) / median(x)}, alpha = 0.3, xlab="Mean/Median", col="Brown")

set.seed(10)
sam= rbeta(20, shape1=3, shape2=4)

beta.mm = myboot2(x=sam, fun=function(x) {mean(x) / median(x)}, alpha = 0.3, xlab="Mean/Median", col="Purple")

Task 6

binom

sam = rbinom(20,69, prob = .6)
#mean
catch <- myboot2(sam, iter=10000,fun="mean", alpha = .2)

#var
catch <- myboot2(sam, iter=10000,fun="var", alpha = .2)

weibull

sam = rweibull(20,shape = 3, scale = 4)
#mean
catch <- myboot2(sam, iter=10000,fun="mean", alpha = .2)

#var
catch <- myboot2(sam, iter=10000,fun="var", alpha = .2)

poisson

sam = rpois(20,lambda = 3)
#mean
catch <- myboot2(sam, iter=10000,fun="mean", alpha = .2)

#var
catch <- myboot2(sam, iter=10000,fun="var", alpha = .2)

exponential

sam = rexp(20)
#mean
catch <- myboot2(sam, iter=10000,fun="mean", alpha = .2)

#var
catch <- myboot2(sam, iter=10000,fun="var", alpha = .2)

Task 7

set.seed(68)
sam = rnorm(20, mean =10, sd=4)

#mean
catch <- myboot2(sam, iter=10000,fun="IQR", alpha = .05)

#var
catch <- myboot2(sam, iter=10000,fun="mean", alpha = .05)

mp = c(1,-1)
z_a2 = qnorm(1-.05, mean=0, sd=1)

mean(sam) - mp*z_a2*sd(sam)/sqrt(20)

myboot2(sam, iter=10000, fun = "mean", alpha = .05)

I think they are close enough to be considered similar

Task 8

fire <- grac0009MATH4753::fire
grac0009MATH4753::myboot2(x=fire$DAMAGE)


agracy2246/MATH4753grac0009 documentation built on April 26, 2020, 9:39 a.m.