knitr::opts_chunk$set(echo = TRUE)

Task 1

getwd()

Task 2

set.seed(55);x1=rnorm(30,mean=25,sd=5)

$H_0: \mu = 22, H_1: \mu \neq 22$

t.test(x1, mu = 22)

The interval does not contain 22, thus we reject the null hypothesis. We can also reject because p-value < 0.05.

$H_0: \mu = 23, H_1: \mu \neq 23$

t.test(x1, mu = 23)

The interval does not contain 23, thus we reject the null hypothesis. We can also reject because p-value < 0.05.

$H_0: \mu = 24, H_1: \mu \neq 24$

t.test(x1, mu = 24)

The interval contains 24, thus we accept the null hypothesis. We can also accept because the p-value > 0.05.

$H_0: \mu = 25, H_1: \mu \neq 25$

t.test(x1, mu = 25)

The interval contains 25, thus we accept the null hypothesis. We can also accept because the p-value > 0.05.

$H_0: \mu = 26, H_1: \mu \neq 26$

t.test(x1, mu = 26)

The interval contains 26, thus we accept the null hypothesis. We can also accept because the p-value > 0.05.

Boxplot

boxplot(x1, main="Sample x1")
ci=t.test(x1,mu=mean(x1))$conf.int
abline(h=c(ci,mean(x1)),col=c("Red","Red","Green"))

Find $t_{calc}$ with $H_0:\mu=24$

tcalc = (mean(x1) - 24) / (sd(x1) / sqrt(length(x1)))
paste0("tcalc = " ,tcalc)

mypvalue

mypvalue=function(t0,xmax=4,n=20, alpha=0.05){
#calculate alpha/2
va=round(pt(-t0,df=n-1),4)
pv=2*va

# plot the t dist
curve(dt(x,df=n-1),xlim=c(-xmax,xmax),ylab="T Density",xlab=expression(t),
main=substitute(paste("P-value=", pv, " alpha=", alpha)))


# set up points on the polygon to the right
xcurve=seq(t0,xmax,length=1000)
ycurve=dt(xcurve,df=n-1)

# set up points to the left
xlcurve=seq(-t0,-xmax,length=1000)
ylcurve=dt(xcurve,df=n-1)

# Shade in the polygon defined by the line segments
polygon(c(t0,xcurve,xmax),c(0,ycurve,0),col="green")
polygon(c(-t0,xlcurve,-xmax),c(0,ylcurve,0),col="green")

# make quantiles
q=qt(1-alpha/2,n-1)
abline( v=c(q,-q),lwd=2) # plot the cut off t value 
axis(3,c(q,-q),c(expression(abs(t[alpha/2])),expression(-abs(t[alpha/2]))))


# Annotation
text(0.5*(t0+xmax),max(ycurve),substitute(paste(area, "=",va)))
text(-0.5*(t0+xmax),max(ycurve),expression(area))

return(list(q=q,pvalue=pv))
}

mypvalue(t0=tcalc, n = length(x1), alpha = 0.05)

What is rejection region?

Rejection region is $T \geq t_{\frac{\alpha}{2}} = 2.04523$ and $T \leq -t_{\frac{\alpha}{2}} = -2.04523$

pvalue that determines reject/accept

p-value = 0.1952 and since alpha = 0.05 the pvalue is greater, we accept the null hypothesis

tcalc in rejection region?

No, tcalc is not in the rejection region.

Bootstrap pvalues

### bootstrap pvalues
bootpval<-function(x,conf.level=0.95,iter=3000,mu0=0, test="two"){
n=length(x)
y=x-mean(x)+mu0  # transform the data so that it is centered at the NULL
rs.mat<-c()    #rs.mat will become a resample matrix -- now it is an empty vector
xrs.mat<-c()
for(i in 1:iter){ # for loop - the loop will go around iter times
rs.mat<-cbind(rs.mat,sample(y,n,replace=TRUE)) #sampling from y cbind -- column bind -- binds the vectors together by columns
xrs.mat<-cbind(xrs.mat,sample(x,n,replace=TRUE)) #sampling from x cbind -- column bind -- binds the vectors together by columns

}

tstat<-function(z){ # The value of t when the NULL is assumed true (xbar-muo)/z/sqrt(n)
sqrt(n)*(mean(z)-mu0)/sd(z)
}

tcalc=tstat(x) # t for the data collected
ytstat=apply(rs.mat,2,tstat) # tstat of resampled y's, ytstat is a vector and will have iter values in it
xstat=apply(xrs.mat,2,mean)  # mean of resampled x's
alpha=1-conf.level # calculating alpha
ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval
pvalue=ifelse(test=="two",length(ytstat[ytstat>abs(tcalc) | ytstat < -abs(tcalc)])/iter,
ifelse(test=="upper",length(ytstat[ytstat>tcalc])/iter,
length(ytstat[ytstat<xstat])/iter))

h=hist(ytstat,plot=FALSE)
mid=h$mid
if(test=="two"){
ncoll=length(mid[mid<= -abs(tcalc)])
ncolr=length(mid[mid>=  abs(tcalc)])
col=c(rep("Green",ncoll),rep("Gray",length(mid)-ncoll-ncolr),rep("Green",ncolr))
}
if(test=="upper"){
ncolr=length(mid[mid>=  abs(tcalc)])
col=c(rep("Gray",length(mid)-ncolr),rep("Green",ncolr))
}

if(test=="lower"){
ncoll=length(mid[mid<=  -abs(tcalc)])
col=c(rep("Green",ncoll),rep("Gray",length(mid)-ncoll))
}
hist(ytstat,col=col,freq=FALSE,las=1,main="",xlab=expression(T[stat]))
#segments(ci[1],0,ci[2],0,lwd=2)
pround=round(pvalue,4)
title(substitute(paste(P[value],"=",pround)))
return(list(pvalue=pvalue,tcalc=tcalc,n=n,x=x,test=test,ci=ci))
}

$H_0: \mu = 22, H_1: \mu \neq 22$

bootpval(x=x1, mu=22)

$H_0: \mu = 23, H_1: \mu \neq 23$

bootpval(x=x1, mu=23)

$H_0: \mu = 24, H_1: \mu \neq 24$

bootpval(x=x1, mu=24)

$H_0: \mu = 25, H_1: \mu \neq 25$

bootpval(x=x1, mu=25)

$H_0: \mu = 26, H_1: \mu \neq 26$

bootpval(x=x1, mu=26)

Conclusion

I can conclude that both of the tests we have conducted returned the same accept/rejection results

Task 3

#x
set.seed(30); x=rnorm(15,mean=10, sd=7)
#y
set.seed(40); y=rnorm(20, mean=12,sd=4)

var.test()

var.test(x,y)

I conclude that we must reject the null hypothesis. The variances are unequal. I gather this by looking at the p-value which is < 0.05.

I will assign var.equal = FALSE

t.test()

t.test(y,x, var.equal=F, mu = 0)
t.test(y,x, var.equal=F, mu = 2)

I have learned how to apply my knowledge of hypothesis testing to accept or reject the null hypothesis. I have learned that multiple methods do indeed give similar results.

Task 4

#x
set.seed(30); x= rnorm(15,mean=10, sd=4)
#y
set.seed(40); y= rnorm(20, mean=12, sd=4)

var.equal

var.test(x,y)

I will assign var.equal = TRUE because the pvalue is > 0.05.

t.tests()

t.test(y,x,mu=0, var.equal=T)
t.test(y,x,mu=2, var.equal=T)

Reject mu = 0, Accept mu=2

Task 5

## Bootstrap interval for a two sample test
boot2pval<-function(x1,x2,conf.level=0.95,iter=3000,mudiff=0, test="two"){
n1=length(x1)
n2=length(x2)
y1=x1-mean(x1)+mean(c(x1,x2))  # transform the data so that it is centered at the NULL
y2=x2-mean(x2)+mean(c(x1,x2))
y1rs.mat<-c()    #rs.mat will be come a resample matrix -- now it is an empty vector
x1rs.mat<-c()
y2rs.mat<-c()
x2rs.mat<-c()
for(i in 1:iter){ # for loop - the loop will go around iter times
y1rs.mat<-cbind(y1rs.mat,sample(y1,n1,replace=TRUE)) #sampling from y cbind -- column bind -- binds the vectors together by columns
y2rs.mat<-cbind(y2rs.mat,sample(y2,n2,replace=TRUE))

}
x1rs.mat<-y1rs.mat+mean(x1)-mean(c(x1,x2))
x2rs.mat<-y2rs.mat+mean(x2)-mean(c(x1,x2))

xbar1=mean(x1)
xbar2=mean(x2)
sx1sq=var(x1)
sx2sq=var(x2)

tcalc=(xbar1-xbar2-mudiff)/sqrt(sx1sq/n1+sx2sq/n2)

sy1sq=apply(y1rs.mat,2,var)
sy2sq=apply(y2rs.mat,2,var) 
y1bar=apply(y1rs.mat,2,mean)
y2bar=apply(y2rs.mat,2,mean)

tstat=(y1bar-y2bar-mudiff)/sqrt(sy1sq/n1+sy2sq/n2)


alpha=1-conf.level # calculating alpha
#ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval
pvalue=ifelse(test=="two",length(tstat[tstat>abs(tcalc) | tstat < -abs(tcalc)])/iter,
ifelse(test=="upper",length(tstat[tstat>tcalc])/iter,
length(ytstat[tstat<tcalc])/iter))

h=hist(tstat,plot=FALSE)
mid=h$mid
if(test=="two"){
ncoll=length(mid[mid<= -abs(tcalc)])
ncolr=length(mid[mid>=  abs(tcalc)])
col=c(rep("Green",ncoll),rep("Gray",length(mid)-ncoll-ncolr),rep("Green",ncolr))
}
hist(tstat,col=col,freq=FALSE)
#segments(ci[1],0,ci[2],0,lwd=2)

return(list(pvalue=pvalue))
#return(list(pvalue=pvalue,tcalc=tcalc,n=n,x=x,test=test,ci=ci))
}
#x
set.seed(30); x3=rnorm(15,mean=10, sd=7)
#y
set.seed(40); y3=rnorm(20, mean=12,sd=4)

boot2pval(x1 = x3, x2=y3)

Task 6

#x
set.seed(30); x= rnorm(15,mean=10, sd=4)
#y
set.seed(40); y= rnorm(20, mean=12, sd=4)

boot2pval(x1=x, x2=y)

Task 7

A

Conducting a t test using the data set x1 with mu = 23

B

Indicates that the test was ran on only 1 sample. Had we used two samples, it would sasve Two sample or Welsh.

C

t = 2.3, df (degrees of freedom) is length(x1) -1, and p-value is 0.025 which indicates we would reject the null hypothesis

D

$H_1: \mu \neq 23$

E

Shows that the confidence interval used for the test is 95%, giving us an alpha of 0.05.

F

The actual interval. Since the t.test was give mu=23, and being that 23 is not within the interval, it shows that we reject the null hypothesis.

G

The actual mean of x1

Task 8

set.seed(30);x1=rnorm(15,mean=10,sd=4)
library(grac0009MATH4753)
grac0009MATH4753::mybootpval(x1)


agracy2246/MATH4753grac0009 documentation built on April 26, 2020, 9:39 a.m.