knitr::opts_chunk$set(echo = TRUE)
getwd()
set.seed(55);x1=rnorm(30,mean=25,sd=5)
t.test(x1, mu = 22)
The interval does not contain 22, thus we reject the null hypothesis. We can also reject because p-value < 0.05.
t.test(x1, mu = 23)
The interval does not contain 23, thus we reject the null hypothesis. We can also reject because p-value < 0.05.
t.test(x1, mu = 24)
The interval contains 24, thus we accept the null hypothesis. We can also accept because the p-value > 0.05.
t.test(x1, mu = 25)
The interval contains 25, thus we accept the null hypothesis. We can also accept because the p-value > 0.05.
t.test(x1, mu = 26)
The interval contains 26, thus we accept the null hypothesis. We can also accept because the p-value > 0.05.
boxplot(x1, main="Sample x1") ci=t.test(x1,mu=mean(x1))$conf.int abline(h=c(ci,mean(x1)),col=c("Red","Red","Green"))
tcalc = (mean(x1) - 24) / (sd(x1) / sqrt(length(x1))) paste0("tcalc = " ,tcalc)
mypvalue=function(t0,xmax=4,n=20, alpha=0.05){ #calculate alpha/2 va=round(pt(-t0,df=n-1),4) pv=2*va # plot the t dist curve(dt(x,df=n-1),xlim=c(-xmax,xmax),ylab="T Density",xlab=expression(t), main=substitute(paste("P-value=", pv, " alpha=", alpha))) # set up points on the polygon to the right xcurve=seq(t0,xmax,length=1000) ycurve=dt(xcurve,df=n-1) # set up points to the left xlcurve=seq(-t0,-xmax,length=1000) ylcurve=dt(xcurve,df=n-1) # Shade in the polygon defined by the line segments polygon(c(t0,xcurve,xmax),c(0,ycurve,0),col="green") polygon(c(-t0,xlcurve,-xmax),c(0,ylcurve,0),col="green") # make quantiles q=qt(1-alpha/2,n-1) abline( v=c(q,-q),lwd=2) # plot the cut off t value axis(3,c(q,-q),c(expression(abs(t[alpha/2])),expression(-abs(t[alpha/2])))) # Annotation text(0.5*(t0+xmax),max(ycurve),substitute(paste(area, "=",va))) text(-0.5*(t0+xmax),max(ycurve),expression(area)) return(list(q=q,pvalue=pv)) } mypvalue(t0=tcalc, n = length(x1), alpha = 0.05)
Rejection region is $T \geq t_{\frac{\alpha}{2}} = 2.04523$ and $T \leq -t_{\frac{\alpha}{2}} = -2.04523$
p-value = 0.1952 and since alpha = 0.05 the pvalue is greater, we accept the null hypothesis
No, tcalc is not in the rejection region.
### bootstrap pvalues bootpval<-function(x,conf.level=0.95,iter=3000,mu0=0, test="two"){ n=length(x) y=x-mean(x)+mu0 # transform the data so that it is centered at the NULL rs.mat<-c() #rs.mat will become a resample matrix -- now it is an empty vector xrs.mat<-c() for(i in 1:iter){ # for loop - the loop will go around iter times rs.mat<-cbind(rs.mat,sample(y,n,replace=TRUE)) #sampling from y cbind -- column bind -- binds the vectors together by columns xrs.mat<-cbind(xrs.mat,sample(x,n,replace=TRUE)) #sampling from x cbind -- column bind -- binds the vectors together by columns } tstat<-function(z){ # The value of t when the NULL is assumed true (xbar-muo)/z/sqrt(n) sqrt(n)*(mean(z)-mu0)/sd(z) } tcalc=tstat(x) # t for the data collected ytstat=apply(rs.mat,2,tstat) # tstat of resampled y's, ytstat is a vector and will have iter values in it xstat=apply(xrs.mat,2,mean) # mean of resampled x's alpha=1-conf.level # calculating alpha ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval pvalue=ifelse(test=="two",length(ytstat[ytstat>abs(tcalc) | ytstat < -abs(tcalc)])/iter, ifelse(test=="upper",length(ytstat[ytstat>tcalc])/iter, length(ytstat[ytstat<xstat])/iter)) h=hist(ytstat,plot=FALSE) mid=h$mid if(test=="two"){ ncoll=length(mid[mid<= -abs(tcalc)]) ncolr=length(mid[mid>= abs(tcalc)]) col=c(rep("Green",ncoll),rep("Gray",length(mid)-ncoll-ncolr),rep("Green",ncolr)) } if(test=="upper"){ ncolr=length(mid[mid>= abs(tcalc)]) col=c(rep("Gray",length(mid)-ncolr),rep("Green",ncolr)) } if(test=="lower"){ ncoll=length(mid[mid<= -abs(tcalc)]) col=c(rep("Green",ncoll),rep("Gray",length(mid)-ncoll)) } hist(ytstat,col=col,freq=FALSE,las=1,main="",xlab=expression(T[stat])) #segments(ci[1],0,ci[2],0,lwd=2) pround=round(pvalue,4) title(substitute(paste(P[value],"=",pround))) return(list(pvalue=pvalue,tcalc=tcalc,n=n,x=x,test=test,ci=ci)) }
bootpval(x=x1, mu=22)
bootpval(x=x1, mu=23)
bootpval(x=x1, mu=24)
bootpval(x=x1, mu=25)
bootpval(x=x1, mu=26)
I can conclude that both of the tests we have conducted returned the same accept/rejection results
#x set.seed(30); x=rnorm(15,mean=10, sd=7) #y set.seed(40); y=rnorm(20, mean=12,sd=4)
var.test(x,y)
I conclude that we must reject the null hypothesis. The variances are unequal. I gather this by looking at the p-value which is < 0.05.
I will assign var.equal = FALSE
t.test(y,x, var.equal=F, mu = 0) t.test(y,x, var.equal=F, mu = 2)
I have learned how to apply my knowledge of hypothesis testing to accept or reject the null hypothesis. I have learned that multiple methods do indeed give similar results.
#x set.seed(30); x= rnorm(15,mean=10, sd=4) #y set.seed(40); y= rnorm(20, mean=12, sd=4)
var.test(x,y)
I will assign var.equal = TRUE because the pvalue is > 0.05.
t.test(y,x,mu=0, var.equal=T) t.test(y,x,mu=2, var.equal=T)
Reject mu = 0, Accept mu=2
## Bootstrap interval for a two sample test boot2pval<-function(x1,x2,conf.level=0.95,iter=3000,mudiff=0, test="two"){ n1=length(x1) n2=length(x2) y1=x1-mean(x1)+mean(c(x1,x2)) # transform the data so that it is centered at the NULL y2=x2-mean(x2)+mean(c(x1,x2)) y1rs.mat<-c() #rs.mat will be come a resample matrix -- now it is an empty vector x1rs.mat<-c() y2rs.mat<-c() x2rs.mat<-c() for(i in 1:iter){ # for loop - the loop will go around iter times y1rs.mat<-cbind(y1rs.mat,sample(y1,n1,replace=TRUE)) #sampling from y cbind -- column bind -- binds the vectors together by columns y2rs.mat<-cbind(y2rs.mat,sample(y2,n2,replace=TRUE)) } x1rs.mat<-y1rs.mat+mean(x1)-mean(c(x1,x2)) x2rs.mat<-y2rs.mat+mean(x2)-mean(c(x1,x2)) xbar1=mean(x1) xbar2=mean(x2) sx1sq=var(x1) sx2sq=var(x2) tcalc=(xbar1-xbar2-mudiff)/sqrt(sx1sq/n1+sx2sq/n2) sy1sq=apply(y1rs.mat,2,var) sy2sq=apply(y2rs.mat,2,var) y1bar=apply(y1rs.mat,2,mean) y2bar=apply(y2rs.mat,2,mean) tstat=(y1bar-y2bar-mudiff)/sqrt(sy1sq/n1+sy2sq/n2) alpha=1-conf.level # calculating alpha #ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval pvalue=ifelse(test=="two",length(tstat[tstat>abs(tcalc) | tstat < -abs(tcalc)])/iter, ifelse(test=="upper",length(tstat[tstat>tcalc])/iter, length(ytstat[tstat<tcalc])/iter)) h=hist(tstat,plot=FALSE) mid=h$mid if(test=="two"){ ncoll=length(mid[mid<= -abs(tcalc)]) ncolr=length(mid[mid>= abs(tcalc)]) col=c(rep("Green",ncoll),rep("Gray",length(mid)-ncoll-ncolr),rep("Green",ncolr)) } hist(tstat,col=col,freq=FALSE) #segments(ci[1],0,ci[2],0,lwd=2) return(list(pvalue=pvalue)) #return(list(pvalue=pvalue,tcalc=tcalc,n=n,x=x,test=test,ci=ci)) }
#x set.seed(30); x3=rnorm(15,mean=10, sd=7) #y set.seed(40); y3=rnorm(20, mean=12,sd=4) boot2pval(x1 = x3, x2=y3)
#x set.seed(30); x= rnorm(15,mean=10, sd=4) #y set.seed(40); y= rnorm(20, mean=12, sd=4) boot2pval(x1=x, x2=y)
Conducting a t test using the data set x1 with mu = 23
Indicates that the test was ran on only 1 sample. Had we used two samples, it would sasve Two sample or Welsh.
t = 2.3, df (degrees of freedom) is length(x1) -1, and p-value is 0.025 which indicates we would reject the null hypothesis
$H_1: \mu \neq 23$
Shows that the confidence interval used for the test is 95%, giving us an alpha of 0.05.
The actual interval. Since the t.test was give mu=23, and being that 23 is not within the interval, it shows that we reject the null hypothesis.
The actual mean of x1
set.seed(30);x1=rnorm(15,mean=10,sd=4) library(grac0009MATH4753) grac0009MATH4753::mybootpval(x1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.