class: fullscreen, inverse, top, center, text-black background-image: url("../inst/images/test_chair.jpeg")
.font150[hypothesis testing]
knitr::opts_chunk$set(fig.width = 4.25, fig.height = 3.5, fig.retina = 3, message = FALSE, warning = FALSE, cache = TRUE, autodep = TRUE, hiline = TRUE) knitr::opts_hooks$set(fig.callout = function(options) { if (options$fig.callout) { options$echo <- FALSE options$out.height <- "99%" options$fig.width <- 16 options$fig.height <- 8 } options }) hook_source <- knitr::knit_hooks$get('source') knitr::knit_hooks$set(source = function(x, options) { if (!is.null(options$hiline) && options$hiline) { x <- stringr::str_replace(x, "^ ?(.+)\\s?#<<", "*\\1") } hook_source(x, options) }) options(htmltools.dir.version = FALSE, width = 90) as_table <- function(...) knitr::kable(..., format='html', digits = 3) library(tidyverse)
Dr. Muriel Bristol, a female colleague of Fisher claimed to be able to tell whether the tea or the milk was added first to a cup.
..img-right[
]
The test statistic was a simple count of the number of successes
in selecting the 4 cups out of 8.
--
She got all correct. What was the probability of getting all correct?
.left-code[
truth <- c(0,1,0,1,1,0,0,1) x <- combn(truth,m = 4) #<< nrcor <- apply(x, 2, sum) #<< nulldistr <- table(nrcor) nulldistr plot(nulldistr, xlab="nr correct")
There are 70 combinations of the elments in x taken m at a time.
Count number of successes for each combination.
Count how often 0, 1, 2, 3, 4 successes. ]
.right-plot[
]
probs <- nulldistr / sum(nulldistr) # compute probabilities probs <- round(probs, digits = 3) probs
Hence, on $\alpha = 0.05$ reject hypothesis, that she can not recognize if milk or tea first, since getting 4 right is $P(x = 4) = r probs[5]
$
If she would have 3 right would you accept the null hypothesis?
--
$P(x > 3) = r probs[5]
+ r probs[4]
= r probs[5] + probs[4]
$.
layout: false
# Plot distribution of T under null Hypothesis x <- seq(-3,5, by=0.1) plot(x, dnorm(x), type="l", xlim = c(-3,5)) # Specify alpha and show critical regions alpha <- 0.05 crl <- qnorm(alpha/2) crh <- qnorm(1 - alpha/2) abline(h = 0) abline(v = c(crl, crh), col=2) text( 2.1 , 0.1, "critical region", adj=0, srt=90) text( -2.1 , 0.3, "critical region", adj=0, srt=-90) # Show not significant test statistic abline(v = 1.2, col=3) text(1.2, 0.1, expression(t[obs]), srt=90) # Show significant test statistic abline(v = 4, col="magenta") text(4, 0.1, expression(t[obs]), srt=90)
.img-right[
]
exclude: true
Null is that the mean of observed data is equal to $\mu$.
Observations are independent, identically distributed (iid)
A suitable test statistics $\bar{X} - \mu$.
It will depend on samples size $n$ and on the variance $\sigma^2$
.left-code[
decimalplaces <- function(x) { if (abs(x - round(x)) > .Machine$double.eps^0.5) { nchar(strsplit(sub('0+$', '', as.character(x)), ".", fixed = TRUE)[[1]][[2]]) } else { return(0) } } getBreaks <- function(T0,by=0.1){ res <- seq(round(min(T0), digits = decimalplaces(by)) - by,round(max(T0), digits = decimalplaces(by)) + by, by = by) return(res) }
# Simulating data from Null N <- 10000; N_obs <- 4; mu <- 0; sigma <- 1 bb <- function(y){ x <- rnorm( N_obs, mu, sigma ) # compute mean and sd data.frame(mean = mean( x ), #<< sd = sd(x)) #<< } res <- purrr::map_df(1:N, bb) res %>% tidyr::gather() %>% ggplot(aes(x = value)) + geom_histogram() + facet_grid(~key,scales="free_x")
]
.right-plot[
]
.left-code[ if $\sigma$ known
# statistic T0 <- mu - res$mean hist(T0, breaks=getBreaks(T0, by=0.05), probability = T, main="") x <- seq(-10,10,0.01) lines(x, # H_0 distribution dnorm(x, # sigma is known sd= (sigma/sqrt(N_obs))), #<< col=2)
$T|H_0 \sim N(0,\sigma/\sqrt{N_{obs}})$
]
.right-plot[
]
The t-statistic
$$T = \frac{\bar{X} - \mu}{\sigma/\sqrt{n}}$$ Z- transformed data $\sim WN(0,1)$.
The variance of the sampling distribution of the mean is the population variance divided by $n$ (given $iid$ data).
$$\sigma^2_{mean} = \sigma^2/n$$
.left-code[ if $\sigma$ known
# improved statistic T0 <- (mu - res$mean)/ (sigma/sqrt(N_obs)) #<< hist(T0, breaks=getBreaks(T0, by=0.05), probability = T, main="") x <- seq(-10,10,0.01) # simplifies H_0 distribution lines(x, dnorm(x, sd= 1), #<< col=2)
$T|H_0 \sim N(0,1)$ ]
.right-plot[
]
.left-code[ if $\sigma$ UNKNOWN
T0 <- (mu - res$mean) / (res$sd/sqrt(N_obs)) #<< hist(T0, breaks=getBreaks(T0), probability = T, xlim=c(-6,6), ylim=c(0,0.4),main="") x <- seq(-10,10,0.1) lines(x, dnorm(x),#<< col="red") lines(x,dt(x,df = N_obs - 1), #<< type="l",col="green",lwd=2)
$T|H_0 \sim T(\mu=0,df=N_{obs})$
]
.right-plot[
]
.left-code[ Sampling from a skewed distribution.
$$x \sim Exp(1)$$ We know $\mu = \lambda = 1$
x <- seq(0,5,length = 100) plot(c(0,(x)), c(0,dexp(x, rate = 1)), type = "l") abline(h = 0) abline(v = 1)
]
.right-plot[
]
.left-code[
N <- 10000;rate <- 1; N_obs <- 4; mu <- 1; bb_exp <- function(y){ x <- rexp( N_obs, rate=rate ) #<< data.frame(mean = mean( x ), sd = sd(x)) } res <- purrr::map_df(1:N, bb_exp) T0 <- (res$mean - mu)/ #<< (res$sd/sqrt(N_obs)) #<< hist(T0, breaks=getBreaks(T0), probability = T, xlim=c(-12,6), ylim=c(0,0.4), main="") x <- seq(-10,10,0.1) lines(x, dt(x,df = N_obs - 1), #<< type="l",col="green",lwd=2) abline(v = 0, col=2)
We simulate $4$ datapoints from an exponential distribution. Observe how the Null distribution changed. ]
.right-plot[
]
.footnote[]
exclude: true
.left-code[
# Simulating data from Null N <- 10000; rate <- 1 N_obs <- 40 #<< mu <- 1 bb_exp <- function(y){ x <- rexp( N_obs, rate=rate ) data.frame(mean = mean( x ), sd = sd(x)) } res <- purrr::map_df(1:N, bb_exp) T0 <- (res$mean - mu)/ (res$sd/sqrt(N_obs)) hist(T0, breaks=getBreaks(T0), probability = T, xlim=c(-6,6), ylim=c(0,0.4), main="") x <- seq(-10,10,0.1) lines(x,dt(x,df = N_obs),type="l", col="green",lwd=2) abline(v = 0, col=2)
]
.right-plot[
]
.footnote[Assumption is that $x$ is generated using a normal distribution. But this time we simulate $40$ datapoints from an exponential distribution.]
.left-code[
# Simulating data from Null N <- 10000;rate <- 1 N_obs <- 100 #<< bb_exp <- function(y){ x <- rexp( N_obs, rate=rate ) data.frame(mean = mean( x ), sd = sd(x)) } res <- purrr::map_df(1:N, bb_exp) T0 <- (res$mean - mu)/ (res$sd/sqrt(N_obs)) hist(T0, breaks=getBreaks(T0), probability = T, xlim=c(-6,6), ylim=c(0,0.4), main="") lines(x,dt(x,df = N_obs - 1),type="l", col="green",lwd=2) abline(v=0, col=2)
We simulate $100$ datapoints from an exponential distribution. Observe how the Null distribution changed.
]
.right-plot[
]
--
--
multcomp
p-value computation)The error of transformed intensities in an LFQ experiment is normally distributed because it is the sum of biological, biochemical, and technical variability.
Sample sizes are small. Therefore great care has to be taken to meet the requirement of normally distributed observations when using the t-test.
asymptotic tests
nonparametric tests e.g. randomization test
Null hypothesis - there is no such difference
Test statistic:
$T = \frac{Y_1 - Y_2}{\sqrt{\frac{s_1^2}{N_1} + \frac{s_2^2}{N_2}}}$
Significance level $\alpha$
Reject the null hypothesis that the two means are equal if $|T| > t_{1-\alpha/2,v}$ with $v$ degrees of freedom
$\upsilon = \frac{(s^{2}{1}/N{1} + s^{2}{2}/N{2})^{2}} {(s^{2}{1}/N{1})^{2}/(N_{1}-1) + (s^{2}{2}/N{2})^{2}/(N_{2}-1) }$
tmp <- list("Diet A"=as.integer(c( 1, 2, 3, 4, 5)), "Diet B" = as.integer(c( 6, 7, 8 , 9, 10))) library(flextable) tmp <- (data.frame(tmp)) tmp %>% head() %>% t() %>% as.data.frame() %>% rownames_to_column() %>% flextable()
$$\frac{1}{B} \sum^B_{i=0} I_{T_i} >= |T_{obs}|$$
.footnote[Step 1-3 generates a sample from the null distribution.]
A type I error (false positive) occurs when
the null hypothesis (H0) is true, but is rejected.
The type I error rate or significance level (p-Value)
is the probability of rejecting the
null hypothesis given that it is true.
A type II error (false negative) occurs when
the null hypothesis
is false,
but erroneously fails to be rejected.
The the type II error rate is denoted by the Greek letter $\beta$
and is related to the power of a test (which equals $1−\beta$).
For a given test, the only way to reduce both error rates
is to increase the sample size, and this may not be feasible.
.img-right[
]
.pull-left[
dist1 <- function(N){rexp(N, 1)} dist2 <- function(N){rexp(N, 10)} xx <- p3003PBC::sample_stats(N = 10, dist1, dist2, samples = 100) cbind( coin = table(xx$res_p[,"coin"] < 0.01), t.test = table(xx$res_p[,"t.test"] < 0.01), asymp.test = table(xx$res_p[,"asymp.test"] < 0.01)) -> x rownames(x) <- c("Accept H0", "Reject H0") knitr::kable(x, format="html")
par(mfrow=c(2,1)) old <- c(5.1, 4.1, 4.1, 2.1) par(mar = c(5.1, 4.1, 1, 2.1)) scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"t.test"], log="xy", xlab="log(p.value) randomization test", ylab="log(p.value) t.test", pch=16,cex=0.5, ylim=c(1e-3,1)) abline(c(0,1), col="blue") abline(h=0.01,v=0.01, col="gray") scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"asymp.test"], log="xy", xlab="log(p.value) randomization test", ylab="log(p.value) asymptotic test", pch=16,cex=0.5, ylim=c(1e-3,1)) abline(c(0,1), col="blue") abline(h=0.01,v=0.01, col="gray")
]
.right-plot[
]
.pull-left[
dist1 <- function(N){rexp(N, 1)} dist2 <- function(N){rexp(N, 10)} xx <- p3003PBC::sample_stats(N_obs = 20, dist1, dist2, samples = 100) cbind( coin = table(xx$res_p[,"coin"] < 0.01), t.test = table(xx$res_p[,"t.test"] < 0.01), asymp.test = table(xx$res_p[,"asymp.test"] < 0.01)) -> x rownames(x) <- c("Accept H0", "Reject H0") knitr::kable(x, format = "html")
par(mfrow=c(2,1)) old <- c(5.1, 4.1, 4.1, 2.1) par(mar = c(5.1, 4.1, 1, 2.1)) scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"t.test"], log="xy", xlab="log(p.value) randomization test", ylab="log(p.value) t.test", pch=16,cex=0.5, ylim=c(1e-3,1)) abline(c(0,1), col="blue") abline(h=0.01,v=0.01, col="gray") scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"asymp.test"], log="xy", xlab="log(p.value) randomization test", ylab="log(p.value) asymptotic test", pch=16,cex=0.5, ylim=c(1e-3,1)) abline(c(0,1), col="blue") abline(h=0.01,v=0.01, col="gray")
]
.right-plot[
]
.pull-left[
dist1 <- function(N){log2(rexp(N, 1))} dist2 <- function(N){log2(rexp(N, 10))} xx <- p3003PBC::sample_stats(N_obs = 10, dist1, dist2, samples = 100) cbind( coin = table(xx$res_p[,"coin"] < 0.01), t.test = table(xx$res_p[,"t.test"] < 0.01), asymp.test = table(xx$res_p[,"asymp.test"] < 0.01)) -> x rownames(x) <- c("Accept H0", "Reject H0") knitr::kable(x, format = "html")
par(mfrow=c(2,1)) old <- c(5.1, 4.1, 4.1, 2.1) par(mar = c(5.1, 4.1, 1, 2.1)) scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"t.test"], log="xy", xlab="log(p.value) randomization test", ylab="log(p.value) t.test", pch=16,cex=0.5, ylim=c(1e-3,1)) abline(c(0,1), col="blue") abline(h=0.01,v=0.01, col="gray") scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"asymp.test"], log="xy", xlab="log(p.value) randomization test", ylab="log(p.value) asymptotic test", pch=16,cex=0.5, ylim=c(1e-3,1)) abline(c(0,1), col="blue") abline(h=0.01,v=0.01, col="gray")
]
.right-plot[
]
exclude: true
.left-code[
?sleep # help(sleep) class(sleep) head(sleep, n = 4) table(sleep$group) table(sleep$ID)
]
.right-code[
# model extra as function of group a <- extra ~ group # model extra as function of group and ID b <- extra ~ group + ID # c <- extra ~ group + ID + group:ID d <- extra ~ group * ID e <- extra ~ group * ID - group:ID class(e)
Most functions in R work with the formula interface and data.frames
```{eval=FALSE} some_function("formula" , data = "data.frame")
] --- # Repeated - correlated measurements .left-code[ ```r par(mfrow=c(3,1), mar=c(4,4,0,0)) plot(extra ~ group, data = sleep) #<< sleepwide <- tidyr::spread(sleep, group, extra) plot( sleepwide$`1`, sleepwide$`2`) #<< legend("topleft", legend = paste("cor = ", round( cor(sleepwide$`1`, sleepwide$`2`), digits=2))) sleepwide <- sleepwide %>% dplyr::mutate(diff = `2`-`1`) boxplot(sleepwide$diff) #<<
]
.right-plot[
]
.footnote[To see sleep
dataset documentation run ?sleep
in R.]
$$t_{unpaired} = \frac{\bar{x}_1 - \bar{x}_2}{\sqrt{s^2(\frac{1}{n_1} + \frac{1}{n_2})}}$$
$$t_{paired} = \frac{\bar{d}}{\frac{s_d}{\sqrt{n}}}$$
.left-code[
test.p.values <- data.frame( unpaired.p = t.test(extra ~ group, #<< data = sleep, #<< paired = FALSE)$p.value, #<< paired.p = t.test(extra~group, #<< data = sleep, #<< paired = TRUE)$p.value, #<< diff.p = t.test(sleepwide$diff)$p.value #<< ) # Traditional interface with(sleep, t.test( extra[group == 1 ], extra[group == 2] ))$p.value
.footnote[Top code block - two sample t-test, middle code block - paired t-test, bottom code - one sample t.test on differences. Note that the paired t-test gives the same results as the one sample t.test of differences] ]
.pull-right[
knitr::kable(signif(test.p.values, digits=2), format = "html")
]
sleepless <- datasets::sleep sleepless$extra[c(1,4,6,12)] <- NA sleepless$extra[1:4] tryCatch( t.test(extra ~ group, data = sleepless, paired = TRUE),#<< error = function(e) e) tryCatch( t.test(extra ~ group, data = sleepless, paired = FALSE),#<< error = function(e) e)
.footnote[running the paired t-test with missing data fails. It can not be specified which observations are paired.]
.left-code[
lm1 <- lm(extra ~ group, data = sleep) #<< lm2 <- lm(extra ~ group + ID, data = sleep) #<< lmermod <- lmerTest::lmer(extra ~ group + (1|ID), data = sleep) #<< # collect coefficients into table x <- bind_rows( broom::tidy(anova(lm1))[1,], broom::tidy(anova(lm2))[1,], broom::tidy(anova(lmermod))[1,], ) # format table xx <- add_column(x, model = c("lm_1","lm_2","lmer"), .before = 1) %>% dplyr::select(model, p.value) %>% dplyr::mutate(p.value = signif(p.value, digits=2))
]
.pull-right[
knitr::kable(xx, format="html")
]
.footnote[The same test can be performed using linear model or mixed effects linear models.]
.left-code[
lm1 <- lm(extra ~ group, #<< data = sleepless) #<< lm2 <- lm(extra ~ group + ID, #<< data = sleepless) #<< lmermod <- lmerTest::lmer( #<< extra ~ group + (1|ID), #<< data = sleepless) #<< x <- bind_rows( broom::tidy(anova(lm1))[1,], broom::tidy(anova(lm2))[1,], broom::tidy(anova(lmermod))[1,], ) xx <- add_column(x, model = c("lm_1","lm_2","lmer"), .before = 1) %>% dplyr::select(model, p.value) %>% dplyr::mutate(p.value = signif(p.value, digits = 2))
]
.pull-right[
knitr::kable(xx, format = "html")
]
.footnote[Linear models do work also with missing data. We can specify the pairing by extra ~ group + ID
]
.left-code[
hist(sqrt(rlnorm(10000)), breaks = 100)
]
.right-plot[
]
x <- replicate(500, { #<< # generates 100 test results for n = 10,100,1000,5000 c(shapiro.test(sqrt(rlnorm(10)))$p.value, #$ shapiro.test(sqrt(rlnorm(100)))$p.value, #$ shapiro.test(sqrt(rlnorm(1000)))$p.value, #$ shapiro.test(sqrt(rlnorm(5000)))$p.value) #$ } # rnorm gives a random draw from the normal distribution ) rownames(x) <- c("n10","n100","n1000","n5000") rowMeans(x < 0.05) #<< the proportion of significant deviations
.footnote[For small sample sizes (N = 10) not enough power to reject Null Hypothesis. For large sample sizes enough power, but NOT RELEVANT, because of the CLT. Don't confuse "statistical significance" with "importance".]
A type II error (false negative) occurs when
the null hypothesis
is false,
but erroneously fails to be rejected.
power of a test (which equals $1−\beta$).
Hence, for N = 10 and data from square-root
normal distribution, the power is:
.img-right[
]
.left-code[
rowMeans(x < 0.05)[1] #<< type II error beta = 1 - rowMeans(x < 0.05)[1] beta 1 - beta
]
.img-right[
]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.