class: fullscreen, inverse, top, center, text-black background-image: url("../inst/images/test_chair.jpeg")

.font150[hypothesis testing]

knitr::opts_chunk$set(fig.width = 4.25, fig.height = 3.5, fig.retina = 3,
                      message = FALSE, warning = FALSE, cache = TRUE,
                      autodep = TRUE, hiline = TRUE)
knitr::opts_hooks$set(fig.callout = function(options) {
  if (options$fig.callout) {
    options$echo <- FALSE
    options$out.height <- "99%"
    options$fig.width <- 16
    options$fig.height <- 8
  }
  options
})
hook_source <- knitr::knit_hooks$get('source')
knitr::knit_hooks$set(source = function(x, options) {
  if (!is.null(options$hiline) && options$hiline) {
    x <- stringr::str_replace(x, "^ ?(.+)\\s?#<<", "*\\1")
  }
  hook_source(x, options)
})
options(htmltools.dir.version = FALSE, width = 90)
as_table <- function(...) knitr::kable(..., format='html', digits = 3)

library(tidyverse)

Goals


Lady tasting tea

Dr. Muriel Bristol, a female colleague of Fisher claimed to be able to tell whether the tea or the milk was added first to a cup.

..img-right[ ]

.footnote[Lady tasting tea https://en.wikipedia.org/wiki/Lady_tasting_tea,Ronald Fisher https://en.wikipedia.org/wiki/Ronald_Fisher]

- The null hypothesis was that the lady
had no such ability.

- The experiment was to prepare 8 cups of tea 4 with
milk and 4 with tea first.


Lady tasting tea

.left-code[

truth <- c(0,1,0,1,1,0,0,1)
x <- combn(truth,m = 4) #<<
nrcor <- apply(x, 2, sum) #<<
nulldistr <- table(nrcor)
nulldistr
plot(nulldistr, xlab="nr correct")

There are 70 combinations of the elments in x taken m at a time.

Count number of successes for each combination.

Count how often 0, 1, 2, 3, 4 successes. ]

.right-plot[


]


Lady tasting tea

probs <- nulldistr / sum(nulldistr) # compute probabilities
probs <- round(probs, digits = 3)
probs

Hence, on $\alpha = 0.05$ reject hypothesis, that she can not recognize if milk or tea first, since getting 4 right is $P(x = 4) = r probs[5]$

If she would have 3 right would you accept the null hypothesis?

--

$P(x > 3) = r probs[5] + r probs[4] = r probs[5] + probs[4]$.


layout: false

Hypothesis testing - Brief version

# Plot distribution of T under null Hypothesis
x <- seq(-3,5, by=0.1)
plot(x, dnorm(x), type="l", xlim = c(-3,5))

# Specify alpha and show critical regions
alpha <- 0.05
crl <- qnorm(alpha/2)
crh <- qnorm(1 - alpha/2)
abline(h = 0)
abline(v = c(crl, crh), col=2)

text(  2.1 , 0.1, "critical region", adj=0, srt=90)
text( -2.1 , 0.3, "critical region", adj=0, srt=-90)

# Show not significant test statistic
abline(v = 1.2, col=3)
text(1.2, 0.1, expression(t[obs]), srt=90)

# Show significant test statistic
abline(v = 4, col="magenta")
text(4, 0.1, expression(t[obs]), srt=90)

.img-right[


]


exclude: true

Hypothesis testing - Long version


Testing if mean is equal to $\mu$


Mean is equal to $\mu$? Simulate data under null

.left-code[

decimalplaces <- function(x) {
    if (abs(x - round(x)) > .Machine$double.eps^0.5) {
        nchar(strsplit(sub('0+$', '', as.character(x)), ".", fixed = TRUE)[[1]][[2]])
    } else {
        return(0)
    }
}

getBreaks <- function(T0,by=0.1){
  res <- seq(round(min(T0), digits = decimalplaces(by)) - by,round(max(T0), digits = decimalplaces(by)) + by, by = by)
  return(res)
}
# Simulating data from Null
N <- 10000; N_obs <- 4;
mu <- 0; sigma <- 1
bb <- function(y){
  x <- rnorm( N_obs, mu, sigma )
  # compute mean and sd
  data.frame(mean = mean( x ), #<<
             sd = sd(x))  #<<
}
res <- purrr::map_df(1:N, bb)
res %>% tidyr::gather() %>%
ggplot(aes(x = value)) +
  geom_histogram() +
  facet_grid(~key,scales="free_x")

]

.right-plot[


]


Mean = $\mu$? What is the distribution of T under null?

.left-code[ if $\sigma$ known

# statistic
T0 <- mu - res$mean
hist(T0, 
     breaks=getBreaks(T0, by=0.05),
     probability = T,
     main="")
x <- seq(-10,10,0.01)
lines(x, 
      # H_0 distribution
      dnorm(x,
            # sigma is known
            sd= (sigma/sqrt(N_obs))), #<<
      col=2)

$T|H_0 \sim N(0,\sigma/\sqrt{N_{obs}})$

]

.right-plot[


]


Improved test statistic T*

The t-statistic

$$T = \frac{\bar{X} - \mu}{\sigma/\sqrt{n}}$$ Z- transformed data $\sim WN(0,1)$.

The variance of the sampling distribution of the mean is the population variance divided by $n$ (given $iid$ data).

$$\sigma^2_{mean} = \sigma^2/n$$


Mean = $\mu$? What is the distribution of T*?

.left-code[ if $\sigma$ known

# improved statistic
T0 <- (mu - res$mean)/
  (sigma/sqrt(N_obs)) #<<
hist(T0, 
     breaks=getBreaks(T0, by=0.05),
     probability = T,
     main="")
x <- seq(-10,10,0.01)
# simplifies H_0 distribution
lines(x, 
      dnorm(x, 
            sd= 1), #<<
      col=2)

$T|H_0 \sim N(0,1)$ ]

.right-plot[


]


Mean = $\mu$? Unknown Variance

.left-code[ if $\sigma$ UNKNOWN

T0 <- (mu - res$mean) /
  (res$sd/sqrt(N_obs)) #<<
hist(T0,
     breaks=getBreaks(T0),
     probability = T, xlim=c(-6,6),
     ylim=c(0,0.4),main="")
x <- seq(-10,10,0.1)
lines(x, dnorm(x),#<<
      col="red")
lines(x,dt(x,df = N_obs - 1), #<<
      type="l",col="green",lwd=2)

$T|H_0 \sim T(\mu=0,df=N_{obs})$

]

.right-plot[


]


Mean = $\mu$? if sampling $x \sim Exp(1)$

.left-code[ Sampling from a skewed distribution.

$$x \sim Exp(1)$$ We know $\mu = \lambda = 1$

x <- seq(0,5,length = 100)
plot(c(0,(x)),
     c(0,dexp(x, rate = 1)),
     type = "l")
abline(h = 0)
abline(v = 1)

]

.right-plot[


]


Mean = $\mu$? $x \sim Exp(1)$ with $N_{obs} = 4$

.left-code[

N <- 10000;rate <- 1;
N_obs <- 4; mu <- 1;
bb_exp <- function(y){
  x <- rexp( N_obs, rate=rate ) #<<
  data.frame(mean = mean( x ),
             sd = sd(x)) 
  }
res <- purrr::map_df(1:N, bb_exp)
T0 <- (res$mean - mu)/ #<<
  (res$sd/sqrt(N_obs)) #<<
hist(T0, breaks=getBreaks(T0),
     probability = T, xlim=c(-12,6),
     ylim=c(0,0.4), main="")
x <- seq(-10,10,0.1)
lines(x,
      dt(x,df = N_obs - 1), #<<
      type="l",col="green",lwd=2)
abline(v = 0, col=2)

We simulate $4$ datapoints from an exponential distribution. Observe how the Null distribution changed. ]

.right-plot[


]

.footnote[]


exclude: true

Mean = $\mu$? $x \sim Exp(1)$ with $N_{obs} = 40$

.left-code[

# Simulating data from Null
N <- 10000; rate <- 1
N_obs <- 40 #<<
mu <- 1
bb_exp <- function(y){
  x <- rexp( N_obs, rate=rate )
  data.frame(mean = mean( x ), sd = sd(x)) 
}
res <- purrr::map_df(1:N, bb_exp)
T0 <- (res$mean - mu)/
  (res$sd/sqrt(N_obs))
hist(T0, breaks=getBreaks(T0),
     probability = T, xlim=c(-6,6),
     ylim=c(0,0.4), main="")
x <- seq(-10,10,0.1)
lines(x,dt(x,df = N_obs),type="l",
      col="green",lwd=2)
abline(v = 0, col=2)

]

.right-plot[


]

.footnote[Assumption is that $x$ is generated using a normal distribution. But this time we simulate $40$ datapoints from an exponential distribution.]


Mean = $\mu$? $x \sim Exp(1)$ but with $N_{obs} = 100$

.left-code[

# Simulating data from Null
N <- 10000;rate <- 1
N_obs <- 100 #<<
bb_exp <- function(y){
  x <- rexp( N_obs, rate=rate )
  data.frame(mean = mean( x ), sd = sd(x)) 
}
res <- purrr::map_df(1:N, bb_exp)
T0 <- (res$mean - mu)/
  (res$sd/sqrt(N_obs))
hist(T0, breaks=getBreaks(T0),
     probability = T, xlim=c(-6,6),
     ylim=c(0,0.4), main="")
lines(x,dt(x,df = N_obs - 1),type="l",
      col="green",lwd=2)
abline(v=0, col=2)

We simulate $100$ datapoints from an exponential distribution. Observe how the Null distribution changed.

]

.right-plot[


]


Central Limit Theorem

--

--


CLT in Proteomics?


Types of tests


Two sample t-test for equal means

$T = \frac{Y_1 - Y_2}{\sqrt{\frac{s_1^2}{N_1} + \frac{s_2^2}{N_2}}}$

$\upsilon = \frac{(s^{2}{1}/N{1} + s^{2}{2}/N{2})^{2}} {(s^{2}{1}/N{1})^{2}/(N_{1}-1) + (s^{2}{2}/N{2})^{2}/(N_{2}-1) }$


Two sample randomization tests for equal means

  1. Suppose the 10 individuals in the study have been labelled
tmp <- list("Diet A"=as.integer(c( 1, 2, 3, 4, 5)),
            "Diet B" = as.integer(c( 6, 7, 8 , 9, 10)))

library(flextable)
tmp <- (data.frame(tmp))

tmp %>%
  head() %>%
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column() %>% 
  flextable()
  1. Randomly re-assign the 10 individuals to the two groups.
  2. Re-calculate the test-statistic for this permuted data
  3. Repeat 2 and 3 to obtain $B$ sampled test-statistics, denoted $T_1, \dots, T_B$.
  4. For a two-sided test, the estimated p-value of the observed test statistic $T_{obs}$ is:

$$\frac{1}{B} \sum^B_{i=0} I_{T_i} >= |T_{obs}|$$

.footnote[Step 1-3 generates a sample from the null distribution.]


Types of error when testing hypothesis

A type I error (false positive) occurs when
the null hypothesis (H0) is true, but is rejected.
The type I error rate or significance level (p-Value)
is the probability of rejecting the
null hypothesis given that it is true.

A type II error (false negative) occurs when
the null hypothesis is false,
but erroneously fails to be rejected.
The the type II error rate is denoted by the Greek letter $\beta$
and is related to the power of a test (which equals $1−\beta$).

For a given test, the only way to reduce both error rates
is to increase the sample size, and this may not be feasible.

.img-right[ ]


Compare Tests

.pull-left[

dist1 <- function(N){rexp(N, 1)}
dist2 <- function(N){rexp(N, 10)}

xx <- p3003PBC::sample_stats(N = 10,
                   dist1,
                   dist2,
                   samples = 100)
cbind( coin = table(xx$res_p[,"coin"] < 0.01),
t.test = table(xx$res_p[,"t.test"] < 0.01),
asymp.test = table(xx$res_p[,"asymp.test"] < 0.01)) -> x
rownames(x) <- c("Accept H0", "Reject H0")
knitr::kable(x, format="html")
par(mfrow=c(2,1))

old <- c(5.1, 4.1, 4.1, 2.1)
par(mar = c(5.1, 4.1, 1, 2.1))

scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"t.test"], log="xy", 
               xlab="log(p.value) randomization test",
               ylab="log(p.value) t.test",
               pch=16,cex=0.5, ylim=c(1e-3,1))

abline(c(0,1), col="blue")
abline(h=0.01,v=0.01, col="gray")

scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"asymp.test"], log="xy", 
               xlab="log(p.value) randomization test",
               ylab="log(p.value) asymptotic test", pch=16,cex=0.5, ylim=c(1e-3,1))
abline(c(0,1), col="blue")
abline(h=0.01,v=0.01, col="gray")

]

.right-plot[


]


Compare Tests - Increasing sample size

.pull-left[

dist1 <- function(N){rexp(N, 1)}
dist2 <- function(N){rexp(N, 10)}
xx <- p3003PBC::sample_stats(N_obs = 20,
                   dist1,
                   dist2,
                   samples = 100)
cbind( coin = table(xx$res_p[,"coin"] < 0.01),
t.test = table(xx$res_p[,"t.test"] < 0.01),
asymp.test = table(xx$res_p[,"asymp.test"] < 0.01)) -> x
rownames(x) <- c("Accept H0", "Reject H0")
knitr::kable(x, format = "html")
par(mfrow=c(2,1))

old <- c(5.1, 4.1, 4.1, 2.1)
par(mar = c(5.1, 4.1, 1, 2.1))

scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"t.test"], log="xy", 
               xlab="log(p.value) randomization test",
               ylab="log(p.value) t.test",
               pch=16,cex=0.5, ylim=c(1e-3,1))

abline(c(0,1), col="blue")
abline(h=0.01,v=0.01, col="gray")

scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"asymp.test"], log="xy", 
               xlab="log(p.value) randomization test",
               ylab="log(p.value) asymptotic test", pch=16,cex=0.5, ylim=c(1e-3,1))
abline(c(0,1), col="blue")
abline(h=0.01,v=0.01, col="gray")

]

.right-plot[


]


Compare Tests - log2 transforming

.pull-left[

dist1 <- function(N){log2(rexp(N, 1))}
dist2 <- function(N){log2(rexp(N, 10))}
xx <- p3003PBC::sample_stats(N_obs = 10,
                   dist1,
                   dist2,
                   samples = 100)
cbind( coin = table(xx$res_p[,"coin"] < 0.01),
t.test = table(xx$res_p[,"t.test"] < 0.01),
asymp.test = table(xx$res_p[,"asymp.test"] < 0.01)) -> x
rownames(x) <- c("Accept H0", "Reject H0")
knitr::kable(x, format = "html")
par(mfrow=c(2,1))

old <- c(5.1, 4.1, 4.1, 2.1)
par(mar = c(5.1, 4.1, 1, 2.1))

scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"t.test"], log="xy", 
               xlab="log(p.value) randomization test",
               ylab="log(p.value) t.test",
               pch=16,cex=0.5, ylim=c(1e-3,1))

abline(c(0,1), col="blue")
abline(h=0.01,v=0.01, col="gray")

scatter.smooth(xx$res_p[,"coin"], xx$res_p[,"asymp.test"], log="xy", 
               xlab="log(p.value) randomization test",
               ylab="log(p.value) asymptotic test", pch=16,cex=0.5, ylim=c(1e-3,1))
abline(c(0,1), col="blue")
abline(h=0.01,v=0.01, col="gray")

]

.right-plot[


]


exclude: true

R data.frame and the R formula interface

.left-code[

?sleep # help(sleep)
class(sleep)
head(sleep, n = 4)
table(sleep$group)
table(sleep$ID)

]

.right-code[

# model extra as function of group
a <- extra ~ group 
# model extra as function of group and ID
b <- extra ~ group + ID # 
c <- extra ~ group + ID + group:ID
d <- extra ~ group * ID
e <- extra ~ group * ID - group:ID
class(e)

Most functions in R work with the formula interface and data.frames

```{eval=FALSE} some_function("formula" , data = "data.frame")

]

---

# Repeated - correlated measurements

.left-code[
```r
par(mfrow=c(3,1), mar=c(4,4,0,0))
plot(extra ~ group, data = sleep) #<<
sleepwide <- tidyr::spread(sleep, group, extra)
plot( sleepwide$`1`, sleepwide$`2`) #<<
legend("topleft", legend = 
       paste("cor = ",
       round(
         cor(sleepwide$`1`, sleepwide$`2`),
         digits=2)))
sleepwide <- sleepwide %>% 
  dplyr::mutate(diff = `2`-`1`) 
boxplot(sleepwide$diff) #<<

]

.right-plot[


]

.footnote[To see sleep dataset documentation run ?sleep in R.]


Repeated - correlated measurements

$$t_{unpaired} = \frac{\bar{x}_1 - \bar{x}_2}{\sqrt{s^2(\frac{1}{n_1} + \frac{1}{n_2})}}$$

$$t_{paired} = \frac{\bar{d}}{\frac{s_d}{\sqrt{n}}}$$

with $\bar{d}$ the mean of the differences $d_i$ with $i \in (1,\dots, n)$, and $d_i = x_{2i} - x_{1i}$ (the correlated samples in condtion $1$ and $2$).

Repeated - correlated measurements

.left-code[

test.p.values <- data.frame(
unpaired.p = 
  t.test(extra ~ group, #<<
         data = sleep, #<<
         paired = FALSE)$p.value, #<<
paired.p = 
  t.test(extra~group, #<<
         data = sleep, #<<
         paired = TRUE)$p.value, #<<
diff.p = 
  t.test(sleepwide$diff)$p.value #<<
)

# Traditional interface
with(sleep, 
     t.test(
       extra[group == 1 ],
       extra[group == 2] ))$p.value

.footnote[Top code block - two sample t-test, middle code block - paired t-test, bottom code - one sample t.test on differences. Note that the paired t-test gives the same results as the one sample t.test of differences] ]

.pull-right[

knitr::kable(signif(test.p.values, digits=2), format = "html")

]


Missing data

sleepless <- datasets::sleep
sleepless$extra[c(1,4,6,12)] <- NA
sleepless$extra[1:4]
tryCatch(
  t.test(extra ~ group, data = sleepless, paired = TRUE),#<<
  error = function(e) e)

tryCatch(
  t.test(extra ~ group, data = sleepless, paired = FALSE),#<<
  error = function(e) e)

.footnote[running the paired t-test with missing data fails. It can not be specified which observations are paired.]


Linear models

.left-code[

lm1 <- lm(extra ~ group, data = sleep) #<<
lm2 <- lm(extra ~ group + ID, data = sleep) #<<
lmermod <- 
  lmerTest::lmer(extra ~ group + (1|ID),
                 data = sleep) #<<

# collect coefficients into table
x <- bind_rows(
broom::tidy(anova(lm1))[1,],
broom::tidy(anova(lm2))[1,],
broom::tidy(anova(lmermod))[1,],
)
# format table
xx <- add_column(x, model = 
        c("lm_1","lm_2","lmer"),
        .before = 1) %>%
  dplyr::select(model, p.value) %>%
  dplyr::mutate(p.value = 
                  signif(p.value, digits=2))

]

.pull-right[

knitr::kable(xx, format="html")

]

.footnote[The same test can be performed using linear model or mixed effects linear models.]


Linear models - missing data

.left-code[

lm1 <- lm(extra ~ group, #<<
          data = sleepless) #<<
lm2 <- lm(extra ~ group + ID, #<<
          data = sleepless) #<<
lmermod <- lmerTest::lmer( #<<
  extra ~ group + (1|ID), #<<
  data = sleepless) #<<
x <- bind_rows(
broom::tidy(anova(lm1))[1,],
broom::tidy(anova(lm2))[1,],
broom::tidy(anova(lmermod))[1,],
)

xx <- add_column(x, 
        model = c("lm_1","lm_2","lmer"),
        .before = 1) %>%
  dplyr::select(model, p.value) %>%
  dplyr::mutate(p.value = 
                  signif(p.value, digits = 2))

]

.pull-right[

knitr::kable(xx, format = "html")

]

.footnote[Linear models do work also with missing data. We can specify the pairing by extra ~ group + ID]


Testing for Normality

.left-code[

hist(sqrt(rlnorm(10000)), breaks = 100)

]

.right-plot[


]


Testing for Normality

x <- replicate(500, { #<< # generates 100 test results for n = 10,100,1000,5000
                     c(shapiro.test(sqrt(rlnorm(10)))$p.value,   #$
                       shapiro.test(sqrt(rlnorm(100)))$p.value,  #$
                       shapiro.test(sqrt(rlnorm(1000)))$p.value, #$
                       shapiro.test(sqrt(rlnorm(5000)))$p.value) #$
                    } # rnorm gives a random draw from the normal distribution
               )

rownames(x) <- c("n10","n100","n1000","n5000")
rowMeans(x < 0.05) #<< the proportion of significant deviations

.footnote[For small sample sizes (N = 10) not enough power to reject Null Hypothesis. For large sample sizes enough power, but NOT RELEVANT, because of the CLT. Don't confuse "statistical significance" with "importance".]


Alternatively


What is the power for N = 10

A type II error (false negative) occurs when
the null hypothesis is false,
but erroneously fails to be rejected.
power of a test (which equals $1−\beta$).

Hence, for N = 10 and data from square-root
normal distribution, the power is:

.img-right[ ]

.left-code[

rowMeans(x < 0.05)[1]
#<< type II error
beta = 1 - rowMeans(x < 0.05)[1] 
beta
1 - beta

]


Conclusion

.img-right[ ]



wolski/p3003PBC documentation built on Nov. 30, 2024, 7:14 a.m.