options(htmltools.dir.version = FALSE)
# see: https://github.com/yihui/xaringan
# install.packages("xaringan")
# see: 
# https://github.com/yihui/xaringan/wiki
# https://github.com/gnab/remark/wiki/Markdown
options(width=110)
options(digits = 4)

Statistics

.pull-left3[

In this tutorial we will cover

]

.pull-right3[

Examples

# mean weight
mean(ChickWeight$weight)

# Standard deviation of Time
sd(ChickWeight$Time)

# T-test comparing weights from Diets 1 and 2
t.test(formula = weight ~ Diet,
       data = Chickweight,
       subset = Diet %in% c(1, 2))

# Correlation test between weight and Time
cor.test(formula = ~ weight + Time,
         data = ChickWeight)

]

Two types of statistics: Descriptive and Inferential

.pull-left3[

Descriptive

Examples

| Statistic| R Function| |------:|----:| | Mean| mean(x)| | Median| median(x)| | Mode| mode(x)| | Standard Deviation| sd(x)| ]

.pull-right3[

R implimentation

sd(c(5, 3, 6, 3, 2, 6))  # Standard deviation

mean(ChickWeight$weight) # Mean weight

median(ChickWeight$Time) # Mean Time

]


Two types of statistics: Descriptive and Inferential

.pull-left3[

Inferential

Examples

| Hypothesis Test| R Function| |------:|----:| | T-test| t.test()| | Correlation Test| cor.test()| | Chi-Square Test| chisq.test()| | ANOVA, Post-hoc| aov(), TukeyHSD()|

]

.pull-right3[

R implimentation

t.test(x = c(4, 3, 6, 5, 3, 2),
       mu = 0,
       alternative = "two.sided")

]


Inferential Statistics

.pull-left3[

Different tests require different arguments

Examples

| Hypothesis Test| Help code| |------:|:----| | T-test| ?t.test()| | Correlation Test| ?cor.test()| | Chi-Square Test| ?chisq.test()| | ANOVA| ?aov()| ]

.pull-right3[

Always check help menus!

?t.test

knitr::include_graphics("images/ttesthelp_ss.png")

]


Inferential Statistics

.pull-left3[

Arguments to hypothesis tests

Ex) Arguments to t.test

| Argument| Description| Default | |------:|----:|:---| | x, formula, data|Vector OR a formula and data | Required | | mu| Null hypothesis| 0 | | alternative| Alternative Hyp| "two.sided"| ]

.pull-right3[

Specifying arguments to a hypothesis test

# 0: Won't work! Manditory arguments not specified
t.test()

# 1: Will work and use default arguments
t.test(x = ChickWeight$weight)

# 1b: Same as above
t.test(x = ChickWeight$weight,
       mu = 0,
       alternative = "two.sided")

# 2: Specified arguments
t.test(x = ChickWeight$weight,
       mu = 120,
       alternative = "greater")

]


Inferential Statistics

.pull-left3[

Formula

formula = y ~ a + b + ...

Means...

Model a dependent variable y as a function of a and b and ...

]

.pull-right3[

General structure of a hypothesis test and formula

my.test(formula = y ~ a + b, # Formula
        data = my.data,      # Dataframe
        ...                  # Additional
        )

Inferential Statistics

.pull-left3[

Assigning hypothesis test objects

Examples of what's in htest objects

| Element| Result| |------:|----:| | x$statistic| A test statistic| | x$parameter| Degrees of freedom| | x$p.value| The p-value| | x$conf.int| Confidence interval| ]

.pull-right3[

What's in an htest object?

# One-sample t-test comparing the weights of chicks to 120

weight.tt <- t.test(x = ChickWeight$weight,
                    mu = 120,
                    alternative = "two.sided")

class(weight.tt)

# What's in the weight.tt object?
names(weight.tt)

]


Examples with ChickWeight Data

.pull-left3[

ChickWeight
set.seed(102)
x <- ChickWeight[sample(nrow(ChickWeight)),]
rownames(x) <- NULL
head(x)

]

.pull-right3[

knitr::include_graphics("images/cutechicken.jpg")

]


t-tests with t.test()

.pull-left4[

ChickWeight data

ChickWeight
set.seed(102)
x <- ChickWeight[sample(nrow(ChickWeight)),]
rownames(x) <- NULL
head(x)

]

.pull-right4[

One sample t-test

Is the mean weight of the chicks significantly different from 120?

A <- t.test(x = ChickWeight$weight, # Vector of values 
       alternative = "two.sided",   # Two sided test
       mu = 120)                    # Null is 120

A

]


t-tests with t.test()

.pull-left4[

ChickWeight data

ChickWeight
set.seed(102)
x <- ChickWeight[sample(nrow(ChickWeight)),]
rownames(x) <- NULL
head(x)

]

.pull-right4[

Two sample t-test

Is the mean weight of the chicks on Diet 1 different from Diet 2?

B <- t.test(formula = weight ~ Diet,    # Formula
            data = ChickWeight,         # Data in Chickweight
            subset = Diet %in% c(1, 2), # Only Diets 1,2
            alternative = 'two.sided')  # Two-sided test

B

]


Correlation test with cor.test()

.pull-left4[

ChickWeight data

ChickWeight
set.seed(102)
x <- ChickWeight[sample(nrow(ChickWeight)),]
rownames(x) <- NULL
head(x)

]

.pull-right4[

Correlation Test

Is there a correlation between weight and Time?

C <- cor.test(formula = ~ weight + Time, # Formula
              data = ChickWeight)          # Data in Chickweight

C

]


Chi-Square test with chisq.test()

.pull-left4[

ChickWeight data

ChickWeight
set.seed(102)
x <- ChickWeight[sample(nrow(ChickWeight)),]
rownames(x) <- NULL
head(x)

]

.pull-right4[

Chi-Square test

Are there more observations from one Diet than another?

table(ChickWeight$Diet)  # Table of counts

D <- chisq.test(x = table(ChickWeight$Diet))

D

]


ANOVA with aov()

.pull-left4[

ChickWeight data

ChickWeight
set.seed(102)
x <- ChickWeight[sample(nrow(ChickWeight)),]
rownames(x) <- NULL
head(x)

]

.pull-right4[

ANOVA

Is there difference in weights based on Diet?

E <- aov(formula = weight ~ Diet, # Formula
         data = ChickWeight)      # Data in Chickweight

summary(E)  # Sow a summary of the results

]


Post-hoc tests with TukeyHSD()

.pull-left4[

ChickWeight data

ChickWeight
set.seed(102)
x <- ChickWeight[sample(nrow(ChickWeight)),]
rownames(x) <- NULL
head(x)

]

.pull-right4[

Post Hoc

Which specific pairs of Diets differed?

D <- aov(formula = weight ~ Diet, # Formula
         data = ChickWeight)      # Data in Chickweight

TukeyHSD(D)  # Conduct post-hoc tests on D

]


Final notes

.pull-left2[

What are the arguments?

What format or class should the arguments be?

# Run test and save as test_A
test_A <- t.test(formula = weight ~ Diet,
                 data = ChickWeight,
                 subset = Diet %in% c(1, 2))

names(test_A)  # What is in the object?

test_A$statistic # Ah ok! Show me the test statistic

]

.pull-right2[

?t.test

knitr::include_graphics("images/ttesthelp_ss.png")

Questions?

]


Statistics Practical



therbootcamp/BaselRBootcamp2017 documentation built on May 3, 2019, 10:45 p.m.