options(htmltools.dir.version = FALSE) # see: https://github.com/yihui/xaringan # install.packages("xaringan") # see: # https://github.com/yihui/xaringan/wiki # https://github.com/gnab/remark/wiki/Markdown options(width=110) options(digits = 4)
.pull-left3[
How to calculate basic descriptive statistics
mean()
, median()
, sd()
, ...How to conduct hypothesis tests and how to work with htest
objects
t.test()
, cor.test()
, aov()
, ...]
.pull-right3[
# mean weight mean(ChickWeight$weight) # Standard deviation of Time sd(ChickWeight$Time) # T-test comparing weights from Diets 1 and 2 t.test(formula = weight ~ Diet, data = Chickweight, subset = Diet %in% c(1, 2)) # Correlation test between weight and Time cor.test(formula = ~ weight + Time, data = ChickWeight)
.pull-left3[
| Statistic| R Function|
|------:|----:|
| Mean| mean(x)
|
| Median| median(x)
|
| Mode| mode(x)
|
| Standard Deviation| sd(x)
|
]
.pull-right3[
sd(c(5, 3, 6, 3, 2, 6)) # Standard deviation mean(ChickWeight$weight) # Mean weight median(ChickWeight$Time) # Mean Time
]
.pull-left3[
| Hypothesis Test| R Function|
|------:|----:|
| T-test| t.test()
|
| Correlation Test| cor.test()
|
| Chi-Square Test| chisq.test()
|
| ANOVA, Post-hoc| aov(), TukeyHSD()
|
]
.pull-right3[
t.test(x = c(4, 3, 6, 5, 3, 2), mu = 0, alternative = "two.sided")
]
.pull-left3[
?t.test
)| Hypothesis Test| Help code|
|------:|:----|
| T-test| ?t.test()
|
| Correlation Test| ?cor.test()
|
| Chi-Square Test| ?chisq.test()
|
| ANOVA| ?aov()
|
]
.pull-right3[
?t.test
knitr::include_graphics("images/ttesthelp_ss.png")
]
.pull-left3[
t.test
| Argument| Description| Default |
|------:|----:|:---|
| x
, formula, data
|Vector OR a formula and data | Required |
| mu
| Null hypothesis| 0 |
| alternative
| Alternative Hyp| "two.sided"
|
]
.pull-right3[
# 0: Won't work! Manditory arguments not specified t.test() # 1: Will work and use default arguments t.test(x = ChickWeight$weight) # 1b: Same as above t.test(x = ChickWeight$weight, mu = 0, alternative = "two.sided") # 2: Specified arguments t.test(x = ChickWeight$weight, mu = 120, alternative = "greater")
]
.pull-left3[
formula
argument
formula = y ~ a + b + ...
Means...
Model a dependent variable
y
as a function ofa
andb
and...
data
containing all variables in the formula, and optional subset
arguments to specify which cases in data
to include. ]
.pull-right3[
my.test(formula = y ~ a + b, # Formula data = my.data, # Dataframe ... # Additional )
y
is the dependent variable (e.g.; age), a
and b
are independent variablesdata
is a dataframe containing the variables in formula
; (y
, a
, b
)...
additional arguments specific to test
].pull-left3[
"htest"
which contain many values$
operator:| Element| Result|
|------:|----:|
| x$statistic
| A test statistic|
| x$parameter
| Degrees of freedom|
| x$p.value
| The p-value|
| x$conf.int
| Confidence interval|
]
.pull-right3[
# One-sample t-test comparing the weights of chicks to 120 weight.tt <- t.test(x = ChickWeight$weight, mu = 120, alternative = "two.sided") class(weight.tt) # What's in the weight.tt object? names(weight.tt)
]
.pull-left3[
ChickWeight
set.seed(102) x <- ChickWeight[sample(nrow(ChickWeight)),] rownames(x) <- NULL head(x)
]
.pull-right3[
knitr::include_graphics("images/cutechicken.jpg")
]
t.test()
.pull-left4[
ChickWeight
set.seed(102) x <- ChickWeight[sample(nrow(ChickWeight)),] rownames(x) <- NULL head(x)
]
.pull-right4[
Is the mean weight of the chicks significantly different from 120?
A <- t.test(x = ChickWeight$weight, # Vector of values alternative = "two.sided", # Two sided test mu = 120) # Null is 120 A
]
t.test()
.pull-left4[
ChickWeight
set.seed(102) x <- ChickWeight[sample(nrow(ChickWeight)),] rownames(x) <- NULL head(x)
]
.pull-right4[
Is the mean weight of the chicks on Diet 1 different from Diet 2?
B <- t.test(formula = weight ~ Diet, # Formula data = ChickWeight, # Data in Chickweight subset = Diet %in% c(1, 2), # Only Diets 1,2 alternative = 'two.sided') # Two-sided test B
]
cor.test()
.pull-left4[
ChickWeight
set.seed(102) x <- ChickWeight[sample(nrow(ChickWeight)),] rownames(x) <- NULL head(x)
]
.pull-right4[
Is there a correlation between weight and Time?
cor.test()
, formula looks like formula = ~ a + b
C <- cor.test(formula = ~ weight + Time, # Formula data = ChickWeight) # Data in Chickweight C
]
chisq.test()
.pull-left4[
ChickWeight
set.seed(102) x <- ChickWeight[sample(nrow(ChickWeight)),] rownames(x) <- NULL head(x)
]
.pull-right4[
Are there more observations from one Diet than another?
chisq.test()
, main argument should be a table of values created from the table()
function:table(ChickWeight$Diet) # Table of counts D <- chisq.test(x = table(ChickWeight$Diet)) D
]
aov()
.pull-left4[
ChickWeight
set.seed(102) x <- ChickWeight[sample(nrow(ChickWeight)),] rownames(x) <- NULL head(x)
]
.pull-right4[
Is there difference in weights based on Diet?
summary()
to an aov
object prints a nice table.E <- aov(formula = weight ~ Diet, # Formula data = ChickWeight) # Data in Chickweight summary(E) # Sow a summary of the results
]
TukeyHSD()
.pull-left4[
ChickWeight
set.seed(102) x <- ChickWeight[sample(nrow(ChickWeight)),] rownames(x) <- NULL head(x)
]
.pull-right4[
Which specific pairs of Diets differed?
TukeyHSD()
to an aov
object to get post-hoc tests.D <- aov(formula = weight ~ Diet, # Formula data = ChickWeight) # Data in Chickweight TukeyHSD(D) # Conduct post-hoc tests on D
]
.pull-left2[
What are the arguments?
What format or class should the arguments be?
When in doubt, always look at the help files and examples at the end.
Save hypothesis tests as new objects, then apply names()
to see what elements it contains, then extract what you want with S
# Run test and save as test_A test_A <- t.test(formula = weight ~ Diet, data = ChickWeight, subset = Diet %in% c(1, 2)) names(test_A) # What is in the object? test_A$statistic # Ah ok! Show me the test statistic
]
.pull-right2[
?t.test
knitr::include_graphics("images/ttesthelp_ss.png")
]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.