# Ensure that libraries are loaded. library(tidyverse) library(learnr) library(gradethis) library(knitr) library(kableExtra) # New packages (must be installed before taking this tutorial) tutorial_options(exercise.timelimit = 60, exercise.checker = gradethis::grade_learnr) knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
# Ensure that the data is loaded for the remainder of this tutorial. flights4 <- UsingRTutorials::flights4 # Store the results of a t test, so the htest object is available in the tutorial. result_t <- flights4 %>% t.test(arr_delay0 ~ origin, data = ., na.action = "na.omit") # Ensure that the final function report_ttest() is available in the tutorial. report_ttest <- function(result_t) { # if (is.null(result_t) || class(result_t) != "htest") { # return("#### Input is not a result of t.test()! ######") # } else { paste0("*t* (", format(round(result_t$parameter, digits = 2), nsmall = 2), ") = ", format(round(result_t$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(result_t$p.value >= 0.0005, paste0("= ", format(round(result_t$p.value, digits = 3), nsmall = 3)), "< 0.001"), ", 95%CI[", format(round(result_t$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(result_t$conf.int[2], digits = 2), nsmall = 2), "]") # } }
First 1.5 hours: Course content
Second 1.5 hours: Data project
Let us quickly rehearse piping.
# Show a plot of the logarithm of arrival delays for flights to # Atlanta, Boston, or Buffalo in January. flights4_january <- filter(flights4, month == 1) flights4_jan_atlantabostonbuffalo <- filter(flights4_january, dest %in% c("ATL", "BOS", "BUF")) flights4_jan_ATLBOSBUF_logdelay <- mutate(flights4_jan_atlantabostonbuffalo, log_arr_delay0 = log(arr_delay0 + 1)) ggplot(flights4_jan_ATLBOSBUF_logdelay, aes(x = log_arr_delay0)) + geom_area(stat = "count")
flights4 %>% filter(month == 1 & dest %in% c("ATL", "BOS", "BUF")) %>% mutate(log_arr_delay0 = log(arr_delay0 + 1)) %>% ggplot(aes(x = log_arr_delay0)) + geom_area(stat = "count")
gradethis::grade_code( correct = "The (input) data argument disappears in a pipe if it is the first argument. And you correctly combined the two filter steps.", incorrect = "Don't save the plot as a data object, send it to the screen. Perhaps you should join the two filter functions." )
The pipe:
.
in a pipePiping:
tidyverse
functions have this characteristic. # Check help on the `t.test()` function.
# Use a dot to specify the piped-in tibble as input data. flights4 %>% t.test(arr_delay0 ~ origin, )
flights4 %>% t.test(arr_delay0 ~ origin, data = .)
gradethis::grade_code( correct = "The `data` argument is not the first argument, so it must be specified and the input data tibble must be represented by a dot. As you have done!", incorrect = "Did you use the data argument in the t.test() function?" )
In a pipe, .
represents the input object. Use it if:
# Get the value of the t statistic of an independent samples t test. flights4 %>% t.test(arr_delay0 ~ origin, data = .) %>% .$statistic #not a function
We will see soon where $statistic
comes from.
A List
may store anything (your perfect cupboard?)
Examples:
A t test yields a results object, which is a list.
# Store the results of a t test. result_t <- flights4 %>% t.test(arr_delay0 ~ origin, data = .) result_t #Default print method for these results.
result_t
here, so we can inspect the results object in the Environment (if you run this code in RStudio). Function str()
shows the contents of a list.
str(result_t)
Note:
htest
(bottom of the output);print()
) know what to do with the list.We get elements from a list:
[[]]
to the list name;$
to the list name.result_t[["conf.int"]]
gradethis::grade_code( correct = "", incorrect = "Perhaps you used the element number in the list instead of the element name to extract the confidence level. That is OK." )
result_t$conf.int
gradethis::grade_code( correct = "`$conf.int` is shorthand for `[['conf.int']]`.", incorrect = "" )
Let's go one level down in the list of t test results:
attribute()
function.str(result_t[4])
# The confidence level is an attribute (`attr` in the structure overview presented above), so use the `attributes()` function. attributes()
# Pull out the confidence interval from the results. Complete this code. attributes(result_t)
# Have a look at the output of the attributes() function. How can you get the number of the confidence level? attributes(result_t$conf.int)
attributes(result_t$conf.int)$conf.level
gradethis::grade_code( correct = "We can use the dolar sign directly after the attributes() function. Isn't that nice?", incorrect = "If you just get the number 0.95, you are fine. Probably, you used [[]] instead of $. If you get more than just the number, you have to go down one level in the list that `attributes()` generates." )
Instead of the attribute value, we can get the attribute name with the names()
function.
# If you want the name of an attribute, apply the `names()` function to an attribute that you extract with attributes(). names(attributes( ))
names(attributes(result_t$conf.int))
gradethis::grade_code( correct = "", incorrect = "Did you use [[]] instead of $?" )
Let us practice some more.
# Build your code in steps. First, find the group means in the results. str(result_t)
# Second, pull the mean from the results data object. You know how to get only the second mean (complete the code below). result_t$estimate
# Third, round the mean with the round() function (use Help). Complete the code below. round(result_t$estimate[[2]] )
# Fourth, pull the name of the second group from the results data object. Complete the code below. names(result_t$estimate)[2]
names(result_t$estimate)[2] round(result_t$estimate[[2]], digits = 1)
gradethis::grade_code( correct = "", incorrect = "Check out the hints to this exercise. And mind the blanks in the resulting sentence." )
The code below extracts the results of the t test in APA style.
paste0( # base R function to concatenate strings "*t* (", round(result_t$parameter, digits = 2), ") = ", # df round(result_t$statistic, digits = 2), # t ", *p* = ", round(result_t$p.value, digits = 3), # p ", 95%CI[", round(result_t$conf.int[1], digits = 2), # 95%CI lower ", ", round(result_t$conf.int[2], digits = 2), "]") # 95%CI upper
The stars will turn t and p into italics in a knitted document.
ifelse()
Improving the reported p value:
paste0("*t* (", format(round(result_t$parameter, digits = 2), nsmall = 2), ") = ", format(round(result_t$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(result_t$p.value < 0.0005, paste0("= ", format(round(result_t$p.value, digits = 3), nsmall = 2)), "< 0.001"), ", 95%CI[", format(round(result_t$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(result_t$conf.int[2], digits = 2), nsmall = 2), "]")
Functions in mathematics: y = f(x)
.
Functions in R: y <- f(x)
.
Meaning: Do something to data object x
-- f(x)
-- and store result in data object y
.
For short: Transform x
into y
.
Left-hand data object (y
):
- Does not exist: new data object created. Can be a function!
- Exists: data object overwritten.
- Not named: output to screen (console).
Step 1: Add function()
, enclose code within {
and }
, and store.
report_ttest <- function() { paste0("*t* (", format(round(result_t$parameter, digits = 2), nsmall = 2), ") = ", format(round(result_t$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(result_t$p.value >= 0.0005, paste0("= ", format(round(result_t$p.value, digits = 3), nsmall = 3)),"< 0.001"), ", 95%CI[", format(round(result_t$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(result_t$conf.int[2], digits = 2), nsmall = 2), "]") }
report_ttest
is the name of the new function.
Step 2: Specify user input.
The user must specify the data object containing the results of t.test()
.
result_t
.report_ttest <- function() { paste0("*t* (", format(round(result_t$parameter, digits = 2), nsmall = 2), ") = ", format(round(result_t$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(result_t$p.value >= 0.0005, paste0("= ", format(round(result_t$p.value, digits = 3), nsmall = 3)),"< 0.001"), ", 95%CI[", format(round(result_t$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(result_t$conf.int[2], digits = 2), nsmall = 2), "]") }
report_ttest <- function(x) { paste0("*t* (", format(round(x$parameter, digits = 2), nsmall = 2), ") = ", format(round(x$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(x$p.value >= 0.0005, paste0("= ", format(round(x$p.value, digits = 3), nsmall = 3)),"< 0.001"), ", 95%CI[", format(round(x$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(x$conf.int[2], digits = 2), nsmall = 2), "]") }
gradethis::grade_code( correct = "", incorrect = "Don't forget to add x as an argument to the function. Replace result_t everywhere by x!" )
Encapsulation:
Step 3: Ensure that all data object names in the body are:
x
), or # The function is created here. report_ttest <- function(x) { paste0("*t* (", format(round(x$parameter, digits = 2), nsmall = 2), ") = ", format(round(x$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(x$p.value >= 0.0005, paste0("= ", format(round(x$p.value, digits = 3), nsmall = 3)),"< 0.001"), ", 95%CI[", format(round(result_t$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(result_t$conf.int[2], digits = 2), nsmall = 2), "]") } # Execute another t test: average arrival delay of carriers AA and UA. result_t2 <- flights4 %>% filter(carrier %in% c("AA", "UA")) %>% t.test(arr_delay0 ~ carrier, data = .) # New test results. result_t2 # New test results with function report_ttest(). report_ttest(result_t2)
library()
command in the function for every package used in the function.base
and stats
are automatically loaded.paste0()
, format()
, and round()
are used, which are part of the base
package....
is a special argument for R functions:
report_ttest <- function(x, ...) { print(paste(...)) paste0("*t* (", format(round(x$parameter, digits = 2), nsmall = 2), ") = ", format(round(x$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(x$p.value >= 0.0005, paste0("= ", format(round(x$p.value, digits = 3), nsmall = 3)),"< 0.001"), ", 95%CI[", format(round(x$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(x$conf.int[2], digits = 2), nsmall = 2), "]") } # Use the function. report_ttest(result_t, "The", "difference", "was", "significant.", sep=' ')
report_ttest <- function(x) { paste0("*t* (", format(round(x$parameter, digits = 2), nsmall = 2), ") = ", format(round(x$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(x$p.value >= 0.0005, paste0("= ", format(round(x$p.value, digits = 3), nsmall = 3)),"< 0.001"), ", 95%CI[", format(round(x$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(x$conf.int[2], digits = 2), nsmall = 2), "]") } # Test the function. report_ttest(result_t, digits = 5)
report_ttest <- function(x, digits = 2) { paste0("*t* (", format(round(x$parameter, digits = digits), nsmall = digits), ") = ", format(round(x$statistic, digits = digits), nsmall = digits), ", *p* ", ifelse(x$p.value >= 0.0005, paste0("= ", format(round(x$p.value, digits = 3), nsmall = 3)),"< 0.001"), ", 95%CI[", format(round(x$conf.int[1], digits = digits), nsmall = digits), ", ", format(round(x$conf.int[2], digits = digits), nsmall = digits), "]") } report_ttest(result_t, digits = 5)
gradethis::grade_code( correct = "", incorrect = "You must use digits instead of the number 2 everywhere. And don't forget to specify the default number of digits in the function's argument." )
Function report_ttest()
is meant to display t test results in APA style to the reader.
This means that the result of the function must be displayed within a sentence in the report.
This is called i
Inline code:
Example R Markdown text with inline code:
There is a statistically significant difference in average delay between the two airports, `r report_ttest(result_t)`.
The R Markdown text shown above if the document is knitted:
There is a statistically significant difference in average delay between the two airports,
r report_ttest(result_t)
.
Note the italics of t and p.
Our function still has flaws: It gives errors or wrong output if the input is not a t.test()
result (class htest
).
#Run a regression. result_lm <- flights4 %>% lm(arr_delay0 ~ origin, data = .)
#Run a regression. result_lm <- flights4 %>% lm(arr_delay0 ~ origin, data = .) #Use regression results in function report_ttest(). report_ttest(result_lm)
if () {} else {}
Let's fix it.
report_ttest <- function(x) { paste0("*t* (", format(round(x$parameter, digits = 2), nsmall = 2), ") = ", format(round(x$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(x$p.value >= 0.0005, paste0("= ", format(round(x$p.value, digits = 3), nsmall = 3)),"< 0.001"), ", 95%CI[", format(round(x$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(x$conf.int[2], digits = 2), nsmall = 2), "]") } # Applied to t test results. report_ttest(result_t) # Applied to regression results. report_ttest(result_lm) # Applied to empty object. report_ttest(NULL)
# Check the use of `return()` in a function.
# The if - else flow control must be at the start of the function, because it makes no sense to pull results from a data object containing something else than a t test result. report_ttest <- function(result_t) { if (class(result_t) != "htest") { } else { } }
# Just return a message if the data object is not of class htest. report_ttest <- function(result_t) { if (class(result_t) != "htest") { return("") } else { } }
# Put the code to create the formatted results in the else{} part. report_ttest <- function(result_t) { if (class(result_t) != "htest") { return("#### Input is not a result of t.test()! ######") } else { # add code here } }
report_ttest <- function(x) { if (class(x) != "htest") { return("#### Input is not a result of t.test()! ######") } else { paste0("*t* (", format(round(x$parameter, digits = 2), nsmall = 2), ") = ", format(round(x$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(x$p.value >= 0.0005, paste0("= ", format(round(x$p.value, digits = 3), nsmall = 3)),"< 0.001"), ", 95%CI[", format(round(x$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(x$conf.int[2], digits = 2), nsmall = 2), "]") } } report_ttest(result_t) report_ttest(result_lm) report_ttest(NULL)
gradethis::grade_code( correct = "A function returns the result of the last step in the code or what is marked by `return()`.", incorrect = "If you get the message 'non-numeric argument to mathematical function', input other than t test results is still being treated as if it contains t test results." )
A function returns:
return()
command, which stops further execution of the function. An F test also yields results as a htest
class.
How can we ensure that report_ttest()
does not report F test results?
report_ttest <- function(x) { if (class(x) != "htest" || x$method == "F test to compare two variances") { return("#### Input is not a result of at.test()! ######") } else { paste0("*t* (", format(round(x$parameter, digits = 2), nsmall = 2), ") = ", format(round(x$statistic, digits = 2), nsmall = 2), ", *p* ", ifelse(x$p.value >= 0.0005, paste0("= ", format(round(x$p.value, digits = 3), nsmall = 3)), "< 0.001"), ", 95%CI[", format(round(x$conf.int[1], digits = 2), nsmall = 2), ", ", format(round(x$conf.int[2], digits = 2), nsmall = 2), "]") } } # Applied to previous t test results. report_ttest(result_t) # Applied to F test results. flights4 %>% var.test(arr_delay0 ~ origin, data = .) %>% report_ttest()
In the report_ttest()
function, we deal with single values instead of vectors:
||
(or) and &&
(and) instead of |
and &
. The latter are for vectorised operations, e.g., in mutate()
. ==
is vectorised. Unless you're sure that there is one value (as in class()
), use non-vectorised identical()
. Sorry, no fancy stuff in this tutorial.
It is time to finish tidying your group's project data.
Use the remaining time to design and create your data visualization.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.