library(tidyverse) library(learnr) library(shiny) library(gt) library(PPBDS.data) knitr::opts_chunk$set(echo = FALSE, message = FALSE) options(tutorial.exercise.timelimit = 60, tutorial.storage="local")
Confirm that you have the correct version of PPBDS.data installed by pressing "Run Code."
packageVersion('PPBDS.data')
The answer should be ‘0.3.2.9005’, or a higher number. If it is not, you should upgrade your installation by issuing these commands:
remove.packages('PPBDS.data') library(remotes) remotes::install_github('davidkane9/PPBDS.data')
Strictly speaking, it should not be necessary to remove a package. Just installing it again should overwrite the current version. But weird things sometimes happen, so removing first is the safest approach.
``` {r name, echo=FALSE} question_text( "Student Name:", answer(NULL, correct = TRUE), allow_retry = TRUE, incorrect = "Ok" )
## Email ``` {r email, echo=FALSE} question_text( "Email:", answer(NULL, correct = TRUE), incorrect = "Ok", try_again_button = "Modify your answer", allow_retry = TRUE )
quiz( question_text( "Give a one sentence definition of a Preceptor Table.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ) )
Retrys are not allowed on multiple choice questions, so read and answer carefully!
quiz( question("What is the Fundamental Problem of Causal Inference?", answer("It is impossible to observe multiple potential outcomes at once.", correct = TRUE, message = "Nice!"), answer("We don't know the potential outcomes for people not surveyed.", message = "Go read the section on Causal Effects"), allow_retry = FALSE ))
quiz(question_text("Explain a 'counterfactual' in a brief phrase. 'Potential outcome' might be helpful to use in your answer.", allow_retry = FALSE, answer(NULL, correct = TRUE), incorrect = "Ok") )
quiz( question_text("In one sentence, explain what a causal effect is according to the RCM?", allow_retry = FALSE, answer(NULL, correct = TRUE), incorrect = "Ok") )
quiz(question("Choose the response variable.", answer("$t$"), answer("$u$"), answer("$Y$", correct = TRUE), answer("$c$"), random_answer_order = TRUE) )
Consider both what did happen --- the factual --- and what might have happened --- the counterfactual.
quiz(question_text("In two sentences, what makes a Preceptor Table ideal, and why is this generally impossible to observe? You should use the phrase 'potential outcomes' in your answer.", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok") )
quiz( question("If we did not have an ideal Preceptor Table, could we directly calculate the causal effect for an individual like Yao?", answer("No", correct = TRUE), answer("Yes"), allow_retry = FALSE) )
quiz( question_text("In one sentence, define what an estimand is, and give an example.", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok"), question_checkbox( "Which of the following are examples of estimands?", answer("average treatment effect", correct = TRUE), answer("percent change", correct = TRUE), answer("44%", correct = FALSE), answer("most positive causal effect", correct = TRUE), answer("+2", correct = FALSE), allow_retry = FALSE, random_answer_order = TRUE) )
Use the following ideal Preceptor Table to answer some questions about potential estimands we might be interested in.
tibble(subject = c("Yao", "Emma", "Cassidy", "Tahmid", "Diego"), ytreat = c("11", "13", "10", "8", "6"), ycontrol = c("9", "10", "11", "10", "4"), ydiff = c("?", "?", "?", "?", "?")) %>% gt() %>% cols_label(subject = md("$$ID$$"), ytreat = md("$$Y_t(u)$$"), ycontrol = md("$$Y_c(u)$$"), ydiff = md("$$Y_t(u) - Y_c(u)$$")) %>% cols_move(columns = vars(ytreat, ycontrol), after = vars(subject)) %>% tab_style(cell_borders(sides = "right"), location = cells_body(columns = vars(subject))) %>% cols_align(align = "center", columns = TRUE) %>% cols_align(align = "left", columns = vars(subject)) %>% fmt_markdown(columns = TRUE) %>% tab_spanner(label = "$$Outcomes$$", vars(ytreat, ycontrol)) %>% tab_spanner(label = "$$Estimands$$", vars(ydiff))
quiz( question_text("What is the causal effect of the treatment on Yao (answer as +/-X)?", answer("+2", correct = TRUE), allow_retry = TRUE), question("For how many of the five is the causal effect of the treatment positive?", answer("1"), answer("2"), answer("3", correct = TRUE), answer("4"), answer("5"), allow_retry = FALSE), question_text("On whom did the treatment have the most negative causal effect?", answer("Tahmid", correct = TRUE), allow_retry = TRUE), question_text("What was the treatment effect percentage on Diego? (answer as +/-X%)", answer("+50%", correct = TRUE), allow_retry = TRUE) )
quiz( question_text("In one sentence, describe the difference between an actual and ideal Preceptor Table.", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok" ), question_checkbox( "Which of the following are types of columns you might expect to see in a Preceptor Table", answer("ID", correct = TRUE), answer("Outcomes", correct = TRUE), answer("Covariates", correct = TRUE), answer("Estimands", correct = TRUE), answer("Predictions", correct = FALSE), answer("Observations", correct = FALSE), allow_retry = FALSE, random_answer_order = T ), question_text("What is the correct order of columns in a Preceptor Table following the ID column? (list in order from left to right starting with the ID column).", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok" ) )
quiz( question_text("In a very short sentence or phrase, why do we need a model?", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok" ) )
In these exercises, we will assume that the causal effect, $\tau$ (pronunced tau), is the same for everyone.
quiz( question_text("In one sentence, what is the big assumption we are making about causal effect by only using one value for tau?", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok" ) )
Use the following actual Preceptor Table to answer questions about a single value for tau.
# First, we create a tibble with the values we want for the table tibble(subject = c("Yao", "Emma", "Cassidy", "Tahmid", "Diego"), ytreat = c("13", "11", "?", "?", "5"), ycontrol = c("?", "?", "10", "12", "?"), ydiff = c("?", "?", "?", "?", "?")) %>% # Then, we use the gt function to make it pretty gt() %>% cols_label(subject = md("$$ID$$"), ytreat = md("$$Y_t(u)$$"), ycontrol = md("$$Y_c(u)$$"), ydiff = md("$$Y_t(u) - Y_c(u)$$")) %>% cols_move(columns = vars(ytreat, ycontrol), after = vars(subject)) %>% tab_style(cell_borders(sides = "right"), location = cells_body(columns = vars(subject))) %>% cols_align(align = "center", columns = TRUE) %>% cols_align(align = "left", columns = vars(subject)) %>% tab_spanner(label = "$$Outcomes$$", vars(ytreat, ycontrol)) %>% tab_spanner(label = "$$Estimands$$", vars(ydiff)) %>% fmt_markdown(columns = TRUE)
actualPT <- tibble(subject = c("Yao", "Emma", "Cassidy", "Tahmid", "Diego"), ytreat = c(13, 11, NA, NA, 5), ycontrol = c(NA, NA, 10, 12, NA), ydiff = c(NA, NA, NA, NA, NA))
# A tibble with the values used in the above Preceptor Table is loaded as # actualPT. You can use this console for your convenience, but nothing you do in # the console will be graded. glimpse(actualPT)
quiz( question_text("Describe in one sentence/equation how you would estimate Yao's $Y_c(u)$. (Do not use actual numbers, use tau in your explanation).", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok" ), question_text("Describe in one sentence/equation how you would estimate Tahmids's $Y_t(u)$. (Do not use actual numbers, use tau in your explanation).", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok" ), question_text("Describe in one sentence/equation how you would estimate a single value for tau.", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok" ) )
The same Preceptor Table and console to minimize scrolling.
# First, we create a tibble with the values we want for the table tibble(subject = c("Yao", "Emma", "Cassidy", "Tahmid", "Diego"), ytreat = c("13", "11", "?", "?", "5"), ycontrol = c("?", "?", "10", "12", "?"), ydiff = c("?", "?", "?", "?", "?")) %>% # Then, we use the gt function to make it pretty gt() %>% cols_label(subject = md("$$ID$$"), ytreat = md("$$Y_t(u)$$"), ycontrol = md("$$Y_c(u)$$"), ydiff = md("$$Y_t(u) - Y_c(u)$$")) %>% cols_move(columns = vars(ytreat, ycontrol), after = vars(subject)) %>% tab_style(cell_borders(sides = "right"), location = cells_body(columns = vars(subject))) %>% cols_align(align = "center", columns = TRUE) %>% cols_align(align = "left", columns = vars(subject)) %>% tab_spanner(label = "$$Outcomes$$", vars(ytreat, ycontrol)) %>% tab_spanner(label = "$$Estimands$$", vars(ydiff)) %>% fmt_markdown(columns = TRUE)
actualPT <- tibble(subject = c("Yao", "Emma", "Cassidy", "Tahmid", "Diego"), ytreat = c(13, 11, NA, NA, 5), ycontrol = c(NA, NA, 10, 12, NA), ydiff = c(NA, NA, NA, NA, NA))
# A tibble with the values used in the above Preceptor Table is loaded as # actualPT. You can use this console for your convenience, but nothing you do in # the console will be graded. glimpse(actualPT)
``` {r question-single-tau3} quiz( question_text("What is your estimate for a single value for tau? (answer as +/-X.XX)", answer("-1.33", correct = TRUE), allow_retry = TRUE ), question_text("What is your estimate for $Y_c(u)$ for Emma? (answer as XX.XX)", answer("12.33", correct = TRUE), allow_retry = TRUE ), question_text("What is your estimate for $Y_t(u)$ for Cassidy? (answer as X.XX)", answer("8.67", correct = TRUE), allow_retry = TRUE ) )
## Two values for tau ### As we do in the book, let's now assume that the causal effect varies across sexes. We will use two estimates for tau. (Cassidy and Emma are female, Tahmid, Diego, and Yao are male). ### Exercise 1 ```r # First, we create a tibble with the values we want for the table tibble(subject = c("Yao", "Emma", "Cassidy", "Tahmid", "Diego"), ytreat = c("13", "11", "?", "?", "5"), ycontrol = c("?", "?", "10", "12", "?"), ydiff = c("?", "?", "?", "?", "?")) %>% # Then, we use the gt function to make it pretty gt() %>% cols_label(subject = md("$$ID$$"), ytreat = md("$$Y_t(u)$$"), ycontrol = md("$$Y_c(u)$$"), ydiff = md("$$Y_t(u) - Y_c(u)$$")) %>% cols_move(columns = vars(ytreat, ycontrol), after = vars(subject)) %>% tab_style(cell_borders(sides = "right"), location = cells_body(columns = vars(subject))) %>% cols_align(align = "center", columns = TRUE) %>% cols_align(align = "left", columns = vars(subject)) %>% tab_spanner(label = "$$Outcomes$$", vars(ytreat, ycontrol)) %>% tab_spanner(label = "$$Estimands$$", vars(ydiff)) %>% fmt_markdown(columns = TRUE)
actualPT <- tibble(subject = c("Yao", "Emma", "Cassidy", "Tahmid", "Diego"), ytreat = c(13, 11, NA, NA, 5), ycontrol = c(NA, NA, 10, 12, NA), ydiff = c(NA, NA, NA, NA, NA))
# A tibble with the values used in the above Preceptor Table is loaded as # actualPT. You can use this console for your convenience, but nothing you do in # the console will be graded. actualPT
quiz( question_text("How would you calculate $\\tau_F$? Use only words and no numbers.", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok" ), question_text("What is $\\tau_M$? (Answer as +/-X)", answer("-3", correct = TRUE), allow_retry = TRUE ), question_text("What is your new estimate for Diego's $Y_c(u)$? (Answer as X)", answer("8", correct = TRUE), allow_retry = TRUE ), question_text("In a couple of sentences, explain how we have two different estimates for Emma depending on assumptions we make.", allow_retry = FALSE, answer(NULL, correct = TRUE), trim = FALSE, incorrect = "Ok" ) )
We will no longer make any assumptions about $\tau$. Instead, we are interested in estimating the average treatment effect, both for the entire sample and for subsets of it.
tibble(subject = c("Yao", "Emma", "Cassidy", "Tahmid", "Diego"), ytreat = c("11", "13", "?", "?", "6"), ycontrol = c("?", "?", "11", "10", "?"), ydiff = c("?", "?", "?", "?", "?")) %>% gt() %>% cols_label(subject = md("$$\\mathbf{Subject}$$"), ytreat = md("$$Y_t(u)$$"), ycontrol = md("$$Y_c(u)$$"), ydiff = md("$$Y_t(u) - Y_c(u)$$")) %>% tab_style(cell_borders(sides = "right"), location = cells_body(columns = vars(subject))) %>% tab_style(cell_text(weight = "bold"), location = cells_body(columns = vars(subject))) %>% cols_align(align = "center", columns = TRUE) %>% fmt_markdown(columns = TRUE)
quiz( question("Estimate the ATE based on the data given to you in the above Preceptor Table.", answer("-0.5", correct = TRUE), answer("+1"), answer("-1.33"), answer("+2"), allow_retry = TRUE) )
quiz( question_text("Before assigning any number to our causal effect, let's work using a single causal effect for everyone called $\\tau$. What would be Yao's $Y_c(u)$. You can write tau for $\\tau$. Don't put spaces in your answer. Your answer should look like 13+tau or 10-tau.", allow_retry = TRUE, answer("11-tau", correct = TRUE) ), question_text("Using the same format, write Cassidy's $Y_t(u)$ in terms of $\\tau$ and the known value for Cassidy's $Y_c(u)$.", allow_retry = TRUE, answer("11+tau", correct = TRUE) ), question_text("What is Cassidy's outcome under treatment if we assume tau to be the ATE we calculated above, which was -0.5? Note that this answer will just be a number, without any symbol.", allow_retry = TRUE, answer("10.5", correct = TRUE) ), question_text("What is Tahmid's outcome under treatment if we assume tau to be the ATE we calculated?", allow_retry = TRUE, answer("9.5", correct = TRUE) ), question_text( "What is the treatment effect for the males in the sample?", allow_retry = TRUE, answer("-1.5", correct = TRUE) ), question_text( "What is the treatment effect for the females in the sample?", allow_retry = TRUE, answer("2", correct = TRUE) ), question_text( "What is Diego's $Y_c(u)$ if we assume his causal effect to be the estimated ATE for males", allow_retry = TRUE, answer("7.5", correct = TRUE) ), question_text( "What is Emma's $Y_c(u)$ if we assume her causal effect is the estimated ATE for females", allow_retry = TRUE, answer("11", correct = TRUE) ) )
quiz( question("Which of the following is NOT a reason why $\\widehat{ATE}$ a useful estimand?", answer("It captures a clear and useful estimator"), answer("If the treatment is randomly assigned, the estimator is unbiased"), answer("You can use to fill out Preceptor Table if you assume the treatment effect is the same for everyone"), answer("$\\widehat{ATE}$ always captures the correct sign (+ or -) of the true ATE", correct = TRUE, message = "Nice! This is untrue. Our estimates for ATE can often get even the direction of the ATE wrong."), allow_retry = TRUE ), question_text("What is the difference between $\\widehat{ATE}$ and ATE.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("Describe the heterogeneous treatment effect for the variation between individuals.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("Describe heterogeneous treatment effect for the variation between different sub-groups or variables.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ) )
Every problem in data science is ultimately a missing data problem. If the ideal Preceptor Table is available, we need only do some arithmetic.
quiz( question_text("In addition to not being able to observe counterfactal outcomes, what is another source of missing data in Preceptor Tables?", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("In order to apply the causal effect of a sample to the general population, our samaple must be ______ of the population.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("Explain representativeness as a concept in sampling.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("Describe an Infinite Preceptor Table in a few sentences. Use the word 'assumption' in your answer.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("Is it fair to assume that the causal effect on Sam at this moment in time is the same as the causal effect would be a year from now? Explain why or why not.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), # DK: Not sure I like this question. What is the answer? question_text("Why might Infinite Preceptor Tables include additional treatment columns?", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), # DK: How closely connected are these to the book? question_text("When you begin working on a causal question, you begin with the idea of an Infinite Preceptor Table. What three assumptions do you make to narrow this idea down to a Preceptor Table which you can work with?", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ) )
## MB: This question is bad, but based directly in the chapter. Keep? quiz( question_text("Describe the two main forms of uncertainty with using ATE as a uniform measure of causal effect.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question("Which of the following is NOT a good reason to doubt the certainty of your ATE?", answer("The ATE wasn't calculated using an equal number of control and treatment observations", correct = TRUE), answer("The sample used to calculate the ATE is too small"), answer("The sample used to calculaate the ATE was not random"), answer("The ATE doesn't account for individual variation in each observation"), allow_retry = TRUE ), question("Which of the following would make the $\\widehat{ATE}$ of a sample closer to the true ATE of a sample? (Select all that apply)", answer("A truly random asssignment mechanism", correct = TRUE), answer("Less variance in what is being measured", correct = TRUE), answer("A larger sample", correct = TRUE), answer("Adding more Spanish speakers to the platform.", message = "Being exposed to more Spanish speakers doesn't necessarily matter if their prescence already served as an effective treatment."), allow_retry = TRUE ), question_text("Explain what 'confounding' means in the context of an assignment mechanism.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("The trains experiment is redone so that everyone in a certain region of the train platform is treated and another region is not. Is this a confounding element? Why?", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question("In the context of the trains dataset, which of the following describes selection bias, a key problem to avoid when establishing an assignment mechanism?", answer("The study selected a specific train station rather than measuring the effect across many"), answer("Spanish speakers do not choose which platforms to sit on randomly despite the assignment mechanism assuming so", correct = TRUE), answer("The individuals who were selected for interviews were disproportionately non-Spanish speakers."), allow_retry = TRUE ) )
quiz( question_text("You are told to redo the train experiment using block randomized assignment. You have 150 commuters and the ability to sort them into 3 different platforms. How many commuters do you put on each of the 3 platforms and how many recieve the treatment?", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("Explain in a few sentences why a researcher may want to use block randomized assignment.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("A few of the platforms in the station are too loud for the Spanish-speakers to be heard by those around them. How might this jeopardize randomization and serve as a confounding factor?", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ) )
quiz( question_text("Describe validity in a few sentences. Give an example of a mistake the researchers could make that would call the validity of the model into question.", answer(NULL, correct = TRUE), allow_retry = TRUE, incorrect = "Ok" ), question("What is external validity?", answer("The data we are collecting is a legitimate measure of what we are trying to observe"), answer("The population used in the study is representative of the population we hope to generalize the findings to", correct = TRUE), answer("It is when the findings of a study affirm what is already known in exisiting literature"), allow_retry = TRUE ), question_text("What is the Hawthorne effect?", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("A drug is being used to test weight loss, however it can only be used on subjects 3, 4, and 5, as subjects 1 and 2 have a disease that keeps the drug from being safe for them. Should all 5 subjects be used to calculate the causal effect of the weight loss drug, or just subjects 3, 4, and 5? Why?", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("In the train experiment, there were people in the experiement who waited in their cars, therefore didn't hear Spanish speakers before stepping onto the train. Should their results have been measured and included in the data? Compare this situation to the one in the previous question in your answer.", # MB: Cass answer(NULL, correct = TRUE), allow_retry = TRUE, incorrect = "Ok" ), question_text("Humility is a key step in modelling and data analysis where we acknowledge all of the things in the real world that may have messed up our experiment or predictions. Use humility and discuss one of these things in a few sentences.", # MB: CASS. examples: headphgones in couldnt hear spanish, etc allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ) )
Miro and Jessica are two mice being given a weight-increasing drug with a causal effect of +2 oz. Miro weighs 13 oz and Jessica weighs 4 oz prior to treatment. However, their scale only goes up to 14 oz.
tibble(subject = c("Jessica", "Miro"), ytreat = c("?", "?"), ycontrol = c("4", "13"), ydiff = c("+2", "+2")) %>% gt() %>% cols_label(subject = md("$$\\mathbf{Subject}$$"), ytreat = md("$$Y_t(u)$$"), ycontrol = md("$$Y_c(u)$$"), ydiff = md("$$Y_t(u) - Y_c(u)$$")) %>% tab_style(cell_borders(sides = "right"), location = cells_body(columns = vars(subject))) %>% tab_style(cell_text(weight = "bold"), location = cells_body(columns = vars(subject))) %>% cols_align(align = "center", columns = TRUE) %>% fmt_markdown(columns = TRUE)
quiz( question_text("What is Jessica's $Y_t(u)$ assuming $Y_t(u) - Y_c(u)$ to be his treatment effect? Leave out units in all answers for this section. Just provide the number.", answer("6", correct = TRUE), allow_retry = TRUE ), question_text("What is Miro's $Y_t(u)$ assuming $Y_t(u) - Y_c(u)$ to be his treatment effect?", answer("15", correct = TRUE), allow_retry = TRUE ), question_text("What does Miro's scale say at the end of the experiment, assuming $Y_t(u) - Y_c(u)$ to be his treatment effect?", answer("14", correct = TRUE), allow_retry = TRUE ), question("Which of the two went go off the scale after applying the treatment effect?", answer("Miro", correct = TRUE), answer("Jessica"), allow_retry = TRUE ), question_text("Write a few sentences describing censoring in statistics.", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ), question_text("If many of the observations were as high as Miro's, is it the problem of the scale in the study? Why?", allow_retry = TRUE, answer(NULL, correct = TRUE), incorrect = "Ok" ) )
submission_ui
submission_server()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.