library(gradethis)
library(learnr)
library(qsslearnr)
library(tidyverse)
library(modelr)
tutorial_options(exercise.checker = gradethis::grade_learnr)
knitr::opts_chunk$set(echo = FALSE)
tut_reptitle <- "QSS Tidyverse Tutorial 6: Output Report"
n <- 10
data(progresa, package = "qss")

Extending Linear Regression

Conceptual questions

quiz(caption = "",
     question("How can regression models be used to draw causal inference?",
              answer("by predicting counterfactual outcomes", correct = TRUE),
              answer("by randomizing outcomes"),
              answer("by fitting a line")),
     question("Even if the average treatment effect is positive, the same treatment may not affect everyone in the positive direction. This is an example of:",
              answer("heterogenous treatment effects", correct = TRUE),
              answer("homogenous treatment effects"),
              answer("both of these"))
     )

First regression

Today we will use a data set designed to investigate the impact of Mexico's conditional cash transfer program, Progresa, on political outcomes such as voter turnout. See Section 4.5.2 in QSS for more information on this study. We have loaded the progresa data into this tutorial session. First, run a regression of turnout (t2000) on treatment status (treatment) to get a sense for the estimated effect of receiving the program earlier versus later. Save the regression to the object fit and print that object.


fit <- lm(_y_ ~ _x_, data = _dataname_)
fit
grade_result(
  pass_if(~ isTRUE(all.equal(.result, lm(t2000 ~ treatment, data = progresa)))),
  pass_if(~ isTRUE(all.equal(.result, lm(progresa$t2000 ~ progresa$treatment, data = progresa))),
          "Right! Remember that when you pass the `data` argument, you don't need to explicitly refer to the data frame in the formula."),
  pass_if(~ isTRUE(all.equal(.result, lm(progresa$t2000 ~ progresa$treatment))),
          "Good job! You might try using the `data` argument next time.")
)

Coefficients

fit <- lm(t2000 ~ treatment, data = progresa)
fit <- lm(t2000 ~ treatment, data = progresa)

coef(fit)
grade_code()

Add predictions and residuals to the original data set

fit <- lm(t2000 ~ treatment, data = progresa)

# Pass your fitted model to the add_predictions function
progresa <- progresa %>%
  add_predictions(_model_) 

# View the variables in progresa 
fit <- lm(t2000 ~ treatment, data = progresa)

progresa <- progresa %>%
  add_predictions(fit) 

glimpse(progresa)
grade_code()
fit <- lm(t2000 ~ treatment, data = progresa)

progresa <- progresa %>%

# View the variables in progresa 
fit <- lm(t2000 ~ treatment, data = progresa)

progresa <- progresa %>%
  add_residuals(fit)

glimpse(progresa)
grade_code()

Add another covariate

Now, add the variable avgpoverty as an additional control variable to the regression (saving it as fit_2var) and print that new model.


fit_2var <- lm(t2000 ~ treatment + avgpoverty, data = progresa)
fit_2var
grade_result(
  pass_if(~ isTRUE(all.equal(.result, lm(t2000 ~ treatment + avgpoverty, data = progresa)))),
  pass_if(~ isTRUE(all.equal(.result, lm(progresa$t2000 ~ progresa$treatment + progresa$avgpoverty, data = progresa))),
          "Right! Remember that when you pass the `data` argument, you don't need to explicitly refer to the data frame in the formula."),
  pass_if(~ isTRUE(all.equal(.result, coef(lm(progresa$t2000 ~ progresa$treatment + progresa$avgpoverty)))),
          "Good job! You might try using the `data` argument next time.")
)

Estimating interactions

Now, let's see how the effect of treatment varies by the average level of poverty. To do this, specify an interaction between treatment and avgpoverty using the : or * operators. Save this new model as fit_int and print it after running it.


fit_int <- lm(t2000 ~ treatment * avgpoverty, data = progresa)
fit_int
grade_result(
  pass_if(~ isTRUE(all.equal(.result, lm(t2000 ~ treatment + avgpoverty +  treatment:avgpoverty, data = progresa)))),
  pass_if(~ isTRUE(all.equal(.result, lm(t2000 ~ treatment * avgpoverty, data = progresa)))),
  pass_if(~ isTRUE(all.equal(.result, lm(t2000 ~ treatment + avgpoverty + treatment * avgpoverty, data = progresa)))),
  pass_if(~ isTRUE(all.equal(.result, lm(progresa$t2000 ~ progresa$treatment * progresa$avgpoverty, data = progresa))),
          "Right! Remember that when you pass the `data` argument, you don't need to explicitly refer to the data frame in the formula."),
  pass_if(~ isTRUE(all.equal(.result, coef(lm(progresa$t2000 ~ progresa$treatment * progresa$avgpoverty)))),
          "Good job! You might try using the `data` argument next time.")
)

Nonlinearities

Finally, let's investigate the relationship between poverty and turnout. Run a regression of turnout on avgpoverty and it squared term to account for any possible nonlinearities. Save the output as fit_sq and print it out.


fit_sq <- lm(t2000 ~ avgpoverty + I(avgpoverty ^ 2), data = progresa)
fit_sq
grade_code()

Submit

submission_ui
submission_server()


mattblackwell/qsslearnr documentation built on Sept. 17, 2022, 6:25 p.m.