Stratification {#chapter-stratification}

::: {.rmdtip} stratify
verb: stratify; 3rd person present: stratifies; past tense: stratified; past participle: stratified; gerund or present participle: stratifying
1. arrange or classify.
2. form or arrange into strata. :::

Propensity score stratification leverages propensity scores so we can define strata (or groups) that roughly equivalent on all the observed covariates. Although it is reasonable to start with chapter \@ref(chapter-matching) on matching, stratification is an important method and even if you prefer to use a matching method, stratification will most often be used in order to evaluate balance.

Phase I: Estimate Propensity Scores (Logistic regression)

To begin let's estimate propensity scores using logistic regression with the National Supported Work Demonostration (lalonde) dataset [@Lalonde1986]. Here, we are using the final model specification used by @DehejiaWahba1999.

data(lalonde, package = 'Matching')
lalonde_formu <- treat ~ age + I(age^2) + educ + I(educ^2) + black +
    hisp + married + nodegr + re74  + I(re74^2) + re75 + I(re75^2)
lr_out <- glm(formula = lalonde_formu,
              data = lalonde,
              family = binomial(link = 'logit'))
summary(lr_out)
lalonde$lr_ps <- fitted(lr_out)

Check the distributions of propensity scores to ensure we have good overlap

ggplot(lalonde, aes(x = lr_ps, color = as.logical(treat))) + 
    geom_density() +
    scale_color_manual('Treatment', values = palette2) +
    xlab('Propensity Score')

Stratifying

Stratification using quintiles.

breaks5 <- psa::get_strata_breaks(lalonde$lr_ps)
breaks5

lalonde$lr_strata5 <- cut(x = lalonde$lr_ps, 
                          breaks = breaks5$breaks, 
                          include.lowest = TRUE, 
                          labels = breaks5$labels$strata)
table(lalonde$treat, lalonde$lr_strata5)
ggplot(lalonde, aes(x = lr_ps, color = as.logical(treat))) + 
    geom_density(aes(fill = as.logical(treat)), alpha = 0.2) +
    geom_vline(xintercept = breaks5$breaks, alpha = 0.5) +
    geom_text(data = breaks5$labels, 
              aes(x = xmid, y = 0, label = strata),
              color = 'black', vjust = 1) +
    scale_fill_manual('Treatment', values = palette2) +
    scale_color_manual('Treatment', values = palette2) +
    xlab('Propensity Score') + ylab('Density') +
    xlim(c(0, 1))
ggplot() +
    geom_vline(xintercept = breaks5$breaks) +
    geom_point(data = lalonde, aes(x = lr_ps, y = log(re78 + 1), color = as.logical(treat)), alpha = 0.5) +
    geom_text(data = breaks5$labels, aes(x = xmid, y = 0, label = strata), color = 'black', vjust = 1) +
    scale_color_manual('Treatment', values = palette2) +
    xlab('Propensity Score')

Checking Balance {#stratification-balance}

covars <- all.vars(lalonde.formu)
covars <- lalonde[,covars[-1]]
PSAgraphics::cv.bal.psa(covariates = covars, 
                        treatment = lalonde$treat,
                        propensity = lalonde$lr_ps,
                        strata = lalonde$lr_strata)
PSAgraphics::box.psa(continuous = lalonde$age, 
                     treatment = lalonde$treat, 
                     strata = lalonde$lr_strata,
                     xlab = "Strata", 
                     balance = FALSE)
PSAgraphics::cat.psa(categorical = lalonde$nodegr, 
                     treatment = lalonde$treat, 
                     strata = lalonde$lr_strata, 
                     xlab = 'Strata',
                     balance = FALSE)
PSAgraphics::cat.psa(categorical = lalonde$nodegr, 
                     treatment = lalonde$treat, 
                     strata = lalonde$lr_strata, 
                     xlab = 'Strata',
                     balance = FALSE, 
                     main = 'Covariate: nodegr')
PSAgraphics::cat.psa(categorical = lalonde$black, 
                     treatment = lalonde$treat, 
                     strata = lalonde$lr_strata, 
                     xlab = 'Strata',
                     balance = FALSE, 
                     main = 'Covariate: black')
PSAgraphics::cat.psa(categorical = lalonde$hisp, 
                     treatment = lalonde$treat, 
                     strata = lalonde$lr_strata, 
                     xlab = 'Strata',
                     balance = FALSE, 
                     main = 'Covariate: hisp')
PSAgraphics::cat.psa(categorical = lalonde$married, 
                     treatment = lalonde$treat, 
                     strata = lalonde$lr_strata, 
                     xlab = 'Strata',
                     balance = FALSE, 
                     main = 'Covariate: married')
PSAgraphics::box.psa(continuous = lalonde$age, 
                     treatment = lalonde$treat, 
                     strata = lalonde$lr_strata,
                     xlab = "Strata", 
                     balance = FALSE,
                     main = 'Covariate: age')
PSAgraphics::box.psa(continuous = lalonde$edu, 
                     treatment = lalonde$treat, 
                     strata = lalonde$lr_strata,
                     xlab = "Strata", 
                     balance = FALSE,
                     main = 'Covariate: edu')
PSAgraphics::box.psa(continuous = lalonde$re74, 
                     treatment = lalonde$treat, 
                     strata = lalonde$lr_strata,
                     xlab = "Strata", 
                     balance = FALSE,
                     main = 'Covariate: re74')
PSAgraphics::box.psa(continuous = lalonde$re75, 
                     treatment = lalonde$treat, 
                     strata = lalonde$lr_strata,
                     xlab = "Strata", 
                     balance = FALSE,
                     main = 'Covariate: re75')

Phase II: Estimate Effects

PSAgraphics::loess.psa(response = log(lalonde$re78 + 1),
                       treatment = lalonde$treat,
                       propensity = lalonde$lr_ps)
psa::loess_plot(ps = lalonde$lr_ps,
                outcome = log(lalonde$re78 + 1),
                treatment = lalonde$treat == 1,
                responseTitle = 'log(re78)',
                plot.strata = 5,
                points.treat.alpha = 0.5,
                points.control.alpha = 0.5,
                percentPoints.treat = 1,
                percentPoints.control = 1,
                se = FALSE, 
                method = 'loess')
PSAgraphics::circ.psa(response = log(lalonde$re78 + 1), 
                      treatment = lalonde$treat == 1, 
                      strata = lalonde$lr_strata5)

Phase III: Sensitivity Analysis

Now that we have established there is a statistically significant effect of the intervention after adjusting for the selection bias using propensity scores we will want to evaluate the robustness of that effect. Sensitivity analysis is one approach but it is only well defined for matching methods. In chapter \@ref(chapter-bootstrapping) we will introduce a bootstrapping method that can help test the robustness. But @Rosenbaum2012 suggest another approach to test the sensitivity is to test the null hypothesis twice. We will do that here using a classification tree approach to estimating propensity scores and strata.

Estimate Propensity Scores (classification tree)

library(tree)
tree_out <- tree::tree(lalonde_formu,
                       data = lalonde)
plot(tree_out); text(tree_out)
lalonde$tree_ps <- predict(tree_out)
table(lalonde$tree_ps, lalonde$treat, useNA = 'ifany')

lalonde$tree_strata <- predict(tree_out, type = 'where')
table(lalonde$tree_strata, lalonde$treat, useNA = 'ifany')


jbryer/psa documentation built on Nov. 17, 2023, 8:21 a.m.