knitr::opts_chunk$set(cache=TRUE, warning = FALSE, message = FALSE, echo = FALSE, fig.align = "center")
library(RACplots)
library(optmatch)
library(DOS2)
library(tidyverse)
library(ggpubr)
library(nearfar)

theme_set(theme_light())

# set some universal variables
prop_model = formula(t ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10)
prog_model = formula(y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10)
k = 1

Set-up

The results that follow depict several simulated datasets. The primary generative model for these is based on @aikens2020pilot and is specified as follows:

\begin{align} X_i &\sim Normal(0, I_{10}) \ T_i &\sim Bernoulli\left(\frac{1}{1 + exp(\phi(X_i))}\right) \ Y_i(0) &= \Psi(X_i) + \epsilon_i \ \epsilon_i &\sim N(0, 1) \end{align}

where $\phi(X)$ and $\Psi(X)$ represent the true propensity and prognostic score functions, given by

\begin{align} \phi(X_i) &= c_1 X_{i1} - c_0 \ \Psi(X_i) &= \rho X_{i1} + \sqrt{(1 - \rho^2)} X_{i2}. \end{align}

Where $c_1$, $c_0$, and $\rho$ are constants. In particular, the form for the prognostic function above guarantees that $\rho = Corr(\phi(X), \Psi(X))$.

Results

Figure 1: Assignment-Control Plots and Data Diagnostics

A first use for the assignment-control plot is as an informal data diagnostic. Researchers running observational studies often want a basic understanding of the baseline variation between the treatment and control groups before they begin. Plotting the standardized mean differences between the treatment groups (love plots) is a common starting place to understand imbalances between groups, but when there are many covariates -- some of which are irrelevant -- it can be difficult to tell by eye which imbalances should be of most concern. Histograms of propensity score overlap can be an important diagnostic for checking that the treatment and control groups overlap in terms of their probability of treatment (since this condition is essential for many causal inference methodologies). However, the propensity score does not necessarily reflect all aspects of covariate balance which may be important.

set.seed(123)
#simulate data
df_0 <- generate_data(N = 1500, p = 10, true_mu = "3*X1-5", rho = 0, sigma = 1)
df_pos <- generate_data(N = 1500, p = 10, true_mu = "3*X1-5", rho = 0.9, sigma = 1)
df_neg <- generate_data(N = 1500, p = 10, true_mu = "3*X1-5", rho = -0.9, sigma = 1)

a <- AC_plot(data = df_0)
b <- AC_plot(data = df_pos)
c <- AC_plot(data = df_neg)

d <- propensity_histogram(data = df_0)
e <- propensity_histogram(data = df_pos)
f <- propensity_histogram(data = df_neg)

ggarrange(a, b, c, d, e, f, ncol= 3, nrow = 2, heights = c(3, 2), labels = "AUTO")
pdf("figures/Figure1.pdf",  width=8, height=5)
ggarrange(a, b, c, d, e, f, ncol= 3, nrow = 2, heights = c(3, 2), labels = "AUTO")
dev.off()

Figure 1 shows example assignment-control plots (A-C) and propensity score density histograms (D-F) for three simulated observational data sets. Notably, the marginal distribution of the propensity score is identical across the three simulated scenarios, and the corresponding propensity score histograms (D-F) are qualitatively equivalent. However, panels A-C reveal that the three settings are in fact quite different. In setting A, treated individuals are no different from control individuals in terms of their prognosis. This means that even a naive comparison of treated and control subjects may give an unbiased treatment effect estimate under suitable conditions. In settings B and C, treatment and control outcomes should not be directly compared, because the results would give a biased estimate of treatment effect. Moreover, the direction of the correlation between propensity and prognosis suggests the direction of the bias from a naive comparison: If the individuals with the \textit{worst} prognosis are the \textit{least} likely to be treated (Figure 1B), we are more likely to \textit{overestimate} the effectiveness of the treatment in producing a more positive outcome. If the individuals with the \textit{worst} prognosis are the \textit{most} likely to be treated (Figure 1C), we are liable to err in the \textit{opposite} direction. Future work might consider whether this correlation indicates a tendency toward bias not only in naive comparisons, but for adjustment or subclassification approaches in which the score models are imperfect or matchings of treated and control individuals are not exact.

Figure 2: Other Notable Scenarios

n <- 1000
df_hamburger <- tibble(X1 = rnorm(n), X2 = rnorm(n), X3 = rbinom(n, 1, 0.8)) %>%
  mutate(prog = 3*X3 + X2/3,
         prop = X1 - 1) %>%
  mutate(t = as.factor(rbinom(n, 1, 1/(1 + exp(-prop)))))

#n <- 500
df_boomerang <- tibble(X1 = rnorm(n), X2 = rnorm(n)) %>%
  mutate(prog = 4*X1/5) %>%
  mutate(prop = -prog^2 -1 + X2/2) %>%
  mutate(t = as.factor(rbinom(n, 1, 1/(1 + exp(-prop)))))
a <- AC_plot(df_hamburger, opaque_class = "none")
b <- AC_plot(df_boomerang, opaque_class = "none")

c <- propensity_histogram(data = df_hamburger)
d <- propensity_histogram(data = df_boomerang)

ggarrange(a, b, ncol= 2, nrow = 1, labels = "AUTO")
pdf("figures/Figure2.pdf",  width=5.33, height=3)
ggarrange(b, a, ncol= 2, nrow = 1, labels = "AUTO")
dev.off()

Figure 3: Assignment-Control Plots and Matching

rho <- 0.5
#simulate data
df <- generate_data(N = 2000, p = 10, true_mu = "X1/3-3", rho = rho, sigma = 1)

# mahalanobis match
mahal_dist <- DOS2::smahal(df$t, df[,1:10])
m_match <- pairmatch(mahal_dist, controls = k, df)
# Match on the true scores
oracle_prop_match <- pairmatch(t ~ prop, controls = k, df)

# mahalanobis match + caliper
mahal_caliper_dist <- addcaliper(mahal_dist, z = df$t, p = df$prop, caliper = 0.1)
mahal_jointcaliper_dist <- addcaliper(mahal_caliper_dist, z = df$t, p = df$prog, caliper = 0.1)
m_caliper_match <- pairmatch(mahal_caliper_dist, controls = k, df)
m_jointcaliper_match <- pairmatch(mahal_jointcaliper_dist, controls = k, df)
a <- AC_match_plot(df, m_match, title = "\nMahalanobis Match")
b <- AC_match_plot(df, oracle_prop_match, title = "\nPropensity Match")
c <- AC_match_plot(df, m_caliper_match, title = "Mahalanobis Match,\nPropensity Caliper")
d <- AC_match_plot(df, m_jointcaliper_match, title = "Mahalanobis Match,\nJoint Caliper")

ggarrange(a, b, c, d, ncol= 2, nrow = 2, labels = "AUTO" )
pdf("figures/Figure3.pdf",  width=6, height=6)
ggarrange(a, b, c, d, ncol= 2, nrow = 2, labels = "AUTO" )
dev.off()

Assignment-Control Plots can also be a useful diagnostic tool for matching studies. The four panels in figure 2 show the assignment-control visualizations of four different 1-to-1 matching schemes on the same data set: Mahalanobis distance (A), propensity score (B), Mahalanobis distance with a propensity score caliper (C), and Mahalanobis distance with both a propensity score caliper and a pronostic score caliper (D).

In Mahalanobis distance matching, all covariates are weighted equally in a statistical sense. When there is an abundance of uninformative covariates (i.e. those which are neither associated with the outcome nor the treatment assignment.), Mahalanobis distance matching can select matches that may actually be quite distant in the assignment-control space [@aikens2020pilot]. On the other hand, propensity score matching optimizes directly for matches which are nearby in terms of the variation associated with the treatment (the "assignment" axis), but it is entirely agnostic to variation associated with the outcome. This can result in high variance in estimated treatment effect [@king2016propensity]. Finally, the two caliper methods impose contraints on the matching option to ensure that matches are close in terms of propensity score (C) or both propensity and prognostic score (D). This visualization illustrates the potential of these methods for minimizing bias (stemming from poor propensity score balance) and variace (stemming from poor prognostic score balance) in the treatment effect.

set.seed(123)

rho <- 0.85

df_overlap_poor <- generate_data(N = 1000, p = 10, true_mu = "4*X1/3 - 2", rho = rho) 

prop_match <- pairmatch(t ~ prop, controls = 1, df_overlap_poor )
m_prop <- sum(!is.na(prop_match))/2
subsample_mids_prop <- paste("1.", sample(1:m_prop, 40), sep = "")
prop_match_subsample <- ifelse(prop_match %in% subsample_mids_prop, prop_match, NA)


b <- AC_match_plot(data = df_overlap_poor, match = prop_match_subsample, title = "Propensity Score Matching,\nPoor Overlap")

rho <- 0.85

df_overlap_good <- generate_data(N = 1000, p = 10, true_mu = "2*X1/3 - 2", rho = rho) 

prop_match <- pairmatch(t ~ prop, controls = 1, df_overlap_good )
m_prop <- sum(!is.na(prop_match))/2
subsample_mids_prop <- paste("1.", sample(1:m_prop, 40), sep = "")
prop_match_subsample <- ifelse(prop_match %in% subsample_mids_prop, prop_match, NA)


a <- AC_match_plot(data = df_overlap_good, match = prop_match_subsample, title = "Propensity Score Matching,\nGood Overlap")
ggarrange(a, b, ncol = 2, nrow = 1)
pdf("figures/Figure4.pdf",  width=6, height=3)
ggarrange(a, b, ncol= 2, nrow = 1, labels = "AUTO" )
dev.off()

Figure 5: Assignment-Control Plots and Unmeasured Confounding

There is a wide and increasing variety of methods which use some articulation of propensity and prognosis to estimate a treatment effect. However, the theorems underlying the use of the propensity score and the theorems underlying the use of the prognostic score both depend on the absense of unmeasured confounding. Violations of this assumption have ramifications for a wide variety of causal inference approaches, and assignment-aontrol plots may be similarly misleading when unmeasured confounding is at play.

Figure 5 illustrates the behavior of assignment control plots in a scenario with unobserved confounding. We add to our data-generating set-up an unobserved confounder, $U$, such that:

\begin{align} \phi(X_i) &= c_1 X_{i1} + \eta U - c_0,\ \Psi(X_i) &=\rho X_{i1} + \sqrt{(1-\rho^2)}X_{i2} + \eta U. \end{align}

Suppose we somehow ascertained exactly the correct relationships between the two score models and the observed covariates, so that our propensity and prognostic models are precisely $\hat{\phi}(X_i) = c_1X_{i1} - c_0$ and $\hat{\Psi}(X_i) = \rho X_{i1} + \sqrt{(1-\rho^2)}X_{i2}$, respectively. That is, the score models are exactly correct, except that they do not include the unobserved confounder. Figure 5 panels A-C depict the assignment control plots we might make and matchings we might produce using these score models, $\hat{\phi}$ and $\hat{\Psi}$. Since both the assignment-control plots and the matchings use the score models $\hat{\phi}$ and $\hat{\Psi}$ that fail to capture unobserved confounding, our propensity matches appear quite close in $\hat{\phi}$ (Figure 5B) and our mahalanobis distance matchings with propensity and prognostic score calipers appear quite close in the assignment-control space defined by $\hat{\phi} \times \hat{\Psi}$ (Figure 5C).

However, panels D-F in Figure 5 show the same matches in the true assignment-control space, in which $\phi$ and $\Psi$ are known to depend on the unobserved confounder, $U$. In each matching, pairs tend to differ from each other due to baseline variations in the unobserved confounder which were not accounted for in the matching process. The contrast between Figures 5C and 5F most cleanly illustrate how failing to account for $U$ results in systematic error: in the true assignment-control space, one matched individual in each pair tends to have both higher prognostic score and higher propensity score than its partner. Since this individual is more often the treated individual than the control individual, estimates of treatment effect based on this matching will tend to be biased, since $U$ induces systematic differences between paired individuals, even after matching.

nu <- 0.3
rho <- 0.2
#simulate data
df <- generate_xSITA_data(N = 2000, p = 10, nu = nu, rho = rho, sigma = 1)

# mahalanobis match on X1 + X2
mahal_dist <- match_on(t ~ X1 + X2, method = "mahalanobis", data = df)
m_match <- pairmatch(mahal_dist, controls = k, df)
#Calculate best possible propensity and prognostic scores without knowing about confounder
naive_df <- df %>% 
    mutate(prog = rho*X1 + sqrt(1-rho^2)*X2, 
           prop = X1/3-3)

naive_prop_match <- pairmatch(t ~ prop, controls = k, naive_df)
a <- AC_match_plot(naive_df, naive_prop_match, title = "Propensity Match")
b <- AC_match_plot(naive_df, m_match, title = "Mahalanobis Match on X1, X2")

c <- AC_match_plot(df, naive_prop_match, title = "Propensity Match")
d <- AC_match_plot(df, m_match, title = "Mahalanobis Match on X1, X2")

ggarrange(a,b,c,d,  ncol= 2, nrow = 2, labels = "AUTO")
pdf("figures/Figure5.pdf",  width=5.5, height=5.5)
ggarrange(a,b,c,d,  ncol= 2, nrow = 2, labels = "AUTO")
dev.off()

Figure 6: Randomization-Assignment-Control Plots

We take the same set-up as before:

\begin{align} X_i &\sim Normal(0, I_{10}) \ T_i &\sim Bernoulli\left(\frac{1}{1 + exp(\phi(X_i))}\right) \ Y_i(0) &= \Psi(X_i) + \epsilon_i \ \epsilon_i &\sim N(0, 1) \end{align}

where $\phi(X)$ and $\Psi(X)$ represent the true propensity and prognostic score functions, given by

\begin{align} \phi(X_i, Z_i) &= c_1 X_{i1} + c_2 Z_{i} + \eta U - c_0,\ \Psi(X_i) &=\rho X_{i1} + \sqrt{(1-\rho^2)}X_{i2} + \eta U. \end{align}

Where $c_2$, $c_1$, $c_0$, $\eta$ and $\rho$ are constants. Note that $U$ represents an unmeasured confounder with strength $\eta$. As before, Z is an instrumental variable in that it is unconfounded and has no effect on the outcome except via the treatment. For simplicity, we'll make $Z$ completely uncorrelated with the other covariates, so it is a "pure" IV. See iv_with_correlated_covariates.Rmd for a set-up that changes this.

reformat_nf <- function(nf, N){
  m_nf <- dim(nf$match[1])

  nf_reformatted <- nf$match %>%
    as_tibble() %>%
    mutate(match_id = paste("1.", 1:nrow(.), sep = "")) %>%
    gather("name", "index", -match_id) 

  match <- tibble(index = 1:N) %>%
    left_join(nf_reformatted) %>%
    pull(match_id)

  names(match) <- 1:N

  return(match)
}
nu <- 0.4

# setting rho_z = 0 makes Z uncorrelated with any X's
df_IV_xSITA <- generate_data_IV_xSITA(true_mu = "X1/2 + 2*Z + nu*U - 1.75", 
                                      nu = nu, rho_z = 0) %>%
  mutate(prop_naive = prop - nu * U,
         prog_naive = prog - nu * U,
         prop_naive_with_Z = prop - nu * U + 2*Z) %>%
  select(-zeta) %>%
  mutate(IV = Z)
IVrange <- range(df_IV_xSITA$Z)[2] - range(df_IV_xSITA$Z)[1]

nf <- opt_nearfar(dta = df_IV_xSITA, trt = "t", covs = c("X1", "X2"), iv = "Z",
                  trt.type = "bin", max.time.seconds = 600)

nf_match <- reformat_nf(nf, 2000)

m <- sum(!is.na(nf_match))/2
subsample_mids <- paste("1.", sample(1:m, 30), sep = "")
nf_match_subsample <- ifelse(nf_match %in% subsample_mids, nf_match, NA)
nf_AC <- AC_match_plot(data = df_IV_xSITA, match = nf_match_subsample, is_RAC = TRUE)
nf_CR <- CR_match_plot(data = df_IV_xSITA, match = nf_match_subsample)
nf_AR <- AR_match_plot(data = df_IV_xSITA, match = nf_match_subsample)

nf_RAC <- ggarrange(nf_AC, nf_CR, nf_AR, ncol = 3, nrow = 1) %>% 
  annotate_figure(top = text_grob("Nearfar Match, Randomization-Control-Assignment-plots", size = 14))
mahal_match <- pairmatch(t ~ X1 + X2, controls = 1, df_IV_xSITA)
m_mahal <- sum(!is.na(mahal_match))/2
subsample_mids_mahal <- paste("1.", sample(1:m_mahal, 30), sep = "")
mahal_match_subsample <- ifelse(mahal_match %in% subsample_mids_mahal, mahal_match, NA)

mahal_AC <- AC_match_plot(data = df_IV_xSITA, match = mahal_match_subsample, is_RAC = TRUE)
mahal_CR <- CR_match_plot(data = df_IV_xSITA, match = mahal_match_subsample)
mahal_AR <- AR_match_plot(data = df_IV_xSITA, match = mahal_match_subsample)

mahal_RAC <- ggarrange(mahal_AC, mahal_CR, mahal_AR, ncol = 3, nrow = 1) %>% 
  annotate_figure(top = text_grob("Mahalanobis Match on X1 and X2, Randomization-Assignment-Control-plots", size = 14))
ggarrange(mahal_RAC, nf_RAC, ncol = 1, nrow = 2, labels = "AUTO")
pdf("figures/Figure6.pdf",  width=8, height=6)
ggarrange(mahal_RAC, nf_RAC, ncol = 1, nrow = 2, labels = "AUTO")
dev.off()

Figures 7-8: Applied example

See /analyses/CABG/CABG_women.Rmd

References



raikens1/RACplots documentation built on July 10, 2021, 11:08 a.m.