knitr::opts_chunk$set(echo = TRUE)

$\texttt{gpbalancer}$: an R Package for Optimally Balanced Gaussian Process Propensity Score Estimation

Reference forthcoming...

Installing gpbalancer

The package devtools is required to install this R package from this Github repository. Install this package first if it is not already installed.

install.packages('devtools', dependencies = TRUE)

Once that package has been installed, use the following to install gpbalancer

devtools::install_github('bvegetabile/gpbalancer')

Load the package to begin analysis!

library('gpbalancer')

Example using gpbalancer

Simulating data

Provided below is a simple simulation to explore the effectiveness of the optimally balanced Gaussian process propensity score. Consider a continuous covariate $X$ which is used to assign treatment, for $i \in 1, \dots, 1000$ let,

[ X_i \sim N(0,1) ]

Additionally, let the true propensity score be defined as follows,

[ e(X_i) = \mbox{Pr}(T_i = 1 | X_i) = 0.8 \times \Phi(2 * X_i) + 0.1 ]

where $\Phi(\cdot)$ is the cumulative distribution of the Normal Distribution. Finally, we simulate treatment assignment such that,

[ T_i | X_i \sim Bernoulli( \ e(X_i) \ ) ]

Additionally, we will consider the following potential outcomes

[ Y_i^{T=1} = X_i^2 + 2 + \epsilon_{i,\mathcal{T}} ]

[ Y_i^{T=0} = X_i + \epsilon_{i,\mathcal{C}} ]

where $\epsilon_{i,G} \sim N(0, 0.25^2)$ for $G \in {\mathcal{T},\mathcal{C}}$. The observed outcome will be,

[ Y_i^{obs} = I(T_i=1)Y_i^{T=1} + I(T=0)Y_i^{T=0} ]

set.seed(2017)
n_obs <- 1000
pretreatment_cov <- rnorm(n_obs)
prop_score <- 0.8 * pnorm(2*pretreatment_cov) + 0.1
treatment_assignment <- rbinom(n_obs, size = 1, prob = prop_score)
ta_logical <- as.logical(treatment_assignment)
y_t <- pretreatment_cov^2 + 2 + rnorm(n_obs, sd=0.25)
y_c <- pretreatment_cov + rnorm(n_obs, sd=0.25)
y_obs <- treatment_assignment * y_t + (1-treatment_assignment) * y_c

t_col <- rgb(0.5,0,0.5,0.5)
c_col <- rgb(0,0.5,0,0.5)

Visualizing the Propensity Score & Covariate Imbalance

The true propensity score and observed treatment assignments are shown below.

par(mfrow=c(1,2))
plot(pretreatment_cov, prop_score, 
     xlim=range(pretreatment_cov), 
     ylim=c(0,1),
     pch=19, col=rgb(0,0,0,0.5),
     xlab='Pretreatment Covariate',
     ylab='Probability of Treatment',
     main='Propensity Score & Observed Treatment Assignments')
points(pretreatment_cov[ta_logical],
       treatment_assignment[ta_logical],
       pch=19, col=t_col)
points(pretreatment_cov[!ta_logical],
       treatment_assignment[!ta_logical],
       pch=19, col=c_col)
abline(h=c(0,1), lty=3)
legend('topleft', c('Treated', 'Control'), pch=19, col=c(t_col, c_col), bg='white')

plot(density(pretreatment_cov[!ta_logical], adjust = 1.5),
     xlim=range(pretreatment_cov), 
     type='l', lwd=3, col=c_col,
     xlab='Pretreatment Covariate',
     ylab='Density',
     main='Covariate Distributions Conditional Upon Treatment Type')
lines(density(pretreatment_cov[ta_logical], adjust = 1.5),
     lwd=3, col=t_col)
abline(h=c(0,1), lty=3)
legend('topleft', c('Treated', 'Control'), lty=1, lwd=3, col=c(t_col, c_col), bg='white')
knitr::kable(gpbalancer::bal_table(data.frame("Pretreatment Covariate" = pretreatment_cov), 1, ta_logical))

Visualizing the Potential Outcomes & Observed Responses

Below are visualizations of the expectations of the potential response functions and the observed responses for the sample data.

exes <- seq(min(pretreatment_cov), 
            max(pretreatment_cov), 
            0.01)
meanYT <- exes^2 + 2
meanYC <- exes
ylims <- range(cbind(meanYT, meanYC))

plot(exes, meanYT,
     type='l', lwd=3, col = t_col,
     xlim=range(pretreatment_cov), ylim=ylims,
     xlab='Pretreatment Covariate',
     ylab='Response',
     main='Expectation of Potential Response Functions\nAnd Observed Response Values')
lines(exes, meanYC, lwd=3, col = c_col)
points(pretreatment_cov[ta_logical], y_obs[ta_logical], pch=19, col=t_col)
points(pretreatment_cov[!ta_logical], y_obs[!ta_logical], pch=19, col=c_col)
legend('topleft', 
       c(expression(E(Y^{T==1})), 
         expression(E(Y^{T==0})),
         expression(paste('Treated Group, ', Y^{obs})), 
         expression(paste('Control Group, ', Y^{obs}))), 
       lty=c(1,1,NA,NA), lwd=c(3,3,NA,NA), pch=c(NA, NA, 19, 19), col=c(t_col, c_col), bg='white')

Under these settings if we are attempting to model $\tau = E(Y^{T=1} - Y^{T=0})$ without adjustment (i.e. using $E(Y^{obs} | T=1) - E(Y^{obs} | T=0))$, then the bias would be approximately 0.43.

hat_tau <- mean(y_obs[ta_logical]) - mean(y_obs[!ta_logical])
original_bias <- (hat_tau-3)
message('Orignal Bias: ', round(original_bias,3))

Estimating the Propensity Score

Using the above simulated data, we can now estimate the propensity score. The reported time is in seconds:

est_propscore <- gpbalancer::gpbal(X = as.matrix(pretreatment_cov), 
                                   y = treatment_assignment, 
                                   cov_function = gpbalancer::sqexp_poly,
                                   init_theta = c(1, 1),
                                   verbose = T)

Visualizing the Estimated Propensity Score

plot(pretreatment_cov, prop_score, 
     xlim=range(pretreatment_cov), 
     ylim=c(0,1),
     pch=19, col=rgb(0,0,0,0.5),
     xlab='Pretreatment Covariate',
     ylab='Probability of Treatment',
     main='Comparing True vs. Estimated Propensity Score')
points(pretreatment_cov,
       est_propscore$ps,
       pch=19, col=rgb(0,0,0.5,0.5))
abline(h=c(0,1), lty=3)
legend('topleft', c('True Propensity Score', 'Estimated Propensity Score'), pch=19, col=c(rgb(0,0,0,0.5), rgb(0,0,0.5,0.5)), bg='white')

Balance Adjusting for the True Propensity Score

true_wts <- ifelse(ta_logical, 1/prop_score, 1/(1-prop_score))
knitr::kable(gpbalancer::bal_table(data.frame("Pretreatment Covariate" = pretreatment_cov), 1, ta_logical, true_wts))

Balance Adjusting for the Estimated Optimally Balanced Gaussian Process Propensity Score

est_wts <- ifelse(ta_logical, 1/est_propscore$ps, 1/(1-est_propscore$ps))
knitr::kable(gpbalancer::bal_table(data.frame("Pretreatment Covariate" = pretreatment_cov), 1, ta_logical, est_wts))

Evaluating the Performance of the Optimally Balanced Gaussian Process Propensity Score

We now compare the method when there is no adjustment on the propensity score and when there is adjustment by the true propensity score. To compare these three scenarios the bias, the percent reduction in bias compared with no adjustment, and the mean squared error of the estimators are provided.

make_wts <- function(ta, ps){
  wts <- data.frame(t=(ta/ps) / sum(ta/ps),
                    c=((1-ta)/(1-ps)) / sum((1-ta)/(1-ps)))
  wts <- ifelse(ta==1, wts$t, wts$c)
  return(wts)
}

lm_ps <- function(Y, X, wts, true_val = NULL){
  W <- diag(wts)
  invXtWX <- solve(t(X) %*% W %*% X)
  betas <- invXtWX %*% t(X) %*% W %*% Y

  Yhat <- X %*% betas
  resids <- Y - Yhat
  sighat <- as.double(sum(resids^2) / (length(Y) - 1))

  varmat <- sighat * invXtWX %*% t(X) %*% W %*% W %*% X %*% invXtWX

  std_errs <- sqrt(diag(varmat))

  low_int <- betas - 1.96 * std_errs
  upp_int <- betas + 1.96 * std_errs

  res <- cbind(betas, std_errs, low_int, upp_int)
  colnames(res) <- c('coef', 'stderrs', 'low95', 'upp95')

  if(!is.null(true_val)){
    cover_test <- res[2,3] < true_val & res[2,4] > true_val
    return(list('ests' = data.frame(res),
                'covers' = cover_test))
  } else{
    return(list('ests' = res))
  }
}

true_wts <- make_wts(treatment_assignment, prop_score)
est_wts <- make_wts(treatment_assignment, est_propscore$ps)
Xdes <- cbind(1, treatment_assignment)
noadj <- lm_ps(y_obs, Xdes, wts = rep(1/n_obs, n_obs), 3)
truadj <- lm_ps(y_obs, Xdes, true_wts, 3)
estadj <- lm_ps(y_obs, Xdes, est_wts, 3)

mat_res <- matrix(NA, nrow = 3, ncol = 7)
mat_res[1, 1:4] = unlist(noadj[[1]][2,])
mat_res[2, 1:4] <- unlist(truadj[[1]][2,])
mat_res[3, 1:4] <- unlist(estadj[[1]][2,])
mat_res[ , 5] <- mat_res[,1] - 3
mat_res[2:3, 6] <- 100*(1 - abs(mat_res[2:3, 5]) / abs(rep(mat_res[1,5],2)))
mat_res[,7] <- mat_res[,5]^2 + mat_res[,2]^2
rownames(mat_res) <- c('No Adjustment', 'True Propensity Score', 'Opt Bal GP Prop Score')
colnames(mat_res) <- c('Est. ATE', 'S.E.', 'Lower95', 'Upper95', 'Bias', '% Red. in Abs. Bias', 'MSE')
options(knitr.kable.NA = '-')
knitr::kable(mat_res, digits = 3, align = rep('c', 7)) 


bvegetabile/gpbalancer documentation built on May 22, 2019, 1:34 p.m.