require(Matching) require(mice) data(lalonde, package='Matching')
Tr <- lalonde$treat Y <- lalonde$re78 X <- lalonde[,c('age','educ','black','hisp','married','nodegr','re74','re75')] lalonde.glm <- glm(treat ~ ., family=binomial, data=cbind(treat=Tr, X)) summary(lalonde.glm)
Create a copy of the covariates to simulate missing at random (mar
) and not missing at random (nmar
).
lalonde.mar <- X lalonde.nmar <- X missing.rate <- .2 # What percent of rows will have missing data missing.cols <- c('nodegr', 're75') # The columns we will add missing values to # Vectors indiciating which rows are treatment and control. treat.rows <- which(lalonde$treat == 1) control.rows <- which(lalonde$treat == 0)
Add missingness to the existing data. For the not missing at random data treatment units will have twice as many missing values as the control group.
set.seed(2112) for(i in missing.cols) { lalonde.mar[sample(nrow(lalonde), nrow(lalonde) * missing.rate), i] <- NA lalonde.nmar[sample(treat.rows, length(treat.rows) * missing.rate * 2), i] <- NA lalonde.nmar[sample(control.rows, length(control.rows) * missing.rate), i] <- NA }
The proportion of missing values for the first covariate
prop.table(table(is.na(lalonde.mar[,missing.cols[1]]), lalonde$treat, useNA='ifany')) prop.table(table(is.na(lalonde.nmar[,missing.cols[1]]), lalonde$treat, useNA='ifany'))
Create a shadow matrix. This is a logical vector where each cell is TRUE if the value is missing in the original data frame.
shadow.matrix.mar <- as.data.frame(is.na(lalonde.mar)) shadow.matrix.nmar <- as.data.frame(is.na(lalonde.nmar))
Change the column names to include "_miss" in their name.
names(shadow.matrix.mar) <- names(shadow.matrix.nmar) <- paste0(names(shadow.matrix.mar), '_miss')
Impute the missing values using the mice package
set.seed(2112) mice.mar <- mice(lalonde.mar, m=1) mice.nmar <- mice(lalonde.nmar, m=1)
Get the imputed data set.
complete.mar <- complete(mice.mar) complete.nmar <- complete(mice.nmar)
Estimate the propensity scores using logistic regression.
lalonde.mar.glm <- glm(treat~., data=cbind(treat=Tr, complete.mar, shadow.matrix.mar)) lalonde.nmar.glm <- glm(treat~., data=cbind(treat=Tr, complete.nmar, shadow.matrix.nmar))
We see that the two indicator columns from the shadow matrix are statistically significant predictors suggesting that the data is not missing at random.
summary(lalonde.mar.glm) summary(lalonde.nmar.glm)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.