Advanced_functionality.R
In stratamatch: Stratification and Matching for Large Observational Data Sets

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = TRUE,
  message = TRUE,
  fig.align = "center",
  fig.height = 5,
  fig.width = 6
)

## -----------------------------------------------------------------------------
library(stratamatch)
library(dplyr)
set.seed(123)
mydata <- make_sample_data(n = 5000)

## -----------------------------------------------------------------------------
a.strat1 <- auto_stratify(mydata, "treat",
                         prognosis = outcome ~ X1 + X2,
                         group_by_covariates = c("C1", "B2"), size = 500)

## -----------------------------------------------------------------------------
mysplit <- split_pilot_set(mydata, "treat", group_by_covariates = c("C1", "B2"))

## -----------------------------------------------------------------------------
a.strat2 <- auto_stratify(mysplit$analysis_set, "treat",
                          prognosis = outcome ~ X1 + X2,
                          pilot_sample = mysplit$pilot_set, size = 500)

## -----------------------------------------------------------------------------
library(glmnet)

# fit model on pilot set
x_pilot <- model.matrix(outcome ~ X1 + X2 + B1 + B2 + C1,
                        data = mysplit$pilot_set)

y_pilot <- mysplit$pilot_set %>%
  dplyr::select(outcome) %>%
  as.matrix()

cvlasso <- cv.glmnet(x_pilot, y_pilot, family = "binomial")

## -----------------------------------------------------------------------------
coef(cvlasso)

## -----------------------------------------------------------------------------
# estimate scores on analysis set
x_analysis <- model.matrix(outcome ~ X1 + X2 + B1 + B2 + C1,
                        data = mysplit$analysis_set)

lasso_scores <- predict(cvlasso, newx = x_analysis, s = "lambda.min", type = "response")

# pass the scores to auto_stratify
a.strat_lasso <- auto_stratify(data = mysplit$analysis_set,
                             treat = "treat",
                             outcome = "outcome",
                             prognosis = lasso_scores,
                             pilot_sample = mysplit$pilot_set,
                             size = 500)

## -----------------------------------------------------------------------------
cvenet <- cv.glmnet(x_pilot, y_pilot, family = "binomial", alpha = 0.2)

enet_scores <- predict(cvenet, newx = x_analysis, s = "lambda.min", type = "response")

# pass the scores to auto_stratify
a.strat_enet <- auto_stratify(data = mysplit$analysis_set,
                             treat = "treat",
                             outcome = "outcome",
                             prognosis = enet_scores,
                             pilot_sample = mysplit$pilot_set,
                             size = 500)

## -----------------------------------------------------------------------------
library(randomForest)
forest <- randomForest(as.factor(outcome) ~ X1 + X2 + B1 + B2, data = mysplit$pilot_set)

## -----------------------------------------------------------------------------
forest_scores <- predict(forest,
                         newdata = mysplit$analysis_set,
                         type = "prob")[,1]

a.strat_forest <- auto_stratify(data = mysplit$analysis_set,
                             treat = "treat",
                             outcome = "outcome",
                             prognosis = forest_scores,
                             pilot_sample = mysplit$pilot_set,
                             size = 500)

## ---- include=FALSE-----------------------------------------------------------
if (!requireNamespace("optmatch", quietly = TRUE)) knitr::opts_chunk$set(eval = FALSE)

## -----------------------------------------------------------------------------
mahalmatch <- strata_match(a.strat2, model = treat ~ X1 + X2 + B1 + B2,
                           method = "mahal", k = 2)

summary(mahalmatch)

## -----------------------------------------------------------------------------
fullmahalmatch <- strata_match(a.strat2, model = treat ~ X1 + X2 + B1 + B2,
                           method = "mahal", k = "full")

summary(fullmahalmatch)

## -----------------------------------------------------------------------------
library(optmatch)

# mahalanobis distance matrix for within-strata matching
mahaldist <- match_on(treat ~ X1 + X2 + B1 + B2, 
                      within = exactMatch(treat ~ stratum,
                                          data = a.strat2$analysis_set),
                      data = a.strat2$analysis_set)

# add propensity score caliper
propdist <- match_on(glm(treat ~ X1 + X2 + B1 + B2,
                         family = "binomial",
                         data = a.strat2$analysis_set))

mahalcaliper <- mahaldist + caliper(propdist, width = 1)

mahalcaliper_match <- pairmatch(mahalcaliper, data = a.strat2$analysis_set)

summary(mahalcaliper_match)

Any scripts or data that you put into this service are public.

stratamatch documentation built on March 31, 2022, 9:07 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

stratamatch
Stratification and Matching for Large Observational Data Sets

inst/doc/Advanced_functionality.R
In stratamatch: Stratification and Matching for Large Observational Data Sets

Try the stratamatch package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stratamatch Stratification and Matching for Large Observational Data Sets

inst/doc/Advanced_functionality.R In stratamatch: Stratification and Matching for Large Observational Data Sets

Try the stratamatch package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stratamatch
Stratification and Matching for Large Observational Data Sets

inst/doc/Advanced_functionality.R
In stratamatch: Stratification and Matching for Large Observational Data Sets