comparison.R
In couplr: Optimal Pairing and Matching via Linear Assignment

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8,
  fig.height = 5
)
library(couplr)
library(dplyr)
library(ggplot2)

## -----------------------------------------------------------------------------
set.seed(42)

# Treatment group: younger, more educated, higher prior earnings
treatment <- tibble(

  id = 1:200,
  age = rnorm(200, mean = 35, sd = 8),
  education = rnorm(200, mean = 14, sd = 2),
  prior_earnings = rnorm(200, mean = 40000, sd = 12000),
  employed = rbinom(200, 1, 0.7),
  group = "treatment"
)

# Control group: older, less educated, lower prior earnings (selection bias)
control <- tibble(
  id = 201:700,
  age = rnorm(500, mean = 45, sd = 12),
  education = rnorm(500, mean = 12, sd = 3),
  prior_earnings = rnorm(500, mean = 32000, sd = 15000),
  employed = rbinom(500, 1, 0.5),
  group = "control"
)

# Combine for packages that expect single data frame
combined <- bind_rows(treatment, control) %>%
  mutate(treated = as.integer(group == "treatment"))

cat("Treatment units:", nrow(treatment), "\n")
cat("Control units:", nrow(control), "\n")

# Baseline imbalance
vars <- c("age", "education", "prior_earnings", "employed")
for (v in vars) {
  diff <- mean(treatment[[v]]) - mean(control[[v]])
  pooled_sd <- sqrt((var(treatment[[v]]) + var(control[[v]])) / 2)
  std_diff <- diff / pooled_sd
  cat(sprintf("%s: std diff = %.3f\n", v, std_diff))
}

## ----eval = requireNamespace("MatchIt", quietly = TRUE)-----------------------
# if (requireNamespace("MatchIt", quietly = TRUE)) {
#   library(MatchIt)
# 
#   # MatchIt: Propensity score matching (default)
#   m_ps <- matchit(
#     treated ~ age + education + prior_earnings + employed,
#     data = combined,
#     method = "nearest",
#     distance = "glm"  # Propensity score via logistic regression
#   )
# 
#   cat("MatchIt (propensity score, nearest neighbor):\n")
#   cat("  Matched pairs:", sum(m_ps$weights[combined$treated == 1] > 0), "\n")
# 
#   # Extract matched data
#   matched_ps <- match.data(m_ps)
# }

## -----------------------------------------------------------------------------
# couplr: Direct covariate matching
result_couplr <- match_couples(
  left = treatment,
  right = control,
  vars = c("age", "education", "prior_earnings", "employed"),
  auto_scale = TRUE,
  scale = "robust"
)

cat("\ncouplr (direct covariate matching):\n")
cat("  Matched pairs:", result_couplr$info$n_matched, "\n")
cat("  Mean distance:", round(mean(result_couplr$pairs$distance), 4), "\n")

## ----eval = requireNamespace("MatchIt", quietly = TRUE), fig.width=9, fig.height=5, fig.alt="Side-by-side comparison of covariate balance achieved by MatchIt and couplr, showing standardized mean differences for age, education, prior_earnings, and employed"----
# if (requireNamespace("MatchIt", quietly = TRUE)) {
#   # MatchIt balance
#   matched_treated_ps <- matched_ps %>% filter(treated == 1)
#   matched_control_ps <- matched_ps %>% filter(treated == 0)
# 
#   matchit_balance <- tibble(
#     variable = vars,
#     std_diff = sapply(vars, function(v) {
#       diff <- mean(matched_treated_ps[[v]]) - mean(matched_control_ps[[v]])
#       pooled_sd <- sqrt((var(matched_treated_ps[[v]]) + var(matched_control_ps[[v]])) / 2)
#       diff / pooled_sd
#     }),
#     method = "MatchIt"
#   )
# 
#   # couplr balance
#   couplr_balance <- balance_diagnostics(
#     result_couplr, treatment, control, vars
#   )
# 
#   couplr_balance_df <- couplr_balance$var_stats %>%
#     select(variable, std_diff) %>%
#     mutate(method = "couplr")
# 
#   # Combine and plot
#   balance_comparison <- bind_rows(matchit_balance, couplr_balance_df)
# 
#   ggplot(balance_comparison, aes(x = variable, y = abs(std_diff), fill = method)) +
#     geom_col(position = "dodge") +
#     geom_hline(yintercept = 0.1, linetype = "dashed", color = "#93c54b") +
#     geom_hline(yintercept = 0.25, linetype = "dashed", color = "#f47c3c") +
#     labs(
#       title = "Covariate Balance: MatchIt vs couplr",
#       subtitle = "Green line = 0.1 (excellent), Orange line = 0.25 (acceptable)",
#       x = "Variable",
#       y = "|Standardized Difference|",
#       fill = "Method"
#     ) +
#     scale_fill_manual(values = c("MatchIt" = "#29abe0", "couplr" = "#93c54b")) +
#     theme_minimal() +
#     theme(
#       plot.background = element_rect(fill = "transparent", color = NA),
#       panel.background = element_rect(fill = "transparent", color = NA),
#       legend.position = "bottom"
#     )
# }

## ----eval = requireNamespace("optmatch", quietly = TRUE)----------------------
# if (requireNamespace("optmatch", quietly = TRUE)) {
#   library(optmatch)
# 
#   # Create distance matrix
#   dist_mat <- match_on(
#     treated ~ age + education + prior_earnings + employed,
#     data = combined,
#     method = "mahalanobis"
#   )
# 
#   # Optimal pair matching
#   m_opt <- pairmatch(dist_mat, data = combined)
# 
#   n_matched <- sum(!is.na(m_opt)) / 2
#   cat("optmatch (optimal pair matching):\n")
#   cat("  Matched pairs:", n_matched, "\n")
# }

## -----------------------------------------------------------------------------
# couplr with Mahalanobis-like scaling
result_couplr_maha <- match_couples(
  left = treatment,
  right = control,
  vars = vars,
  auto_scale = TRUE,
  scale = "standardize"  # Similar to Mahalanobis diagonal
)

cat("\ncouplr (optimal pair matching):\n")
cat("  Matched pairs:", result_couplr_maha$info$n_matched, "\n")

## ----eval = requireNamespace("optmatch", quietly = TRUE)----------------------
# if (requireNamespace("optmatch", quietly = TRUE)) {
#   # Compare total distances
#   # (Note: Direct comparison is complex due to different distance scaling)
#   cat("\nBoth packages find globally optimal one-to-one assignments.\n")
#   cat("Total distance differences arise from distance metric choices.\n")
# }

## ----eval = requireNamespace("designmatch", quietly = TRUE)-------------------
# if (requireNamespace("designmatch", quietly = TRUE)) {
#   library(designmatch)
# 
#   # Prepare data for designmatch
#   t_ind <- combined$treated
# 
#   # Distance matrix (Mahalanobis)
#   X <- as.matrix(combined[, vars])
#   dist_mat_dm <- distmat(t_ind, X)
# 
#   # Balance constraints: mean differences
#   mom <- list(
#     covs = X,
#     tols = rep(0.1, ncol(X))  # Tolerance for standardized difference
#   )
# 
#   # Solve with balance constraints
#   tryCatch({
#     m_dm <- bmatch(
#       t_ind = t_ind,
#       dist_mat = dist_mat_dm,
#       mom = mom,
#       solver = list(name = "glpk", approximate = 1)
#     )
# 
#     cat("designmatch (balance-constrained matching):\n")
#     cat("  Matched pairs:", sum(m_dm$t_id > 0), "\n")
#   }, error = function(e) {
#     cat("designmatch: Balance constraints may be infeasible\n")
#     cat("  Try relaxing tolerances or reducing constraint count\n")
#   })
# }

## -----------------------------------------------------------------------------
# couplr: Optimize distance, then check balance
result_couplr_dm <- match_couples(
  left = treatment,
  right = control,
  vars = vars,
  auto_scale = TRUE
  # No caliper - let algorithm find optimal matches
)

balance_dm <- balance_diagnostics(result_couplr_dm, treatment, control, vars)

cat("\ncouplr (distance-minimizing):\n")
cat("  Matched pairs:", result_couplr_dm$info$n_matched, "\n")
cat("  Mean |std diff|:", round(balance_dm$overall$mean_abs_std_diff, 4), "\n")
cat("  Max |std diff|:", round(balance_dm$overall$max_abs_std_diff, 4), "\n")

## ----eval = requireNamespace("Matching", quietly = TRUE)----------------------
# if (requireNamespace("Matching", quietly = TRUE)) {
#   library(Matching)
# 
#   # Genetic matching (finds optimal weights)
#   X <- as.matrix(combined[, vars])
# 
#   set.seed(123)
#   m_gen <- Match(
#     Tr = combined$treated,
#     X = X,
#     M = 1,  # 1:1 matching
#     estimand = "ATT",
#     replace = FALSE
#   )
# 
#   cat("Matching package (multivariate matching):\n")
#   cat("  Matched pairs:", length(m_gen$index.treated), "\n")
# }

## ----eval = requireNamespace("Matching", quietly = TRUE)----------------------
# if (requireNamespace("Matching", quietly = TRUE)) {
#   # Check balance from Matching package
#   mb <- MatchBalance(
#     treated ~ age + education + prior_earnings + employed,
#     data = combined,
#     match.out = m_gen,
#     nboots = 0
#   )
# }

## ----eval = FALSE-------------------------------------------------------------
# # Stage 1: couplr for initial matching
# matched <- match_couples(
#   left = treatment_data,
#   right = control_data,
#   vars = covariates,
#   auto_scale = TRUE
# )
# 
# # Stage 2: Check balance
# balance <- balance_diagnostics(matched, treatment_data, control_data, covariates)
# 
# # Stage 3: If balance insufficient, consider alternatives
# if (balance$overall$max_abs_std_diff > 0.25) {
#   # Try MatchIt with propensity scores
#   library(MatchIt)
#   combined <- bind_rows(treatment_data, control_data)
#   m_ps <- matchit(treated ~ ., data = combined, method = "full")
# }
# 
# # Stage 4: Analysis on matched data
# matched_data <- join_matched(matched, treatment_data, control_data)
# model <- lm(outcome ~ treatment, data = matched_data)

## ----lalonde-style------------------------------------------------------------
set.seed(1986)

# NSW treatment group (randomized) - smaller sample for CRAN
nsw_treat <- tibble(
  id = 1:100,
  age = pmax(17, rnorm(100, 25, 7)),
  education = pmax(0, pmin(16, rnorm(100, 10, 2))),
  black = rbinom(100, 1, 0.84),
  hispanic = rbinom(100, 1, 0.06),
  married = rbinom(100, 1, 0.19),
  nodegree = rbinom(100, 1, 0.71),
  re74 = pmax(0, rnorm(100, 2100, 5000)),
  re75 = pmax(0, rnorm(100, 1500, 3500)),
  group = "treatment"
)

# CPS comparison group (observational - very different!)
# Reduced from 15,815 to 500 for CRAN timing compliance
cps_control <- tibble(
  id = 101:600,
  age = pmax(17, rnorm(500, 33, 11)),
  education = pmax(0, pmin(16, rnorm(500, 12, 3))),
  black = rbinom(500, 1, 0.07),
  hispanic = rbinom(500, 1, 0.07),
  married = rbinom(500, 1, 0.71),
  nodegree = rbinom(500, 1, 0.30),
  re74 = pmax(0, rnorm(500, 14000, 9000)),
  re75 = pmax(0, rnorm(500, 13500, 9000)),
  group = "control"
)

cat("NSW treatment:", nrow(nsw_treat), "individuals\n")
cat("CPS control:", nrow(cps_control), "individuals\n")
cat("(Note: Real CPS has ~16,000 controls; reduced here for vignette timing)\n")

# Baseline imbalance is severe
vars_lalonde <- c("age", "education", "black", "hispanic", "married",
                  "nodegree", "re74", "re75")
cat("\nBaseline standardized differences:\n")
for (v in vars_lalonde) {
  t_mean <- mean(nsw_treat[[v]])
  c_mean <- mean(cps_control[[v]])
  pooled_sd <- sqrt((var(nsw_treat[[v]]) + var(cps_control[[v]])) / 2)
  std_diff <- (t_mean - c_mean) / pooled_sd
  cat(sprintf("  %s: %.2f\n", v, std_diff))
}

## ----lalonde-matching---------------------------------------------------------
# Greedy matching (fast for large control pools)
result_lalonde <- greedy_couples(
  left = nsw_treat,
  right = cps_control,
  vars = vars_lalonde,
  strategy = "pq",       # Priority queue - efficient for large pools
  auto_scale = TRUE,
  scale = "robust"
)

cat("Matched", result_lalonde$info$n_matched, "of", nrow(nsw_treat), "treatment units\n")
cat("Mean distance:", round(mean(result_lalonde$pairs$distance), 4), "\n")

## ----lalonde-balance, fig.width=9, fig.height=5, fig.alt="Balance plot for Lalonde-style matching showing improvement in standardized differences"----
balance_lalonde <- balance_diagnostics(
  result_lalonde, nsw_treat, cps_control, vars_lalonde
)

# Before/after comparison
before_df <- tibble(
  variable = vars_lalonde,
  std_diff = sapply(vars_lalonde, function(v) {
    t_mean <- mean(nsw_treat[[v]])
    c_mean <- mean(cps_control[[v]])
    pooled_sd <- sqrt((var(nsw_treat[[v]]) + var(cps_control[[v]])) / 2)
    (t_mean - c_mean) / pooled_sd
  }),
  stage = "Before"
)

after_df <- balance_lalonde$var_stats %>%
  select(variable, std_diff) %>%
  mutate(stage = "After")

balance_plot_df <- bind_rows(before_df, after_df) %>%
  mutate(stage = factor(stage, levels = c("Before", "After")))

ggplot(balance_plot_df, aes(x = reorder(variable, abs(std_diff)),
                             y = std_diff, fill = stage)) +
  geom_col(position = "dodge") +
  geom_hline(yintercept = c(-0.1, 0.1), linetype = "dashed", color = "#93c54b") +
  geom_hline(yintercept = c(-0.25, 0.25), linetype = "dashed", color = "#f47c3c") +
  coord_flip() +
  labs(
    title = "Covariate Balance: Before vs After Matching",
    subtitle = "Lalonde-style job training evaluation",
    x = "",
    y = "Standardized Difference",
    fill = ""
  ) +
  scale_fill_manual(values = c("Before" = "#d9534f", "After" = "#93c54b")) +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold")
  )

## ----lalonde-summary----------------------------------------------------------
cat("Balance summary:\n")
cat("  Mean |std diff| before:",
    round(mean(abs(before_df$std_diff)), 3), "\n")
cat("  Mean |std diff| after:",
    round(balance_lalonde$overall$mean_abs_std_diff, 3), "\n")
cat("  Max |std diff| after:",
    round(balance_lalonde$overall$max_abs_std_diff, 3), "\n")

if (balance_lalonde$overall$max_abs_std_diff < 0.25) {
  cat("\n✓ All variables within acceptable balance threshold (0.25)\n")
} else {
  cat("\n⚠ Some variables exceed 0.25 threshold - consider calipers or blocking\n")
}
Any scripts or data that you put into this service are public.
couplr documentation built on Jan. 20, 2026, 5:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
couplr
Optimal Pairing and Matching via Linear Assignment

inst/doc/comparison.R
In couplr: Optimal Pairing and Matching via Linear Assignment

Try the couplr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

couplr Optimal Pairing and Matching via Linear Assignment

inst/doc/comparison.R In couplr: Optimal Pairing and Matching via Linear Assignment

Try the couplr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

couplr
Optimal Pairing and Matching via Linear Assignment

inst/doc/comparison.R
In couplr: Optimal Pairing and Matching via Linear Assignment