Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5
)
library(couplr)
library(dplyr)
## ----first-match--------------------------------------------------------------
library(couplr)
library(dplyr)
# Create example data: treatment and control groups
set.seed(123)
treatment <- tibble(
id = 1:50,
age = rnorm(50, mean = 45, sd = 10),
income = rnorm(50, mean = 55000, sd = 12000)
)
control <- tibble(
id = 1:80,
age = rnorm(80, mean = 50, sd = 12),
income = rnorm(80, mean = 48000, sd = 15000)
)
# Match on age and income
result <- match_couples(
left = treatment,
right = control,
vars = c("age", "income"),
auto_scale = TRUE
)
# View matched pairs
head(result$pairs)
## ----output-explained---------------------------------------------------------
# Quick overview with summary()
summary(result)
# Or access specific info
result$info$n_matched
## ----scaling-demo-------------------------------------------------------------
# BAD: Without scaling, income dominates
result_unscaled <- match_couples(
treatment, control,
vars = c("age", "income"),
auto_scale = FALSE
)
# GOOD: With scaling, both variables contribute equally
result_scaled <- match_couples(
treatment, control,
vars = c("age", "income"),
auto_scale = TRUE
)
# Compare mean distances
cat("Unscaled mean distance:", round(mean(result_unscaled$pairs$distance), 1), "\n")
cat("Scaled mean distance:", round(mean(result_scaled$pairs$distance), 3), "\n")
## ----balance-check------------------------------------------------------------
# Get the matched observations
matched_treatment <- treatment[result$pairs$left_id, ]
matched_control <- control[result$pairs$right_id, ]
# Compare means before and after matching
cat("BEFORE matching:\n")
cat(" Age difference:", round(mean(treatment$age) - mean(control$age), 1), "years\n")
cat(" Income difference: $", round(mean(treatment$income) - mean(control$income), 0), "\n\n")
cat("AFTER matching:\n")
cat(" Age difference:", round(mean(matched_treatment$age) - mean(matched_control$age), 1), "years\n")
cat(" Income difference: $", round(mean(matched_treatment$income) - mean(matched_control$income), 0), "\n")
## ----plot-result, fig.width=6, fig.height=4, fig.alt="Histogram showing distribution of match distances, with most matches having low distances near zero"----
plot(result)
## ----greedy-example-----------------------------------------------------------
# Create larger datasets
set.seed(456)
large_treatment <- tibble(
id = 1:2000,
age = rnorm(2000, 45, 10),
income = rnorm(2000, 55000, 12000)
)
large_control <- tibble(
id = 1:3000,
age = rnorm(3000, 50, 12),
income = rnorm(3000, 48000, 15000)
)
# Fast greedy matching
result_greedy <- greedy_couples(
large_treatment, large_control,
vars = c("age", "income"),
auto_scale = TRUE,
strategy = "row_best" # fastest strategy
)
cat("Matched", result_greedy$info$n_matched, "pairs\n")
cat("Mean distance:", round(mean(result_greedy$pairs$distance), 3), "\n")
## ----caliper-example----------------------------------------------------------
# Allow any match
result_loose <- match_couples(
treatment, control,
vars = c("age", "income"),
auto_scale = TRUE
)
# Only allow close matches
result_strict <- match_couples(
treatment, control,
vars = c("age", "income"),
auto_scale = TRUE,
max_distance = 0.5 # reject pairs more different than this
)
cat("Without caliper:", result_loose$info$n_matched, "pairs\n")
cat("With caliper:", result_strict$info$n_matched, "pairs\n")
## ----blocking-example---------------------------------------------------------
# Data from multiple hospital sites
set.seed(321)
treated <- tibble(
id = 1:60,
site = rep(c("Hospital A", "Hospital B", "Hospital C"), each = 20),
age = rnorm(60, 55, 10),
severity = rnorm(60, 5, 2)
)
controls <- tibble(
id = 1:90,
site = rep(c("Hospital A", "Hospital B", "Hospital C"), each = 30),
age = rnorm(90, 52, 12),
severity = rnorm(90, 4.5, 2.5)
)
# Step 1: Create blocks by hospital site
blocks <- matchmaker(
left = treated,
right = controls,
block_type = "group",
block_by = "site"
)
# Step 2: Match within each block
result_blocked <- match_couples(
left = blocks$left,
right = blocks$right,
vars = c("age", "severity"),
block_id = "block_id",
auto_scale = TRUE
)
# Verify: matches stay within their block
result_blocked$pairs |> count(block_id)
## ----complete-example---------------------------------------------------------
# 1. Prepare your data
set.seed(789)
patients_treated <- tibble(
patient_id = paste0("T", 1:100),
age = rnorm(100, 62, 8),
bmi = rnorm(100, 28, 4),
smoker = sample(0:1, 100, replace = TRUE, prob = c(0.6, 0.4))
)
patients_control <- tibble(
patient_id = paste0("C", 1:200),
age = rnorm(200, 58, 10),
bmi = rnorm(200, 26, 5),
smoker = sample(0:1, 200, replace = TRUE, prob = c(0.7, 0.3))
)
# 2. Match on clinical variables
matched <- match_couples(
left = patients_treated,
right = patients_control,
vars = c("age", "bmi", "smoker"),
auto_scale = TRUE
)
# 3. Check how many matched
cat("Treated patients:", nrow(patients_treated), "\n")
cat("Successfully matched:", matched$info$n_matched, "\n")
cat("Match rate:", round(100 * matched$info$n_matched / nrow(patients_treated), 1), "%\n")
# 4. Extract matched samples for analysis
treated_matched <- patients_treated[matched$pairs$left_id, ]
control_matched <- patients_control[matched$pairs$right_id, ]
# 5. Verify balance
cat("\nBalance check (difference in means):\n")
cat(" Age:", round(mean(treated_matched$age) - mean(control_matched$age), 2), "\n")
cat(" BMI:", round(mean(treated_matched$bmi) - mean(control_matched$bmi), 2), "\n")
cat(" Smoker %:", round(100*(mean(treated_matched$smoker) - mean(control_matched$smoker)), 1), "\n")
## ----lap-solve-basic----------------------------------------------------------
# Cost matrix: 3 workers × 3 tasks
cost <- matrix(c(
4, 2, 5,
3, 3, 6,
7, 5, 4
), nrow = 3, byrow = TRUE)
result <- lap_solve(cost)
print(result)
## ----forbidden----------------------------------------------------------------
cost_forbidden <- matrix(c(
4, 2, NA, # Row 1 cannot go to column 3
Inf, 3, 6, # Row 2 cannot go to column 1
7, 5, 4
), nrow = 3, byrow = TRUE)
lap_solve(cost_forbidden)
## ----maximize-----------------------------------------------------------------
preferences <- matrix(c(
8, 5, 3,
4, 7, 6,
2, 4, 9
), nrow = 3, byrow = TRUE)
lap_solve(preferences, maximize = TRUE)
## ----grouped-lap--------------------------------------------------------------
# Weekly nurse-shift scheduling: solve each day separately
schedule <- tibble(
day = rep(c("Mon", "Tue", "Wed"), each = 9),
nurse = rep(rep(1:3, each = 3), 3),
shift = rep(1:3, 9),
cost = c(4,2,5, 3,3,6, 7,5,4, # Monday costs
5,3,4, 2,4,5, 6,4,3, # Tuesday costs
3,4,5, 4,2,6, 5,5,4) # Wednesday costs
)
# Solve all three days at once
schedule |>
group_by(day) |>
lap_solve(nurse, shift, cost)
## ----kbest--------------------------------------------------------------------
cost <- matrix(c(1, 2, 3, 4, 3, 2, 5, 4, 1), nrow = 3, byrow = TRUE)
kbest <- lap_solve_kbest(cost, k = 3)
print(kbest)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.