Nothing
# ==============================================================================
# Tests for matching layer
# ==============================================================================
test_that("matchmaker creates blocks correctly", {
left <- data.frame(
id = 1:10,
region = rep(c("A", "B"), each = 5),
x = rnorm(10)
)
right <- data.frame(
id = 11:20,
region = rep(c("A", "B"), each = 5),
x = rnorm(10)
)
blocks <- matchmaker(left, right, block_type = "group", block_by = "region")
expect_s3_class(blocks, "matchmaker_result")
expect_true("block_id" %in% names(blocks$left))
expect_true("block_id" %in% names(blocks$right))
expect_equal(nrow(blocks$block_summary), 2)
expect_equal(blocks$block_summary$n_left, c(5, 5))
expect_equal(blocks$block_summary$n_right, c(5, 5))
})
test_that("matchmaker filters small blocks", {
left <- data.frame(
id = 1:8,
region = c(rep("A", 5), rep("B", 2), "C"),
x = rnorm(8)
)
right <- data.frame(
id = 11:18,
region = c(rep("A", 5), rep("B", 2), "C"),
x = rnorm(8)
)
blocks <- matchmaker(
left, right,
block_type = "group",
block_by = "region",
min_left = 3,
min_right = 3
)
expect_equal(nrow(blocks$block_summary), 1)
expect_equal(blocks$block_summary$block_id, "A")
expect_equal(blocks$info$n_blocks_dropped, 2)
})
test_that("match_couples works with simple input", {
set.seed(123)
left <- data.frame(id = 1:5, x = c(1, 2, 3, 4, 5))
right <- data.frame(id = 6:10, x = c(1.1, 2.1, 3.1, 4.1, 5.1))
result <- match_couples(left, right, vars = "x")
expect_s3_class(result, "matching_result")
expect_equal(nrow(result$pairs), 5)
expect_true(all(result$pairs$distance < 0.2))
})
test_that("match_couples respects max_distance", {
left <- data.frame(id = 1:3, x = c(1, 2, 3))
right <- data.frame(id = 4:6, x = c(1.1, 5, 3.1))
result <- match_couples(left, right, vars = "x", max_distance = 0.5)
expect_equal(nrow(result$pairs), 2)
expect_true(all(result$pairs$distance <= 0.5))
})
test_that("match_couples respects calipers", {
left <- data.frame(
id = 1:3,
x = c(1, 2, 3),
y = c(10, 20, 30)
)
right <- data.frame(
id = 4:6,
x = c(1.1, 2.1, 3.1),
y = c(10.5, 25, 30.5)
)
result <- match_couples(
left, right,
vars = c("x", "y"),
calipers = list(y = 2)
)
# Should exclude middle pair where y differs by 5
expect_equal(nrow(result$pairs), 2)
})
test_that("match_couples works with blocking", {
left <- data.frame(
id = 1:6,
block_id = rep(c("A", "B"), each = 3),
x = c(1, 2, 3, 10, 20, 30)
)
right <- data.frame(
id = 7:12,
block_id = rep(c("A", "B"), each = 3),
x = c(1.1, 2.1, 3.1, 10.1, 20.1, 30.1)
)
result <- match_couples(left, right, vars = "x")
expect_equal(nrow(result$pairs), 6)
expect_true("block_id" %in% names(result$pairs))
expect_equal(sum(result$pairs$block_id == "A"), 3)
expect_equal(sum(result$pairs$block_id == "B"), 3)
})
test_that("match_couples handles rectangular inputs", {
left <- data.frame(id = 1:3, x = c(1, 2, 3))
right <- data.frame(id = 4:8, x = c(1.1, 2.1, 3.1, 4, 5))
result <- match_couples(left, right, vars = "x")
expect_equal(nrow(result$pairs), 3)
expect_equal(length(result$unmatched$left), 0)
expect_equal(length(result$unmatched$right), 2)
})
test_that("validate_matching_inputs catches errors", {
expect_error(
validate_matching_inputs(list(), data.frame(x = 1)),
"left must be a data frame"
)
expect_error(
validate_matching_inputs(data.frame(), data.frame(x = 1)),
"left must have at least one row"
)
expect_error(
validate_matching_inputs(
data.frame(x = 1),
data.frame(y = 1),
vars = "x"
),
"right.*missing.*x"
)
})
test_that("distance metrics work correctly", {
left <- matrix(c(0, 0), ncol = 2)
right <- matrix(c(3, 4), ncol = 2)
# Euclidean: sqrt(3^2 + 4^2) = 5
d_euclidean <- compute_distance_matrix(left, right, "euclidean")
expect_equal(d_euclidean[1, 1], 5)
# Manhattan: |3| + |4| = 7
d_manhattan <- compute_distance_matrix(left, right, "manhattan")
expect_equal(d_manhattan[1, 1], 7)
# Squared Euclidean: 3^2 + 4^2 = 25
d_sq <- compute_distance_matrix(left, right, "squared_euclidean")
expect_equal(d_sq[1, 1], 25)
})
test_that("scaling works correctly", {
left <- matrix(c(1, 2, 3, 4), ncol = 2)
right <- matrix(c(5, 6, 7, 8), ncol = 2)
scaled <- apply_scaling(left, right, "standardize")
# Check that combined data has mean ~ 0, sd ~ 1
combined_scaled <- rbind(scaled$left, scaled$right)
expect_equal(colMeans(combined_scaled), c(0, 0), tolerance = 1e-10)
expect_equal(apply(combined_scaled, 2, sd), c(1, 1), tolerance = 1e-10)
})
# ==============================================================================
# Tests for greedy_couples()
# ==============================================================================
test_that("greedy_couples works with simple input", {
set.seed(123)
left <- data.frame(id = 1:5, x = c(1, 2, 3, 4, 5))
right <- data.frame(id = 6:10, x = c(1.1, 2.1, 3.1, 4.1, 5.1))
result <- greedy_couples(left, right, vars = "x")
expect_s3_class(result, "matching_result")
expect_equal(result$info$method, "greedy")
expect_equal(nrow(result$pairs), 5)
expect_true(all(result$pairs$distance < 0.2))
})
test_that("greedy_couples respects constraints", {
left <- data.frame(id = 1:3, x = c(1, 2, 3))
right <- data.frame(id = 4:6, x = c(1.1, 5, 3.1))
result <- greedy_couples(left, right, vars = "x", max_distance = 0.5)
expect_equal(nrow(result$pairs), 2)
expect_true(all(result$pairs$distance <= 0.5))
})
test_that("greedy_couples works with blocking", {
left <- data.frame(
id = 1:6,
block_id = rep(c("A", "B"), each = 3),
x = c(1, 2, 3, 10, 20, 30)
)
right <- data.frame(
id = 7:12,
block_id = rep(c("A", "B"), each = 3),
x = c(1.1, 2.1, 3.1, 10.1, 20.1, 30.1)
)
result <- greedy_couples(left, right, vars = "x")
expect_equal(nrow(result$pairs), 6)
expect_true("block_id" %in% names(result$pairs))
expect_equal(result$info$n_blocks, 2)
})
test_that("greedy strategies produce valid matchings", {
set.seed(456)
left <- data.frame(id = 1:20, x = rnorm(20))
right <- data.frame(id = 21:40, x = rnorm(20))
result_row <- greedy_couples(left, right, vars = "x", strategy = "row_best")
result_sorted <- greedy_couples(left, right, vars = "x", strategy = "sorted")
result_pq <- greedy_couples(left, right, vars = "x", strategy = "pq")
# All should produce valid matchings
expect_equal(nrow(result_row$pairs), 20)
expect_equal(nrow(result_sorted$pairs), 20)
expect_equal(nrow(result_pq$pairs), 20)
# No duplicate assignments
expect_equal(length(unique(result_row$pairs$left_id)), 20)
expect_equal(length(unique(result_row$pairs$right_id)), 20)
})
test_that("greedy is faster than optimal (smoke test)", {
skip_if_not(requireNamespace("microbenchmark", quietly = TRUE))
set.seed(789)
left <- data.frame(id = 1:50, x = rnorm(50), y = rnorm(50))
right <- data.frame(id = 51:100, x = rnorm(50), y = rnorm(50))
# Just test that both complete successfully
expect_no_error({
result_opt <- match_couples(left, right, vars = c("x", "y"))
result_greedy <- greedy_couples(left, right, vars = c("x", "y"))
})
# Greedy should produce valid but possibly suboptimal matching
expect_gte(result_greedy$info$total_distance, result_opt$info$total_distance)
})
# ==============================================================================
# Tests for preprocessing functions
# ==============================================================================
test_that("check_variable_health detects constant variables", {
left <- data.frame(
x = rep(5, 10),
y = rnorm(10)
)
right <- data.frame(
x = rep(5, 10),
y = rnorm(10)
)
health <- check_variable_health(left, right, c("x", "y"))
expect_s3_class(health, "variable_health")
expect_true("x" %in% health$exclude_vars)
expect_false("y" %in% health$exclude_vars)
expect_true(any(sapply(health$issues, function(i) i$type == "constant")))
})
test_that("check_variable_health detects all-NA variables", {
left <- data.frame(
x = rep(NA_real_, 10),
y = rnorm(10)
)
right <- data.frame(
x = rep(NA_real_, 10),
y = rnorm(10)
)
health <- check_variable_health(left, right, c("x", "y"))
expect_true("x" %in% health$exclude_vars)
expect_true(any(sapply(health$issues, function(i) i$type == "all_na")))
})
test_that("check_variable_health detects high missingness", {
left <- data.frame(
x = c(rep(NA_real_, 7), rnorm(3))
)
right <- data.frame(
x = c(rep(NA_real_, 7), rnorm(3))
)
health <- check_variable_health(left, right, "x")
expect_true(any(sapply(health$issues, function(i) i$type == "high_missingness")))
})
test_that("check_variable_health detects skewness", {
set.seed(42)
# Create highly skewed data
skewed_data <- c(rnorm(45, mean = 0, sd = 1), rnorm(5, mean = 20, sd = 1))
left <- data.frame(x = skewed_data[1:25])
right <- data.frame(x = skewed_data[26:50])
health <- check_variable_health(left, right, "x")
# Should detect skewness
expect_true(any(sapply(health$issues, function(i) i$type == "skewed")))
})
test_that("suggest_scaling returns appropriate methods", {
# Similar scales - should suggest standardize or none
left <- data.frame(x = rnorm(50, 10, 2), y = rnorm(50, 20, 3))
right <- data.frame(x = rnorm(50, 10, 2), y = rnorm(50, 20, 3))
method <- suggest_scaling(left, right, c("x", "y"))
expect_true(method %in% c("standardize", "robust", "none"))
# Very different scales - should suggest standardize
left2 <- data.frame(x = rnorm(50, 10, 2), y = rnorm(50, 1000, 200))
right2 <- data.frame(x = rnorm(50, 10, 2), y = rnorm(50, 1000, 200))
method2 <- suggest_scaling(left2, right2, c("x", "y"))
expect_equal(method2, "standardize")
})
test_that("preprocess_matching_vars excludes problematic variables", {
left <- data.frame(
const = rep(5, 10),
good = rnorm(10),
all_na = rep(NA_real_, 10)
)
right <- data.frame(
const = rep(5, 10),
good = rnorm(10),
all_na = rep(NA_real_, 10)
)
preproc <- preprocess_matching_vars(
left, right,
vars = c("const", "good", "all_na"),
auto_scale = TRUE,
remove_problematic = TRUE,
verbose = FALSE
)
expect_s3_class(preproc, "preprocessing_result")
expect_equal(preproc$vars, "good")
expect_setequal(preproc$excluded_vars, c("const", "all_na"))
})
test_that("preprocess_matching_vars suggests scaling method", {
left <- data.frame(x = rnorm(50, 100, 10), y = rnorm(50, 0.5, 0.1))
right <- data.frame(x = rnorm(50, 100, 10), y = rnorm(50, 0.5, 0.1))
preproc <- preprocess_matching_vars(
left, right,
vars = c("x", "y"),
auto_scale = TRUE,
scale_method = "auto",
verbose = FALSE
)
expect_true(preproc$scaling_method %in% c("standardize", "robust", "range"))
})
test_that("match_couples with auto_scale works correctly", {
set.seed(123)
left <- data.frame(
id = 1:10,
const = rep(1, 10), # Should be excluded
x = rnorm(10, 100, 10),
y = rnorm(10, 0.5, 0.1)
)
right <- data.frame(
id = 11:20,
const = rep(1, 10), # Should be excluded
x = rnorm(10, 100, 10),
y = rnorm(10, 0.5, 0.1)
)
# Suppress warnings about excluded variables
result <- suppressWarnings({
match_couples(
left, right,
vars = c("const", "x", "y"),
auto_scale = TRUE
)
})
expect_s3_class(result, "matching_result")
expect_equal(nrow(result$pairs), 10)
})
test_that("greedy_couples with auto_scale works correctly", {
set.seed(456)
left <- data.frame(
id = 1:10,
x = rnorm(10, 100, 10),
y = rnorm(10, 0.5, 0.1)
)
right <- data.frame(
id = 11:20,
x = rnorm(10, 100, 10),
y = rnorm(10, 0.5, 0.1)
)
result <- greedy_couples(
left, right,
vars = c("x", "y"),
auto_scale = TRUE
)
expect_s3_class(result, "matching_result")
expect_equal(result$info$method, "greedy")
})
# ==============================================================================
# Tests for balance diagnostics
# ==============================================================================
test_that("standardized_difference calculates correctly", {
x1 <- c(10, 12, 14, 16, 18)
x2 <- c(11, 13, 15, 17, 19)
std_diff <- standardized_difference(x1, x2)
# Should be close to -1 / sqrt(var) for shift of 1
expect_true(is.numeric(std_diff))
expect_false(is.na(std_diff))
expect_true(abs(std_diff) < 1) # Should be small for similar distributions
})
test_that("standardized_difference handles edge cases", {
# Empty vectors
expect_true(is.na(standardized_difference(numeric(0), c(1, 2, 3))))
# Constant values (SD = 0)
expect_equal(standardized_difference(c(5, 5, 5), c(5, 5, 5)), 0)
# With NAs
x1 <- c(1, 2, 3, NA, 5)
x2 <- c(2, 3, 4, 5, NA)
std_diff <- standardized_difference(x1, x2)
expect_false(is.na(std_diff))
})
test_that("balance_diagnostics works with simple matching", {
set.seed(123)
left <- data.frame(
id = 1:20,
age = rnorm(20, 50, 10),
income = rnorm(20, 50000, 10000)
)
right <- data.frame(
id = 21:50,
age = rnorm(30, 50, 10),
income = rnorm(30, 50000, 10000)
)
result <- match_couples(left, right, vars = c("age", "income"))
balance <- balance_diagnostics(result, left, right, vars = c("age", "income"))
expect_s3_class(balance, "balance_diagnostics")
expect_equal(nrow(balance$var_stats), 2)
expect_true("age" %in% balance$var_stats$variable)
expect_true("income" %in% balance$var_stats$variable)
expect_equal(balance$n_matched, 20)
expect_true(is.numeric(balance$overall$mean_abs_std_diff))
})
test_that("balance_diagnostics computes correct statistics", {
set.seed(456)
# Create data with known properties
left <- data.frame(
id = 1:30,
x = rep(10, 30) # Constant value
)
right <- data.frame(
id = 31:60,
x = rep(10, 30) # Same constant value
)
result <- suppressWarnings(match_couples(left, right, vars = "x"))
balance <- balance_diagnostics(result, left, right, vars = "x")
# With identical values, standardized difference should be 0
expect_equal(balance$var_stats$std_diff[1], 0)
expect_equal(balance$var_stats$mean_diff[1], 0)
})
test_that("balance_diagnostics detects imbalance", {
set.seed(789)
# Create data with deliberate imbalance
left <- data.frame(
id = 1:20,
age = rnorm(20, 30, 5) # Younger group
)
right <- data.frame(
id = 21:50,
age = rnorm(30, 50, 5) # Older group
)
result <- match_couples(left, right, vars = "age")
balance <- balance_diagnostics(result, left, right, vars = "age")
# Should detect large standardized difference
expect_true(abs(balance$var_stats$std_diff[1]) > 0.1)
expect_true(balance$overall$mean_abs_std_diff > 0.1)
})
test_that("balance_diagnostics works with blocking", {
set.seed(321)
left <- data.frame(
id = 1:40,
group = rep(c("A", "B"), each = 20),
age = rnorm(40, 50, 10)
)
right <- data.frame(
id = 41:100,
group = rep(c("A", "B"), each = 30),
age = rnorm(60, 50, 10)
)
blocks <- matchmaker(left, right, block_type = "group", block_by = "group")
result <- match_couples(blocks$left, blocks$right, vars = "age")
balance <- balance_diagnostics(result, blocks$left, blocks$right, vars = "age")
expect_true(balance$has_blocks)
expect_false(is.null(balance$block_stats))
expect_equal(nrow(balance$block_stats), 2) # Two blocks
expect_true("quality" %in% names(balance$block_stats))
})
test_that("balance_diagnostics handles unmatched units", {
set.seed(654)
# Create unbalanced data (more right than left)
left <- data.frame(
id = 1:10,
x = rnorm(10, 0, 1)
)
right <- data.frame(
id = 11:40,
x = rnorm(30, 0, 1)
)
result <- match_couples(left, right, vars = "x")
balance <- balance_diagnostics(result, left, right, vars = "x")
expect_equal(balance$n_matched, 10)
expect_equal(balance$n_unmatched_left, 0)
expect_equal(balance$n_unmatched_right, 20)
})
test_that("balance_table formats output correctly", {
set.seed(987)
left <- data.frame(
id = 1:20,
age = rnorm(20, 50, 10),
income = rnorm(20, 50000, 10000)
)
right <- data.frame(
id = 21:50,
age = rnorm(30, 50, 10),
income = rnorm(30, 50000, 10000)
)
result <- match_couples(left, right, vars = c("age", "income"))
balance <- balance_diagnostics(result, left, right, vars = c("age", "income"))
tbl <- balance_table(balance)
expect_s3_class(tbl, "tbl_df")
expect_equal(nrow(tbl), 2)
expect_true("Variable" %in% names(tbl))
expect_true("Std Diff" %in% names(tbl))
expect_true("Mean Diff" %in% names(tbl))
})
test_that("balance_diagnostics print method works", {
set.seed(111)
left <- data.frame(
id = 1:15,
x = rnorm(15, 0, 1)
)
right <- data.frame(
id = 16:40,
x = rnorm(25, 0, 1)
)
result <- match_couples(left, right, vars = "x")
balance <- balance_diagnostics(result, left, right, vars = "x")
# Should print without error
expect_output(print(balance), "Balance Diagnostics")
expect_output(print(balance), "Matched pairs")
expect_output(print(balance), "Overall Balance")
})
test_that("balance_diagnostics validates inputs", {
left <- data.frame(id = 1:10, x = rnorm(10))
right <- data.frame(id = 11:20, x = rnorm(10))
result <- match_couples(left, right, vars = "x")
# Wrong result type
expect_error(
balance_diagnostics("not a result", left, right, vars = "x"),
"matching_result"
)
# Missing ID column
expect_error(
balance_diagnostics(result, left, right, vars = "x", left_id = "missing"),
"not found"
)
# Missing variable
expect_error(
balance_diagnostics(result, left, right, vars = "missing_var"),
"not found"
)
})
# ==============================================================================
# Tests for join_matched()
# ==============================================================================
test_that("join_matched works with basic matching", {
left <- data.frame(
id = 1:5,
treatment = 1,
age = c(25, 30, 35, 40, 45),
income = c(45000, 52000, 48000, 61000, 55000)
)
right <- data.frame(
id = 6:10,
treatment = 0,
age = c(24, 29, 36, 41, 44),
income = c(46000, 51500, 47500, 60000, 54000)
)
result <- match_couples(left, right, vars = c("age", "income"))
joined <- join_matched(result, left, right)
# Check structure
expect_s3_class(joined, "tbl_df")
expect_equal(nrow(joined), nrow(result$pairs))
# Check required columns
expect_true("pair_id" %in% names(joined))
expect_true("left_id" %in% names(joined))
expect_true("right_id" %in% names(joined))
expect_true("distance" %in% names(joined))
# Check suffixes applied
expect_true("treatment_left" %in% names(joined))
expect_true("treatment_right" %in% names(joined))
expect_true("age_left" %in% names(joined))
expect_true("age_right" %in% names(joined))
expect_true("income_left" %in% names(joined))
expect_true("income_right" %in% names(joined))
# Check values (IDs are type-converted to match original data)
expect_type(joined$left_id, typeof(left$id))
expect_type(joined$right_id, typeof(right$id))
expect_equal(nrow(joined), nrow(result$pairs))
expect_equal(joined$distance, result$pairs$distance)
})
test_that("join_matched handles custom suffixes", {
left <- data.frame(
id = 1:3,
treatment = 1,
age = c(25, 30, 35)
)
right <- data.frame(
id = 4:6,
treatment = 0,
age = c(24, 29, 36)
)
result <- match_couples(left, right, vars = "age")
joined <- join_matched(
result, left, right,
suffix = c("_treated", "_control")
)
expect_true("treatment_treated" %in% names(joined))
expect_true("treatment_control" %in% names(joined))
expect_true("age_treated" %in% names(joined))
expect_true("age_control" %in% names(joined))
})
test_that("join_matched handles variable selection", {
left <- data.frame(
id = 1:3,
treatment = 1,
age = c(25, 30, 35),
income = c(45000, 52000, 48000),
extra = c("a", "b", "c")
)
right <- data.frame(
id = 4:6,
treatment = 0,
age = c(24, 29, 36),
income = c(46000, 51500, 47500),
extra = c("x", "y", "z")
)
result <- match_couples(left, right, vars = c("age", "income"))
# Select specific variables
joined <- join_matched(
result, left, right,
left_vars = c("treatment", "age"),
right_vars = "age"
)
expect_true("treatment_left" %in% names(joined))
expect_true("age_left" %in% names(joined))
expect_true("age_right" %in% names(joined))
expect_false("income_left" %in% names(joined))
expect_false("income_right" %in% names(joined))
expect_false("extra_left" %in% names(joined))
expect_false("extra_right" %in% names(joined))
})
test_that("join_matched works with blocking", {
left <- data.frame(
id = 1:10,
region = rep(c("A", "B"), each = 5),
age = c(25, 30, 35, 40, 45, 26, 31, 36, 41, 46)
)
right <- data.frame(
id = 11:20,
region = rep(c("A", "B"), each = 5),
age = c(24, 29, 36, 41, 44, 25, 30, 37, 42, 45)
)
blocks <- matchmaker(left, right, block_type = "group", block_by = "region")
result <- match_couples(blocks$left, blocks$right, vars = "age")
joined <- join_matched(result, blocks$left, blocks$right)
expect_true("block_id" %in% names(joined))
expect_equal(length(unique(joined$block_id)), 2)
})
test_that("join_matched handles include options", {
left <- data.frame(id = 1:3, x = c(1, 2, 3))
right <- data.frame(id = 4:6, x = c(1.1, 2.1, 3.1))
result <- match_couples(left, right, vars = "x")
# Without distance
joined1 <- join_matched(result, left, right, include_distance = FALSE)
expect_false("distance" %in% names(joined1))
# Without pair_id
joined2 <- join_matched(result, left, right, include_pair_id = FALSE)
expect_false("pair_id" %in% names(joined2))
# Without both
joined3 <- join_matched(
result, left, right,
include_distance = FALSE,
include_pair_id = FALSE
)
expect_false("distance" %in% names(joined3))
expect_false("pair_id" %in% names(joined3))
})
test_that("join_matched handles custom ID columns", {
# Create data with custom ID columns
left <- data.frame(
patient_id = paste0("P", 1:3),
age = c(25, 30, 35)
)
right <- data.frame(
control_id = paste0("C", 4:6),
age = c(24, 29, 36)
)
# For this test, we'll use standard "id" column for matching
# but demonstrate custom ID specification in join_matched
left$id <- 1:3
right$id <- 4:6
result <- match_couples(
left, right,
vars = "age"
)
# join_matched can still select custom ID columns from the original data
joined <- join_matched(
result, left, right,
left_vars = c("patient_id", "age"),
right_vars = c("control_id", "age")
)
# Verify structure
expect_true("left_id" %in% names(joined))
expect_true("right_id" %in% names(joined))
expect_true("patient_id_left" %in% names(joined))
expect_true("control_id_right" %in% names(joined))
# Verify patient_id and control_id were joined correctly
expect_equal(joined$patient_id_left, paste0("P", joined$left_id))
expect_equal(joined$control_id_right, paste0("C", joined$right_id))
})
test_that("join_matched validates inputs", {
left <- data.frame(id = 1:3, x = c(1, 2, 3))
right <- data.frame(id = 4:6, x = c(1.1, 2.1, 3.1))
result <- match_couples(left, right, vars = "x")
# Not a matching_result
expect_error(
join_matched(list(), left, right),
"matching_result object"
)
# Not data frames
expect_error(
join_matched(result, as.matrix(left), right),
"data frames"
)
# Missing ID column
expect_error(
join_matched(result, left, right, left_id = "missing"),
"not found in left"
)
expect_error(
join_matched(result, left, right, right_id = "missing"),
"not found in right"
)
# Wrong suffix length
expect_error(
join_matched(result, left, right, suffix = "_only_one"),
"length 2"
)
# Missing variables
expect_error(
join_matched(result, left, right, left_vars = "missing"),
"not found in left"
)
expect_error(
join_matched(result, left, right, right_vars = "missing"),
"not found in right"
)
})
test_that("join_matched handles empty matches", {
left <- data.frame(id = 1:3, x = c(1, 2, 3))
right <- data.frame(id = 4:6, x = c(100, 200, 300))
# With strict caliper, should throw error (no valid pairs)
expect_error(
suppressWarnings(match_couples(left, right, vars = "x", max_distance = 1)),
"No valid pairs"
)
})
test_that("join_matched works with greedy matching", {
left <- data.frame(
id = 1:5,
treatment = 1,
age = c(25, 30, 35, 40, 45)
)
right <- data.frame(
id = 6:10,
treatment = 0,
age = c(24, 29, 36, 41, 44)
)
result <- greedy_couples(left, right, vars = "age", strategy = "sorted")
joined <- join_matched(result, left, right)
expect_s3_class(joined, "tbl_df")
expect_equal(nrow(joined), nrow(result$pairs))
expect_true(all(c("treatment_left", "treatment_right") %in% names(joined)))
})
test_that("augment.matching_result works", {
left <- data.frame(id = 1:3, x = c(1, 2, 3))
right <- data.frame(id = 4:6, x = c(1.1, 2.1, 3.1))
result <- match_couples(left, right, vars = "x")
# Call augment with explicit namespace
augmented <- couplr::augment(result, left, right)
# Should be identical to join_matched with defaults
joined <- join_matched(result, left, right)
expect_equal(augmented, joined)
# Can pass additional arguments
augmented2 <- couplr::augment(
result, left, right,
suffix = c("_a", "_b"),
include_distance = FALSE
)
expect_true("x_a" %in% names(augmented2))
expect_true("x_b" %in% names(augmented2))
expect_false("distance" %in% names(augmented2))
})
test_that("join_matched column order is sensible", {
left <- data.frame(
id = 1:3,
treatment = 1,
age = c(25, 30, 35)
)
right <- data.frame(
id = 4:6,
treatment = 0,
age = c(24, 29, 36)
)
result <- match_couples(left, right, vars = "age")
joined <- join_matched(result, left, right)
# Check order: pair_id, left_id, right_id, distance, then variables
col_names <- names(joined)
expect_equal(col_names[1], "pair_id")
expect_equal(col_names[2], "left_id")
expect_equal(col_names[3], "right_id")
expect_equal(col_names[4], "distance")
})
# ==============================================================================
# Distance Caching Tests (Step 4)
# ==============================================================================
test_that("compute_distances creates valid distance object", {
left <- data.frame(
id = 1:5,
x = c(1, 2, 3, 4, 5),
y = c(2, 4, 6, 8, 10)
)
right <- data.frame(
id = 6:10,
x = c(1.1, 2.1, 3.1, 4.1, 5.1),
y = c(2.2, 4.2, 6.2, 8.2, 10.2)
)
dist_obj <- compute_distances(left, right, vars = c("x", "y"))
# Check class
expect_s3_class(dist_obj, "distance_object")
expect_true(is_distance_object(dist_obj))
# Check structure
expect_true("cost_matrix" %in% names(dist_obj))
expect_true("left_ids" %in% names(dist_obj))
expect_true("right_ids" %in% names(dist_obj))
expect_true("metadata" %in% names(dist_obj))
expect_true("original_left" %in% names(dist_obj))
expect_true("original_right" %in% names(dist_obj))
# Check dimensions
expect_equal(nrow(dist_obj$cost_matrix), 5)
expect_equal(ncol(dist_obj$cost_matrix), 5)
expect_equal(length(dist_obj$left_ids), 5)
expect_equal(length(dist_obj$right_ids), 5)
# Check metadata
expect_equal(dist_obj$metadata$vars, c("x", "y"))
expect_equal(dist_obj$metadata$distance, "euclidean")
expect_equal(dist_obj$metadata$n_left, 5)
expect_equal(dist_obj$metadata$n_right, 5)
# Check original datasets stored
expect_equal(nrow(dist_obj$original_left), 5)
expect_equal(nrow(dist_obj$original_right), 5)
})
test_that("compute_distances with auto_scale works", {
set.seed(123)
left <- data.frame(
id = 1:20,
age = rnorm(20, mean = 50, sd = 10),
income = rnorm(20, mean = 50000, sd = 15000)
)
right <- data.frame(
id = 21:50,
age = rnorm(30, mean = 52, sd = 10),
income = rnorm(30, mean = 52000, sd = 15000)
)
# Should work with auto_scale
dist_obj <- compute_distances(
left, right,
vars = c("age", "income"),
auto_scale = TRUE
)
# Should have applied auto_scale
expect_true(dist_obj$metadata$auto_scale)
expect_s3_class(dist_obj, "distance_object")
})
test_that("compute_distances with scaling works", {
left <- data.frame(
id = 1:5,
x = c(1, 2, 3, 4, 5),
y = c(10, 20, 30, 40, 50)
)
right <- data.frame(
id = 6:10,
x = c(1.5, 2.5, 3.5, 4.5, 5.5),
y = c(15, 25, 35, 45, 55)
)
dist_standardize <- compute_distances(left, right, vars = c("x", "y"), scale = "standardize")
dist_range <- compute_distances(left, right, vars = c("x", "y"), scale = "range")
dist_none <- compute_distances(left, right, vars = c("x", "y"), scale = FALSE)
# All should create valid distance objects
expect_s3_class(dist_standardize, "distance_object")
expect_s3_class(dist_range, "distance_object")
expect_s3_class(dist_none, "distance_object")
# Distances should differ based on scaling
expect_false(identical(dist_standardize$cost_matrix, dist_range$cost_matrix))
expect_false(identical(dist_standardize$cost_matrix, dist_none$cost_matrix))
# Metadata should reflect scaling choice
expect_equal(dist_standardize$metadata$scale, "standardize")
expect_equal(dist_range$metadata$scale, "range")
expect_equal(dist_none$metadata$scale, FALSE)
})
test_that("compute_distances validates inputs", {
left <- data.frame(id = 1:5, x = 1:5)
right <- data.frame(id = 6:10, x = 6:10)
# Not data frames
expect_error(
compute_distances(list(), right, vars = "x"),
"must be data frames"
)
# Missing ID columns
expect_error(
compute_distances(left, right, vars = "x", left_id = "missing"),
"not found in left dataset"
)
# Missing variables
expect_error(
compute_distances(left, right, vars = c("x", "z")),
"not found in both datasets"
)
# Duplicate IDs
left_dup <- data.frame(id = c(1, 1, 2, 3, 4), x = 1:5)
expect_error(
compute_distances(left_dup, right, vars = "x"),
"Duplicate IDs"
)
})
test_that("match_couples works with distance object", {
left <- data.frame(
id = 1:5,
x = c(1, 2, 3, 4, 5),
y = c(2, 4, 6, 8, 10)
)
right <- data.frame(
id = 6:10,
x = c(1.1, 2.1, 3.1, 4.1, 5.1),
y = c(2.2, 4.2, 6.2, 8.2, 10.2)
)
# Create distance object
dist_obj <- compute_distances(left, right, vars = c("x", "y"), scale = "standardize")
# Match using distance object
result <- match_couples(dist_obj)
# Should return valid matching_result
expect_s3_class(result, "matching_result")
expect_true("pairs" %in% names(result))
expect_true("info" %in% names(result))
# Should have matched all 5 pairs
expect_equal(nrow(result$pairs), 5)
# Pairs should have expected columns
expect_true("left_id" %in% names(result$pairs))
expect_true("right_id" %in% names(result$pairs))
expect_true("distance" %in% names(result$pairs))
})
test_that("greedy_couples works with distance object", {
left <- data.frame(
id = 1:5,
x = c(1, 2, 3, 4, 5)
)
right <- data.frame(
id = 6:10,
x = c(1.5, 2.5, 3.5, 4.5, 5.5)
)
# Create distance object
dist_obj <- compute_distances(left, right, vars = "x")
# Greedy matching using distance object
result <- greedy_couples(dist_obj, strategy = "sorted")
# Should return valid matching_result
expect_s3_class(result, "matching_result")
expect_equal(nrow(result$pairs), 5)
})
test_that("distance object reusable across multiple matches", {
left <- data.frame(
id = 1:10,
x = rnorm(10),
y = rnorm(10)
)
right <- data.frame(
id = 11:30,
x = rnorm(20),
y = rnorm(20)
)
# Compute distances once
dist_obj <- compute_distances(left, right, vars = c("x", "y"), scale = "standardize")
# Use for multiple matching strategies
result1 <- match_couples(dist_obj, max_distance = 0.5)
result2 <- match_couples(dist_obj, max_distance = 1.0)
result3 <- match_couples(dist_obj, max_distance = 2.0)
result4 <- greedy_couples(dist_obj, strategy = "sorted")
# All should be valid
expect_s3_class(result1, "matching_result")
expect_s3_class(result2, "matching_result")
expect_s3_class(result3, "matching_result")
expect_s3_class(result4, "matching_result")
# Different max_distance should give different results
expect_true(nrow(result1$pairs) <= nrow(result2$pairs))
expect_true(nrow(result2$pairs) <= nrow(result3$pairs))
})
test_that("update_constraints creates new distance object", {
left <- data.frame(id = 1:5, x = 1:5)
right <- data.frame(id = 6:10, x = c(1.5, 2.5, 3.5, 4.5, 5.5))
dist_obj <- compute_distances(left, right, vars = "x")
# Update constraints
constrained <- update_constraints(dist_obj, max_distance = 1.0)
# Should be a new object
expect_s3_class(constrained, "distance_object")
# Should have constraint metadata
expect_true("constraints_applied" %in% names(constrained$metadata))
expect_equal(constrained$metadata$constraints_applied$max_distance, 1.0)
# Original should be unchanged
expect_false("constraints_applied" %in% names(dist_obj$metadata))
# Constrained should have more Inf values
n_inf_original <- sum(is.infinite(dist_obj$cost_matrix))
n_inf_constrained <- sum(is.infinite(constrained$cost_matrix))
expect_true(n_inf_constrained >= n_inf_original)
})
test_that("update_constraints validates input", {
left <- data.frame(id = 1:5, x = 1:5)
right <- data.frame(id = 6:10, x = 6:10)
# Not a distance object
expect_error(
update_constraints(list(), max_distance = 1.0),
"must be a distance_object"
)
})
test_that("distance object with blocking stores block info", {
left <- data.frame(
id = 1:10,
block = rep(c("A", "B"), each = 5),
x = rnorm(10)
)
right <- data.frame(
id = 11:20,
block = rep(c("A", "B"), each = 5),
x = rnorm(10)
)
expect_message(
dist_obj <- compute_distances(left, right, vars = "x", block_id = "block"),
"Block information stored"
)
expect_equal(dist_obj$block_id, "block")
})
test_that("print.distance_object works", {
left <- data.frame(id = 1:5, x = 1:5, y = 2:6)
right <- data.frame(id = 6:10, x = 1:5, y = 2:6)
dist_obj <- compute_distances(left, right, vars = c("x", "y"), scale = "standardize")
# Should not error
expect_output(print(dist_obj), "Distance Object")
expect_output(print(dist_obj), "Left units")
expect_output(print(dist_obj), "Variables")
expect_output(print(dist_obj), "Distance Summary")
})
test_that("summary.distance_object works", {
left <- data.frame(id = 1:5, x = 1:5)
right <- data.frame(id = 6:10, x = 6:10)
dist_obj <- compute_distances(left, right, vars = "x")
# Should not error
expect_output(summary(dist_obj), "Distance Object Summary")
expect_output(summary(dist_obj), "Quantiles")
expect_output(summary(dist_obj), "Sparsity")
})
test_that("distance object works with join_matched", {
left <- data.frame(
id = 1:5,
treatment = 1,
x = 1:5,
y = 2:6
)
right <- data.frame(
id = 6:10,
treatment = 0,
x = 1:5,
y = 2:6
)
# Create distance object and match
dist_obj <- compute_distances(left, right, vars = c("x", "y"))
result <- suppressWarnings(match_couples(dist_obj))
# Join should work seamlessly
joined <- join_matched(result, left, right)
expect_s3_class(joined, "tbl_df")
expect_equal(nrow(joined), nrow(result$pairs))
expect_true("treatment_left" %in% names(joined))
expect_true("treatment_right" %in% names(joined))
})
test_that("distance object works with balance_diagnostics", {
set.seed(456)
left <- data.frame(
id = 1:10,
x = rnorm(10, mean = 5, sd = 2),
y = rnorm(10, mean = 10, sd = 3)
)
right <- data.frame(
id = 11:30,
x = rnorm(20, mean = 5.5, sd = 2),
y = rnorm(20, mean = 10.5, sd = 3)
)
# Create distance object and match
dist_obj <- compute_distances(left, right, vars = c("x", "y"), scale = "standardize")
result <- match_couples(dist_obj)
# Balance diagnostics should work
balance <- balance_diagnostics(result, left, right, vars = c("x", "y"))
expect_s3_class(balance, "balance_diagnostics")
expect_true("var_stats" %in% names(balance))
expect_true("overall" %in% names(balance))
})
test_that("distance object backwards compatible", {
left <- data.frame(id = 1:5, x = 1:5)
right <- data.frame(id = 6:10, x = 6:10)
# Old way (still works)
result_old <- match_couples(left, right, vars = "x", distance = "euclidean")
# New way
dist_obj <- compute_distances(left, right, vars = "x", distance = "euclidean")
result_new <- match_couples(dist_obj)
# Should produce identical results
expect_equal(nrow(result_old$pairs), nrow(result_new$pairs))
expect_equal(result_old$info$n_matched, result_new$info$n_matched)
# Distances should be the same
expect_equal(
sort(result_old$pairs$distance),
sort(result_new$pairs$distance)
)
})
test_that("is_distance_object works correctly", {
left <- data.frame(id = 1:5, x = 1:5)
right <- data.frame(id = 6:10, x = 6:10)
dist_obj <- compute_distances(left, right, vars = "x")
expect_true(is_distance_object(dist_obj))
expect_false(is_distance_object(list()))
expect_false(is_distance_object(data.frame()))
expect_false(is_distance_object(NULL))
})
test_that("distance object with custom ID columns", {
left <- data.frame(
patient_id = 101:105,
x = 1:5
)
right <- data.frame(
control_id = 201:205,
x = 1:5
)
dist_obj <- compute_distances(
left, right,
vars = "x",
left_id = "patient_id",
right_id = "control_id"
)
expect_equal(dist_obj$metadata$left_id, "patient_id")
expect_equal(dist_obj$metadata$right_id, "control_id")
expect_equal(dist_obj$left_ids, as.character(101:105))
expect_equal(dist_obj$right_ids, as.character(201:205))
# Should work with match_couples
result <- suppressWarnings(match_couples(dist_obj))
expect_s3_class(result, "matching_result")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.