troubleshooting.R
In couplr: Optimal Pairing and Matching via Linear Assignment

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5,
  error = TRUE
)
library(couplr)
library(dplyr)

## ----error=TRUE---------------------------------------------------------------
try({
# This fails: all assignments have Inf cost
cost <- matrix(c(1, Inf, Inf, Inf, Inf, Inf, Inf, 2, 3), nrow = 3, byrow = TRUE)
result <- lap_solve(cost)
})

## -----------------------------------------------------------------------------
# Check for infeasible rows
check_feasibility <- function(cost_matrix) {
  finite_per_row <- rowSums(is.finite(cost_matrix))
  infeasible_rows <- which(finite_per_row == 0)

  if (length(infeasible_rows) > 0) {
    cat("Infeasible rows (no finite costs):", infeasible_rows, "\n")
    return(FALSE)
  }

  finite_per_col <- colSums(is.finite(cost_matrix))
  infeasible_cols <- which(finite_per_col == 0)

  if (length(infeasible_cols) > 0) {
    cat("Infeasible columns (no finite costs):", infeasible_cols, "\n")
    return(FALSE)
  }

  cat("Problem appears feasible\n")
  return(TRUE)
}

# Check the problematic matrix
cost <- matrix(c(1, Inf, Inf, Inf, Inf, Inf, Inf, 2, 3), nrow = 3, byrow = TRUE)
check_feasibility(cost)

## -----------------------------------------------------------------------------
# Remove rows with no valid assignments
cost <- matrix(c(1, Inf, Inf, Inf, Inf, Inf, Inf, 2, 3), nrow = 3, byrow = TRUE)
valid_rows <- rowSums(is.finite(cost)) > 0
cost_valid <- cost[valid_rows, , drop = FALSE]

if (nrow(cost_valid) > 0) {
  result <- lap_solve(cost_valid)
  cat("Matched", nrow(result), "of", nrow(cost), "rows\n")
}

## -----------------------------------------------------------------------------
# Replace Inf with high (but finite) penalty
cost_with_fallback <- cost
cost_with_fallback[!is.finite(cost_with_fallback)] <- 1e6  # Large penalty

result <- lap_solve(cost_with_fallback)
print(result)

## ----fig.width=7, fig.height=4, fig.alt="Density plot showing overlap between treatment and control groups on the age variable"----
# Simulate poor overlap scenario
set.seed(123)
left <- tibble(id = 1:50, age = rnorm(50, mean = 25, sd = 3))
right <- tibble(id = 1:50, age = rnorm(50, mean = 55, sd = 3))

# Visualize overlap
library(ggplot2)
combined <- bind_rows(
  left %>% mutate(group = "Left"),
  right %>% mutate(group = "Right")
)

ggplot(combined, aes(x = age, fill = group)) +
  geom_density(alpha = 0.5) +
  labs(title = "Poor Covariate Overlap",
       subtitle = "No overlap means no good matches possible") +
  theme_minimal() +
  theme(plot.background = element_rect(fill = "transparent", color = NA),
        panel.background = element_rect(fill = "transparent", color = NA))

## -----------------------------------------------------------------------------
# Example of poor balance
set.seed(456)
left <- tibble(
  id = 1:100,
  age = rnorm(100, 30, 5),
  income = rnorm(100, 80000, 20000)  # Very different from right
)
right <- tibble(
  id = 1:100,
  age = rnorm(100, 32, 5),
  income = rnorm(100, 40000, 10000)  # Very different income
)

result <- match_couples(left, right, vars = c("age", "income"), auto_scale = TRUE)
balance <- balance_diagnostics(result, left, right, vars = c("age", "income"))
print(balance)

## ----eval=FALSE---------------------------------------------------------------
# # Include additional relevant variables
# result <- match_couples(
#   left, right,
#   vars = c("age", "income", "education", "region"),  # More variables
#   auto_scale = TRUE
# )

## -----------------------------------------------------------------------------
result_strict <- match_couples(
  left, right,
  vars = c("age", "income"),
  max_distance = 0.3,  # Stricter caliper
  auto_scale = TRUE
)

cat("Original matches:", result$info$n_matched, "\n")
cat("With caliper:", result_strict$info$n_matched, "\n")

balance_strict <- balance_diagnostics(result_strict, left, right, vars = c("age", "income"))
print(balance_strict)

## ----eval=FALSE---------------------------------------------------------------
# # Block on income tertiles to ensure exact balance
# left$income_cat <- cut(left$income, breaks = 3, labels = c("low", "mid", "high"))
# right$income_cat <- cut(right$income, breaks = 3, labels = c("low", "mid", "high"))
# 
# blocks <- matchmaker(left, right, block_type = "group", block_by = "income_cat")
# result_blocked <- match_couples(
#   blocks$left, blocks$right,
#   vars = c("age"),  # Match on age within income blocks
#   block_id = "block_id"
# )

## -----------------------------------------------------------------------------
# Compare scaling methods
for (scale_method in c("robust", "standardize", "range")) {
  res <- match_couples(left, right, vars = c("age", "income"),
                       auto_scale = TRUE, scale = scale_method)
  bal <- balance_diagnostics(res, left, right, vars = c("age", "income"))
  cat(scale_method, "- max |std_diff|:",
      round(bal$overall$max_abs_std_diff, 3), "\n")
}

## -----------------------------------------------------------------------------
# Estimate runtime
estimate_runtime <- function(n, seconds_per_billion = 1) {
  ops <- n^3
  time_sec <- ops / 1e9 * seconds_per_billion

  if (time_sec < 60) {
    sprintf("%.1f seconds", time_sec)
  } else if (time_sec < 3600) {
    sprintf("%.1f minutes", time_sec / 60)
  } else {
    sprintf("%.1f hours", time_sec / 3600)
  }
}

cat("Estimated runtime for optimal matching:\n")
for (n in c(100, 500, 1000, 3000, 5000, 10000)) {
  cat(sprintf("  n = %5d: %s\n", n, estimate_runtime(n)))
}

## -----------------------------------------------------------------------------
set.seed(789)
n <- 500
large_left <- tibble(id = 1:n, x1 = rnorm(n), x2 = rnorm(n))
large_right <- tibble(id = 1:n, x1 = rnorm(n), x2 = rnorm(n))

# Greedy is much faster
time_greedy <- system.time({
  result_greedy <- greedy_couples(
    large_left, large_right,
    vars = c("x1", "x2"),
    strategy = "row_best"
  )
})

cat("Greedy matching (n=500):", round(time_greedy["elapsed"], 2), "seconds\n")
cat("Quality (mean distance):", round(mean(result_greedy$pairs$distance), 4), "\n")

## ----eval=FALSE---------------------------------------------------------------
# # Create clusters to match within
# blocks <- matchmaker(
#   large_left, large_right,
#   block_type = "cluster",
#   block_vars = c("x1", "x2"),
#   n_blocks = 10  # 10 blocks of ~200 each
# )
# 
# # Match within blocks (10 x O(200^3) << O(2000^3))
# result_blocked <- match_couples(
#   blocks$left, blocks$right,
#   vars = c("x1", "x2"),
#   block_id = "block_id"
# )

## ----eval=FALSE---------------------------------------------------------------
# # For n > 1000, auction algorithm often faster
# result <- match_couples(
#   large_left, large_right,
#   vars = c("x1", "x2"),
#   method = "auction"
# )
# 
# # For sparse problems (many forbidden pairs)
# result <- match_couples(
#   left, right,
#   vars = vars,
#   max_distance = 0.5,  # Creates sparsity
#   method = "sap"       # Sparse algorithm
# )

## -----------------------------------------------------------------------------
# Compute once, reuse multiple times
dist_cache <- compute_distances(
  large_left, large_right,
  vars = c("x1", "x2"),
  scale = "robust"
)

# Fast: reuse cached distances
result1 <- match_couples(dist_cache, max_distance = 0.3)
result2 <- match_couples(dist_cache, max_distance = 0.5)
result3 <- match_couples(dist_cache, max_distance = 1.0)

## -----------------------------------------------------------------------------
# Memory requirements
memory_needed <- function(n) {
  bytes <- 8 * n^2
  if (bytes < 1e6) {
    sprintf("%.1f KB", bytes / 1e3)
  } else if (bytes < 1e9) {
    sprintf("%.1f MB", bytes / 1e6)
  } else {
    sprintf("%.1f GB", bytes / 1e9)
  }
}

cat("Memory for full distance matrix:\n")
for (n in c(1000, 5000, 10000, 20000, 50000)) {
  cat(sprintf("  n = %5d: %s\n", n, memory_needed(n)))
}

## ----eval=FALSE---------------------------------------------------------------
# # Greedy computes distances on-the-fly
# result <- greedy_couples(
#   left, right,
#   vars = covariates,
#   strategy = "row_best"  # Most memory-efficient
# )

## ----eval=FALSE---------------------------------------------------------------
# # Each block is much smaller
# blocks <- matchmaker(left, right, block_type = "cluster", n_blocks = 20)
# result <- match_couples(blocks$left, blocks$right, vars = vars, block_id = "block_id")

## ----eval=FALSE---------------------------------------------------------------
# # Caliper excludes distant pairs (sparse representation)
# result <- match_couples(
#   left, right,
#   vars = covariates,
#   max_distance = 0.5,
#   method = "sap"  # Sparse-optimized algorithm
# )

## ----eval=FALSE---------------------------------------------------------------
# # Increase to 16 GB (if available)
# memory.limit(size = 16000)

## -----------------------------------------------------------------------------
cost <- matrix(c(1, 2, 2, 2, 1, 2, 2, 2, 1), nrow = 3, byrow = TRUE)

result_jv <- lap_solve(cost, method = "jv")
result_hungarian <- lap_solve(cost, method = "hungarian")

cat("JV assignment:       ", result_jv$target, "\n")
cat("Hungarian assignment:", result_hungarian$target, "\n")
cat("JV total cost:       ", get_total_cost(result_jv), "\n")
cat("Hungarian total cost:", get_total_cost(result_hungarian), "\n")

## -----------------------------------------------------------------------------
# Check for ties
check_ties <- function(cost_matrix) {
  n <- nrow(cost_matrix)
  # Check if diagonal dominates (trivial ties)
  diag_costs <- diag(cost_matrix)
  if (length(unique(diag_costs)) < n) {
    cat("Tied costs on diagonal - multiple optima likely\n")
  }

  # Check cost uniqueness
  unique_costs <- length(unique(as.vector(cost_matrix)))
  total_entries <- length(cost_matrix)
  if (unique_costs < total_entries * 0.5) {
    cat("Many repeated costs - ties possible\n")
  }
}

check_ties(cost)

## -----------------------------------------------------------------------------
# Total cost should be identical
stopifnot(get_total_cost(result_jv) == get_total_cost(result_hungarian))
cat("Both methods found optimal solutions (same total cost)\n")

## -----------------------------------------------------------------------------
# Hungarian has consistent tie-breaking
result <- lap_solve(cost, method = "hungarian")

## -----------------------------------------------------------------------------
set.seed(42)
cost_perturbed <- cost + matrix(rnorm(9, 0, 1e-10), 3, 3)
result <- lap_solve(cost_perturbed)

## ----error=TRUE---------------------------------------------------------------
try({
left <- tibble(id = 1:5, age = c(25, 30, NA, 35, 40))
right <- tibble(id = 1:5, age = c(28, 32, 33, 36, 42))

# This may fail or give unexpected results
result <- match_couples(left, right, vars = "age")
})

## -----------------------------------------------------------------------------
left_clean <- left %>% filter(!is.na(age))
right_clean <- right %>% filter(!is.na(age))

result <- match_couples(left_clean, right_clean, vars = "age")
cat("Matched", result$info$n_matched, "pairs (excluded 1 left unit with NA)\n")

## -----------------------------------------------------------------------------
# Simple mean imputation
left_imputed <- left %>%
  mutate(age = if_else(is.na(age), mean(age, na.rm = TRUE), age))

result <- match_couples(left_imputed, right, vars = "age")
cat("Matched", result$info$n_matched, "pairs (imputed 1 NA with mean)\n")

## -----------------------------------------------------------------------------
health <- preprocess_matching_vars(
  left, right,
  vars = "age"
)
print(health)

## ----eval=FALSE---------------------------------------------------------------
# # In R:
# remove.packages("couplr")
# install.packages("couplr")
# 
# # Or from GitHub:
# devtools::install_github("gcol33/couplr", force = TRUE)

## ----eval=FALSE---------------------------------------------------------------
# devtools::clean_dll()
# devtools::load_all()

## -----------------------------------------------------------------------------
# Check if Rtools is properly configured
Sys.which("make")
Sys.which("g++")

## -----------------------------------------------------------------------------
cost <- matrix(c(1, NA, 3, 4), 2, 2)
cost[is.na(cost)] <- Inf  # Mark as forbidden
result <- lap_solve(cost)

## -----------------------------------------------------------------------------
# Always verify data before matching
stopifnot(nrow(left) > 0, nrow(right) > 0)

## ----eval=FALSE---------------------------------------------------------------
# # Minimal example template
# library(couplr)
# 
# # Minimal data that reproduces the issue
# set.seed(123)
# left <- tibble(id = 1:10, x = rnorm(10))
# right <- tibble(id = 1:10, x = rnorm(10))
# 
# # Code that causes the error
# result <- match_couples(left, right, vars = "x")
# 
# # Expected vs actual behavior
# # Expected: ...
# # Actual: [error message]
# 
# # Session info
# sessionInfo()
Any scripts or data that you put into this service are public.
couplr documentation built on Jan. 20, 2026, 5:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
couplr
Optimal Pairing and Matching via Linear Assignment

inst/doc/troubleshooting.R
In couplr: Optimal Pairing and Matching via Linear Assignment

Try the couplr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

couplr Optimal Pairing and Matching via Linear Assignment

inst/doc/troubleshooting.R In couplr: Optimal Pairing and Matching via Linear Assignment

Try the couplr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

couplr
Optimal Pairing and Matching via Linear Assignment

inst/doc/troubleshooting.R
In couplr: Optimal Pairing and Matching via Linear Assignment