test-matching-preprocessing-coverage.R
In couplr: Optimal Pairing and Matching via Linear Assignment

# ==============================================================================
# Additional tests for matching preprocessing functions to increase coverage
# ==============================================================================

# ------------------------------------------------------------------------------
# check_variable_health tests
# ------------------------------------------------------------------------------

test_that("check_variable_health errors with no variables", {
  left <- data.frame(x = 1:5)
  right <- data.frame(x = 1:5)

  expect_error(
    couplr:::check_variable_health(left, right, vars = character(0)),
    "No variables provided"
  )
})

test_that("check_variable_health detects all-NA variable", {
  left <- data.frame(x = c(NA, NA, NA), y = 1:3)
  right <- data.frame(x = c(NA, NA, NA), y = 4:6)

  result <- couplr:::check_variable_health(left, right, vars = c("x", "y"))

  expect_true("x" %in% result$exclude_vars)
  expect_true(any(sapply(result$issues, function(i) i$type == "all_na")))
  expect_true(any(grepl("all values are NA", result$warnings)))
})

test_that("check_variable_health detects constant variable", {
  left <- data.frame(x = rep(5, 5), y = 1:5)
  right <- data.frame(x = rep(5, 5), y = 6:10)

  result <- couplr:::check_variable_health(left, right, vars = c("x", "y"))

  expect_true("x" %in% result$exclude_vars)
  expect_true(any(sapply(result$issues, function(i) i$type == "constant")))
})

test_that("check_variable_health detects low variance", {
  left <- data.frame(x = c(1, 1, 1, 1.00000001), y = 1:4)
  right <- data.frame(x = c(1, 1, 1, 1), y = 5:8)

  result <- couplr:::check_variable_health(left, right, vars = c("x", "y"),
                                            low_variance_threshold = 1e-6)

  # x should trigger low variance warning
  issues <- result$issues
  low_var_issues <- Filter(function(i) i$type == "low_variance", issues)
  expect_true(length(low_var_issues) > 0 || "x" %in% result$exclude_vars)
})

test_that("check_variable_health detects high missingness", {
  left <- data.frame(x = c(NA, NA, NA, NA, 5), y = 1:5)
  right <- data.frame(x = c(NA, NA, NA, NA, 5), y = 6:10)

  result <- couplr:::check_variable_health(left, right, vars = c("x", "y"),
                                            high_missingness_threshold = 0.5)

  issues <- result$issues
  miss_issues <- Filter(function(i) i$type == "high_missingness", issues)
  expect_true(length(miss_issues) > 0)
})

test_that("check_variable_health detects extreme skewness", {
  # Create highly skewed data
  left <- data.frame(x = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 100), y = 1:10)
  right <- data.frame(x = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 100), y = 11:20)

  result <- couplr:::check_variable_health(left, right, vars = c("x", "y"))

  issues <- result$issues
  skew_issues <- Filter(function(i) i$type == "skewed", issues)
  expect_true(length(skew_issues) > 0)
})

test_that("check_variable_health returns proper structure", {
  left <- data.frame(x = 1:5, y = 6:10)
  right <- data.frame(x = 11:15, y = 16:20)

  result <- couplr:::check_variable_health(left, right, vars = c("x", "y"))

  expect_s3_class(result, "variable_health")
  expect_true("summary" %in% names(result))
  expect_true("issues" %in% names(result))
  expect_true("exclude_vars" %in% names(result))
  expect_true("warnings" %in% names(result))

  expect_true("variable" %in% names(result$summary))
  expect_true("mean" %in% names(result$summary))
  expect_true("sd" %in% names(result$summary))
})

test_that("check_variable_health handles multiple issues on same variable", {
  # Variable with both high missingness and skewness
  set.seed(123)
  vals <- c(rep(NA, 6), rep(1, 3), 1000)
  left <- data.frame(x = vals[1:5], y = 1:5)
  right <- data.frame(x = vals[6:10], y = 6:10)

  result <- couplr:::check_variable_health(left, right, vars = c("x", "y"),
                                            high_missingness_threshold = 0.3)

  # Should have multiple issues
  expect_true(length(result$issues) >= 1)
})

# ------------------------------------------------------------------------------
# suggest_scaling tests
# ------------------------------------------------------------------------------

test_that("suggest_scaling returns 'none' for empty vars", {
  left <- data.frame(x = 1:5)
  right <- data.frame(x = 1:5)

  result <- couplr:::suggest_scaling(left, right, vars = character(0))

  expect_equal(result, "none")
})

test_that("suggest_scaling suggests robust for skewed data", {
  # Create highly skewed data
  left <- data.frame(x = c(1, 1, 1, 1, 1, 100))
  right <- data.frame(x = c(1, 1, 1, 1, 1, 100))

  result <- couplr:::suggest_scaling(left, right, vars = "x")

  expect_true(result %in% c("robust", "standardize", "range", "none"))
})

test_that("suggest_scaling suggests standardize for normal data", {
  set.seed(123)
  left <- data.frame(x = rnorm(100), y = rnorm(100))
  right <- data.frame(x = rnorm(100), y = rnorm(100))

  result <- couplr:::suggest_scaling(left, right, vars = c("x", "y"))

  expect_true(result %in% c("standardize", "range", "robust", "none"))
})

# ------------------------------------------------------------------------------
# preprocess_matching_vars tests
# ------------------------------------------------------------------------------

test_that("preprocess_matching_vars works with default settings", {
  left <- data.frame(x = 1:5, y = 6:10)
  right <- data.frame(x = 11:15, y = 16:20)

  result <- preprocess_matching_vars(left, right, vars = c("x", "y"))

  expect_true("vars" %in% names(result))
  expect_true("scaling_method" %in% names(result))
})

test_that("preprocess_matching_vars removes problematic vars", {
  left <- data.frame(const = rep(5, 5), good = 1:5)
  right <- data.frame(const = rep(5, 5), good = 6:10)

  result <- preprocess_matching_vars(left, right, vars = c("const", "good"),
                                      check_health = TRUE,
                                      remove_problematic = TRUE,
                                      verbose = FALSE)

  expect_true("good" %in% result$vars)
  expect_false("const" %in% result$vars)
})

test_that("preprocess_matching_vars keeps problematic vars when requested", {
  left <- data.frame(const = rep(5, 5), good = 1:5)
  right <- data.frame(const = rep(5, 5), good = 6:10)

  result <- preprocess_matching_vars(left, right, vars = c("const", "good"),
                                      check_health = TRUE,
                                      remove_problematic = FALSE,
                                      verbose = FALSE)

  expect_true("const" %in% result$vars)
  expect_true("good" %in% result$vars)
})

test_that("preprocess_matching_vars auto-selects scaling", {
  set.seed(123)
  left <- data.frame(x = rnorm(50), y = rnorm(50) + 100)
  right <- data.frame(x = rnorm(50), y = rnorm(50) + 100)

  result <- preprocess_matching_vars(left, right, vars = c("x", "y"),
                                      auto_scale = TRUE,
                                      scale_method = "auto",
                                      verbose = FALSE)

  expect_true(result$scaling_method %in% c("standardize", "range", "robust", "none"))
})

test_that("preprocess_matching_vars respects explicit scale_method", {
  left <- data.frame(x = 1:5)
  right <- data.frame(x = 6:10)

  result <- preprocess_matching_vars(left, right, vars = "x",
                                      auto_scale = FALSE,
                                      scale_method = "robust",
                                      verbose = FALSE)

  expect_equal(result$scaling_method, "robust")
})

test_that("preprocess_matching_vars returns health info", {
  left <- data.frame(x = 1:5, y = 6:10)
  right <- data.frame(x = 11:15, y = 16:20)

  result <- preprocess_matching_vars(left, right, vars = c("x", "y"),
                                      check_health = TRUE,
                                      verbose = FALSE)

  expect_true("health" %in% names(result))
})

test_that("preprocess_matching_vars handles verbose output", {
  left <- data.frame(x = 1:5)
  right <- data.frame(x = 6:10)

  output <- capture.output({
    result <- preprocess_matching_vars(left, right, vars = "x",
                                        check_health = TRUE,
                                        auto_scale = TRUE,
                                        verbose = TRUE)
  })

  expect_true(length(output) > 0 || is.list(result))
})

# ------------------------------------------------------------------------------
# print.variable_health tests
# ------------------------------------------------------------------------------

test_that("print.variable_health works", {
  left <- data.frame(x = 1:5, y = c(NA, NA, 3, 4, 5))
  right <- data.frame(x = 6:10, y = c(6, NA, 8, 9, 10))

  health <- couplr:::check_variable_health(left, right, vars = c("x", "y"))

  output <- capture.output(print(health))

  expect_true(any(grepl("Variable Health", output)))
})

# ------------------------------------------------------------------------------
# print.preprocessing_result tests
# ------------------------------------------------------------------------------

test_that("print.preprocessing_result works", {
  left <- data.frame(x = 1:5)
  right <- data.frame(x = 6:10)

  result <- preprocess_matching_vars(left, right, vars = "x",
                                      verbose = FALSE)

  output <- capture.output(print(result))

  expect_true(length(output) >= 1)
})

# ------------------------------------------------------------------------------
# Edge cases
# ------------------------------------------------------------------------------

test_that("check_variable_health handles single row data", {
  left <- data.frame(x = 1)
  right <- data.frame(x = 2)

  result <- couplr:::check_variable_health(left, right, vars = "x")

  expect_s3_class(result, "variable_health")
})

test_that("check_variable_health handles many variables", {
  n <- 20
  left <- as.data.frame(matrix(rnorm(n * 10), nrow = 10))
  right <- as.data.frame(matrix(rnorm(n * 10), nrow = 10))

  result <- couplr:::check_variable_health(left, right, vars = names(left))

  expect_equal(nrow(result$summary), n)
})

test_that("preprocess_matching_vars errors when all vars are problematic", {
  left <- data.frame(x = rep(1, 5), y = rep(2, 5))
  right <- data.frame(x = rep(1, 5), y = rep(2, 5))

  # When all variables are excluded due to health issues, an error is thrown
  expect_error(
    preprocess_matching_vars(left, right, vars = c("x", "y"),
                              check_health = TRUE,
                              remove_problematic = TRUE,
                              verbose = FALSE),
    "All variables were excluded"
  )
})

Any scripts or data that you put into this service are public.

couplr documentation built on Jan. 20, 2026, 5:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

couplr
Optimal Pairing and Matching via Linear Assignment

tests/testthat/test-matching-preprocessing-coverage.R
In couplr: Optimal Pairing and Matching via Linear Assignment

Try the couplr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

couplr Optimal Pairing and Matching via Linear Assignment

tests/testthat/test-matching-preprocessing-coverage.R In couplr: Optimal Pairing and Matching via Linear Assignment

Try the couplr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

couplr
Optimal Pairing and Matching via Linear Assignment

tests/testthat/test-matching-preprocessing-coverage.R
In couplr: Optimal Pairing and Matching via Linear Assignment