tests/testthat/test-calculate_distances.R

test_that("Euclidean distance works and returns symmetric matrix", {
  x <- matrix(c(1, 2, 3, 4,
                2, 3, 4, 5), nrow = 2, byrow = TRUE)
  d <- calculate_distances(x, method = "euclidean", output_format = "matrix")

  expect_true(is.matrix(d))
  expect_equal(nrow(d), nrow(x))
  expect_equal(ncol(d), nrow(x))
  expect_equal(unname(diag(d)), rep(0, nrow(x)))
  expect_equal(d, t(d))  # symmetry
})

test_that("Manhattan distance matches manual calculation", {
  x <- matrix(c(1, 2, 3,
                2, 3, 4), nrow = 2, byrow = TRUE)
  d <- calculate_distances(x, method = "manhattan", output_format = "matrix")

  manual <- abs(1-2) + abs(2-3) + abs(3-4)
  expect_equal(d[1, 2], manual)
})

test_that("Minkowski requires parameter p", {
  x <- matrix(1:6, nrow = 2)

  expect_error(calculate_distances(x, method = "minkowski"))
  expect_error(calculate_distances(x, method = "manhattan", p = 3))

  d <- calculate_distances(x, method = "minkowski", p = 3)
  expect_true(inherits(d, "dist") || is.matrix(d))
})

test_that("Categorical distance works with matching_coefficient", {
  x <- data.frame(a = c("a", "b", "a"), b = c("x", "x", "y"))
  d <- calculate_distances(x, method = "matching_coefficient", output_format = "matrix")

  expect_true(all(d >= 0 & d <= 1))
})

test_that("Binary Jaccard distance returns warning on NaN cases", {
  x <- data.frame(v1 = c(0, 0), v2 = c(0, 0))

  expect_warning(
    d <- calculate_distances(x, method = "jaccard", output_format = "matrix"),
    "NaN"
  )
  expect_true(any(is.nan(d)))
})

test_that("Dice and Hamming distances produce valid outputs", {
  x <- data.frame(v1 = c(0, 1, 1), v2 = c(1, 0, 1))

  d_dice <- calculate_distances(x, method = "dice", output_format = "matrix")
  d_hamming <- calculate_distances(x, method = "hamming", output_format = "matrix")

  expect_true(all(d_dice >= 0 & d_dice <= 1, na.rm = TRUE))
  expect_true(all(d_hamming >= 0 & d_hamming <= 1, na.rm = TRUE))
})

test_that("Cosine and correlation return distances with expected warnings", {
  x <- matrix(rnorm(50), nrow = 5)

  expect_warning(
    d_cos <- calculate_distances(x, method = "cosine"),
    "does not guarantee Euclidean"
  )
  expect_warning(
    d_corr <- calculate_distances(x, method = "correlation"),
    "does not guarantee Euclidean"
  )

  expect_true(all(d_cos >= 0, na.rm = TRUE))
  expect_true(all(d_corr >= 0, na.rm = TRUE))
})

test_that("Mahalanobis distance runs without error", {
  set.seed(123)
  x <- matrix(rnorm(20), ncol = 4)
  d <- calculate_distances(x, method = "mahalanobis", output_format = "matrix")

  expect_true(all(d >= 0))
})

test_that("Gower distance works with auto detection and expected warning", {
  data("Data_HC_contamination", package = "dbrobust")
  df <- Data_HC_contamination

  expect_warning(
    d <- calculate_distances(df, method = "gower", output_format = "matrix"),
    "may not satisfy the Euclidean property"
  )

  expect_true(is.matrix(d))
  expect_equal(nrow(d), nrow(df))
})

test_that("Gower distance works with manual variable selection and expected warning", {
  data("Data_HC_contamination", package = "dbrobust")
  df <- Data_HC_contamination

  cont <- 1:4
  bin <- 8:9
  cat <- 5:7

  expect_warning(
    d_manual <- calculate_distances(df, method = "gower",
                                    continuous_cols = cont,
                                    binary_cols = bin,
                                    categorical_cols = cat,
                                    output_format = "matrix"),
    "may not satisfy the Euclidean property"
  )

  expect_true(is.matrix(d_manual))
  expect_equal(nrow(d_manual), nrow(df))
})

test_that("Invalid method throws error", {
  x <- matrix(1:6, nrow = 2)
  expect_error(calculate_distances(x, method = "invalid_method"))
})

test_that("Similarity output works with transformation options", {
  x <- data.frame(v1 = c(0, 1), v2 = c(1, 1))

  d1 <- calculate_distances(x, method = "jaccard", output_format = "similarity",
                            similarity_transform = "linear")
  d2 <- calculate_distances(x, method = "jaccard", output_format = "similarity",
                            similarity_transform = "sqrt")

  expect_true(all(d1 >= 0 & d1 <= 1, na.rm = TRUE))
  expect_true(all(d2 >= 0 & d2 <= 1, na.rm = TRUE))
})

test_that("Squared option squares distances but not similarities", {
  x <- matrix(c(1,2,3, 2,3,4), nrow = 2, byrow = TRUE)

  d1 <- calculate_distances(x, method = "euclidean", squared = FALSE, output_format = "matrix")
  d2 <- calculate_distances(x, method = "euclidean", squared = TRUE, output_format = "matrix")

  expect_equal(d2[1,2], d1[1,2]^2)

  sim <- calculate_distances(data.frame(v1=c(0,1), v2=c(1,1)),
                             method = "jaccard",
                             output_format = "similarity",
                             squared = TRUE)

  expect_false(any(sim > 1))  # squared not applied to similarities
})

Try the dbrobust package in your browser

Any scripts or data that you put into this service are public.

dbrobust documentation built on Nov. 5, 2025, 6:24 p.m.