tests/testthat/test_search.R

# ==============================================================================
# distances -- R package with tools for distance metrics
# https://github.com/fsavje/distances
#
# Copyright (C) 2017  Fredrik Savje -- http://fredriksavje.com
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see http://www.gnu.org/licenses/
# ==============================================================================

library(distances)
context("search.R")


my_data_points <- data.frame(x = c(-0.337583089273209, -1.37648631398233, 1.20172247127349, -1.14105566632672, 0.896351538950015, -1.65678250088529, -0.702935129174973, 0.436019296689133, 0.946791912272624, -0.16316317143057),
                             y = c(-1.36917010106676, -0.448426125058238, 0.607836056473607, -0.146778291857306, -0.360125338032073, -0.37738942655005, 0.43107849723969, -2.02715324025675, 1.12082703897338, 0.788935416029541))
my_distances <- distances(my_data_points)
my_distances_withID <- distances(data.frame(my_data_points, my_ids = letters[1:10]),
                                 id_variable = "my_ids")


# ==============================================================================
# max_distance_search
# ==============================================================================

replica_max_distance_search <- function(distances,
                                        query_indices = NULL,
                                        search_indices = NULL) {
  if (is.null(query_indices)) query_indices <- 1:length(distances)
  if (is.null(search_indices)) search_indices <- 1:length(distances)

  apply(as.matrix(distances)[query_indices, search_indices],
        1, function(x) { search_indices[rev(order(x))[1]] })
}

test_that("`max_distance_search` returns correct output", {
  expect_identical(max_distance_search(my_distances),
                   replica_max_distance_search(my_distances))
  expect_identical(max_distance_search(my_distances, 1:10),
                   replica_max_distance_search(my_distances, 1:10))
  expect_identical(max_distance_search(my_distances, 4:8),
                   replica_max_distance_search(my_distances, 4:8))
  expect_identical(max_distance_search(my_distances, NULL, 1:10),
                   replica_max_distance_search(my_distances, NULL, 1:10))
  expect_identical(max_distance_search(my_distances, NULL, 4:8),
                   replica_max_distance_search(my_distances, NULL, 4:8))
  expect_identical(max_distance_search(my_distances_withID),
                   replica_max_distance_search(my_distances_withID))
  expect_identical(max_distance_search(my_distances_withID, 1:10),
                   replica_max_distance_search(my_distances_withID, 1:10))
  expect_identical(max_distance_search(my_distances_withID, 4:8),
                   replica_max_distance_search(my_distances_withID, 4:8))
  expect_identical(max_distance_search(my_distances_withID, NULL, 1:10),
                   replica_max_distance_search(my_distances_withID, NULL, 1:10))
  expect_identical(max_distance_search(my_distances_withID, NULL, 4:8),
                   replica_max_distance_search(my_distances_withID, NULL, 4:8))
  expect_identical(max_distance_search(my_distances, 1:10, 1:10),
                   replica_max_distance_search(my_distances, 1:10, 1:10))
  expect_identical(max_distance_search(my_distances, 4:8, 1:10),
                   replica_max_distance_search(my_distances, 4:8, 1:10))
  expect_identical(max_distance_search(my_distances_withID, 1:10, 1:10),
                   replica_max_distance_search(my_distances_withID, 1:10, 1:10))
  expect_identical(max_distance_search(my_distances_withID, 4:8, 1:10),
                   replica_max_distance_search(my_distances_withID, 4:8, 1:10))
  expect_identical(max_distance_search(my_distances, 1:10, 1:7),
                   replica_max_distance_search(my_distances, 1:10, 1:7))
  expect_identical(max_distance_search(my_distances, 4:8, 1:7),
                   replica_max_distance_search(my_distances, 4:8, 1:7))
  expect_identical(max_distance_search(my_distances_withID, 1:10, 1:7),
                   replica_max_distance_search(my_distances_withID, 1:10, 1:7))
  expect_identical(max_distance_search(my_distances_withID, 4:8, 1:7),
                   replica_max_distance_search(my_distances_withID, 4:8, 1:7))
})


# ==============================================================================
# nearest_neighbor_search
# ==============================================================================

replica_nearest_neighbor_search <- function(distances,
                                            k,
                                            query_indices = NULL,
                                            search_indices = NULL,
                                            radius = NULL) {
  if (is.null(query_indices)) query_indices <- 1:length(distances)
  if (is.null(search_indices)) search_indices <- 1:length(distances)

  ans <- apply(as.matrix(distances)[query_indices, search_indices],
               1, function(x) {
                 if (is.null(radius) || x[order(x)[k]] <= radius) { search_indices[order(x)[1:k]] } else { rep(NA, k) }
               })

  if (!is.matrix(ans)) ans <- matrix(ans, ncol = length(ans), dimnames = list(NULL, names(ans)))
  ans
}

test_that("`nearest_neighbor_search` returns correct output", {
  expect_identical(nearest_neighbor_search(my_distances, 1L),
                   replica_nearest_neighbor_search(my_distances, 1L))
  expect_identical(nearest_neighbor_search(my_distances, 3L, 1:10),
                   replica_nearest_neighbor_search(my_distances, 3L, 1:10))
  expect_identical(nearest_neighbor_search(my_distances, 2L, 4:8),
                   replica_nearest_neighbor_search(my_distances, 2L, 4:8))
  expect_identical(nearest_neighbor_search(my_distances, 3L, NULL, 1:10),
                   replica_nearest_neighbor_search(my_distances, 3L, NULL, 1:10))
  expect_identical(nearest_neighbor_search(my_distances, 2L, NULL, 4:8),
                   replica_nearest_neighbor_search(my_distances, 2L, NULL, 4:8))
  expect_identical(nearest_neighbor_search(my_distances_withID, 1L),
                   replica_nearest_neighbor_search(my_distances_withID, 1L))
  expect_identical(nearest_neighbor_search(my_distances_withID, 3L, 1:10),
                   replica_nearest_neighbor_search(my_distances_withID, 3L, 1:10))
  expect_identical(nearest_neighbor_search(my_distances_withID, 2L, 4:8),
                   replica_nearest_neighbor_search(my_distances_withID, 2L, 4:8))
  expect_identical(nearest_neighbor_search(my_distances_withID, 3L, NULL, 1:10),
                   replica_nearest_neighbor_search(my_distances_withID, 3L, NULL, 1:10))
  expect_identical(nearest_neighbor_search(my_distances_withID, 1L, NULL, 4:8),
                   replica_nearest_neighbor_search(my_distances_withID, 1L, NULL, 4:8))
  expect_identical(nearest_neighbor_search(my_distances, 3L, 1:10, 1:10),
                   replica_nearest_neighbor_search(my_distances, 3L, 1:10, 1:10))
  expect_identical(nearest_neighbor_search(my_distances, 3L, 4:8, 1:10),
                   replica_nearest_neighbor_search(my_distances, 3L, 4:8, 1:10))
  expect_identical(nearest_neighbor_search(my_distances_withID, 3L, 1:10, 1:10),
                   replica_nearest_neighbor_search(my_distances_withID, 3L, 1:10, 1:10))
  expect_identical(nearest_neighbor_search(my_distances_withID, 1L, 4:8, 1:10),
                   replica_nearest_neighbor_search(my_distances_withID, 1L, 4:8, 1:10))
  expect_identical(nearest_neighbor_search(my_distances, 3L, 1:10, 1:7),
                   replica_nearest_neighbor_search(my_distances, 3L, 1:10, 1:7))
  expect_identical(nearest_neighbor_search(my_distances, 2L, 4:8, 1:7),
                   replica_nearest_neighbor_search(my_distances, 2L, 4:8, 1:7))
  expect_identical(nearest_neighbor_search(my_distances_withID, 2L, 1:10, 1:7),
                   replica_nearest_neighbor_search(my_distances_withID, 2L, 1:10, 1:7))
  expect_identical(nearest_neighbor_search(my_distances_withID, 3L, 4:8, 1:7),
                   replica_nearest_neighbor_search(my_distances_withID, 3L, 4:8, 1:7))

  expect_identical(nearest_neighbor_search(my_distances, 1L, radius = 1),
                   replica_nearest_neighbor_search(my_distances, 1L, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances, 3L, 1:10, radius = 1),
                   replica_nearest_neighbor_search(my_distances, 3L, 1:10, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances, 2L, 4:8, radius = 1),
                   replica_nearest_neighbor_search(my_distances, 2L, 4:8, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances, 3L, NULL, 1:10, radius = 1),
                   replica_nearest_neighbor_search(my_distances, 3L, NULL, 1:10, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances, 2L, NULL, 4:8, radius = 1),
                   replica_nearest_neighbor_search(my_distances, 2L, NULL, 4:8, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances_withID, 1L, radius = 1),
                   replica_nearest_neighbor_search(my_distances_withID, 1L, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances_withID, 3L, 1:10, radius = 1),
                   replica_nearest_neighbor_search(my_distances_withID, 3L, 1:10, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances_withID, 2L, 4:8, radius = 1),
                   replica_nearest_neighbor_search(my_distances_withID, 2L, 4:8, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances_withID, 3L, NULL, 1:10, radius = 1),
                   replica_nearest_neighbor_search(my_distances_withID, 3L, NULL, 1:10, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances_withID, 1L, NULL, 4:8, radius = 1),
                   replica_nearest_neighbor_search(my_distances_withID, 1L, NULL, 4:8, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances, 3L, 1:10, 1:10, radius = 1),
                   replica_nearest_neighbor_search(my_distances, 3L, 1:10, 1:10, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances, 3L, 4:8, 1:10, radius = 1),
                   replica_nearest_neighbor_search(my_distances, 3L, 4:8, 1:10, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances_withID, 3L, 1:10, 1:10, radius = 1),
                   replica_nearest_neighbor_search(my_distances_withID, 3L, 1:10, 1:10, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances_withID, 1L, 4:8, 1:10, radius = 1),
                   replica_nearest_neighbor_search(my_distances_withID, 1L, 4:8, 1:10, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances, 3L, 1:10, 1:7, radius = 1),
                   replica_nearest_neighbor_search(my_distances, 3L, 1:10, 1:7, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances, 2L, 4:8, 1:7, radius = 1),
                   replica_nearest_neighbor_search(my_distances, 2L, 4:8, 1:7, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances_withID, 2L, 1:10, 1:7, radius = 1),
                   replica_nearest_neighbor_search(my_distances_withID, 2L, 1:10, 1:7, radius = 1))
  expect_identical(nearest_neighbor_search(my_distances_withID, 3L, 4:8, 1:7, radius = 1),
                   replica_nearest_neighbor_search(my_distances_withID, 3L, 4:8, 1:7, radius = 1))
})

Try the distances package in your browser

Any scripts or data that you put into this service are public.

distances documentation built on Feb. 16, 2023, 7:59 p.m.