context('distance')
set.seed(62)
thedf <- data.frame(
ID=rep(LETTERS[1:3], length.out=10),
x=sample(10),
y=sample(10),
extra1=sample(letters, size=10),
extra2=sample(letters, size=10),
extra3=sample(10)
)
res1 <- threshold_distance(thedf, threshold=3, as_dataframe=FALSE, cols=c('x', 'y'))
res2 <- threshold_distance(thedf, threshold=3, as_dataframe=TRUE, cols=c('x', 'y'))
res1_extras <- threshold_distance(thedf, threshold=3, as_dataframe=FALSE, extra_columns=c('extra1', 'extra2'))
res2_extras <- threshold_distance(thedf, threshold=3, as_dataframe=TRUE, extra_columns=c('extra1', 'extra2'))
res1_extras_nocheck <- threshold_distance(thedf, threshold=3, as_dataframe=FALSE, extra_columns=c('extra1', 'extra2'), check_id=FALSE)
res2_extras_nocheck <- threshold_distance(thedf, threshold=3, as_dataframe=TRUE, extra_columns=c('extra1', 'extra2'), check_id=FALSE)
test_that("Correct type is returned", {
expect_is(res1, 'list')
expect_is(res2, 'data.frame')
expect_is(res1_extras, 'list')
expect_is(res2_extras, 'data.frame')
expect_is(res1_extras_nocheck, 'list')
expect_is(res2_extras_nocheck, 'data.frame')
})
test_that("Input is restricted", {
expect_error(threshold_distance(as.list(thedf), threshold=3, as_dataframe=FALSE))
})
test_that("Outcome is the right size", {
expect_equal(length(res1), 7)
expect_equal(length(res1_extras), 11)
expect_equal(length(res1_extras_nocheck), 11)
expect_equal(dim(res2), c(5, 5))
expect_equal(dim(res2_extras), c(5, 9))
expect_equal(dim(res2_extras_nocheck), c(7, 9))
})
test_that("Output is correct", {
input <- data.frame(
ID = c("A", "B", "C", "A"),
x = c(0, 1, 1, 0),
y = c(0, 1, 3, 2)
)
actual <- threshold_distance(input, 3, as_dataframe = TRUE)
expected <- data.frame(
i = c(1, 4, 4, 2),
j = c(2, 2, 3, 3),
distance = c(
1.4142135623731,
1.4142135623731,
1.4142135623731,
2
),
ID_1 = c("A", "A", "A", "B"),
ID_2 = c("B", "B", "C", "C")
)
expect_equal(data.frame(actual), data.frame(expected))
})
test_that("Points on a map work as expected", {
input <- data.frame(
ID = c("A", "B", "C", "A"),
lat = c(40.668034, 40.66853, 40.66903, 40.66853),
lng = c(-73.971291, -73.97079, -73.97079, -73.971291)
)
expected <- data.frame(
i = c(1, 4, 2, 2),
j = c(2, 3, 4, 3),
ID_1 = c("A", "A", "B", "B"),
ID_2 = c("B", "C", "A", "C")
)
actual <- threshold_distance(input, threshold = 100, cols = c("lat", "lng"), distance_type = "haversine", as_dataframe = TRUE)[, c("i", "j", "ID_1", "ID_2")]
expect_equal(data.frame(actual), data.frame(expected))
})
test_that("Distancing new points to old points works as expected (data.frame)", {
# old data
left_df <- data.frame(
x = c(0, 1, 1, 0),
y = c(0, 1, 3, 2)
)
# new data
right_df <- data.table::data.table(
x = c(0, 0, 2),
y = c(1, 3, 2)
)
expected <- data.table::data.table(
i = c(1, 4, 2, 4, 3, 2, 3),
j = c(1, 1, 1, 2, 2, 3, 3),
distance = c(1, 1, 1, 1, 1, sqrt(2), sqrt(2))
)
attr(expected, 'kept') <- 7
attr(expected, 'skipped') <- 5
actual <- threshold_distance2(left_df, right_df, 1.5, as_dataframe = TRUE)
expect_equal(actual[order(i, j), c('i', 'j', 'distance')], expected[order(i, j)])
})
test_that("Distancing new points to old points works as expected (list)", {
# old data
left_df <- data.table::data.table(
x = c(0, 1, 1, 0),
y = c(0, 1, 3, 2)
)
# new data
right_df <- data.table::data.table(
x = c(0, 0, 2),
y = c(1, 3, 2)
)
expected <- list(
i = c(1, 4, 2, 4, 3, 2, 3),
j = c(1, 1, 1, 2, 2, 3, 3),
distance = c(1, 1, 1, 1, 1, sqrt(2), sqrt(2)),
kept = 7,
skipped = 5
)
actual <- threshold_distance2(left_df, right_df, 1.5)
expect_equal(actual, expected)
})
test_that("Distancing new points on a map to old points works as expected (data.frame)", {
left_df <- data.frame(
lat = c(40.668034, 40.66853, 40.66903, 40.66853),
lng = c(-73.971291, -73.97079, -73.97079, -73.971291)
)
right_df <- data.frame(
lat = c(40.668366, 40.66903, 40.668698),
lng = c(-73.971291, -73.971291, 40.669036)
)
expected <- data.table::data.table(
i = c(1, 2, 4, 3, 2, 4, 3),
j = c(1, 1, 1, 1, 2, 2, 2)
)
attr(expected, 'kept') <- 7
attr(expected, 'skipped') <- 5
actual <- threshold_distance2(left_df, right_df,
threshold = 100,
cols = c("lat", "lng"),
as_dataframe = TRUE,
distance_type = "haversine") |>
(\(df) df[, c("i", "j")])()
expect_equal(actual[order(i, j)], expected[order(i, j)])
})
test_that("Distancing new points on a map to old points works as expected (data.frame) accounting for ID", {
left_df <- data.frame(
lat = c(40.668034, 40.66853, 40.66903, 40.66853),
lng = c(-73.971291, -73.97079, -73.97079, -73.971291),
id = c(1, 1, 2, 3)
)
right_df <- data.frame(
lat = c(40.668366, 40.66903, 40.668698),
lng = c(-73.971291, -73.971291, 40.669036),
id = c(1, 2, 4)
)
expected <- data.table::data.table(
i = c(1, 2, 4, 3, 2, 4, 3),
j = c(1, 1, 1, 1, 2, 2, 2),
id_1 = c(1, 1, 3, 2, 1, 3, 2),
id_2 = c(1, 1, 1, 1, 2, 2, 2),
key=c('id_1', 'id_2')
)
actual_check_id <- threshold_distance2(left_df, right_df,
threshold = 100,
cols = c("lat", "lng"),
as_dataframe = TRUE,
id_col='id',
check_id=TRUE,
distance_type = "haversine") |>
(\(df) df[, c("i", "j", "id_1", "id_2")])()
actual_no_id <- threshold_distance2(left_df, right_df,
threshold = 100,
cols = c("lat", "lng"),
as_dataframe = TRUE,
id_col='id',
check_id=FALSE,
distance_type = "haversine") |>
(\(df) df[, c("i", "j", "id_1", "id_2")])()
attr(expected, 'kept') <- 4
attr(expected, 'skipped') <- 8
expect_equal(actual_check_id[order(i, j)], expected[order(i, j)][id_1 != id_2, ])
attr(expected, 'kept') <- 7
attr(expected, 'skipped') <- 5
expect_equal(actual_no_id[order(i, j)], expected[order(i, j)])
})
# TODO: add tests for extra_columns
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.