Nothing
test_that("kNN", {
set.seed(665544)
n <- 1000
x <- cbind(
x = runif(10, 0, 10) + rnorm(n, sd = 0.2),
y = runif(10, 0, 10) + rnorm(n, sd = 0.2),
z = runif(10, 0, 10) + rnorm(n, sd = 0.2)
)
## no duplicates first! All distances should be unique
x <- x[!duplicated(x),]
rownames(x) <- paste0("Object_", 1:nrow(x))
k <- 5L
nn <- kNN(x, k=k, sort = TRUE)
## check dimensions
expect_identical(nn$k, k)
expect_identical(dim(nn$dist), c(nrow(x), k))
expect_identical(dim(nn$id), c(nrow(x), k))
## check visually
#plot(x)
#points(x[nn$id[1,],], col="red", lwd=5)
#points(x[nn$id[2,],], col="green", lwd=5)
## compare with kNN found using distances
nn_d <- kNN(dist(x), k, sort = TRUE)
## check visually
#plot(x)
#points(x[nn_d$id[1,],], col="red", lwd=5)
#points(x[nn_d$id[2,],], col="green", lwd=5)
### will agree since we use sorting
expect_equal(nn, nn_d)
## calculate dist internally
nn_d2 <- kNN(x, k, search = "dist", sort = TRUE)
expect_equal(nn, nn_d2)
## without sorting
nn2 <- kNN(x, k=k, sort = FALSE)
expect_equal(t(apply(nn$id, MARGIN = 1, sort)),
t(apply(nn2$id, MARGIN = 1, sort)))
## search options
nn_linear <- kNN(x, k=k, search = "linear", sort = TRUE)
expect_equal(nn, nn_linear)
## split options
for(so in c("STD", "MIDPT", "FAIR", "SL_FAIR")) {
nn3 <- kNN(x, k=k, splitRule = so, sort = TRUE)
expect_equal(nn, nn3)
}
## bucket size
for (bs in c(5, 10, 15, 100)) {
nn3 <- kNN(x, k=k, bucketSize = bs, sort = TRUE)
expect_equal(nn, nn3)
}
## the order is not stable with matching distances which means that the
## k-NN are not stable. We add 100 copied points to check if self match
## filtering and sort works
x <- rbind(x, x[sample(1:nrow(x), 100),])
rownames(x) <- paste0("Object_", 1:nrow(x))
k <- 5L
nn <- kNN(x, k=k, sort = TRUE)
## compare with manually found NNs
nn_d <- kNN(x, k=k, search = "dist", sort = TRUE)
expect_equal(nn$dist, nn_d$dist)
## This is expected to fail: because the ids are not stable for matching distances
## expect_equal(nn$id, nn_d$id)
## FIXME: write some code to check this!
## missing values, but distances are fine
x_na <- x
x_na[c(1, 3, 5), 1] <- NA
expect_error(kNN(x_na, k = 3), regexp = "NA")
res_d1 <- kNN(x_na, k = 3, search = "dist")
res_d2 <- kNN(dist(x_na), k = 3)
expect_equal(res_d1, res_d2)
## introduce NAs into dist
x_na[c(1, 3, 5),] <- NA
expect_error(kNN(x_na, k = 3), regexp = "NA")
expect_error(kNN(x_na, k = 3, search = "dist"), regexp = "NA")
expect_error(kNN(dist(x_na), k = 3), regexp = "NA")
## inf
x_inf <- x
x_inf[c(1, 3, 5), 2] <- Inf
kNN(x_inf, k = 3)
kNN(x_inf, k = 3, search = "dist")
kNN(dist(x_inf), k = 3)
## sort and kNN to reduce k
nn10 <- kNN(x, k = 10)
#nn10 <- kNN(x, k = 10, sort = FALSE)
## knn now returns sorted lists
#expect_equal(nn10$sort, FALSE)
expect_error(kNN(nn10, k = 11))
nn5 <- kNN(nn10, k = 5)
expect_true(nn5$sort)
expect_identical(ncol(nn5$id), 5L)
expect_identical(ncol(nn5$dist), 5L)
## test with simple data
x <- data.frame(x=1:10, row.names = LETTERS[1:10])
nn <- kNN(x, k = 5)
expect_identical(unname(nn$id[1, ]), 2:6)
expect_identical(unname(nn$id[5, ]), c(4L, 6L, 3L, 7L, 2L))
expect_identical(unname(nn$id[10, ]), 9:5)
## test kNN with query
x <- data.frame(x=1:10, row.names = LETTERS[1:10])
nn <- kNN(x[1:8, , drop=FALSE], x[9:10, , drop = FALSE], k = 5)
expect_identical(nrow(nn$id), 2L)
expect_identical(unname(nn$id[1, ]), 8:4)
expect_identical(unname(nn$id[2, ]), 8:4)
expect_error(kNN(dist(x[1:8, , drop=FALSE]), x[9:10, , drop = FALSE], k = 5))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.