Nothing
# tests for universal functions (umap_universal.R)
source("synthetic.R")
source("train_test.R")
# ############################################################################
# Tests for exact nearest neighbors extraction
test_that("k nearest neighbors complains when k is too large or too small", {
# da is a distance matrix (not necessarily metric)
da <- matrix(0, ncol=5, nrow=5)
expect_error(knn.from.dist(da, 6))
expect_error(knn.from.dist(da, 0))
expect_silent(knn.from.dist(da, 3))
})
test_that("k nearest neighbors information", {
# da is a distance matrix (not necessarily metric)
da <- matrix(0, ncol=5, nrow=5)
# object 1 - nearest items are 2,3
da[1,2] <- da[2,1] <- 1
da[1,3] <- da[3,1] <- 2
da[1,4] <- da[4,1] <- 3
da[1,5] <- da[5,1] <- 4
# object 2 - nearest items are 1,4
da[2,3] <- da[3,2] <- 10
da[2,4] <- da[4,2] <- 2
da[2,5] <- da[5,2] <- 11
# object 3 - nearest items are 1,4
da[3,4] <- da[4,3] <- 3
da[3,5] <- da[5,3] <- 12
# object 4 - nearest items are 5,2
da[4,5] <- da[5,4] <- 1
da.dist <- as.dist(da)
# compute knn from matrix and from dist
result <- knn.from.dist(da, 3)
resultd <- knn.from.dist(da.dist, 3)
# the results should be numerically equal, but (dist) introduces rownames
# check for numerical equality only (not attributes)
expect_equal(result, resultd, check.attributes=FALSE)
# check content of the matrices
# these matrices constructed manually by visual inspection of da
expected.indexes <- matrix(c(1,2,3, 2,1,4, 3,1,4, 4,5,2, 5,4,1),
byrow=TRUE, nrow=5, ncol=3)
expected.distances <- matrix(c(0,1,2, 0,1,2, 0,2,3, 0,1,2, 0,1,4),
byrow=TRUE, nrow=5, ncol=3)
expect_equal(result$indexes, expected.indexes)
expect_equal(result$distances, expected.distances)
})
# ############################################################################
# Tests for well-formed output
test_that("knn.from.data should preserve rownames", {
result <- knn.from.data(t(syn0), 3, mdEuclidean)
expect_is(result, "umap.knn")
expect_equal(rownames(syn0), rownames(result$indexes))
expect_equal(rownames(syn0), rownames(result$distances))
})
test_that("knn.from.data should preserve rownames", {
syn0dist <- as.matrix(syn0.dist)
result <- knn.from.dist(syn0dist, 3)
expect_is(result, "umap.knn")
expect_equal(rownames(syn0), rownames(result$indexes))
expect_equal(rownames(syn0), rownames(result$distances))
})
test_that("knn.info (brute force) should preserve rownames", {
conf <- umap.defaults
conf$n_neighbors <- 4
conf$metric.function <- mdEuclidean
result <- knn.info(syn0, conf, brute.force=TRUE)
expected.rownames <- c(rownames(result$indexes), rownames(result$distances))
result.rownames <- c(rownames(syn0), rownames(syn0))
expect_equal(result.rownames, expected.rownames)
})
test_that("knn.info (from data) should preserve rownames", {
conf <- umap.defaults
conf$n_neighbors <- 4
conf$metric.function <- mdEuclidean
result <- knn.info(syn0, conf, brute.force=FALSE)
expected.rownames <- c(rownames(result$indexes), rownames(result$distances))
result.rownames <- c(rownames(syn0), rownames(syn0))
expect_is(result, "umap.knn")
expect_equal(result.rownames, expected.rownames)
})
# ############################################################################
# Tests for approximate nearest neighbors extraction
test_that("knn from data complains when k is too large or too small", {
# da is a dummy matrix
da <- matrix(0, ncol=5, nrow=5)
expect_error(knn.from.data(da, 6, mdEuclidean))
expect_error(knn.from.data(da, 0, mdEuclidean))
expect_silent(knn.from.data(da, 3, mdEuclidean))
})
test_that("knn from data complains about k (large dataset)", {
# da is a dummy, but large, matrix
da <- matrix(0, ncol=5, nrow=5000)
expect_error(knn.from.data(da, 6000, mdEuclidean))
expect_error(knn.from.data(da, 0, mdEuclidean))
expect_error(knn.from.data(da, -0.5, mdEuclidean))
})
test_that("knn from data complains about subsampling (large dataset)", {
# da is a dummy, but large, matrix
da <- matrix(0, nrow=20, ncol=300)
expect_error(knn.from.data(da, 5, mdEuclidean, subsample.k=NA),
"subsample")
expect_error(knn.from.data(da, 5, mdEuclidean, subsample.k=-0.2),
"subsample")
expect_error(knn.from.data(da, 5, mdEuclidean, subsample.k=600),
"subsample")
expect_silent(knn.from.data(da, 6, mdEuclidean, subsample.k=0.3))
})
test_that("knn returns a reasonable set of data", {
# dd is a matrix with points on a plane
dd <- matrix(0, ncol=2, nrow=10)
# cluster 1 with points near (1,1)
dd[1,] <- c(1, 1)
dd[2,] <- c(1, 2)
dd[3,] <- c(2, 1)
# cluster with points near (10, 10)
dd[4,] <- c(10,10)
dd[5,] <- c(10,11)
dd[6,] <- c(12,12)
# cluster with points near (20, 20)
dd[7,] <- c(20,20)
dd[8,] <- c(20,21)
dd[9,] <- c(21,20)
dd[10,] <- c(21,21)
result <- knn.from.data(t(dd), 3, mdEuclidean)
expect_equal(dim(result$indexes), c(10, 3))
expect_equal(dim(result$distances), c(10, 3))
})
test_that("knn for large dataset queries a small number of distances", {
dlarge <- matrix(0, ncol=2, nrow=300)
dlarge[,1] <- runif(nrow(dlarge), -2, 2)
dlarge[,2] <- runif(nrow(dlarge), -2, 2)
result.dist <- knn.from.dist(dist(dlarge), 4)
result <- knn.from.data.reps(dlarge, 4, mdEuclidean, subsample=4, reps=1)
result.reps <- knn.from.data.reps(dlarge, 4, mdEuclidean, subsample=4, reps=2)
# all self distances are zero
expect_equal(sum(result$distances[,1]), 0)
# distance to nearest neighbor should be small
expect_lt(mean(result$distances[,2]), 0.4)
# distances in two-reps are smaller
totdist1 <- sum(apply(result$distances, 1, sum))
totdist2 <- sum(apply(result.reps$distances, 2, sum))
expect_lt(totdist2, totdist1)
})
# ############################################################################
# Tests with degenerate neighbors
test_that("knn works with degenerate neighbors", {
# syn1 has so many same-location points, that first nearest neighbor
# should always be at distance 0
result <- knn.from.data(t(syn1[, 1:2]), 4, mdEuclidean)
expect_lt(mean(result$distances[,2]), 0.01)
})
# ############################################################################
# Tests with spectators
test_that("knn from data links spectators to primary data", {
# create merged dataset with observations in columns
dT <- cbind(t(i.train), t(i.test))
num.primary <- nrow(i.train)
result <- knn.from.data(dT, 6, mdEuclidean, fix.observations=num.primary)
# result should have indexes and distance components
expect_true(all(c("indexes", "distances") %in% names(result)))
# indexes should show all observation link to themselves
expect_equivalent(result$indexes[,1], seq_len(nrow(result$indexes)))
# indexes to further neighbors should only link to primary data
expect_lt(max(result$indexes[, 2:6]), num.primary+1)
})
train.size <- nrow(i.train)/3
test.size <- nrow(i.test)/3
test_that("spectator.info by brute force links spectators to primary data", {
conf <- umap.prep.config(umap.defaults, random_state=123)
conf$n_neighbors <- 5
result <- spectator.knn.info(i.test, i.train, conf, brute.force=TRUE)
# check output sizes
expect_equal(dim(result$indexes), c(nrow(i.test), conf$n_neighbors))
expect_equal(dim(result$distances), c(nrow(i.test), conf$n_neighbors))
expect_equal(rownames(result$distances), rownames(i.test))
expect_equal(rownames(result$indexes), rownames(i.test))
# ditances of first neighbor must be larger than zero
expect_equal(mean(result$distances[,1]), 0)
# entries in i.test should connect to items in i4,
# which are more numerous than i.test
indexes <- result$indexes[, 2:ncol(result$indexes)]
expect_gt(max(indexes), nrow(i.test))
# the first 5 items in i.test must be neighbors to the
# first 15 indexes in i4, etc.
expect_lt(mean(indexes[1:test.size,]), train.size)
expect_lt(mean(indexes[test.size+(1:test.size),]), 2*train.size)
expect_gt(mean(indexes[2*test.size+(1:test.size),]), 2*train.size)
})
test_that("spectator.info by stochastic method links to primary data", {
conf <- umap.prep.config(umap.defaults, random_state=123)
conf$n_neighbors <- 5
result <- spectator.knn.info(i.test, i.train, conf, brute.force=FALSE)
# check output sizes
expect_equal(dim(result$indexes), c(nrow(i.test), conf$n_neighbors))
expect_equal(dim(result$distances), c(nrow(i.test), conf$n_neighbors))
expect_equal(rownames(result$distances), rownames(i.test))
expect_equal(rownames(result$indexes), rownames(i.test))
# ditances of first neighbor must be larger than zero
expect_equal(mean(result$distances[,1]), 0)
# entries in i.test should connect to items in i4,
# which are more numerous than i.test
indexes <- result$indexes[, 2:ncol(result$indexes)]
expect_gt(max(indexes), nrow(i.test))
# the first 5 items in i.test must be neighbors to the
# first 15 indexes in i4, etc.
expect_lt(mean(indexes[1:test.size,]), train.size)
expect_lt(mean(indexes[test.size+(1:test.size),]), 2*train.size)
expect_gt(mean(indexes[2*test.size+(1:test.size),]), 2*train.size)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.