context("distance/cosine similarity measures")
test_that("measures work properly", {
r_dist = function(x, y, what = c("cos_sim", "pdist"), p = NULL) {
what = match.arg(what)
if (is.null(p)) p = 2
res = matrix(NA, nrow = NROW(x), ncol = NROW(y))
for (i in 1:NROW(x)) {
for (j in 1:NROW(y)) {
rx = x[i, ]
ry = y[j, ]
res[i, j] = if (what == "cos_sim") {
sum(rx * ry) / sqrt(sum(rx^2)) / sqrt(sum(ry^2))
} else {
if (p == 1) {
sum(abs(rx - ry))
} else {
sum((rx - ry)^p)^(1/p)
}
}
}
}
return(res)
}
for (rr in 1:5) {
n = 20
k = 4
A = matrix(runif(n * k) + 1, ncol = k)
B = matrix(runif((n - 3) * k) + 1, ncol = k)
# cosine similarity
res1 = row_cos_sim(A, B)
res2 = frow_cos_sim(A, B)
r_res = r_dist(A, B, "cos_sim")
expect_equal(res1, res2)
expect_equal(res1, r_res)
# Euclidean distance
res3 = row_pdist(A, B, 2L)
res4 = frow_euc_dist(A, B)
res5 = r_dist(A, B, "pdist", p = 2)
expect_equal(res3, res4)
expect_equal(res3, res5)
# Manhattan distance
res6 = row_pdist(A, B, 1)
res7 = r_dist(A, B, "pdist", p = 1)
expect_equal(res6, res7)
# Minkowski distance
k = max(rpois(1, 2), 1)
res8 = row_pdist(A, B, k)
res9 = r_dist(A, B, "pdist", p = k)
# skip if r-code results in NaNs
if (sum(is.na(res9)) == 0)
expect_equal(res8, res9)
# compare with built-in distance function
res10 = as.matrix(stats::dist(A))
res11 = row_pdist(A, A, 2)
expect_equivalent(res10, res11)
res12 = as.matrix(stats::dist(A, method = "minkowski", p = k))
res13 = row_pdist(A, A, k)
expect_equivalent(res12, res13)
# check double-centering
A_c = double_center(A)
expect_equal(rowSums(A_c), rep(0, nrow(A)))
expect_equal(colSums(A_c), rep(0, ncol(A)))
}
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.