context("test pairwise functions")
vec_with_na <- seq(12)
vec_with_na[3] <- NA
test_df <- data.frame(
row=rep(paste("row", seq(4)), each=3),
col=rep(paste("col", seq(3)), 4) ,
val=seq(12),
with_na=vec_with_na)
test_df <- test_df %>% rename(`ro w`=row, `co l`=col) #TODO when val column has space there are errors
set.seed(0)
test_df$rand <- vapply(seq(nrow(test_df)), function(x){
if(x <= 6) {
runif(1, min=-0.1, max=0.1)
} else {
10+runif(1, min=-0.1, max=0.1)
}
}, FUN.VALUE=1)
test_that("test do_cosine_sim.kv with NA value", {
loadNamespace("dplyr")
result <- test_df %>%
do_cosine_sim.kv(`ro w`, `co l`, with_na)
expect_equal(nrow(result), 12)
expect_equal( typeof(result[[1]]), "character")
expect_equal( typeof(result[[2]]), "character")
})
test_that("test do_cosine_sim.kv", {
loadNamespace("dplyr")
df <- test_df
# test with a column name with a space
colnames(df)[1] <- "ro w"
result <- (
df %>%
do_cosine_sim.kv(`ro w`, `co l`, val)
)
# row1 and row2 pair result
expect_equal(colnames(result), c("ro w.x", "ro w.y", "value"))
expect_equal(result[1, "value"][[1]], (1*4+2*5+3*6)/sqrt(1^2+2^2+3^2)/sqrt(4^2+5^2+6^2))
})
test_that("test do_cosine_sim.kv without val", {
loadNamespace("dplyr")
test_df <- data.frame(
subject = paste0("subject", rep(4-seq(3), each=3)),
key = paste0("key", c(rep(3-seq(2), 4), 1)))
result <- (
test_df %>%
do_cosine_sim.kv( subject, key)
)
expect_equal(result[[3]][1:2], c(1, 4/5))
})
test_that("test sparse_cast with duplicate", {
test_df <- data.frame(
rowname = rep(c("row1", "row02", "row3"), each=3),
colname = c("col1", "col1", "col5", "col02", "col3", "col1", "col02", "col4", "col5"),
val = seq(9),
stringsAsFactors = FALSE
)
result <- (
test_df %>%
do_cosine_sim.kv(rowname, colname, val, fun.aggregate=min)
)
expect_equal(result[1, 3][[1]], (1*6)/sqrt(1^2+3^2)/sqrt(4^2+5^2+6^2))
})
test_that("test do_cosine_sim.kv with NA value", {
loadNamespace("dplyr")
result <- (
test_df %>%
do_cosine_sim.kv(`ro w`, `co l`, with_na)
)
expect_equal(nrow(result), 12)
expect_equal( typeof(result[[1]]), "character")
expect_equal( typeof(result[[2]]), "character")
})
test_that("test do_cosine_sim.kv diag TRUE", {
loadNamespace("dplyr")
result <- (
test_df %>%
do_cosine_sim.kv(`ro w`, `co l`, val, diag=TRUE)
)
expect_equal(nrow(result), 16)
})
test_that("test do_cosine_sim.kv with distinct", {
loadNamespace("dplyr")
result <- (
test_df %>%
do_cosine_sim.kv(`ro w`, `co l`, val, distinct=TRUE)
)
expect_equal(nrow(result), 6)
expect_equal( typeof(result[[1]]), "character")
expect_equal( typeof(result[[2]]), "character")
})
test_that("test do_cosine_sim.kv method cosine diag TRUE", {
loadNamespace("dplyr")
result <- (
test_df %>%
do_cosine_sim.kv(`ro w`, `co l`, val, diag=TRUE)
)
expect_equal(nrow(result), 16)
expect_equal(result[[3]][[1]], 1)
})
test_that("test do_cosine_sim.kv diag TRUE", {
loadNamespace("dplyr")
result <- (
test_df %>%
do_cosine_sim.kv(`ro w`, `co l`, val, diag=TRUE)
)
expect_equal(nrow(result), 16)
})
test_that("test do_cosine_sim.kv for grouped data frame as subject error", {
data <- data.frame(group=rep(c(1,2,3), each=6),
row = rep(c(1, 1, 2, 2, 3,3), 3),
col = rep(c(1,2), 9),
val = rep(0, 18))
expect_error({
ret <- data %>%
dplyr::group_by(group) %>%
do_cosine_sim.kv(group, col, val)
}, "group is a grouping column\\. ungroup\\(\\) may be necessary before this operation\\.")
})
test_that("test do_dist.kv", {
loadNamespace("dplyr")
result <- test_df %>%
do_dist.kv(`ro w`, `co l`, val, diag=TRUE)
expect_equal(nrow(result), 16)
expect_equal(result[[3]][1], 0)
})
test_that("test do_dist with cmd_scale", {
loadNamespace("dplyr")
test_df <- data.frame(
row=rep(paste("row", seq(4)), each=6),
col=rep(paste("col", seq(6)), 4) ,
val=seq(24)
)
result_kv <- test_df %>%
do_dist(skv = c("row", "col", "val"), diag=TRUE, cmdscale_k = 3)
# it seems result_kv can be 3 or 2 dimensional with this data,
# since axis3 gets near 0.
#
# 3 dimensional result (got on Mac).
#
# # A tibble: 4 x 4
# row axis1 axis2 axis3
# <chr> <dbl> <dbl> <dbl>
# 1 row 1 -22.0 0.000000613 0
# 2 row 2 -7.35 -0.000000167 0.0000000359
# 3 row 3 7.35 0.000000167 0.000000111
# 4 row 4 22.0 0.000000501 -0.0000000250
#
# 2 dimensional result (got on Linux).
#
# # A tibble: 4 x 3
# row axis1 axis2
# <chr> <dbl> <dbl>
# 1 row 1 -22.0 0.000000613
# 2 row 2 -7.35 -0.000000167
# 3 row 3 7.35 0.000000167
# 4 row 4 22.0 0.000000501
result_cols <- test_df %>%
tidyr::spread(col, val) %>% dplyr::select(-row) %>%
do_dist(dplyr::everything(), diag=TRUE, cmdscale_k = 3)
expect_true(ncol(result_kv) %in% c(4,3))
expect_equal(ncol(result_cols), 4)
# this expectation used to be like following, but added abs to allow results with flipped sign,
# which is also correct. For some reason this happens on windows 32bit.
# expect_equal(result_kv[[2]], c(-22.045408, -7.348469, 7.348469, 22.045408), tolerance=0.01)
expect_equal(abs(result_kv[[2]]), c(22.045408, 7.348469, 7.348469, 22.045408), tolerance=0.01)
})
test_that("test do_dist with cmd_scale with normalize", {
loadNamespace("dplyr")
test_df <- data.frame(
row=rep(paste("row", seq(4)), each=6),
col=rep(paste("col", seq(6)), 4) ,
val=c(seq(7),seq(7),seq(10))
)
result_kv <- test_df %>%
do_dist(skv = c("row", "col", "val"), diag=TRUE, cmdscale_k = 3, normalize=TRUE)
expect_equal(ncol(result_kv), 4)
expect_equal(result_kv[[2]], c(-0.124,-1.16, -1.61, 2.89), tolerance=0.01)
})
test_that("test do_dist.kv diag TRUE", {
loadNamespace("dplyr")
result <- (
test_df %>%
do_dist.kv(`ro w`, `co l`, val, diag=TRUE)
)
expect_equal(nrow(result), 16)
expect_equal(result[[3]][1], 0)
})
test_that("test do_dist.kv without val", {
loadNamespace("dplyr")
test_df <- data.frame(
subject = paste0("subject", rep(4-seq(3), each=3)),
key = paste0("key", c(rep(3-seq(2), 4), 1)))
result <- (
test_df %>%
do_dist.kv(subject, key)
)
expect_equal(result[[3]][1:2], c(0, sqrt(2)))
})
test_that("test do_dist without val", {
loadNamespace("dplyr")
test_df <- data.frame(
subject = paste0("subject", rep(4-seq(3), each=3)),
key = paste0("key", c(rep(3-seq(2), 4), 1)))
result <- (
test_df %>%
do_dist( skv = c("subject", "key") )
)
expect_equal(result[[3]][1:2], c(0, sqrt(2)))
})
test_that("test do_dist.cols", {
loadNamespace("dplyr")
test_df <- data.frame(var1=c(1,2,2,2), var2=c(2,1,1,1), var3=c(0,0,2,3))
result <- (
test_df %>%
do_dist.cols(dplyr::starts_with("var"))
)
expect_equal(sum(!is.na(result$value)), 6) # Elements other than diagonal.
})
test_that("test do_dist.cols with only lower triangle", {
loadNamespace("dplyr")
test_df <- data.frame(var1=c(1,2,2,2), var2=c(2,1,1,1), var3=c(0,0,2,3))
result <- (
test_df %>%
do_dist.cols(dplyr::starts_with("var"), distinct=TRUE)
)
expect_equal(sum(!is.na(result$value)), 3) # Only lower triangle elements.
})
test_that("test do_dist.kv for grouped data frame as subject error", {
data <- data.frame(group=rep(c(1,2,3), each=6),
row = rep(c(1, 1, 2, 2, 3,3), 3),
col = rep(c(1,2), 9),
val = rep(0, 18))
expect_error({
ret <- data %>%
dplyr::group_by(group) %>%
do_dist.kv(group, col, val)
}, "group is a grouping column\\. ungroup\\(\\) may be necessary before this operation\\.")
})
test_that("do_dist with NA values", {
loadNamespace("reshape2")
nrow <- 10
ncol <- 20
vec <- rnorm(nrow * ncol)
mat <- matrix(vec, nrow = nrow)
melt_mat <- reshape2::melt(mat)
# test column name with space
colnames(melt_mat)[[2]] <- "Var 2"
ret <- do_dist(melt_mat, skv = c("Var 2", "Var1", "value"), diag = TRUE)
dist_ret <- as.matrix(dist(t(mat)))
melt_ret <- reshape2::melt(dist_ret)
for(i in seq(ncol)){
for(j in seq(ncol)){
mat_answer <- dist_ret[i, j]
df_answer <- ret[ret[[1]] == i & ret[[2]] == j, 3][[1]]
expect_equal(mat_answer, df_answer)
}
}
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.