Nothing
# ---- cooccurrence() tests ----
# === Shared test data ===
# Three transactions: {A, B, C}, {B, C}, {A, C}
# Co-occurrence matrix (raw counts):
# A B C
# A 2 1 2
# B 1 2 2
# C 2 2 3
# Frequencies: A=2, B=2, C=3, N=3
.co_test_list <- list(c("A", "B", "C"), c("B", "C"), c("A", "C"))
.co_expected_raw <- matrix(
c(2, 1, 2,
1, 2, 2,
2, 2, 3),
nrow = 3, byrow = TRUE,
dimnames = list(c("A", "B", "C"), c("A", "B", "C"))
)
# ========================================
# 1. Input format tests
# ========================================
test_that("list input produces correct co-occurrence matrix", {
net <- cooccurrence(.co_test_list)
expect_s3_class(net, "netobject")
expect_equal(net$weights, .co_expected_raw)
expect_false(net$directed)
expect_equal(net$method, "co_occurrence_fn")
})
test_that("delimited input produces correct co-occurrence matrix", {
df <- data.frame(
id = 1:3,
items = c("A; B; C", "B; C", "A; C"),
stringsAsFactors = FALSE
)
net <- cooccurrence(df, field = "items", sep = ";")
expect_equal(net$weights, .co_expected_raw)
})
test_that("multi-column delimited input pools across columns", {
df <- data.frame(
col1 = c("A; B", "B", "A"),
col2 = c("C", "C", "C"),
stringsAsFactors = FALSE
)
net <- cooccurrence(df, field = c("col1", "col2"), sep = ";")
expect_equal(net$weights, .co_expected_raw)
})
test_that("long/bipartite input produces correct co-occurrence matrix", {
df <- data.frame(
doc = c(1, 1, 1, 2, 2, 3, 3),
item = c("A", "B", "C", "B", "C", "A", "C"),
stringsAsFactors = FALSE
)
net <- cooccurrence(df, field = "item", by = "doc")
expect_equal(net$weights, .co_expected_raw)
})
test_that("binary matrix input produces correct co-occurrence matrix", {
bin <- matrix(
c(1, 1, 1,
0, 1, 1,
1, 0, 1),
nrow = 3, byrow = TRUE,
dimnames = list(NULL, c("A", "B", "C"))
)
net <- cooccurrence(bin)
expect_equal(net$weights, .co_expected_raw)
})
test_that("binary data.frame input produces correct co-occurrence matrix", {
df <- data.frame(A = c(1, 0, 1), B = c(1, 1, 0), C = c(1, 1, 1))
net <- cooccurrence(df)
expect_equal(net$weights, .co_expected_raw)
})
test_that("wide sequence input produces correct co-occurrence matrix", {
df <- data.frame(
V1 = c("A", "B", "A"),
V2 = c("B", "C", "C"),
V3 = c("C", NA, NA),
stringsAsFactors = FALSE
)
net <- cooccurrence(df)
expect_equal(net$weights, .co_expected_raw)
})
# ========================================
# 2. All formats produce identical results
# ========================================
test_that("all 6 formats produce the same co-occurrence matrix", {
net_list <- cooccurrence(.co_test_list)
df_del <- data.frame(items = c("A;B;C", "B;C", "A;C"),
stringsAsFactors = FALSE)
net_del <- cooccurrence(df_del, field = "items", sep = ";")
df_multi <- data.frame(c1 = c("A;B", "B", "A"),
c2 = c("C", "C", "C"),
stringsAsFactors = FALSE)
net_multi <- cooccurrence(df_multi, field = c("c1", "c2"), sep = ";")
df_long <- data.frame(
doc = c(1, 1, 1, 2, 2, 3, 3),
item = c("A", "B", "C", "B", "C", "A", "C"),
stringsAsFactors = FALSE
)
net_long <- cooccurrence(df_long, field = "item", by = "doc")
bin <- matrix(c(1, 1, 1, 0, 1, 1, 1, 0, 1), nrow = 3, byrow = TRUE,
dimnames = list(NULL, c("A", "B", "C")))
net_bin <- cooccurrence(bin)
df_wide <- data.frame(V1 = c("A", "B", "A"), V2 = c("B", "C", "C"),
V3 = c("C", NA, NA), stringsAsFactors = FALSE)
net_wide <- cooccurrence(df_wide)
expect_equal(net_del$weights, net_list$weights)
expect_equal(net_multi$weights, net_list$weights)
expect_equal(net_long$weights, net_list$weights)
expect_equal(net_bin$weights, net_list$weights)
expect_equal(net_wide$weights, net_list$weights)
})
# ========================================
# 3. Similarity tests
# ========================================
test_that("similarity = 'none' returns raw counts", {
net <- cooccurrence(.co_test_list, similarity = "none")
expect_equal(net$weights, .co_expected_raw)
})
test_that("similarity = 'jaccard' is correct", {
net <- cooccurrence(.co_test_list, similarity = "jaccard")
W <- net$weights
# Jaccard(A,B) = C[A,B] / (freq_A + freq_B - C[A,B]) = 1 / (2+2-1) = 1/3
expect_equal(W["A", "B"], 1 / 3)
# Jaccard(A,C) = 2 / (2+3-2) = 2/3
expect_equal(W["A", "C"], 2 / 3)
# Jaccard(B,C) = 2 / (2+3-2) = 2/3
expect_equal(W["B", "C"], 2 / 3)
# Diagonal: Jaccard(A,A) = 2 / (2+2-2) = 1
expect_equal(W["A", "A"], 1)
# Symmetric
expect_equal(W["A", "B"], W["B", "A"])
})
test_that("similarity = 'cosine' is correct", {
net <- cooccurrence(.co_test_list, similarity = "cosine")
W <- net$weights
# Cosine(A,B) = 1 / sqrt(2*2) = 0.5
expect_equal(W["A", "B"], 0.5)
# Cosine(A,C) = 2 / sqrt(2*3) = 2/sqrt(6)
expect_equal(W["A", "C"], 2 / sqrt(6))
# Diagonal: Cosine(A,A) = 2 / sqrt(2*2) = 1
expect_equal(W["A", "A"], 1)
})
test_that("similarity = 'inclusion' is correct", {
net <- cooccurrence(.co_test_list, similarity = "inclusion")
W <- net$weights
# Inclusion(A,B) = 1 / min(2,2) = 0.5
expect_equal(W["A", "B"], 0.5)
# Inclusion(A,C) = 2 / min(2,3) = 1
expect_equal(W["A", "C"], 1)
# Inclusion(B,C) = 2 / min(2,3) = 1
expect_equal(W["B", "C"], 1)
})
test_that("similarity = 'association' is correct", {
net <- cooccurrence(.co_test_list, similarity = "association")
W <- net$weights
# Association(A,B) = 1 / (2*2) = 0.25
expect_equal(W["A", "B"], 0.25)
# Association(A,C) = 2 / (2*3) = 1/3
expect_equal(W["A", "C"], 1 / 3)
# Association(B,C) = 2 / (2*3) = 1/3
expect_equal(W["B", "C"], 1 / 3)
})
test_that("similarity = 'dice' is correct", {
net <- cooccurrence(.co_test_list, similarity = "dice")
W <- net$weights
# Dice(A,B) = 2*1 / (2+2) = 0.5
expect_equal(W["A", "B"], 0.5)
# Dice(A,C) = 2*2 / (2+3) = 0.8
expect_equal(W["A", "C"], 0.8)
# Dice(A,A) = 2*2 / (2+2) = 1
expect_equal(W["A", "A"], 1)
})
test_that("similarity = 'equivalence' is correct", {
net <- cooccurrence(.co_test_list, similarity = "equivalence")
W <- net$weights
# Equivalence(A,B) = 1^2 / (2*2) = 0.25
expect_equal(W["A", "B"], 0.25)
# Equivalence(A,C) = 2^2 / (2*3) = 4/6 = 2/3
expect_equal(W["A", "C"], 2 / 3)
# Equivalence(A,A) = 2^2 / (2*2) = 1
expect_equal(W["A", "A"], 1)
})
test_that("similarity = 'relative' row-normalizes correctly", {
net <- cooccurrence(.co_test_list, similarity = "relative")
W <- net$weights
# Each row sums to 1
expect_equal(rowSums(W), c(A = 1, B = 1, C = 1))
# A row: 2/5, 1/5, 2/5
expect_equal(W["A", ], c(A = 2 / 5, B = 1 / 5, C = 2 / 5))
})
# ========================================
# 4. Parameters: threshold, min_occur, diagonal, top_n
# ========================================
test_that("threshold filters low-weight edges after normalization", {
# Raw counts: A-B = 1, A-C = 2, B-C = 2
# With similarity = "none", threshold = 2 drops A-B
net <- cooccurrence(.co_test_list, threshold = 2)
W <- net$weights
expect_equal(W["A", "B"], 0)
expect_equal(W["B", "A"], 0)
expect_equal(W["A", "C"], 2)
expect_equal(W["B", "C"], 2)
})
test_that("threshold applies after normalization", {
# Jaccard: A-B = 1/3, A-C = 2/3, B-C = 2/3
net <- cooccurrence(.co_test_list, similarity = "jaccard", threshold = 0.5)
W <- net$weights
# A-B = 1/3 < 0.5 → zeroed
expect_equal(W["A", "B"], 0)
# A-C = 2/3 >= 0.5 → kept
expect_equal(W["A", "C"], 2 / 3)
})
test_that("min_occur drops infrequent entities", {
# A appears in 2 transactions, B in 2, C in 3
# min_occur = 3 keeps only C → but single-item transactions → error
# min_occur = 2 keeps A, B, C (all >= 2)
trans <- list(c("A", "B", "C"), c("B", "C"), c("A", "C"), c("D"))
net <- cooccurrence(trans, min_occur = 2L)
expect_false("D" %in% colnames(net$weights))
expect_true(all(c("A", "B", "C") %in% colnames(net$weights)))
})
test_that("min_occur = 1 keeps all entities (default)", {
trans <- list(c("A", "B"), c("B", "C"), c("D"))
net <- cooccurrence(trans, min_occur = 1L)
expect_true("D" %in% colnames(net$weights))
})
test_that("diagonal = FALSE zeros the diagonal", {
net <- cooccurrence(.co_test_list, diagonal = FALSE)
expect_equal(diag(net$weights), c(A = 0, B = 0, C = 0))
expect_equal(net$weights["A", "B"], 1)
})
test_that("diagonal = FALSE + similarity works", {
net <- cooccurrence(.co_test_list, diagonal = FALSE, similarity = "cosine")
expect_equal(diag(net$weights), c(A = 0, B = 0, C = 0))
# freq still computed from column sums of B (not diagonal)
expect_equal(net$weights["A", "B"], 0.5)
})
test_that("top_n keeps only top N edges", {
# 3 unique off-diagonal pairs: A-B=1, A-C=2, B-C=2
net <- cooccurrence(.co_test_list, diagonal = FALSE, top_n = 2L)
W <- net$weights
# A-B (weight=1) should be zeroed; A-C and B-C (weight=2) kept
expect_equal(W["A", "B"], 0)
expect_equal(W["A", "C"], 2)
expect_equal(W["B", "C"], 2)
})
# ========================================
# 5. netobject integration
# ========================================
test_that("co_occurrence returns valid netobject structure", {
net <- cooccurrence(.co_test_list)
expect_s3_class(net, "netobject")
expect_s3_class(net, "cograph_network")
expect_false(net$directed)
expect_equal(net$n_nodes, 3L)
expect_true(net$n_edges > 0L)
expect_equal(nrow(net$nodes), 3L)
expect_true(all(c("id", "label", "name") %in% names(net$nodes)))
expect_true(all(c("from", "to", "weight") %in% names(net$edges)))
})
test_that("co_occurrence params are stored correctly", {
net <- cooccurrence(.co_test_list, similarity = "jaccard", threshold = 0.1)
expect_equal(net$params$similarity, "jaccard")
expect_equal(net$params$threshold, 0.1)
expect_equal(net$params$n_transactions, 3L)
expect_equal(net$params$n_items, 3L)
})
# ========================================
# 6. Edge cases
# ========================================
test_that("single-item transactions produce zero off-diagonal", {
trans <- list(c("A"), c("B"), c("C"))
net <- cooccurrence(trans)
W <- net$weights
expect_equal(W["A", "B"], 0)
expect_equal(W["B", "C"], 0)
expect_equal(diag(W), c(A = 1, B = 1, C = 1))
})
test_that("all-identical transactions give expected counts", {
trans <- list(c("X", "Y"), c("X", "Y"), c("X", "Y"))
net <- cooccurrence(trans)
expect_equal(net$weights["X", "Y"], 3)
expect_equal(net$weights["X", "X"], 3)
})
test_that("duplicate items within a transaction are de-duplicated", {
trans <- list(c("A", "A", "B"), c("B", "B", "C"))
net <- cooccurrence(trans)
expect_equal(net$weights["A", "A"], 1)
expect_equal(net$weights["A", "B"], 1)
})
test_that("empty strings and NAs are removed from transactions", {
trans <- list(c("A", "", NA, "B"), c("B", "C"))
net <- cooccurrence(trans)
expect_true("A" %in% colnames(net$weights))
expect_true("B" %in% colnames(net$weights))
})
test_that("delimited with extra whitespace is handled", {
df <- data.frame(kw = c(" A ; B ; C ", " B;C "), stringsAsFactors = FALSE)
net <- cooccurrence(df, field = "kw", sep = ";")
expect_equal(sort(colnames(net$weights)), c("A", "B", "C"))
expect_equal(net$weights["A", "B"], 1)
})
test_that("error on empty input", {
expect_error(cooccurrence(list()), "No non-empty transactions")
expect_error(cooccurrence(list(character(0), character(0))),
"No non-empty transactions")
})
test_that("error on invalid format", {
expect_error(cooccurrence("not_a_valid_input"), "Cannot detect input format")
})
test_that("large number of items works", {
items <- paste0("item_", seq_len(50))
trans <- lapply(seq_len(20), function(i) sample(items, 10))
net <- cooccurrence(trans)
expect_equal(ncol(net$weights), length(unique(unlist(trans))))
expect_true(isSymmetric(net$weights))
})
test_that("binary matrix without colnames gets auto-named", {
bin <- matrix(c(1, 0, 1, 1), nrow = 2)
net <- cooccurrence(bin)
expect_equal(colnames(net$weights), c("V1", "V2"))
})
test_that("wide sequence with void markers removes them", {
df <- data.frame(V1 = c("A", "%"), V2 = c("B", "*"),
stringsAsFactors = FALSE)
net <- cooccurrence(df)
expect_equal(net$params$n_transactions, 1L)
})
test_that("min_occur filters before co-occurrence computation", {
# D appears only once → dropped with min_occur = 2
trans <- list(c("A", "B"), c("A", "B"), c("A", "D"))
net <- cooccurrence(trans, min_occur = 2L)
expect_false("D" %in% colnames(net$weights))
expect_equal(net$weights["A", "B"], 2)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.