library(testthat)
library(data.table)
library(topicanalysis)
testthat::context("cooccurrences")
test_that(
"cooccurrences plausibility checks",
{
data(BE_lda)
dt <- cooccurrences(BE_lda, k = 3L)
top <- topicmodels::topics(BE_lda, k = 3L)
# Ensure that cooccurrence counts are correct
for (i in 1L:10L){
a <- dt[i,][["a"]]
b <- dt[i,][["b"]]
expect_equal(
dt[i,][["count_coi"]],
table(apply(top, 2, function(x) (a %in% x) && (b %in% x)))[["TRUE"]]
)
}
# The first test shall ensure that counts for a->b and b->a, i.e. for the
# reverse order of the topic are identical
g <- igraph::graph_from_data_frame(
d = data.frame(
from = dt[["a"]],
to = dt[["b"]],
n = dt[["count_coi"]],
chisquare = dt[["chisquare"]]
),
directed = TRUE
)
m <- igraph::get.adjacency(g, sparse = FALSE, attr = "n")
expect_true(isSymmetric(m))
# Do the same for the chisquare test, knowing that there is a potential
# rounding error
m <- igraph::get.adjacency(g, sparse = FALSE, attr = "chisquare")
expect_true(isSymmetric(m, tol = 0.05))
# Now, ensure that the count of the number of cooccurrences and the
# occurrences external to the context sum up to the total
dt_n <- data.table(n = as.matrix(table(as.vector(top)))[,1])
dt_n[, "topic" := 1L:nrow(dt_n)]
setkeyv(dt_n, cols = "topic")
setkeyv(dt, cols = "b")
dt2 <- dt[dt_n]
expect_true(all(dt[["count_coi"]] + dt[["count_ref"]] == dt2[["n"]]))
}
)
test_that(
"check cooccurrences results against docs()-method (without renumbering)",
{
data(BE_lda)
BE <- Topicanalysis$new(BE_lda)
BE_coocs <- cooccurrences(BE_lda, k = 5L)
for (i in 1:5){
expect_equal(
BE_coocs[i][["count_coi"]],
length(BE$docs(x = BE_coocs[i][["a"]], y = BE_coocs[i][["b"]], n = 5L))
)
}
}
)
test_that(
"check cooccurrences results against docs()-method (with renumbering, without subsetting)",
{
data(BE_lda)
top <- topicmodels::topics(BE_lda, k = 5L)
benchmark <- table(apply(top, 2, function(x) all(any(1:5 %in% x), any(6:10 %in% x))))[["TRUE"]]
BE <- cooccurrences(BE_lda, k = 5L)
BE <- Topicanalysis$new(BE_lda)
n_docs_method <- length(BE$docs(x = 1:5, y = 6:10, n = 5L))
expect_identical(benchmark, n_docs_method)
renumber_vec <- 1L:BE_lda@k
renumber_vec[1:5] <- 1L
renumber_vec[6:10] <- 6L
coocs <- cooccurrences(BE_lda, k = 5L, renumber = renumber_vec)
expect_equal(coocs[a == 1][b == 6][["count_coi"]], n_docs_method)
}
)
test_that(
"check that using list as input for argument renumber for cooccurrences method works",
{
data(BE_lda, BE_labels)
renumber_li <- list(
school = grep("Grundschule", BE_labels),
cummunity = grep("Gemeindeentwicklung", BE_labels),
traffic = grep("Verkehrsmittel", BE_labels)
)
dt <- cooccurrences(BE_lda, k = 3L, renumber = renumber_li)
a_n <- unique(dt[a == grep("Grundschule", BE_labels)[1]][["a_total"]])
TA <- Topicanalysis$new(BE_lda)
a_n2 <- length(TA$docs(x = renumber_li[["school"]], n = 3L))
expect_identical(a_n, a_n2)
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.