Nothing
library(polmineR)
use("polmineR")
use(pkg = "RcppCWB", corpus = "REUTERS")
testthat::context("pmi")
test_that(
"check calculation of pointwise mutual information",
{
y <- cooccurrences("REUTERS", query = "oil", method = "pmi")
N <- size(y)[["ref"]] + size(y)[["coi"]] + count(y)
I <- log2((y[["count_coi"]]/N) / ((count(y) / N) * (y[["count_partition"]] / N)))
expect_equal(y[["pmi"]], I, tolerance = 1e-3)
}
)
test_that(
"identity of phrase detection of decode-workflow and Cooccurrences workflow",
{
a <- corpus("GERMAPARLMINI") %>%
decode(p_attribute = "word", s_attribute = character(), to = "data.table", verbose = FALSE) %>%
ngrams(n = 2L, p_attribute = "word") %>%
pmi(observed = count("GERMAPARLMINI", p_attribute = "word"))
b <- Cooccurrences("GERMAPARLMINI", p_attribute = "word", left = 0L, right = 1L, verbose = FALSE) %>%
decode() %>%
pmi()
a_min <- subset(a, ngram_count == 5L) %>% slot("stat") %>% data.table::setorderv(cols = c("word_1", "word_2"))
b_min <- subset(b, ab_count == 5L) %>% slot("stat") %>% data.table::setorderv(cols = c("a_word", "b_word"))
expect_identical(nrow(a_min), nrow(b_min))
expect_identical(a_min[["word_1"]], b_min[["a_word"]])
expect_identical(a_min[["word_2"]], b_min[["b_word"]])
}
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.