context("test phrase() function")
test_that("test phrase for character", {
txt <- c("capital gains tax", "one two", "three")
expect_equivalent(
phrase(txt),
list(c("capital", "gains", "tax"), c("one", "two"), "three")
)
expect_equivalent(
phrase(letters),
as.list(letters)
)
})
test_that("test phrase for dictionaries", {
dict <- dictionary(list(country = c("United States"),
institution = c("Congress", "feder* gov*")))
expect_equivalent(
phrase(dict),
list(c("united", "states"), c("congress"), c("feder*", "gov*"))
)
})
test_that("test phrase for dictionaries", {
dict <- dictionary(list(country = c("United States"),
institution = c("Congress", "feder* gov*")))
expect_equivalent(
phrase(dict),
list(c("united", "states"), c("congress"), c("feder*", "gov*"))
)
})
test_that("test phrase for collocations", {
toks <- tokens(c("United States", "Congress", "federal government"))
colls <- textstat_collocations(toks, min_count = 1, tolower = FALSE)
expect_equivalent(
phrase(colls),
list(c("United", "States"), c("federal", "government"))
)
})
test_that("test phrase for tokens", {
toks <- tokens(c("United States", "Congress", "federal government"))
expect_equivalent(
phrase(toks),
list(c("United", "States"), "Congress", c("federal", "government"))
)
toks2 <- tokens(c("United States", "Congress", "federal government")) %>%
tokens_ngrams(n = 2, concatenator = " ")
expect_equivalent(
phrase(toks2),
list("United States", character(0), "federal government")
)
})
test_that("helper functions for phrase() work", {
p <- phrase(c("capital gains tax", "one two", "three"))
expect_identical(
quanteda.core:::as.list.phrases(p),
list(c("capital", "gains", "tax"), c("one", "two"), "three")
)
expect_output(
print(p)
)
expect_identical(
is.phrase(list(c("capital", "gains", "tax"), c("one", "two"), "three")),
FALSE
)
expect_identical(
is.phrase(p),
TRUE
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.