test_that("textstat_summary method works", {
skip_on_os("solaris")
corp <- quanteda::data_corpus_inaugural[10:15]
toks <- quanteda::tokens_tolower(quanteda::tokens(corp))
dfmt <- quanteda::dfm(toks)
col_summ <- c("document", "chars", "sents", "tokens", "types",
"puncts", "numbers", "symbols", "urls",
"tags", "emojis")
# corpus
summ_corp <- textstat_summary(corp)
expect_equal(
summ_corp$sents,
unname(quanteda::ntoken(quanteda::tokens(corp, what = "sentence")))
)
expect_equal(
names(summ_corp),
col_summ
)
# tokens
summ_toks <- textstat_summary(toks)
expect_equal(
summ_toks$puncts,
unname(quanteda::ntoken(quanteda::tokens_select(toks, quanteda:::removals_regex(punct = TRUE)[[1]],
valuetype = "regex")))
)
expect_equal(
summ_toks$numbers,
unname(quanteda::ntoken(quanteda::tokens_select(toks, quanteda:::removals_regex(numbers = TRUE)[[1]],
valuetype = "regex")))
)
expect_equal(
summ_toks$tokens,
unname(quanteda::ntoken(toks))
)
expect_equal(
summ_toks$types,
unname(quanteda::ntype(toks))
)
expect_equal(
summ_toks$sents,
rep(NA, quanteda::ndoc(toks))
)
expect_equal(
names(summ_toks),
col_summ
)
# dfm
summ_dfm <- textstat_summary(dfmt)
expect_equal(
summ_dfm$puncts,
unname(quanteda::ntoken(quanteda::dfm_select(dfmt, quanteda:::removals_regex(punct = TRUE)[[1]],
valuetype = "regex")))
)
expect_equal(
summ_dfm$numbers,
unname(quanteda::ntoken(quanteda::dfm_select(dfmt, quanteda:::removals_regex(numbers = TRUE)[[1]],
valuetype = "regex")))
)
expect_equal(
summ_dfm$tokens,
unname(quanteda::ntoken(dfmt))
)
expect_equal(
summ_dfm$types,
unname(quanteda::ntype(dfmt))
)
expect_equal(
summ_dfm$sents,
rep(NA, quanteda::ndoc(dfmt))
)
expect_equal(
names(summ_dfm),
col_summ
)
})
test_that("textstat_summary counts hashtag and emoji correctly", {
skip_on_os("solaris")
txt <- c("£ € 👏 Rock on❗🎸",
"Hi @qi #quanteda https://quanteda.io")
toks <- quanteda::tokens(txt)
summ <- textstat_summary(toks)
expect_identical(summ$tokens, c(7L, 4L))
expect_identical(summ$tags, c(0L, 2L))
expect_identical(summ$emojis, c(3L, 0L))
expect_identical(summ$urls, c(0L, 1L))
})
test_that("textstat_summary counts hashtag and emoji correctly", {
skip("Until we fix #64")
txt <- c("£ € 👏 Rock on❗ 💪️🎸",
"Hi @qi #quanteda https://quanteda.io")
toks <- quanteda::tokens(txt)
summ <- textstat_summary(toks)
expect_identical(summ$emojis, c(4L, 0L))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.