Nothing
library(polmineR)
use(pkg = "RcppCWB", corpus = "REUTERS")
testthat::context("ngrams-method")
test_that("ngrams",{
o <- corpus("REUTERS")
n <- ngrams(o, n = 2)
dt <- data.table::as.data.table(n)
data.table::setorderv(dt, cols = "count", order = -1L)
dt_min <- dt[!dt[["word_1"]] %in% tm::stopwords("en")]
for (i in 1L:5L){
n <- count(
corpus("REUTERS"),
query = sprintf('"%s" "%s"', dt_min[["word_1"]][i], dt_min[["word_2"]][i])
)[["count"]]
expect_equal(n, dt_min[["count"]][i])
}
p <- partition("REUTERS", places = "saudi-arabia", regex = TRUE)
n <- ngrams(p, n = 2)
dt <- data.table::as.data.table(n)
data.table::setorderv(dt, cols = "count", order = -1L)
dt_min <- dt[!dt[["word_1"]] %in% tm::stopwords("en")]
for (i in 1L:5L){
n <- count(
p,
query = sprintf('"%s" "%s"', dt_min[["word_1"]][i], dt_min[["word_2"]][i])
)[["count"]]
expect_equal(n, dt_min[["count"]][i])
}
})
test_that("ngrams - character",{
o <- corpus("REUTERS")
n <- ngrams(o, n = 3, char = "")
expect_identical(
length(grep("oil", get_token_stream(o, p_attribute = "word"))),
n["oil",][["count"]]
)
})
test_that("ngrams - character, bundle",{
a <- corpus("REUTERS") %>%
split(s_attribute = "id") %>%
ngrams(n = 3L, char = "")
b <- corpus("REUTERS") %>%
subset(id == "127") %>%
ngrams(n = 3L, char = "")
expect_identical(a[["127"]]@stat, b@stat)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.