Nothing
test_that("tokens works for strange spaces (#796)", {
txt <- "space tab\t newline\n non-breakingspace\u00A0, variationselector16 \uFE0F."
toks <- tokens(txt, what = "word1", remove_punct = FALSE, remove_separators = FALSE)
expect_identical(ntoken(toks), c(text1 = 15L))
expect_identical(
as.character(tokens(txt, what = "word1", remove_punct = FALSE, remove_separators = FALSE))[13:15],
c("variationselector16", " ", ".")
)
expect_identical(
ntoken(txt, remove_punct = TRUE, remove_separators = FALSE, what = "word1"),
c(text1 = 13L)
)
expect_identical(
as.character(tokens(txt, remove_punct = TRUE, remove_separators = FALSE,
what = "word1"))[12:13],
c("variationselector16", " ")
)
})
test_that("output is correct for word1", {
expect_message(
tmp <- tokens(data_char_ukimmig2010, what = "word1", split_hyphens = FALSE, verbose = TRUE),
"preserving hyphens"
)
expect_message(
tmp <- tokens(data_char_ukimmig2010, what = "word1", split_hyphens = FALSE, verbose = TRUE),
"Finished constructing tokens from 9 documents"
)
expect_message(
tmp <- tokens(data_char_ukimmig2010, what = "word1", split_hyphens = FALSE, verbose = TRUE),
"^Creating a tokens object from a character input"
)
})
test_that("symbols and punctuation are handled separately (#1445)", {
txt <- "£ € 👏 Rock on❗ 💪️🎸"
expect_identical(
as.character(tokens(txt, what = "word1", remove_symbols = FALSE, remove_punct = TRUE)),
as.character(tokens(txt, what = "word1", remove_symbols = FALSE, remove_punct = FALSE))
)
expect_identical(
as.character(tokens(txt, what = "fasterword", remove_symbols = FALSE, remove_punct = TRUE)),
as.character(tokens(txt, what = "fasterword", remove_symbols = FALSE, remove_punct = FALSE))
)
expect_identical(
as.character(tokens(txt, what = "fastestword", remove_symbols = FALSE, remove_punct = TRUE)),
as.character(tokens(txt, what = "fastestword", remove_symbols = FALSE, remove_punct = FALSE))
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.