Nothing
test_that("tokens_split works", {
toks <- tokens("a-a b+b B*B cc DD ee", what = "fastestword")
expect_equal(as.list(tokens_split(toks, separator = "-", remove_separator = FALSE)),
list(text1 = c("a", "-", "a", "b+b", "B*B", "cc", "DD", "ee")))
expect_equal(as.list(tokens_split(toks, separator = "-", remove_separator = TRUE)),
list(text1 = c("a", "a", "b+b", "B*B", "cc", "DD", "ee")))
expect_equal(as.list(tokens_split(toks, separator = "+", remove_separator = FALSE)),
list(text1 = c("a-a", "b", "+", "b", "B*B", "cc", "DD", "ee")))
expect_equal(as.list(tokens_split(toks, separator = "+", remove_separator = TRUE)),
list(text1 = c("a-a", "b", "b", "B*B", "cc", "DD", "ee")))
expect_equal(as.list(tokens_split(toks, separator = "\\p{P}", valuetype = "regex",
remove_separator = FALSE)),
list(text1 = c("a", "-", "a", "b+b", "B", "*", "B", "cc", "DD", "ee")))
expect_equal(as.list(tokens_split(toks, separator = "\\p{S}", valuetype = "regex",
remove_separator = FALSE)),
list(text1 = c("a-a", "b", "+", "b", "B*B", "cc", "DD", "ee")))
})
test_that("tokens_split error when more than one separator is given", {
toks <- tokens("a-a b+b B*B cc DD ee", what = "fastestword")
expect_error(tokens_split(toks, c("-", "+")),
"The length of separator must be 1")
})
test_that("tokens_split works in the same way as tokens", {
txt <- "US-EU low-carbon agreement"
expect_equal(
as.list(tokens(txt, split_hyphens = TRUE)),
as.list(tokens_split(tokens(txt, what = "fastestword"), "\\p{P}", valuetype = "regex",
remove_separator = FALSE))
)
})
test_that("tokens_split works fully when matching entire tokens to separator pattern", {
toks <- tokens("five-star rating", split_hyphens = TRUE)
expect_identical(
as.character(tokens_split(toks, separator = "-", valuetype = "regex",
remove_separator = FALSE)),
c("five", "-", "star", "rating")
)
expect_identical(
as.character(tokens_split(toks, separator = "-", valuetype = "regex",
remove_separator = TRUE)),
c("five", "star", "rating")
)
expect_identical(
as.character(tokens_split(toks, separator = "-", valuetype = "fixed",
remove_separator = TRUE)),
c("five", "star", "rating")
)
})
test_that("apply_if argument is working", {
dat <- data.frame(text = c("US-EU low-carbon agreement",
"five-star hotel"),
topic = c("environment", "travel"))
corp <- corpus(dat)
toks <- tokens(corp)
toks1 <- tokens_split(toks, separator = "-", apply_if = toks$topic == "environment")
expect_identical(
as.list(toks1),
list(text1 = c("US", "EU", "low", "carbon", "agreement"),
text2 = c("five-star", "hotel"))
)
toks2 <- tokens_split(toks, separator = "-", apply_if = toks$topic == "travel")
expect_identical(
as.list(toks2),
list(text1 = c("US-EU", "low-carbon", "agreement"),
text2 = c("five", "star", "hotel"))
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.