test_that("tolower works", {
txt <- c("According to NATO", "There is G7 meeting")
expect_equal(char_tolower(txt), c("according to nato", "there is g7 meeting"))
expect_error(char_tolower(txt, logical()),
"The length of keep_acronyms must be 1")
expect_error(char_tolower(txt, c(TRUE, FALSE)),
"The length of keep_acronyms must be 1")
})
test_that("char_tolower/char_toUpper works", {
txt <- c("According to NATO", "There is G7 meeting")
expect_equal(char_tolower(txt[1]), "according to nato")
expect_equal(char_toupper(txt[1]), "ACCORDING TO NATO")
})
test_that("char_tolower keeps acronyms", {
txt <- c("According to NATO", "There is G7 meeting")
expect_equal(char_tolower(txt, keep_acronyms = TRUE),
c("according to NATO", "there is G7 meeting"))
})
test_that("tokens_tolower/tokens_toupper works", {
txt <- c("According to NATO", "There is G7 meeting")
toks <- tokens(txt)
expect_equal(as.list(tokens_tolower(toks)),
list(text1 = c("according", "to", "nato"),
text2 = c("there", "is", "g7", "meeting")))
expect_equal(as.list(tokens_tolower(toks, keep_acronyms = TRUE)),
list(text1 = c("according", "to", "NATO"),
text2 = c("there", "is", "G7", "meeting")))
expect_equal(as.list(tokens_toupper(toks)),
list(text1 = c("ACCORDING", "TO", "NATO"),
text2 = c("THERE", "IS", "G7", "MEETING")))
expect_error(tokens_tolower(toks, logical()),
"The length of keep_acronyms must be 1")
expect_error(tokens_tolower(toks, c(TRUE, FALSE)),
"The length of keep_acronyms must be 1")
})
test_that("tokens_tolower/tokens_toupper works", {
txt <- c("According to NATO", "There is G7 meeting")
dfmat <- dfm(tokens(txt), tolower = FALSE)
expect_equal(featnames(dfm_tolower(dfmat)),
c("according", "to", "nato", "there", "is", "g7", "meeting"))
expect_equal(featnames(dfm_tolower(dfmat, keep_acronyms = TRUE)),
c("according", "to", "NATO", "there", "is", "G7", "meeting"))
expect_equal(featnames(dfm_toupper(dfmat)),
c("ACCORDING", "TO", "NATO", "THERE", "IS", "G7", "MEETING"))
expect_error(dfm_tolower(dfmat, logical()),
"The length of keep_acronyms must be 1")
expect_error(dfm_tolower(dfmat, c(TRUE, FALSE)),
"The length of keep_acronyms must be 1")
})
test_that("set encoding when no gap or duplication is found, #1387", {
toks <- tokens("привет tschüß bye")
toks <- tokens_tolower(toks)
expect_equal(Encoding(types(toks)),
c("UTF-8", "UTF-8", "unknown"))
})
test_that("works with empty objects (#2142)", {
dfmat <- as.dfm(matrix(nrow = 0, ncol = 0))
toks <- as.tokens(list())
expect_identical(types(tokens_tolower(toks)),
character())
expect_identical(types(tokens_toupper(toks)),
character())
expect_identical(featnames(dfm_tolower(dfmat)),
character())
expect_identical(featnames(dfm_toupper(dfmat)),
character())
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.