tests/testthat/test-casechanges.R

test_that("tolower works", {
    txt <- c("According to NATO", "There is G7 meeting")
    expect_equal(char_tolower(txt), c("according to nato", "there is g7 meeting"))
    expect_error(char_tolower(txt, logical()), 
                 "The length of keep_acronyms must be 1")
    expect_error(char_tolower(txt, c(TRUE, FALSE)), 
                 "The length of keep_acronyms must be 1")
})

test_that("char_tolower/char_toUpper works", {
    txt <- c("According to NATO", "There is G7 meeting")
    expect_equal(char_tolower(txt[1]), "according to nato")
    expect_equal(char_toupper(txt[1]), "ACCORDING TO NATO")
})

test_that("char_tolower keeps acronyms", {
    txt <- c("According to NATO", "There is G7 meeting")
    expect_equal(char_tolower(txt, keep_acronyms = TRUE),
                 c("according to NATO", "there is G7 meeting"))
})

test_that("tokens_tolower/tokens_toupper works", {
    txt <- c("According to NATO", "There is G7 meeting")
    toks <- tokens(txt)
    expect_equal(as.list(tokens_tolower(toks)),
                  list(text1 = c("according", "to", "nato"),
                       text2 = c("there", "is", "g7", "meeting")))
    expect_equal(as.list(tokens_tolower(toks, keep_acronyms = TRUE)),
                 list(text1 = c("according", "to", "NATO"),
                      text2 = c("there", "is", "G7", "meeting")))
    expect_equal(as.list(tokens_toupper(toks)),
                 list(text1 = c("ACCORDING", "TO", "NATO"),
                      text2 = c("THERE", "IS", "G7", "MEETING")))
    expect_error(tokens_tolower(toks, logical()), 
                 "The length of keep_acronyms must be 1")
    expect_error(tokens_tolower(toks, c(TRUE, FALSE)), 
                 "The length of keep_acronyms must be 1")
})

test_that("tokens_tolower/tokens_toupper works", {
    txt <- c("According to NATO", "There is G7 meeting")
    dfmat <- dfm(tokens(txt), tolower = FALSE)
    expect_equal(featnames(dfm_tolower(dfmat)),
                 c("according", "to", "nato", "there", "is", "g7", "meeting"))
    expect_equal(featnames(dfm_tolower(dfmat, keep_acronyms = TRUE)),
                 c("according", "to", "NATO", "there", "is", "G7", "meeting"))
    expect_equal(featnames(dfm_toupper(dfmat)),
                 c("ACCORDING", "TO", "NATO", "THERE", "IS", "G7", "MEETING"))
    
    expect_error(dfm_tolower(dfmat, logical()), 
                 "The length of keep_acronyms must be 1")
    expect_error(dfm_tolower(dfmat, c(TRUE, FALSE)), 
                 "The length of keep_acronyms must be 1")
})

test_that("set encoding when no gap or duplication is found, #1387", {
    toks <- tokens("привет tschüß bye")
    toks <- tokens_tolower(toks)
    expect_equal(Encoding(types(toks)), 
                 c("UTF-8", "UTF-8", "unknown")) 
})

test_that("works with empty objects (#2142)", {
    
    dfmat <- as.dfm(matrix(nrow = 0, ncol = 0))
    toks <- as.tokens(list())
    
    expect_identical(types(tokens_tolower(toks)),
                     character())
    expect_identical(types(tokens_toupper(toks)),
                     character())
    
    expect_identical(featnames(dfm_tolower(dfmat)), 
                     character())
    expect_identical(featnames(dfm_toupper(dfmat)), 
                     character())

})
quanteda/quanteda documentation built on March 20, 2024, 2:11 p.m.