tests/testthat/test-dfm_replace.R

test_that("test dfm_replace", {
    txt <- c(doc1 = "aa bb BB cc DD ee",
             doc2 = "aa bb cc DD ee")
    dfmt <- dfm(tokens(txt), tolower = FALSE)
    
    # case-insensitive
    expect_equal(featnames(dfm_replace(dfmt, c('aa', 'bb'), c('a', 'b'), case_insensitive = TRUE)),
                 c("a", "b", "cc", "DD", "ee"))
    
    # case-sensitive
    expect_equal(featnames(dfm_replace(dfmt, c('aa', 'bb'), c('a', 'b'), case_insensitive = FALSE)),
                 c("a", "b", "BB", "cc", "DD", "ee"))
    
    # duplicated types in from
    expect_equal(featnames(dfm_replace(dfmt, c('aa', 'aa'), c('a', 'aaa'), case_insensitive = FALSE)),
                 c("a", "bb", "BB", "cc", "DD", "ee"))
    
    # equivalent to dfm conversion method
    feat <- featnames(dfmt)
    expect_equal(dfm_replace(dfmt, feat, char_toupper(feat), case_insensitive = FALSE),
                 dfm_toupper(dfmt))
    
    # error when lenfths of from and to are different
    expect_error(dfm_replace(dfmt, c('aa', 'bb'), c('a')),
                 "The length of pattern and replacement must be the same")
    
    expect_error(dfm_replace(dfmt, c(1, 2), c(10, 20)),
                 "The type of pattern must be character")

    # does nothing when input vector is zero length
    expect_equal(dfm_replace(dfmt, character(), character()),
                 dfmt)
    
})

Try the quanteda package in your browser

Any scripts or data that you put into this service are public.

quanteda documentation built on May 29, 2024, 10 a.m.