tests/testthat/test-dfm_replace.R

test_that("test dfm_replace", {
    txt <- c(doc1 = "aa bb BB cc DD ee",
             doc2 = "aa bb cc DD ee")
    dfmt <- dfm(tokens(txt), tolower = FALSE)
    
    # case-insensitive
    expect_equal(featnames(dfm_replace(dfmt, c('aa', 'bb'), c('a', 'b'), case_insensitive = TRUE)),
                 c("a", "b", "cc", "DD", "ee"))
    
    # case-sensitive
    expect_equal(featnames(dfm_replace(dfmt, c('aa', 'bb'), c('a', 'b'), case_insensitive = FALSE)),
                 c("a", "b", "BB", "cc", "DD", "ee"))
    
    # duplicated types in from
    expect_equal(featnames(dfm_replace(dfmt, c('aa', 'aa'), c('a', 'aaa'), case_insensitive = FALSE)),
                 c("a", "bb", "BB", "cc", "DD", "ee"))
    
    # equivalent to dfm conversion method
    feat <- featnames(dfmt)
    expect_equal(dfm_replace(dfmt, feat, char_toupper(feat), case_insensitive = FALSE),
                 dfm_toupper(dfmt))
    
    # error when lenfths of from and to are different
    expect_error(dfm_replace(dfmt, c('aa', 'bb'), c('a')),
                 "The length of pattern and replacement must be the same")
    
    expect_error(dfm_replace(dfmt, c(1, 2), c(10, 20)),
                 "The type of pattern must be character")

    # does nothing when input vector is zero length
    expect_equal(dfm_replace(dfmt, character(), character()),
                 dfmt)
    
})

test_that("dfm_replace() verbose works", {
    dfmat <- dfm(tokens(c("a a b c d", "a a b c", "b c c d")))
    expect_message(
        dfm_replace(dfmat, c("a", "c"), c("X", "X"), verbose = TRUE),
        "dfm_replace() changed from 4 features (3 documents) to 3 features (3 documents)",
        fixed = TRUE
    )
})

Try the quanteda package in your browser

Any scripts or data that you put into this service are public.

quanteda documentation built on June 8, 2025, 9:41 p.m.