tests/testthat/test-dfm_sample.R

test_that("dfm_sample works as expected", {
    mt <- dfm(tokens(data_corpus_inaugural[1:10]), verbose = FALSE)
    expect_equal(ndoc(dfm_sample(mt, size = 5)), 5)
    expect_equal(ndoc(dfm_sample(mt, size = 15, replace = TRUE)), 15)
    expect_error(dfm_sample(mt, size = 20),
                 "size cannot exceed the number of items")
    expect_error(dfm_sample(data_corpus_inaugural[1:10]),
                 "dfm_sample() only works on dfm objects.", fixed = TRUE)
})

test_that("dfm_sample default size arguments work as expected", {
    suppressWarnings(RNGversion("3.5.3"))
    dfmat <- dfm(tokens(c("a b c c d", "a a c c d d d")))
    
    mat1 <- matrix(rep(c(1, 1, 2, 1), 2), byrow = TRUE, nrow = 2,
                   dimnames = list(docs = c("text1.1", "text1.2"), features = letters[1:4]))
    mat2 <- matrix(c(1, 1, 2, 1,  0, 0, 2, 2), byrow = TRUE, nrow = 2,
                   dimnames = list(docs = c("text1", "text2"), features = c("b", "b", "c", "a")))

    expect_identical({
        set.seed(100)
        as.matrix(dfm_sample(dfmat, replace = TRUE))
    }, mat1)
})

Try the quanteda package in your browser

Any scripts or data that you put into this service are public.

quanteda documentation built on May 31, 2023, 8:28 p.m.