tests/testthat/test-bootstrap.R

test_that("bootstrap_dfm works with character and corpus objects", {
    txt <- c(textone = "This is a sentence.  Another sentence.  Yet another.",
             texttwo = "Premiere phrase.  Deuxieme phrase.",
             textthree = "Sentence three is really short.")
    corp <- corpus(txt,
                   docvars = data.frame(country = c("UK", "USA", "UK"), 
                                        year = c(1990, 2000, 2005)))
    toks <- tokens(corp)
    dfmt <- dfm(toks)
    
    set.seed(10)
    bs <- bootstrap_dfm(dfmt, n = 10)
    expect_equal(bs[[1]], dfmt)
    
    # are feature names of resamples identical?
    expect_identical(
        featnames(bs[[1]]),
        featnames(bs[[2]])
    )

    # are document names of resamples identical?
    expect_identical(
        docnames(bs[[1]]),
        docnames(bs[[2]])
    )
    
    expect_error(bootstrap_dfm(dfmt, n = -1), 
                 "The value of n must be between 0 and Inf")
})

test_that("bootstrap_dfm works as planned with dfm", {
    txt <- c(textone = "This is a sentence.  Another sentence.  Yet another.",
             texttwo = "Premiere phrase.  Deuxieme phrase.")
    corp <- corpus(txt, 
                   docvars = data.frame(country = c("UK", "USA"), year = c(1990, 2000)))
    dfmt <- dfm(tokens(corpus_reshape(corp, to = "sentences")))
    
    set.seed(10)
    bs <- bootstrap_dfm(dfmt, n = 3, verbose = FALSE)
    expect_equivalent(bs[[1]], 
                      dfm_group(dfmt))
    
    # are feature names of resamples identical?
    expect_identical(
        featnames(bs[[1]]),
        featnames(bs[[2]])
    )
    # are document names of resamples identical?
    expect_identical(
        docnames(bs[[1]]),
        docnames(bs[[2]])
    )
})

test_that("verbose messages work", {
    txt <- c(textone = "This is a sentence.  Another sentence.  Yet another.",
             texttwo = "Premiere phrase.  Deuxieme phrase.",
             textthree = "Sentence three is really short.")
    toks <- tokens(txt)
    dfmt <- dfm(toks)
    expect_message(
        bootstrap_dfm(dfmt, n = 1, verbose = TRUE),
        "resampling and forming dfms: 0"
    )
})

Try the quanteda package in your browser

Any scripts or data that you put into this service are public.

quanteda documentation built on May 29, 2024, 10 a.m.