tests/testthat/test-extractor.R

txt <- c(doc1 = "This is a sample text.\nIt has three lines.\nThe third line.",
         doc2 = "one\ntwo\tpart two\nthree\nfour.",
         doc3 = "A single sentence.",
         doc4 = "A sentence with \"escaped quotes\".")
dat <- data.frame(var_numeric = 10:13, var_factor = factor(c("A", "B", "A", "B")), 
                  var_char = letters[1:4])
corp <- corpus(txt, docvars = dat) 
toks <- tokens(corp)
dfmt <- dfm(toks)

test_that("extractor for corpus works", {
    
    expect_equal(docnames(corp[c(1, 3)]), c("doc1", "doc3"))
    expect_equal(docid(corp[c(1, 3)]), droplevels(docid(corp)[c(1, 3)]))
    expect_equal(docid(corp[c(1, 3), FALSE]), docid(corp)[c(1, 3)])
    expect_equal(docnames(corp[c(1, 1)]), c("doc1.1", "doc1.2"))
    expect_equal(docid(corp[c(1, 1)]), droplevels(docid(corp)[c(1, 1)]))
    expect_equal(docid(corp[c(1, 1), FALSE]), docid(corp)[c(1, 1)])
    
})

test_that("extractor for tokens works", {
    
    expect_equal(docnames(toks[c(1, 3)]), c("doc1", "doc3"))
    expect_equal(docid(toks[c(1, 3)]), droplevels(docid(toks)[c(1, 3)]))
    expect_equal(docid(toks[c(1, 3), FALSE]), docid(toks)[c(1, 3)])
    expect_equal(docnames(toks[c(1, 1)]), c("doc1.1", "doc1.2"))
    expect_equal(docid(toks[c(1, 1)]), droplevels(docid(toks)[c(1, 1)]))
    expect_equal(docid(toks[c(1, 1), FALSE]), docid(toks)[c(1, 1)])
    
})

test_that("extractor for dfm works", {
    
    expect_equal(docnames(dfmt[c(1, 3),]), c("doc1", "doc3"))
    expect_equal(docid(dfmt[c(1, 3),]), droplevels(docid(dfmt)[c(1, 3)]))
    expect_equal(docid(dfmt[c(1, 3),,FALSE]), docid(dfmt)[c(1, 3)])
    expect_equal(docnames(dfmt[c(1, 1),]), c("doc1.1", "doc1.2"))
    expect_equal(docid(dfmt[c(1, 1),,FALSE]), docid(dfmt)[c(1, 1)])
    
})

Try the quanteda package in your browser

Any scripts or data that you put into this service are public.

quanteda documentation built on May 31, 2023, 8:28 p.m.