tests/testthat/test-empty-objects.R

test_that("empty corpus works", {
  
  corp1 <- corpus(character(), docvars = data.frame(var1 = integer()))
  expect_equal(ndoc(corp1), 0)
  expect_equal(docvars(corp1),
               data.frame(var1 = integer()))
  expect_output(print(corp1),
                "Corpus consisting of 0 documents and 1 docvar.")
  
  corp2 <- corpus(data.frame(text = character(), var2 = numeric()))
  expect_equal(ndoc(corp2), 0)
  expect_equal(docvars(corp2),
               data.frame(var2 = numeric()))
  expect_output(print(corp2),
                "Corpus consisting of 0 documents and 1 docvar.")
  
  corp3 <- c(corp1, corp2)
  expect_equal(ndoc(corp3), 0)
  expect_equal(docvars(corp3),
               data.frame(var1 = integer(), var2 = numeric()))
  expect_output(print(corp3),
                "Corpus consisting of 0 documents and 2 docvars.")
  
  # works with corpus methods
  expect_s3_class(corpus(corp3), "corpus")
  expect_s3_class(corpus_subset(corp3), "corpus")
  expect_s3_class(corpus_segment(corp3), "corpus")
  expect_s3_class(corpus_reshape(corp3), "corpus")
  expect_s3_class(corpus_trim(corp3), "corpus")
  expect_s3_class(corpus_group(corp3), "corpus")
  expect_s3_class(corpus_sample(corp3), "corpus")
})

test_that("empty tokens works", {
  
  corp1 <- corpus(character(), docvars = data.frame(var1 = integer()))
  corp2 <- corpus(data.frame(text = character(), var2 = numeric()))
  
  toks1 <- tokens(corp1)
  expect_output(print(toks1),
                "Tokens consisting of 0 documents and 1 docvar.")
  
  toks2 <- tokens(corp2)
  expect_output(print(toks2),
                "Tokens consisting of 0 documents and 1 docvar.")
  
  toks3 <- c(toks1, toks2)
  expect_equal(ndoc(toks3), 0)
  expect_equal(docvars(toks3),
               data.frame(var1 = integer(), var2 = numeric()))
  expect_output(print(toks3),
                "Tokens consisting of 0 documents and 2 docvars.")
  
  # works with tokens methods
  expect_s3_class(tokens(toks3), "tokens")
  expect_s3_class(tokens_subset(toks3), "tokens")
  expect_s3_class(tokens_select(toks3, stopwords("en")), "tokens")
  expect_s3_class(tokens_select(toks3, stopwords("en"), padding = TRUE), "tokens")
  expect_s3_class(tokens_lookup(toks3, data_dictionary_LSD2015), "tokens")
  expect_s3_class(tokens_segment(toks3, pattern = "."), "tokens")
  expect_s3_class(tokens_group(toks3), "tokens")
  expect_s3_class(tokens_sample(toks3), "tokens")
  expect_s3_class(tokens_wordstem(toks3), "tokens")
  expect_s3_class(as.tokens(list()), "tokens")
  expect_s3_class(kwic(toks3, stopwords("en")), "kwic")
  expect_s4_class(fcm(toks3), "fcm")
  expect_equal(types(toks3), character())
})

test_that("empty DFM works", {

    toks1 <- tokens(corpus(character(), docvars = data.frame(var1 = integer())))
    toks2 <- tokens(corpus(data.frame(text = character(), var2 = numeric())))
    
    dfmat1 <- dfm(toks1)
    expect_output(print(dfmat1),
                  "Document-feature matrix of: 0 documents, 0 features (0.00% sparse) and 1 docvar.", 
                  fixed = TRUE)
    
    dfmat2 <- dfm(toks2)
    expect_output(print(dfmat2),
                  "Document-feature matrix of: 0 documents, 0 features (0.00% sparse) and 1 docvar.", 
                  fixed = TRUE)
    
    dfmat3 <- rbind(dfmat1, dfmat2)
    expect_equal(ndoc(dfmat3), 0)
    
    expect_equal(docvars(dfmat3),
                 data.frame(var1 = integer(), var2 = numeric()))
    expect_output(print(dfmat3),
                  "Document-feature matrix of: 0 documents, 0 features (0.00% sparse) and 2 docvars.",
                  fixed = TRUE)

    # works with dfm methods
    expect_s4_class(dfm(dfmat3), "dfm")
    expect_s4_class(dfm_subset(dfmat3), "dfm")
    expect_s4_class(dfm_select(dfmat3, stopwords("en")), "dfm")
    expect_s4_class(dfm_select(dfmat3, stopwords("en"), padding = TRUE), "dfm")
    expect_s4_class(dfm_lookup(dfmat3, data_dictionary_LSD2015), "dfm")
    expect_s4_class(dfm_group(dfmat3), "dfm")
    expect_s4_class(dfm_sample(dfmat3), "dfm")
    expect_s4_class(dfm_wordstem(dfmat3), "dfm")
    expect_s4_class(dfm_sort(dfmat3), "dfm")
    expect_s4_class(dfm_weight(dfmat3), "dfm")
    expect_s4_class(as.dfm(matrix(nrow = 0, ncol = 0)), "dfm")
    expect_s4_class(as.dfm(Matrix::Matrix(nrow = 0, ncol = 0)), "dfm")
    expect_s4_class(fcm(dfmat3), "fcm")
    expect_equal(featnames(dfmat3), character())
    expect_equal(featfreq(dfmat3), structure(numeric(), names = character()))
})

test_that("subsetting zero documents works", {
  corp <- corpus(c("a b c", "d e f"))
  expect_equal(ndoc(corp[0]), 0)
  
  toks <- tokens(corp)
  expect_equal(ndoc(toks[0]), 0)
  
  dfmat <- dfm(toks)
  expect_equal(ndoc(dfmat[0,]), 0)
})

Try the quanteda package in your browser

Any scripts or data that you put into this service are public.

quanteda documentation built on May 31, 2023, 8:28 p.m.