tests/testthat/test-attributes.R

test_that("unit attributes are set correctly", {
    txt <- c(d1 = "Sentence one.  Second sentence is this one!\n
                   Here is the third sentence.",
             d2 = "Only sentence of doc2?  No there is another.")
    corp <- corpus(txt, docvars = data.frame(title = c("doc1", "doc2")))
    
    corp_para <- corpus_reshape(corp, "paragraphs")
    expect_equal(attr(corp_para, "meta")$object$unit, "paragraphs")
    
    corp_sent <- corpus_reshape(corp, "sentences")
    expect_equal(attr(corp_sent, "meta")$object$unit, "sentences")
    
    corp_doc <- corpus_reshape(corp_sent, "documents")
    expect_equal(attr(corp_doc, "meta")$object$unit, "documents")
    
    corp_seg <- corpus_segment(corp, "\\p{Sterm}", valuetype = "regex")
    expect_equal(attr(corp_seg, "meta")$object$unit, "segments")
    
    toks <- tokens(corp)
    toks_seg <- tokens_segment(toks, "\\p{Sterm}", valuetype = "regex")
    expect_equal(attr(toks_seg, "meta")$object$unit, "segments")
    
    toks_chunk <- tokens_chunk(toks, 2)
    expect_equal(attr(toks_chunk, "meta")$object$unit, "segments")
    
    expect_equal(dfm(tokens(corp_sent))@meta$object$unit, "sentences")
    expect_equal(dfm(tokens(corp_para))@meta$object$unit, "paragraphs")
    expect_equal(dfm(toks_chunk)@meta$object$unit, "segments")
    expect_equal(fcm(dfm(toks_chunk))@meta$object$unit, "segments")
    expect_equal(fcm(toks_chunk)@meta$object$unit, "segments")
})

test_that("set_concatenator and get_concatenator are working", {
    
    txt <- c(d1 = "Sentence one.  Second sentence is this one!\n
                   Here is the third sentence.",
             d2 = "Only sentence of doc2?  No there is another.")
    toks <- tokens(txt, concatenator = "+")
    expect_equal(
        quanteda:::get_concatenator(toks),
        "+"
    )
    quanteda:::set_concatenator(toks) <- "-"
    expect_equal(
        quanteda:::get_concatenator(toks),
        "-"
    )
    expect_error(
        quanteda:::set_concatenator(toks) <- c("-", "-"),
        "concatenator value must be a single character"
    )
    expect_error(
        quanteda:::set_concatenator(toks) <- 123,
        "concatenator value must be a single character"
    )
})

Try the quanteda package in your browser

Any scripts or data that you put into this service are public.

quanteda documentation built on May 29, 2024, 10 a.m.