tests/testthat/test-spacyr-methods.R

test_that("test quanteda methods for spacy_parsed objects", {
    load("../data/data_spacy_parsed.rda")
    expect_identical(docnames(data_spacy_parsed), "text1")
    expect_identical(ndoc(data_spacy_parsed), 1L)
    expect_identical(ntype(data_spacy_parsed), c(text1 = 7L))
    expect_identical(ntoken(data_spacy_parsed), c(text1 = 7L))
})

test_that("test as.tokens works for spacy_parsed objects", {
    load("../data/data_spacy_parsed.rda")
    expect_equal(
        as.tokens(data_spacy_parsed) |> as.list(),
        list(text1 = c("And", "now", "for", "something", "completely", "different", "."))
    )
    expect_equal(
        as.tokens(data_spacy_parsed, include_pos = "pos") |> as.list(),
        list(text1 = c("And/CCONJ", "now/ADV", "for/ADP", "something/NOUN", 
                       "completely/ADV", "different/ADJ", "./PUNCT"))
    )
    expect_equal(
        as.tokens(data_spacy_parsed, include_pos = "tag") |> as.list(),
        list(text1 = c("And/CC", "now/RB", "for/IN", "something/NN", 
                       "completely/RB", "different/JJ", "./."))
    )
    expect_equal(
        as.tokens(data_spacy_parsed, use_lemma = TRUE) |> as.list(),
        list(text1 = c("and", "now", "for", "something", "completely", "different", "."))
    )
})

test_that("spacy_parse/tokenize work", {
    skip("requires spacyr installation to work")
    skip_if_not_installed("spacyr")
    
    library("spacyr")
    spacy_initialize()
    corp <- corpus(c(doc1 = "This is Sparta!", 
                     doc2 = "This is the 2nd document."))
    expect_identical(
        spacy_tokenize(corp),
        list(doc1 = c("This", "is", "Sparta", "!"),
             doc2 = c("This", "is", "the", "2nd", "document", "."))
    )
    sp <- spacy_parse(corp, pos = TRUE, tag = FALSE, lemma = TRUE,
                      entity = TRUE, dependency = FALSE, nounphrase = FALSE)
    expect_is(sp, "data.frame")
    expect_equal(nrow(sp), 10)
    expect_identical(names(sp), c("doc_id", "sentence_id", "token_id", "token", "lemma", "pos", "entity"))
    spacy_finalize()
})
quanteda/quanteda documentation built on April 15, 2024, 7:59 a.m.