test_that("test quanteda methods for spacy_parsed objects", {
load("../data/data_spacy_parsed.rda")
expect_identical(docnames(data_spacy_parsed), "text1")
expect_identical(ndoc(data_spacy_parsed), 1L)
expect_identical(ntype(data_spacy_parsed), c(text1 = 7L))
expect_identical(ntoken(data_spacy_parsed), c(text1 = 7L))
})
test_that("test as.tokens works for spacy_parsed objects", {
load("../data/data_spacy_parsed.rda")
expect_equal(
as.tokens(data_spacy_parsed) |> as.list(),
list(text1 = c("And", "now", "for", "something", "completely", "different", "."))
)
expect_equal(
as.tokens(data_spacy_parsed, include_pos = "pos") |> as.list(),
list(text1 = c("And/CCONJ", "now/ADV", "for/ADP", "something/NOUN",
"completely/ADV", "different/ADJ", "./PUNCT"))
)
expect_equal(
as.tokens(data_spacy_parsed, include_pos = "tag") |> as.list(),
list(text1 = c("And/CC", "now/RB", "for/IN", "something/NN",
"completely/RB", "different/JJ", "./."))
)
expect_equal(
as.tokens(data_spacy_parsed, use_lemma = TRUE) |> as.list(),
list(text1 = c("and", "now", "for", "something", "completely", "different", "."))
)
})
test_that("spacy_parse/tokenize work", {
skip("requires spacyr installation to work")
skip_if_not_installed("spacyr")
library("spacyr")
spacy_initialize()
corp <- corpus(c(doc1 = "This is Sparta!",
doc2 = "This is the 2nd document."))
expect_identical(
spacy_tokenize(corp),
list(doc1 = c("This", "is", "Sparta", "!"),
doc2 = c("This", "is", "the", "2nd", "document", "."))
)
sp <- spacy_parse(corp, pos = TRUE, tag = FALSE, lemma = TRUE,
entity = TRUE, dependency = FALSE, nounphrase = FALSE)
expect_is(sp, "data.frame")
expect_equal(nrow(sp), 10)
expect_identical(names(sp), c("doc_id", "sentence_id", "token_id", "token", "lemma", "pos", "entity"))
spacy_finalize()
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.