context("test spacy_parse")
source("utils.R")
test_that("spacy_parse handles newlines and tabs ok", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
txt1 <- c(doc1 = "Sentence one.\nSentence two.",
doc2 = "Sentence\tthree.")
expect_equal(
dim(spacy_parse(txt1, dependency = TRUE)),
c(11, 9)
)
txt2 <- c(doc1 = "Sentence one.\\nSentence two.",
doc2 = "Sentence\\tthree.")
expect_equal(
dim(spacy_parse(txt2, dependency = TRUE)),
c(11, 9)
)
## multiple tagsets
expect_equal(
names(tag1 <- spacy_parse(txt2, pos = TRUE, tag = FALSE, entity = FALSE)),
c("doc_id", "sentence_id", "token_id", "token", "lemma", "pos")
)
expect_equal(
names(tag2 <- spacy_parse(txt2, pos = FALSE, tag = TRUE, entity = FALSE)),
c("doc_id", "sentence_id", "token_id", "token", "lemma", "tag")
)
expect_false(any(tag1$pos == tag2$tag))
expect_silent(spacy_finalize())
})
test_that("spacy_parse handles quotes ok", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
txt1 <- c(doc1 = "Sentence \"quoted\" one.",
doc2 = "Sentence \'quoted\' two.")
expect_true("dep_rel" %in% names(spacy_parse(txt1, dependency = TRUE)))
txt2 <- c(doc1 = "Sentence \\\"quoted\\\" one.")
expect_equal(
dim(spacy_parse(txt2, dependency = TRUE)),
c(6, 9)
)
txt3 <- c(doc1 = "Second sentence \\\'quoted\\\' example.")
expect_equal(
dim(spacy_parse(txt3, dependency = TRUE)),
c(7, 9)
)
txt4 <- c(doc1 = "Sentence \\\"quoted\\\" one.",
doc2 = "Sentence \\\'quoted\\\' two.")
expect_equal(
dim(spacy_parse(txt4, dependency = TRUE)),
c(12, 9)
)
expect_silent(spacy_finalize())
})
test_that("spacy_parse returns the same regardless of the value of 'entity' option at the initialization", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
txt1 <- c(doc1 = "Sentence one.\nSentence two.",
doc2 = "Sentence\tthree.")
data_not_omit_entity <- spacy_parse(txt1, entity = FALSE)
expect_silent(spacy_finalize())
expect_message(spacy_initialize(entity = FALSE), "successfully")
data_omit_entity <- spacy_parse(txt1, entity = FALSE)
expect_equal(
data_omit_entity,
data_not_omit_entity
)
expect_silent(spacy_finalize())
})
test_that("spacy_parse returns the message if 'entity' option is not consistent with the initialization options", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
txt1 <- c(doc1 = "Sentence one.\nSentence two.",
doc2 = "Sentence\tthree.")
expect_silent(spacy_finalize())
expect_message(spacy_initialize(entity = FALSE), "successfully|already")
expect_message(spacy_parse(txt1, entity = TRUE), "entity == TRUE is ignored")
expect_silent(spacy_finalize())
expect_message(spacy_initialize(entity = TRUE), "successfully")
expect_silent(tmp <- spacy_parse(txt1, entity = TRUE))
expect_silent(spacy_finalize())
})
test_that("spacy_parse additional_attributes option works as intended", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "I would have accepted without question the information that Gatsby sprang from the swamps of Louisiana or from the lower East Side of New York.",
doc2 = "I graduated from New Haven in 1915, just a quarter of a century after my father, and a little later I participated in that delayed Teutonic migration known as the Great War.")
spacy_parsed <- spacy_parse(txt1, additional_attributes = "is_title")
expect_true("is_title" %in% names(spacy_parsed))
expect_equal(sum(spacy_parsed$is_title), 14)
spacy_parsed <- spacy_parse(txt1, additional_attributes = c("is_title", "like_num"))
expect_true("like_num" %in% names(spacy_parsed))
expect_equal(spacy_parsed[spacy_parsed$token == "1915", "like_num"],
TRUE)
expect_error(spacy_parse(txt1, additional_attributes = c("random_attribute_name")),
"object has no attribute")
expect_silent(spacy_finalize())
})
test_that("spacy_parse can handle data.frame properly", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
df <- data.frame(doc_id = paste0("doc", seq(2)),
text = c("Sentence one.\nSentence two.",
"Sentence\tthree."),
stringsAsFactors = FALSE)
expect_equal(dim(spacy_parse(df)), c(11, 7))
df_impropoer <- data.frame(id = paste0("doc", seq(2)),
text = c("Sentence one.\nSentence two.",
"Sentence\tthree."),
stringsAsFactors = FALSE)
expect_error(spacy_parse(df_impropoer),
"input data.frame does not conform to the TIF standard")
expect_silent(spacy_finalize())
})
# see https://stackoverflow.com/questions/55518184/how-to-feed-a-tibble-to-spacyr
test_that("spacy_parse handles numbers/tibbles correctly", {
skip_on_cran()
skip_on_os("solaris")
try_spacy_initialize()
skip_if_not_installed("tibble")
bogustib <- tibble::tibble(doc_id = c(1, 2, 3),
text = c("bug", "one love", "838383838"))
expect_equivalent(
spacy_parse(bogustib)[, c("doc_id", "sentence_id", "token_id", "token", "pos")],
data.frame(
doc_id = as.character(c(1, 2, 2, 3)),
sentence_id = rep(1, 4),
token_id = c(1, 1, 2, 1),
token = c("bug", "one", "love", "838383838"),
pos = c("NOUN", "NUM", "NOUN", "NUM"),
stringsAsFactors = FALSE)
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.