context("testing entity functions")
source("utils.R")
test_that("spacy_extract_entity data.frame works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "I would have accepted without question the information that Gatsby sprang from the swamps of Louisiana or from the lower East Side of New York.",
doc2 = "I graduated from New Haven in 1915, just a quarter of a century after my father, and a little later I participated in that delayed Teutonic migration known as the Great War.")
entities <- spacy_extract_entity(txt1, output = "data.frame")
expect_equal(
entities$text,
c("Gatsby", "Louisiana", "East Side", "New York", "New Haven",
"1915", "just a quarter of a century", "Teutonic", "the Great War"))
skip("behaviour changed in spaCy")
expect_equal(
entities$ent_type,
c("PERSON", "GPE", "LOC", "GPE", "GPE", "DATE", "CARDINAL", "NORP", "EVENT"))
expect_silent(spacy_finalize())
})
test_that("spacy_extract_entity data.frame works properly when there is no noun-phrase", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "He told me all this very much later, but I've put it down here with the idea of exploding those wild rumors about his antecedents, which weren t even faintly true.")
expect_message(
spacy_extract_entity(txt1, output = "data.frame"),
"No entity")
expect_equivalent(
spacy_extract_entity(txt1, output = "data.frame"),
NULL)
})
test_that("spacy_extract_entity list works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "I would have accepted without question the information that Gatsby sprang from the swamps of Louisiana or from the lower East Side of New York.",
doc2 = "I graduated from New Haven in 1915, just a quarter of a century after my father, and a little later I participated in that delayed Teutonic migration known as the Great War.")
entities <- spacy_extract_entity(txt1, output = "list")
expect_equal(
entities,
list(doc1 = c("Gatsby", "Louisiana", "East Side", "New York"),
doc2 = c("New Haven", "1915", "just a quarter of a century",
"Teutonic", "the Great War"))
)
})
test_that("spacy_extract_entity data.frame and list returns the same entity", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "I would have accepted without question the information that Gatsby sprang from the swamps of Louisiana or from the lower East Side of New York.",
doc2 = "It was a matter of chance that I should have rented a house in one of the strangest communities in North America.")
entities_dataframe <- spacy_extract_entity(txt1, output = "data.frame")
entities_list <- spacy_extract_entity(txt1, output = "list")
expect_equal(
entities_dataframe$text,
unname(unlist(entities_list))
)
expect_identical(
lengths(entities_list, use.names = FALSE),
as.integer(table(entities_dataframe$doc_id))
)
})
test_that("spacy_extract_entity.data.frame() works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt <- c(doc1 = "I would have accepted without question the information that Gatsby sprang from the swamps of Louisiana or from the lower East Side of New York.",
doc2 = "It was a matter of chance that I should have rented a house in one of the strangest communities in North America.")
txt_df <- data.frame(doc_id = paste0("doc", 1:2),
text = txt, stringsAsFactors = FALSE)
expect_equal(
spacy_extract_entity(txt),
spacy_extract_entity(txt_df)
)
})
test_that("spacy_extract_entity type option works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "I would have accepted without question the information that Gatsby sprang from the swamps of Louisiana or from the lower East Side of New York.",
doc2 = "I graduated from New Haven in 1915, just a quarter of a century after my father, and a little later I participated in that delayed Teutonic migration known as the Great War.")
expect_equal(
nrow(spacy_extract_entity(txt1, output = "data.frame", type = "named")),
7
)
expect_equal(
nrow(spacy_extract_entity(txt1, output = "data.frame", type = "extended")),
2
)
expect_equal(
nrow(spacy_extract_entity(txt1, output = "data.frame", type = "all")),
9
)
expect_equal(
unname(unlist(spacy_extract_entity(txt1, output = "list", type = "named"))),
c("Gatsby", "Louisiana", "East Side", "New York", "New Haven",
"Teutonic", "the Great War")
)
expect_equal(
unname(unlist(spacy_extract_entity(txt1, output = "list", type = "extended"))),
c("1915", "just a quarter of a century")
)
expect_equal(
spacy_extract_entity(txt1, output = "data.frame", type = "named")$text,
unname(unlist(spacy_extract_entity(txt1, output = "list", type = "named")))
)
})
test_that("getting named entities from spacy_parsed object works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "The United States elected President Donald Trump, from New York.",
doc2 = "New buildings on the New York skyline.")
parsed <- spacy_parse(txt1, entity = TRUE)
entities <- entity_extract(parsed, concatenator = " ")
expect_equal(
entities$entity,
c("The United States", "Donald Trump", "New York", "New York")
)
expect_equal(
entities$entity_type,
c("GPE", "PERSON", "GPE", "GPE")
)
txt1 <- c(doc1 = "The United States elected President Donald Trump, from New York.",
doc2 = "New buildings on the New York skyline appeared in January.")
parsed <- spacy_parse(txt1, entity = TRUE)
expect_equal(
entity_extract(parsed, type = "extended")$entity_type,
"DATE"
)
expect_equal(
entity_extract(parsed, type = "named")$entity_type,
c("GPE", "PERSON", "GPE", "GPE")
)
parsed <- spacy_parse(txt1, entity = FALSE)
expect_error(
entity_extract(parsed),
"no entities in parsed object"
)
expect_silent(spacy_finalize())
})
test_that("compare entity_extract(spacy_parse()) and spacy_extract_entity()", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods.",
doc2 = "In 1950, Alan Turing published an article titled Intelligence which proposed what is now called the Turing test as a criterion of intelligence.")
parsed <- spacy_parse(txt1, entity = TRUE)
entities_1 <- entity_extract(parsed, concatenator = " ", type = "all")
entities_2 <- spacy_extract_entity(txt1, output = "data.frame")
expect_equal(
entities_1$entity,
entities_2$text
)
expect_silent(spacy_finalize())
})
test_that("entity consolidation works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try(spacy_finalize(), silent = TRUE)
expect_message(spacy_initialize(), "successfully")
txt1 <- c(doc1 = "The United States elected President Donald Trump, from New York.",
doc2 = "New buildings on the New York skyline appeared in January.")
parsed <- spacy_parse(txt1, entity = TRUE)
expect_equal(
entity_consolidate(parsed)$token[c(1, 4)],
c("The_United_States", "Donald_Trump")
)
expect_equal(
entity_consolidate(parsed, concatenator = " ")$token[c(1, 4)],
c("The United States", "Donald Trump")
)
expect_equal(
entity_consolidate(parsed)$token_id,
c(1:8, 1:10)
)
parsed <- spacy_parse(txt1, entity = TRUE, nounphrase = TRUE)
expect_equal(
entity_consolidate(parsed)$token[c(1, 4)],
c("The_United_States", "Donald_Trump")
)
expect_true( !("nounphrase" %in% names(entity_consolidate(parsed))) )
parsed <- spacy_parse(txt1, entity = TRUE, pos = TRUE, tag = TRUE)
expect_equal(
entity_consolidate(parsed)$tag[c(1, 4, 17)],
rep("ENTITY", 3)
)
expect_equal(
tolower(entity_consolidate(parsed)$lemma[c(1, 4, 16)]), # obviously en_model stop lowercasing lemmas. That's cool
tolower(entity_consolidate(parsed)$token[c(1, 4, 16)])
)
parsed <- spacy_parse(txt1, entity = TRUE, dependency = TRUE)
expect_true(
!"dep_rel" %in% names(entity_consolidate(parsed))
)
expect_message(
entity_consolidate(parsed),
"Note: removing head_token_id, dep_rel"
)
parsed <- spacy_parse(txt1, entity = FALSE)
expect_error(
entity_consolidate(parsed),
"no entities in parsed object"
)
expect_silent(spacy_finalize())
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.