Nothing
context("testing nounphrase functions")
source("utils.R")
test_that("spacy_extract_nounphrases data.frame works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods.",
doc2 = "In 1950, Alan Turing published an article titled Intelligence which proposed what is now called the Turing test as a criterion of intelligence.")
noun_phrases <- spacy_extract_nounphrases(txt1, output = "data.frame")
skip("behaviour changed in spaCy")
expect_equal(
noun_phrases$text,
c("The history", "natural language processing", "the 1950s",
"work", "earlier periods", "Alan Turing", "an article", "what",
"a criterion", "intelligence"))
expect_equal(
noun_phrases$root_text,
c("history", "processing", "1950s", "work", "periods", "Turing",
"article", "what", "criterion", "intelligence"))
expect_error(
spacy_extract_nounphrases(c(doc1 = "Hello", doc1 = "world"), output = "data.frame"),
"duplicated"
)
expect_error(
spacy_extract_nounphrases(c(doc1 = "Hello", "world"), output = "data.frame"),
"missing"
)
expect_silent(spacy_finalize())
})
test_that("spacy_extract_nounphrases data.frame works properly when there is no noun-phrase", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "Hello")
expect_message(
spacy_extract_nounphrases(txt1, output = "data.frame"),
"No noun phrase")
expect_equivalent(
spacy_extract_nounphrases(txt1, output = "data.frame"),
NULL)
})
test_that("spacy_extract_nounphrases works with a TIF formatted data.frame", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
# comment out following line to increase a coverage
#expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods.",
doc2 = "In 1950, Alan Turing published an article titled Intelligence which proposed what is now called the Turing test as a criterion of intelligence.")
txt1_df <- data.frame(doc_id = names(txt1), text = txt1, stringsAsFactors = FALSE)
expect_equal(
spacy_extract_nounphrases(txt1_df, output = "data.frame"),
spacy_extract_nounphrases(txt1, output = "data.frame")
)
txt1_df_err <- data.frame(doc_name = names(txt1), text = txt1, stringsAsFactors = FALSE)
expect_error(
spacy_extract_nounphrases(txt1_df_err, output = "data.frame"),
"input data.frame does not conform to the TIF standard"
)
expect_silent(spacy_finalize())
})
test_that("spacy_extract_nounphrases with atomic text string", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods."
noun_phrases <- spacy_extract_nounphrases(txt1, output = "data.frame")
expect_true(
all.equal(noun_phrases$doc_id[1], "text1")
)
expect_equal(
noun_phrases$text,
c("The history", "natural language processing", "the 1950s",
"work", "earlier periods")
)
expect_equal(
noun_phrases$root_text,
c("history", "processing", "1950s", "work", "periods")
)
expect_silent(spacy_finalize())
})
test_that("spacy_extract_nounphrases list works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods.",
doc2 = "In 1950, Alan Turing published an article titled Intelligence which proposed what is now called the Turing test as a criterion of intelligence.")
noun_phrases <- spacy_extract_nounphrases(txt1, output = "list")
skip("behaviour changed in spaCy")
expect_equal(
noun_phrases,
list(doc1 = c("The history", "natural language processing", "the 1950s", "work", "earlier periods"),
doc2 = c("Alan Turing", "an article", "what", "a criterion", "intelligence")))
expect_silent(spacy_finalize())
})
test_that("spacy_extract_nounphrases data.frame and list returns the same nounphrases", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods.",
doc2 = "In 1950, Alan Turing published an article titled Intelligence which proposed what is now called the Turing test as a criterion of intelligence.")
noun_phrases_dataframe <- spacy_extract_nounphrases(txt1, output = "data.frame")
noun_phrases_list <- spacy_extract_nounphrases(txt1, output = "list")
expect_equal(
noun_phrases_dataframe$text,
unname(unlist(noun_phrases_list))
)
expect_equal(
unname(sapply(noun_phrases_list, length)),
as.vector(unclass(unname(table(noun_phrases_dataframe$doc_id))))
)
expect_silent(spacy_finalize())
})
test_that("spacy_extract_nounphrases.data.frame() works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt <- c(doc1 = "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods.",
doc2 = "In 1950, Alan Turing published an article titled Intelligence which proposed what is now called the Turing test as a criterion of intelligence.")
txt_df <- data.frame(doc_id = paste0("doc", 1:2),
text = txt, stringsAsFactors = FALSE)
expect_equal(
spacy_extract_nounphrases(txt),
spacy_extract_nounphrases(txt_df)
)
})
test_that("spacy_parse nounphrase = TRUE works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods.",
doc2 = "In 1950, Alan Turing published an article titled Intelligence which proposed what is now called the Turing test as a criterion of intelligence.")
parsed <- spacy_parse(txt1, nounphrase = TRUE)
expect_true(
"nounphrase" %in% names(parsed)
)
expect_true(
"whitespace" %in% names(parsed)
)
skip("behaviour changed in spaCy")
expect_identical(
sum(grepl("beg", parsed$nounphrase)),
10L
)
expect_silent(spacy_finalize())
})
test_that("spacy_parse nounphrase = TRUE return message when there is no nounphrase", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "hello",
doc2 = "hello")
expect_message(
spacy_parse(txt1, nounphrase = TRUE),
"No noun phrase"
)
expect_false(
"nounphrase" %in% names(spacy_parse(txt1, nounphrase = TRUE))
)
expect_false(
"whitespace" %in% names(spacy_parse(txt1, nounphrase = TRUE))
)
})
test_that("nounphrase_extract() on parsed object works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods.",
doc2 = "In 1950, Alan Turing published an article titled Intelligence which proposed what is now called the Turing test as a criterion of intelligence.")
parsed <- spacy_parse(txt1, nounphrase = TRUE)
expect_silent(
nounphrase_extract(parsed)
)
parsed_without_nounphrase <- spacy_parse(txt1, nounphrase = FALSE)
expect_error(
nounphrase_extract(parsed_without_nounphrase),
"no nounphrases in parsed object"
)
})
test_that("compare nounphrase_extract(spacy_parse()) and spacy_extract_nounphrases()", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods.",
doc2 = "In 1950, Alan Turing published an article titled Intelligence which proposed what is now called the Turing test as a criterion of intelligence.")
parsed <- spacy_parse(txt1, nounphrase = TRUE)
noun_phrases_1 <- nounphrase_extract(parsed, concatenator = " ")
noun_phrases_2 <- spacy_extract_nounphrases(txt1, output = "data.frame")
expect_equal(
noun_phrases_1$nounphrase,
noun_phrases_2$text
)
expect_equal(
parsed$token[grep("root", parsed$nounphrase)],
noun_phrases_2$root_text
)
expect_silent(spacy_finalize())
})
test_that("nounphrase consolidation works", {
skip_on_cran()
# skip_on_appveyor()
skip_on_os("solaris")
try_spacy_initialize()
expect_message(spacy_initialize(), "successfully|already")
txt1 <- c(doc1 = "The history of natural language processing generally started in the 1950s, although work can be found from earlier periods.",
doc2 = "In 1950, Alan Turing published an article titled Intelligence which proposed what is now called the Turing test as a criterion of intelligence.")
parsed <- spacy_parse(txt1, entity = TRUE, nounphrase = TRUE)
expect_equal(
nounphrase_consolidate(parsed)$token[c(1, 3, 15)],
c("The_history", "natural_language_processing", "earlier_periods")
)
expect_equal(
nounphrase_consolidate(parsed, concatenator = " ")$token[c(1, 3, 15)],
c("The history", "natural language processing", "earlier periods")
)
skip("behaviour changed in spaCy")
expect_equal(
nounphrase_consolidate(parsed)$token_id,
c(1:16, 1:22)
)
expect_identical( c("nounphrase", "whitespace", "entity") %in% names(nounphrase_consolidate(parsed)),
rep(FALSE, 3))
parsed <- spacy_parse(txt1, nounphrase = TRUE, pos = TRUE, tag = TRUE)
expect_equal(
nounphrase_consolidate(parsed)$pos[c(1, 3, 7, 10)],
rep("nounphrase", 4)
)
expect_equal(
nounphrase_consolidate(parsed)$tag[c(1, 3, 7, 10)],
rep("nounphrase", 4)
)
expect_equal(
nounphrase_consolidate(parsed)$lemma[c(1, 3, 7)],
c("the_history", "natural_language_processing", "the_1950")
)
parsed <- spacy_parse(txt1, nounphrase = TRUE, dependency = TRUE)
expect_message(
nounphrase_consolidate(parsed),
"emoving head_token_id, dep_rel for nounphrases"
)
expect_silent(spacy_finalize())
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.