library(data.table)
testthat::context("Adding annotation layer to tokenized data")
test_that(
"Workflow #1 explained in the vignette",
{
ts_ws <- lapply(
split(reuters_dt, f = reuters_dt[["doc_id"]]),
function(tab) paste(tab[["word"]], collapse = " ")
)
properties_list <- list(
"annotators" = "tokenize, ssplit",
"tokenize.whitespace" = "true"
)
Pipe <- StanfordCoreNLP$new(properties = properties_list, output_format = "conll")
annoli <- AnnotationList$new(ts_ws)
Pipe$annotate(annoli)
reuters_dt_v2 <- annoli$as.data.table()
# Check that the number of tokens is identical
expect_identical(nrow(reuters_dt), nrow(reuters_dt_v2))
# Check that the tokens are identical
expect_identical(reuters_dt[["word"]], reuters_dt_v2[["word"]])
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.