context("Tokenizer")
test_that("Parser works for large strings",{
x <- "I want to see the lovely, handier version of sing or singing dogs"
y <- paste(rep(x, 100), collapse=" ")
for(i in 1:10) {
expect_length(hunspell_parse(y)[[1]], 1300)
expect_length(hunspell_parse(y, dict = "ru_RU")[[1]], 1300)
expect_length(hunspell_parse(y, dict = "russian-aot")[[1]], 1300)
}
})
# contractions and posessive are the most challenging to check
test_that("Test English apostrophe",{
text <- c(
"let's, don't, couldn't, it's, she's",
"the lawyer's fee",
"the child's toy",
"children's toys",
"Tom Jones's first album",
"anyone's guess",
"excessive lawyers' fees",
"the twins' parents",
"the student teachers' supervisor",
"the boys' baseball team"
)
# Do not split at the apostrophe
len <- c(5, 3, 3, 2, 4, 2, 3, 3, 4, 4)
expect_equal(sapply(hunspell_parse(text, dict = 'en_US'), length), len)
expect_equal(sapply(hunspell_parse(text, dict = 'en_GB'), length), len)
# Do not find any typos
lapply(text, function(x){
expect_length(hunspell(x, dict = 'en_GB')[[1]], 0)
expect_length(hunspell(x, dict = 'en_US')[[1]], 0)
})
})
test_that("Quotes around words are ignored", {
str <- "The 'hunspell' library"
words <- c("The", "hunspell", "library")
expect_equal(hunspell_parse(str, dict = 'en_US')[[1]], words)
expect_equal(hunspell_parse(str, dict = 'en_GB')[[1]], words)
expect_equal(hunspell(str, dict = 'en_US')[[1]], "hunspell")
expect_equal(hunspell(str, dict = 'en_GB')[[1]], "hunspell")
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.