context("test utils")
test_that("message_select works as expected", {
expect_message(
quanteda.core:::message_select("remove", 10, 5, 0, 0),
"removed 10 features"
)
expect_message(
quanteda.core:::message_select("remove", 10, 5, 0, 0),
" and 5 documents"
)
expect_message(
quanteda.core:::message_select("remove", 1, 5, 0, 0),
"removed 1 feature"
)
expect_message(
quanteda.core:::message_select("remove", 5, 1, 0, 0),
" and 1 document$"
)
expect_message(
quanteda.core:::message_select("remove", 0, 0, 0, 0),
"removed 0 features$"
)
expect_message(
quanteda.core:::message_select("select", 1000, 1000000, 0, 0),
"removed 1,000 features"
)
expect_message(
quanteda.core:::message_select("select", 1000, 1000000, 0, 0),
" and 1,000,000 documents$"
)
expect_message(
quanteda.core:::message_select("remove", 10, 5, 2, 3),
"2 features"
)
expect_message(
quanteda.core:::message_select("remove", 10, 5, 2, 3),
"3 documents"
)
expect_message(
quanteda.core:::message_select("remove", 1, 5, 0, 1),
", padded"
)
expect_message(
quanteda.core:::message_select("remove", 1, 5, 0, 1),
"1 document"
)
expect_message(
quanteda.core:::message_select("remove", 5, 1, 1, 0),
"1 feature"
)
})
test_that("pipes work", {
expect_true(!"package:magrittr" %in% search())
expect_equal(
tokens(char_tolower("A B C")),
tokens("A B C") %>% tokens_tolower()
)
})
test_that("pattern2list is working with collocations", {
txt <- c(". . . . a b c . . a b c . . . c d e",
"a b . . a b . . a b . . a b . a b",
"b c d . . b c . b c . . . b c")
toks <- tokens(txt)
type <- types(toks)
col <- textstat_collocations(toks, size = 2:3)
ids <- quanteda.core:::pattern2list(col, type, 'fixed', TRUE)
expect_equivalent(col$collocation,
vapply(ids, function(x, y) paste0(y[x], collapse = " "), character(1), type))
expect_equal(names(ids), col$collocation)
})
test_that("pattern2list is working with a list", {
type <- letters
pat <- c('a', 'a b', 'c d', 'e f g')
ids <- quanteda.core:::pattern2list(phrase(pat), type, 'fixed', FALSE)
expect_equal(names(ids), pat)
})
test_that("pattern2list is working with a dictionary", {
type <- c("a", "ab", "b", "bb", "a a", "a b")
dict <- dictionary(list(key1 = c("a*", "b*"), key2 = c("a a*", "a b*")))
ids <- quanteda.core:::pattern2list(dict, type, "glob", FALSE)
expect_true(all(names(dict) %in% names(ids)))
})
test_that("pattern2list is working with empty patterns", {
col <- data.frame()
class(col) <- c("collocations", "data.frame")
pat <- list()
expect_silent(quanteda.core:::pattern2list(col, types(toks), "fixed", TRUE))
expect_silent(quanteda.core:::pattern2list(pat, types(toks), "fixed", TRUE))
})
# test_that("get_package_version works", {
# load("../data/pre_v2_objects/data_corpus_pre2.rda")
# expect_true(quanteda.core:::get_object_version(data_corpus_pre2) == "1.4.0")
# expect_true(quanteda.core:::is_pre2(data_corpus_pre2))
# expect_true(quanteda.core:::get_object_version(corpus("one")) > "1.4.9")
#
# load("../data/pre_v2_objects/data_tokens_pre2.rda")
# expect_true(quanteda.core:::get_object_version(data_tokens_pre2) == "1.4.0")
# expect_true(quanteda.core:::is_pre2(data_tokens_pre2))
# expect_true(quanteda.core:::get_object_version(tokens("one")) > "1.4.9")
#
# load("../data/pre_v2_objects/data_dfm_pre2.rda")
# expect_true(quanteda.core:::get_object_version(data_dfm_pre2) == "1.4.0")
# expect_true(quanteda.core:::is_pre2(data_dfm_pre2))
# expect_true(quanteda.core:::get_object_version(dfm("one")) > "1.4.9")
# })
test_that("sample_bygroup works with sizes", {
grvec <- c(rep("a", 3), rep("b", 4), rep("c", 3))
# sampling same size each group
tmp <- quanteda.core:::sample_bygroup(1:10, group = grvec, size = 2, replace = TRUE)
expect_true(all(tmp[1:2] >= 0 & tmp[1:2] <= 3))
expect_true(all(tmp[3:4] >= 4 & tmp[3:4] <= 7))
expect_true(all(tmp[5:6] >= 8 & tmp[5:6] <= 10))
# sampling from a vector of sizes
tmp <- quanteda.core:::sample_bygroup(1:10, group = grvec, size = c(1, 1, 3), replace = TRUE)
expect_true(all(tmp[1] >= 0 & tmp[1] <= 3))
expect_true(all(tmp[2] >= 4 & tmp[2] <= 7))
expect_true(all(tmp[3:5] >= 8 & tmp[3:5] <= 10))
# default group size sampling
tmp <- quanteda.core:::sample_bygroup(1:10, group = grvec, replace = TRUE)
expect_true(all(tmp[1:3] >= 0 & tmp[1:3] <= 3))
expect_true(all(tmp[4:7] >= 4 & tmp[4:7] <= 7))
expect_true(all(tmp[8:10] >= 8 & tmp[8:10] <= 10))
# oversampling within group
tmp <- quanteda.core:::sample_bygroup(c(1:2, c(101:102)),
group = rep(letters[1:2], each = 2),
size = 5, replace = TRUE)
expect_true(all(tmp[1:5] >= 0 & tmp[1:5] <= 2))
expect_true(all(tmp[6:10] >= 100 & tmp[6:10] <= 102))
expect_error(
quanteda.core:::sample_bygroup(1:10, group = grvec[1:6], size = c(2, 3), replace = TRUE),
"group not equal in length of x"
)
expect_error(
quanteda.core:::sample_bygroup(1:10, group = grvec, size = c(2, 3), replace = TRUE),
"size not equal in length to the number of groups"
)
})
test_that("sample_bygroup works with groups of size 1", {
grvec <- c(rep("a", 3), rep("b", 1))
for (i in seq_len(10)) {
tmp <- quanteda.core:::sample_bygroup(1:4, group = grvec, replace = TRUE)
expect_true(all(tmp[1:3] >= 1 & tmp[1:3] <= 3))
expect_equal(tmp[4], 4)
}
})
test_that("max_load_factor can be configured", {
quanteda.core:::qatd_cpp_set_load_factor("pattern", 0.2)
quanteda.core:::qatd_cpp_set_load_factor("pattern", -1.0)
quanteda.core:::qatd_cpp_set_load_factor("pattern", 2.0)
quanteda.core:::qatd_cpp_set_load_factor("ngrams", 0.7)
quanteda.core:::qatd_cpp_set_load_factor("ngrams", -1.0)
quanteda.core:::qatd_cpp_set_load_factor("ngrams", 2.0)
expect_equal(quanteda.core:::qatd_cpp_get_load_factor(),
list(pattern = 0.2, ngrams = 0.7), tolerance = 0.001)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.