Nothing
test_that("dfm_compress: simple test", {
mat <- rbind(dfm(tokens(c("b A A", "C C a b B")), tolower = FALSE, verbose = FALSE),
dfm(tokens("A C C C C C"), tolower = FALSE, verbose = FALSE))
colnames(mat) <- char_tolower(featnames(mat))
expect_equal(as.matrix(dfm_compress(mat, margin = "documents")),
matrix(c(1,1,3,0,5,2,0,1,0,1), nrow = 2,
dimnames = list(docs = c("text1", "text2"), features = featnames(mat))))
expect_equal(
as.matrix(dfm_compress(mat, margin = "features")),
matrix(c(1,2,0,2,1,1,0,2,5), nrow = 3,
dimnames = list(docs = c("text1", "text2", "text1"),
features = c("b", "a", "c")))
)
expect_equal(
as.matrix(dfm_compress(mat, margin = "both")),
matrix(c(1,2,3,1,5,2), nrow = 2,
dimnames = list(docs = c("text1", "text2"), features = c("b", "a", "c")))
)
})
test_that("dfm_compress: no effect if no compression needed", {
compactdfm <- dfm(tokens(data_corpus_inaugural[1:5]), tolower = TRUE, verbose = FALSE)
expect_equal(dim(compactdfm), dim(dfm_compress(compactdfm)))
})
test_that("dfm_compress: empty features are preserved", {
testdfm <- new("dfm", Matrix::Matrix(matrix(c(0,0,0, 2,1,5, 0,1,0, 1,1,0), nrow = 3,
dimnames = list(docs = paste0("d", 1:3),
features = c("a", "b", "c", "b"))),
sparse = TRUE))
expect_equal(colSums(dfm_compress(testdfm))[1], c(a = 0))
})
test_that("dfm_compress: empty documents are preserved", {
testdfm <- new("dfm", Matrix::Matrix(matrix(c(0,0,0, 2,1,0, 0,1,0, 1,1,0), nrow = 3,
dimnames = list(docs = paste0("d", 1:3),
features = c("a", "b", "c", "b"))),
sparse = TRUE))
expect_equal(rowSums(dfm_compress(testdfm))[3], c(d3 = 0))
})
test_that("dfm_compress preserves docvars (#1506)", {
corp <- corpus(c(d1 = "A A A b c D D",
d2 = "b b b b D D D"),
docvars = data.frame(bool = c(TRUE, FALSE)))
thedfm <- dfm(tokens(corp))
# this ensures the existence of _document
docnames(thedfm) <- docnames(thedfm)
expect_true("docname_" %in% names(thedfm@docvars))
expect_true("docid_" %in% names(thedfm@docvars))
expect_true("segid_" %in% names(thedfm@docvars))
expect_identical(
thedfm@docvars,
dfm_compress(thedfm, margin = "features")@docvars
)
expect_identical(
thedfm@docvars,
dfm(thedfm)@docvars
)
})
test_that("add test for group_matrix with features and fill = TRUE", {
x <- dfm(tokens(c("a a b c d", "b c d e")))
colnames(x)[4] <- "e"
expect_identical(
as.matrix(quanteda:::group_matrix(x, fill = TRUE,
features = factor(featnames(x), levels = letters[1:5]))),
matrix(c(2, 0, 1, 1, 1, 1, 0, 0, 1, 2), nrow = 2,
dimnames = list(paste0("text", 1:2), c("a", "b", "c", "d", "e")))
)
expect_identical(
as.matrix(quanteda:::group_matrix(x, fill = FALSE,
features = factor(featnames(x), levels = letters[1:5]))),
matrix(c(2, 0, 1, 1, 1, 1, 1, 2), nrow = 2,
dimnames = list(paste0("text", 1:2), c("a", "b", "c", "e")))
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.