Nothing
require(quanteda)
test_that("tfm is working", {
dict <- dictionary(list(A = c("a", "aa*", "abc"),
B = c("b*", "bb*", "bcd"),
AB = c("aa", "bb")))
txt <- c("a aa aa aaa abc", "b b bb bcd bcd")
dfmt <- dfm(tokens(txt))
tfm1 <- seededlda:::tfm(dfmt, dict)
expect_s4_class(tfm1, "dgCMatrix")
expect_equal(tfm1["A",], c("a" = 1, "aa" = 2, "aaa" = 1, "abc" = 1,
"b" = 0, "bb" = 0, "bcd" = 0))
expect_equal(tfm1["B",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 2, "bb" = 1, "bcd" = 2))
expect_equal(tfm1["AB",], c("a" = 0, "aa" = 2, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 1, "bcd" = 0))
expect_equal(tfm1["other",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
tfm2 <- seededlda:::tfm(dfmt, dict, residual = FALSE)
expect_s4_class(tfm2, "dgCMatrix")
expect_equal(rownames(tfm2), c("A", "B", "AB"))
tfm3 <- seededlda:::tfm(dfmt, dict, min_termfreq = 2)
expect_s4_class(tfm3, "dgCMatrix")
expect_equal(tfm3["A",], c("a" = 0, "aa" = 2, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
expect_equal(tfm3["B",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 2, "bb" = 0, "bcd" = 2))
expect_equal(tfm3["AB",], c("a" = 0, "aa" = 2, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
expect_equal(tfm3["other",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
tfm4 <- seededlda:::tfm(dfmt, dict, weight = 0.02)
expect_s4_class(tfm4, "dgCMatrix")
expect_equal(tfm4["A",], c("a" = 2, "aa" = 4, "aaa" = 2, "abc" = 2,
"b" = 0, "bb" = 0, "bcd" = 0))
expect_equal(tfm4["B",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 4, "bb" = 2, "bcd" = 4))
expect_equal(tfm4["AB",], c("a" = 0, "aa" = 4, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 2, "bcd" = 0))
expect_equal(tfm4["other",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
tfm5 <- seededlda:::tfm(dfmt, dict, weight = c(0.01, 0.01, 0.02))
expect_s4_class(tfm4, "dgCMatrix")
expect_equal(tfm5["A",], c("a" = 1, "aa" = 2, "aaa" = 1, "abc" = 1,
"b" = 0, "bb" = 0, "bcd" = 0))
expect_equal(tfm5["B",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 2, "bb" = 1, "bcd" = 2))
expect_equal(tfm5["AB",], c("a" = 0, "aa" = 4, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 2, "bcd" = 0))
expect_equal(tfm5["other",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
tfm11 <- seededlda:::tfm(dfmt, dict, min_termfreq = 2, residual = 4)
expect_s4_class(tfm11, "dgCMatrix")
expect_equal(rownames(tfm11),
c("A", "B", "AB", "other1", "other2", "other3", "other4"))
options(seededlda_residual_name = "topic")
tfm12 <- seededlda:::tfm(dfmt, dict, min_termfreq = 2, residual = 4)
expect_s4_class(tfm12, "dgCMatrix")
expect_equal(rownames(tfm12),
c("A", "B", "AB", "topic1", "topic2", "topic3", "topic4"))
options(seededlda_residual_name = "other") # reset
})
test_that("tfm is working without matches", {
dict <- dictionary(list(A = c("a", "aa*", "abc"),
B = c("b*", "bb*", "bcd"),
AB = c("aa", "bb")))
txt <- c("a aa aa aaa abc", "b b bb bcd bcd")
dfmt <- dfm(tokens(txt))
tfm1 <- seededlda:::tfm(dfmt, dict, residual = 0)
expect_equal(rowSums(tfm1), c("A" = 5, "B" = 5, "AB" = 3))
txt2 <- c("a aaa abc", "b b bcd bcd")
dfmt2 <- dfm(tokens(txt2))
tfm2 <- seededlda:::tfm(dfmt2, dict, residual = 1)
expect_equal(rowSums(tfm2), c("A" = 3, "B" = 4, "AB" = 0, "other" = 0))
# no match
tfm11 <- seededlda:::tfm(dfmt, dictionary(list(X = "x", Y = "y")),
residual = 1)
expect_equal(rowSums(tfm11), c("X" = 0, "Y" = 0, "other" = 0))
tfm12 <- seededlda:::tfm(dfmt, dictionary(list(X = character(), Y = character())),
residual = 1)
expect_equal(rowSums(tfm12), c("X" = 0, "Y" = 0, "other" = 0))
})
test_that("tfm works with ngrams", {
txt <- c("UN is the United Nations.",
"ICC is the International Criminal Court.")
toks <- tokens(txt, concatenator = " ")
dict <- dictionary(list("un" = c("united nations", "un"),
"icc" = c("international criminal court", "icc")))
toks1 <- tokens_compound(toks, dict)
dfmt1 <- dfm(toks1)
expect_equal(rowSums(seededlda:::tfm(dfmt1, dict)),
c("un" = 2, "icc" = 2, "other" = 0))
skip_if_not_installed("quanteda", "4.2.0")
toks2 <- tokens_compound(toks, dict, concatenator = "+")
dfmt2 <- dfm(toks2)
expect_equal(rowSums(seededlda:::tfm(dfmt2, dict)),
c("un" = 1, "icc" = 1, "other" = 0))
})
test_that("tfm works with dfm with x in docvars (#87)", {
dict <- dictionary(list("A" = "a", "B" = "b"))
dat <- data.frame(text = c("a b c", "A B C"),
x = c(1, 2))
corp <- corpus(dat)
toks <- tokens(corp)
dfmt <- dfm(toks)
expect_equal(
as.matrix(seededlda:::tfm(dfmt, dict, residual = 1)),
matrix(c(2, 0, 0, 0, 2, 0, 0, 0 ,0), nrow = 3,
dimnames = list(c("A", "B", "other"), c("a", "b", "c")))
)
})
test_that("levels is working", {
dict <- dictionary(list(A = list(
A2 = c("aa", "aab"),
c("a", "a*", "abc")
),
B = c("b*", "bb*", "bcd"),
AB = c("aa", "bb")))
txt <- c("a aa aa aaa abc", "b b bb bcd bcd")
dfmt <- dfm(tokens(txt))
tfm1 <- seededlda:::tfm(dfmt, dict)
expect_s4_class(tfm1, "dgCMatrix")
expect_equal(tfm1["A",], c("a" = 1, "aa" = 2, "aaa" = 1, "abc" = 1,
"b" = 0, "bb" = 0, "bcd" = 0))
expect_equal(tfm1["B",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 2, "bb" = 1, "bcd" = 2))
expect_equal(tfm1["AB",], c("a" = 0, "aa" = 2, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 1, "bcd" = 0))
expect_equal(tfm1["other",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
tfm2 <- seededlda:::tfm(dfmt, dict, levels = 1:2)
expect_s4_class(tfm2, "dgCMatrix")
expect_equal(tfm2["A.A2",], c("a" = 0, "aa" = 2, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
expect_equal(tfm2["A",], c("a" = 1, "aa" = 2, "aaa" = 1, "abc" = 1,
"b" = 0, "bb" = 0, "bcd" = 0))
expect_equal(tfm2["B",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 2, "bb" = 1, "bcd" = 2))
expect_equal(tfm2["AB",], c("a" = 0, "aa" = 2, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 1, "bcd" = 0))
expect_equal(tfm2["other",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
tfm3 <- seededlda:::tfm(dfmt, dict, levels = 2)
expect_s4_class(tfm3, "dgCMatrix")
expect_equal(tfm3["A2",], c("a" = 0, "aa" = 2, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
expect_equal(tfm3["other",], c("a" = 0, "aa" = 0, "aaa" = 0, "abc" = 0,
"b" = 0, "bb" = 0, "bcd" = 0))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.