Nothing
require(quanteda)
mat_test <- readRDS("../data/matrix_embedding.RDS")
toks_test <- readRDS("../data/tokens_test.RDS")
feat_test <- head(char_context(toks_test, "america*", min_count = 1, p = 0.05), 100)
dfmt_test <- dfm(toks_test)
seed <- as.seedwords(data_dictionary_sentiment)
lss_test <- textmodel_lss(dfmt_test, seed, terms = feat_test, k = 50,
include_data = FALSE)
test_that("as.textmodel_lss works with matrix", {
term <- c("decision", "instance", "universal", "foundations", "the")
# with terms
lss1 <- as.textmodel_lss(mat_test, seed, term)
expect_equal(names(lss1), names(LSX:::build_lss()))
expect_identical(lss1$embedding, mat_test)
expect_false(any(duplicated(names(coef(lss1)))))
pred1 <- predict(lss1, dfmt_test)
expect_equal(names(pred1), rownames(dfmt_test))
expect_equal(rowSums(dfmt_test[,names(lss1$beta)]) == 0,
is.na(pred1))
# without terms
lss2 <- as.textmodel_lss(mat_test, seed)
expect_equal(names(lss2), names(LSX:::build_lss()))
expect_identical(lss2$embedding, mat_test)
expect_false(any(duplicated(names(coef(lss2)))))
pred2 <- predict(lss2, dfmt_test)
expect_equal(names(pred2), rownames(dfmt_test))
expect_equal(rowSums(dfmt_test[,names(lss2$beta)]) == 0,
is.na(pred2))
# with special features
mat_special <- mat_test
colnames(mat_special)[1:2] <- c("", "*")
lss3 <- as.textmodel_lss(mat_special, seed)
expect_equal(sum("" == names(coef(lss3))), 0)
expect_equal(sum("*" == names(coef(lss3))), 1)
# with slice
lss4 <- as.textmodel_lss(mat_test, seed, slice = 50)
expect_error(
as.textmodel_lss(mat_test, seed, slice = 150),
"The value of slice must be between 1 and 100"
)
expect_error(
as.textmodel_lss(mat_test, seed, slice = 1:150),
"The length of slice must be between 1 and 100"
)
expect_identical(coef(lss4),
coef(as.textmodel_lss(mat_test, seed, slice = 1:50)))
expect_identical(lss4$embedding, mat_test)
})
test_that("as.textmodel_lss errors with invalid columns", {
seed <- as.seedwords(data_dictionary_sentiment)
mat_nocol <- mat_nacol <- mat_na <- mat_test
colnames(mat_nocol) <- NULL
expect_error(as.textmodel_lss(mat_nocol, seed),
"x must have column names for features")
colnames(mat_nacol)[1] <- NA
expect_error(as.textmodel_lss(mat_nacol, seed),
"x must not have NA in the column names")
mat_na[1,1] <- NA
expect_error(as.textmodel_lss(mat_na, seed),
"x must not have NA")
})
test_that("as.textmodel_lss works with textmodel_lss", {
# with fitted model
lss <- as.textmodel_lss(lss_test, seed, terms = feat_test, slice = 10)
expect_equal(lss$embedding, lss_test$embedding)
expect_identical(lss$data, lss_test$data)
expect_identical(lss$frequency, lss_test$frequency)
expect_identical(names(lss$frequency), names(lss$frequency))
expect_error(
as.textmodel_lss(lss_test, seed, slice = 100),
"The value of slice must be between 1 and 50"
)
expect_error(
as.textmodel_lss(lss_test, seed, slice = 1:100),
"The length of slice must be between 1 and 50"
)
# with dummy LSS
weight <- c("decision" = 0.1, "instance" = -0.1,
"foundations" = 0.3, "the" = 0)
lss_dummy <- as.textmodel_lss(weight)
expect_error(
as.textmodel_lss(lss_dummy, seed),
"x must be a valid textmodel_lss object"
)
})
test_that("as.textmodel_lss works with vector", {
weight <- c("decision" = 0.1, "instance" = -0.1,
"foundations" = 0.3, "the" = 0)
lss <- as.textmodel_lss(weight)
expect_equal(names(lss), names(LSX:::build_lss()))
pred <- predict(lss, dfmt_test)
expect_equal(names(pred), rownames(dfmt_test))
expect_equal(rowSums(dfmt_test[,names(lss$beta)]) == 0,
is.na(pred))
})
test_that("as.textmodel_lss errors with vector", {
weight <- c("decision" = 0.1, "instance" = -0.1,
"foundations" = 0.3, "the" = 0)
weight_noname <- weight_naname <- weight_na <- weight
names(weight_noname) <- NULL
expect_error(as.textmodel_lss(weight_noname),
"x must have names for features")
names(weight_naname)[1] <- NA
expect_error(as.textmodel_lss(weight_naname),
"x must not have NA in the names")
weight_na[1] <- NA
expect_error(as.textmodel_lss(weight_na),
"x must not have NA")
})
test_that("auto_weight is working", {
skip_on_cran()
lss1 <- as.textmodel_lss(mat_test, seed)
lss2 <- as.textmodel_lss(mat_test, seed, auto_weight = TRUE)
expect_true(
all(lss1$seeds_weighted != lss2$seeds_weighted)
)
expect_true(
all(sign(lss1$seeds_weighted) == sign(lss2$seeds_weighted))
)
expect_true(
all(abs(lss2$beta[names(lss2$seeds_weighted)] - lss1$seeds_weighted) < 0.05)
)
expect_output(
as.textmodel_lss(mat_test, seed, auto_weight = TRUE, verbose = TRUE),
"Optimizing seed weights..."
)
})
test_that("terms is working", {
skip_on_cran()
lss <- textmodel_lss(dfmt_test, seed, k = 50)
# glob pattern
lss1 <- as.textmodel_lss(lss, seed, terms = "poli*")
expect_equal(sum(stringi::stri_startswith_fixed(names(lss1$beta), "poli")), 11)
expect_identical(names(lss1$beta), names(lss1$frequency))
# numeric vector
weight <- sample(1:10, length(lss1$beta), replace = TRUE) / 10
names(weight) <- names(lss1$beta)
lss2 <- as.textmodel_lss(lss, seed, terms = weight)
expect_true(all(lss2$beta == lss1$beta * weight))
expect_error(as.textmodel_lss(lss, seed, terms = c("polity" = 0.2, "politic" = -0.1)),
"terms must be positive values without NA")
expect_error(as.textmodel_lss(lss, seed, terms = c("polity" = 0.2, "politic" = NA)),
"terms must be positive values without NA")
expect_error(as.textmodel_lss(lss, seed, terms = c(01, 0.2)),
"terms must be named")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.