LSS: Semi-Supervised Algorithm for Document Scaling

require(quanteda)
toks_test <- readRDS("../data/tokens.RDS")
toks_test <- tokens_remove(toks_test, stopwords())
feat_test <- head(char_context(toks_test, "america*", min_count = 1, p = 0.05), 100)
dict <- dictionary(list("keywords" = c("positive", "bad", "xxxx")))

test_that("textplot_* works with SVD", {
    dfmt <- dfm(toks_test)
    seed <- c("nice*" = 1, "positive*" = 1, "bad*" = -1, "negative*" = -1)
    lss <- textmodel_lss(dfmt, seed, k = 10)
    suppressWarnings({
      expect_equal(class(textplot_simil(lss)), c("gg", "ggplot"))
    })
    expect_equal(class(textplot_terms(lss, highlighted = dict$keywords)),
                 c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss, highlighted = dict$keywords, max_words = 2)),
                 c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss, highlighted = dict$keywords, max_highlighted = 10)),
                 c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss, highlighted = dict$keywords, max_highlighted = 0)),
                 c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss, highlighted = dict)),
                 c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss, highlighted = character())),
                 c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss)), c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss, max_highlighted = 10)), c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss, sampling = "relative")), c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss, sampling = "absolute")), c("gg", "ggplot"))
    expect_error(textplot_terms(lss, sampling = "xxx"))

    lss2 <- textmodel_lss(dfmt, seed, terms = feat_test, k = 10)
    expect_equal(class(textplot_terms(lss2)), c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss2, sampling = "relative")), c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss2, sampling = "absolute")), c("gg", "ggplot"))
    expect_error(textplot_terms(lss2, sampling = "xxx"))
})

test_that("textplot_* works even when frequency and beta do not match (#71)", {
    dfmt <- dfm(toks_test)
    seed <- c("nice*" = 1, "positive*" = 1, "bad*" = -1, "negative*" = -1)
    lss <- textmodel_lss(dfmt, seed, k = 10)
    lss$frequency <- c(lss$frequency, "xxx" = 1, "yyy" = 1) # replicate #71
    expect_equal(class(textplot_terms(lss)), c("gg", "ggplot"))
})

test_that("textplot_* works with Glove", {
    fcmt <- fcm(toks_test)
    seed <- c("nice*" = 1, "positive*" = 1, "bad*" = -1, "negative*" = -1)
    lss <- textmodel_lss(fcmt, seed, w = 10)
    suppressWarnings({
      expect_equal(class(textplot_simil(lss)), c("gg", "ggplot"))
    })
    expect_equal(class(textplot_terms(lss, highlighted = dict$keywords)),
                 c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss, highlighted = dict$keywords, max_words = 2)),
                 c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss, highlighted = dict)),
                 c("gg", "ggplot"))
    expect_equal(class(textplot_terms(lss)), c("gg", "ggplot"))
    expect_error(textplot_terms(lss, highlighted = dict, max_words = 100:200),
                 "The length of max_words must be 1")

    lss2 <- textmodel_lss(fcmt, seed, terms = feat_test, w = 10)
    expect_equal(class(textplot_terms(lss2)), c("gg", "ggplot"))
})

test_that("textplot_components() works", {

    seed <- c("nice*" = 1, "positive*" = 1, "bad*" = -1, "negative*" = -1)

    dfmt <- dfm(toks_test)
    lss_svd <- textmodel_lss(dfmt, seed, k = 10)
    fcmt <- fcm(toks_test)
    lss_glove <- textmodel_lss(fcmt, seed, w = 10)

    gg1 <- textplot_components(lss_svd, n = 5)
    expect_equal(length(levels(gg1$data$group)), 5)
    gg2 <- textplot_components(lss_svd, n = 3)
    expect_equal(length(levels(gg2$data$group)), 3)

    expect_equal(class(textplot_components(lss_svd, 3)), c("gg", "ggplot"))
    expect_equal(class(textplot_components(lss_svd, 3, scale = "relative")), c("gg", "ggplot"))
    expect_error(textplot_components(lss_svd, n = c(5, 6)), "The length of n must be 1")
    expect_error(textplot_components(lss_svd, n = 20), "The value of n must be between 2 and 10")
    expect_error(textplot_components(lss_glove), "SVD must be used to generate word vectors")
})

test_that("textplot_* raise error when attributes are missing", {
    dfmt <- dfm(toks_test)
    coef <- rnorm(100)
    names(coef) <- topfeatures(dfmt, 100)
    lss <- as.textmodel_lss(coef)
    suppressWarnings({
      expect_error(textplot_simil(lss),
                   "textplot_simil() does not work with dummy models", fixed = TRUE)
    })
})

test_that("textplot_terms works even when frequency has zeros (#85)", {
    dfmt <- dfm(toks_test) %>%
        dfm_subset(Year > 2000)
    seed <- c("nice*" = 1, "positive*" = 1, "bad*" = -1, "negative*" = -1)
    suppressWarnings(
        lss <- textmodel_lss(dfmt, seed, k = 10)
    )
    expect_true(any(lss$frequency == 0))
    expect_equal(class(textplot_terms(lss)), c("gg", "ggplot"))
    expect_silent(print(textplot_terms(lss, max_highlighted = 10)))
})

test_that("textplot_terms works with dictionary", {

    dict <- dictionary(list("american" = c("american *"),
                            "president" = c("president *")))
    toks <- tokens_subset(toks_test, Year > 2000) %>%
      tokens_compound(dict)
    dfmt <- dfm(toks)
    seed <- c("nice*" = 1, "positive*" = 1, "bad*" = -1, "negative*" = -1)
    suppressWarnings(
        lss <- textmodel_lss(dfmt, seed, k = 10)
    )
    expect_silent(print(
        textplot_terms(lss, dict, max_highlighted = 10)
    ))
    expect_silent(print(
        textplot_terms(lss, dictionary(list(none = "xxxxx")))
    ))
})

koheiw/LSS documentation built on June 14, 2025, 11:04 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

koheiw/LSS
Semi-Supervised Algorithm for Document Scaling

tests/testthat/test-textplot.R
In koheiw/LSS: Semi-Supervised Algorithm for Document Scaling

R Package Documentation

Browse R Packages

We want your feedback!

koheiw/LSS Semi-Supervised Algorithm for Document Scaling

tests/testthat/test-textplot.R In koheiw/LSS: Semi-Supervised Algorithm for Document Scaling

R Package Documentation

Browse R Packages

We want your feedback!

koheiw/LSS
Semi-Supervised Algorithm for Document Scaling

tests/testthat/test-textplot.R
In koheiw/LSS: Semi-Supervised Algorithm for Document Scaling