tests/testthat/test-lsa.R

library(quanteda)
library(wordvector)

corp <- data_corpus_inaugural %>% 
    corpus_reshape()

toks <- tokens(corp, remove_punct = TRUE, remove_symbols = TRUE) %>% 
    tokens_remove(stopwords(), padding = FALSE) %>% 
    tokens_tolower()

set.seed(1234)
wov <- textmodel_lsa(toks, dim = 50, min_count = 2, sample = 0)
dov <- textmodel_doc2vec(toks, wov)
dov_gp <- textmodel_doc2vec(toks, wov, group_data = TRUE)

test_that("word2vec words", {
    
    # wordvector
    expect_equal(
        class(wov), "textmodel_wordvector"
    )
    expect_identical(
        dim(wov$values), c(5360L, 50L)
    )
    expect_equal(
        wov$weight, "count"
    )
    expect_identical(
        wov$min_count, 2L
    )
    expect_identical(
        featfreq(dfm_trim(dfm(toks), 2)),
        wov$frequency
    )
    expect_output(
        print(wov),
        paste(
            "",
            "Call:",
            "textmodel_lsa(x = toks, dim = 50, min_count = 2, sample = 0)",
            "",
            "50 dimensions; 5,360 words.", sep = "\n"), fixed = TRUE
    )
    expect_equal(
        class(print(wov)), "textmodel_wordvector"
    )
    expect_equal(
        names(dov),
        c("values", "dim", "concatenator", "docvars", "call", "version")
    )
    
    # docvector with model
    expect_equal(
        dim(dov$values), c(5234L, 50L)
    )
    expect_equal(
        class(dov), "textmodel_docvector"
    )
    expect_output(
        print(dov),
        paste(
            "",
            "Call:",
            "textmodel_doc2vec(x = toks, model = wov)",
            "",
            "50 dimensions; 5,234 documents.", sep = "\n"), fixed = TRUE
    )
    expect_equal(
        class(print(dov)), "textmodel_docvector"
    )
    expect_equal(
        names(dov),
        c("values", "dim", "concatenator", "docvars", "call", "version")
    )
    
    # docvector with grouped data
    expect_identical(
        dim(dov_gp$values), c(59L, 50L)
    )
    expect_equal(
        class(dov_gp), "textmodel_docvector"
    )
    expect_equal(
        names(dov_gp),
        c("values", "dim", "concatenator", "docvars", "call", "version")
    )
})

Try the wordvector package in your browser

Any scripts or data that you put into this service are public.

wordvector documentation built on April 12, 2025, 2:23 a.m.