tests/testthat/test-textmodel_svmlin.R

library("quanteda")

test_that("the svmlin model works", {
    ## Example from 13.1 of _An Introduction to Information Retrieval_
    corp <- corpus(c(d1 = "Chinese Beijing Chinese",
                     d2 = "Chinese Chinese Shanghai",
                     d3 = "Chinese Macao",
                     d4 = "Tokyo Japan Chinese",
                     d5 = "Chinese Chinese Chinese Tokyo Japan"),
                   docvars = data.frame(train = factor(c("Y", "Y", "Y", "N", NA))))
    dfmat <- dfm(tokens(corp), tolower = FALSE) %>%
        dfm_tfidf()
    tmod <- textmodel_svmlin(dfmat, y = docvars(dfmat, "train"))

    expect_output(
        print(tmod),
        "Call:"
    )

    expect_equal(
        head(coef(tmod), 3),
        c(intercept = 0.0, Chinese = 0.330, Beijing = 0.330),
        tol = .001
    )

    tmod2 <- textmodel_svmlin(dfmat, y = docvars(dfmat, "train"), intercept = FALSE)
    expect_identical(
        predict(tmod2),
        c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "N")
    )
    expect_equal(names(summary(tmod)), c("call", "estimated.feature.scores"))

    expect_identical(
        predict(tmod),
        c(d1 = "Y", d2 = "Y", d3 = "N", d4 = "N", d5 = "N")
    )

    expect_error(
        textmodel_svmlin(dfmat, y = c("Y", "N", "Maybe", NA, NA)),
        "y must contain two values only"
    )
})

test_that("textmodel_svm/svmlin() work with weighted dfm", {
    dfmat <- dfm_tfidf(data_dfm_lbgexample)
    expect_silent(
        tmod <- textmodel_svm(dfmat, y = c("N", "N", NA, "Y", "Y", NA))
    )
    expect_silent(
        predict(tmod)
    )
    expect_silent(
        tmod <- textmodel_svmlin(dfmat, y = c("N", "N", NA, "Y", "Y", NA))
    )
    expect_silent(
        predict(tmod)
    )
})

Try the quanteda.textmodels package in your browser

Any scripts or data that you put into this service are public.

quanteda.textmodels documentation built on Sept. 11, 2024, 8:19 p.m.