tests/testthat/test-textmodel_svm.R

library("quanteda")

test_that("the svm model works", {
    ## Example from 13.1 of _An Introduction to Information Retrieval_
    corp <- corpus(c(d1 = "Chinese Beijing Chinese",
                     d2 = "Chinese Chinese Shanghai",
                     d3 = "Chinese Macao",
                     d4 = "Tokyo Japan Chinese",
                     d5 = "Chinese Chinese Chinese Tokyo Japan"),
                   docvars = data.frame(train = factor(c("Y", "Y", "Y", "N", NA))))
    dfmat <- dfm(tokens(corp), tolower = FALSE)
    set.seed(10)
    tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), scale = TRUE)

    expect_output(
        print(tmod),
        "Call:"
    )

    expect_equal(
        coef(tmod)[1, 1:3, drop = FALSE],
        matrix(c(0.3556985, 0.1370573, 0.1399235), nrow = 1,
               dimnames = list(NULL, c("Chinese", "Beijing", "Shanghai"))),
        tol = .01
    )

    expect_equal(names(summary(tmod)), c("call", "estimated.feature.scores"))
    expect_identical(
        predict(tmod, type = "class"),
        factor(c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "N"))
    )
    expect_error(
        predict(tmod, type = "probability"),
        "probability predictions not implemented for this model type"
    )

    # for model = 0 type
    set.seed(10)
    tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), scale = TRUE, type = 0)
    expect_equal(
        predict(tmod, type = "probability"),
        matrix(c(.8, .8, .7, .5, .7, .2, .2, .3, .5, .3), ncol = 2,
               dimnames = list(paste0("d", 1:5), c("Y", "N"))),
        tol = .1
    )
})

test_that("the svm model works with different weights", {
    ## Example from 13.1 of _An Introduction to Information Retrieval_
    corp <- corpus(c(d1 = "Chinese Beijing Chinese",
                     d2 = "Chinese Chinese Shanghai",
                     d3 = "Chinese Macao",
                     d4 = "Tokyo Japan Chinese",
                     d5 = "Chinese Chinese Chinese Tokyo Japan"),
                   docvars = data.frame(train = factor(c("Y", "Y", "Y", "N", NA))))
    dfmat <- dfm(tokens(corp), tolower = FALSE)

    set.seed(10)
    tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), weight = "docfreq")
    expect_identical(
        predict(tmod, type = "class"),
        factor(c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "Y"), levels = sort(tmod$classnames))
    )
    set.seed(10)
    tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), weight = "termfreq")
    expect_identical(
        predict(tmod, type = "class"),
        factor(c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "Y"), levels = sort(tmod$classnames))
    )
})

test_that("the svm model works with bias = 0", {
    set.seed(100)
    dfmat <- tokens(data_corpus_moviereviews[c(1:100, 1001:1101)]) %>%
        dfm()
    tmod <- textmodel_svm(dfmat, y = dfmat$sentiment, bias = 0)
    expect_identical(
        suppressWarnings(predict(tmod, newdata = dfm(tokens(data_corpus_moviereviews[1101])), type = "class")),
        factor(c("cv100_11528.txt" = "pos"), levels = c("neg", "pos"))
    )
})

test_that("multiclass prediction works", {
    dfmat <- dfm(tokens(data_corpus_irishbudget2010)) %>%
        dfm_tfidf()
    tmod2 <- textmodel_svm(dfmat,
                           y = c(rep(NA, 3), "SF", "FF", "FG", NA, "LAB", NA,
                                 NA, "Green", rep(NA, 3)),
                           weight = "uniform")
    expect_equal(
        head(predict(tmod2, type = "class"), 3),
        factor(c("Lenihan, Brian (FF)" = "Green", "Bruton, Richard (FG)" = "FG",
                 "Burton, Joan (LAB)" = "FG"),
               levels = sort(tmod2$classnames))
    )
})

Try the quanteda.textmodels package in your browser

Any scripts or data that you put into this service are public.

quanteda.textmodels documentation built on Sept. 11, 2024, 8:19 p.m.