test-textmodel_svm.R
In quanteda.textmodels: Scaling Models and Classifiers for Textual Data

library("quanteda")

test_that("the svm model works", {
    ## Example from 13.1 of _An Introduction to Information Retrieval_
    corp <- corpus(c(d1 = "Chinese Beijing Chinese",
                     d2 = "Chinese Chinese Shanghai",
                     d3 = "Chinese Macao",
                     d4 = "Tokyo Japan Chinese",
                     d5 = "Chinese Chinese Chinese Tokyo Japan"),
                   docvars = data.frame(train = factor(c("Y", "Y", "Y", "N", NA))))
    dfmat <- dfm(tokens(corp), tolower = FALSE)
    set.seed(10)
    tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), scale = TRUE)

    expect_output(
        print(tmod),
        "Call:"
    )

    expect_equal(
        coef(tmod)[1, 1:3, drop = FALSE],
        matrix(c(0.3556985, 0.1370573, 0.1399235), nrow = 1,
               dimnames = list(NULL, c("Chinese", "Beijing", "Shanghai"))),
        tol = .01
    )

    expect_equal(names(summary(tmod)), c("call", "estimated.feature.scores"))
    expect_identical(
        predict(tmod, type = "class"),
        factor(c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "N"))
    )
    expect_error(
        predict(tmod, type = "probability"),
        "probability predictions not implemented for this model type"
    )

    # for model = 0 type
    set.seed(10)
    tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), scale = TRUE, type = 0)
    expect_equal(
        predict(tmod, type = "probability"),
        matrix(c(.8, .8, .7, .5, .7, .2, .2, .3, .5, .3), ncol = 2,
               dimnames = list(paste0("d", 1:5), c("Y", "N"))),
        tol = .1
    )
})

test_that("the svm model works with different weights", {
    ## Example from 13.1 of _An Introduction to Information Retrieval_
    corp <- corpus(c(d1 = "Chinese Beijing Chinese",
                     d2 = "Chinese Chinese Shanghai",
                     d3 = "Chinese Macao",
                     d4 = "Tokyo Japan Chinese",
                     d5 = "Chinese Chinese Chinese Tokyo Japan"),
                   docvars = data.frame(train = factor(c("Y", "Y", "Y", "N", NA))))
    dfmat <- dfm(tokens(corp), tolower = FALSE)

    set.seed(10)
    tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), weight = "docfreq")
    expect_identical(
        predict(tmod, type = "class"),
        factor(c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "Y"), levels = sort(tmod$classnames))
    )
    set.seed(10)
    tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), weight = "termfreq")
    expect_identical(
        predict(tmod, type = "class"),
        factor(c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "Y"), levels = sort(tmod$classnames))
    )
})

test_that("the svm model works with bias = 0", {
    set.seed(100)
    dfmat <- tokens(data_corpus_moviereviews[c(1:100, 1001:1101)]) %>%
        dfm()
    tmod <- textmodel_svm(dfmat, y = dfmat$sentiment, bias = 0)
    expect_identical(
        suppressWarnings(predict(tmod, newdata = dfm(tokens(data_corpus_moviereviews[1101])), type = "class")),
        factor(c("cv100_11528.txt" = "pos"), levels = c("neg", "pos"))
    )
})

test_that("multiclass prediction works", {
    dfmat <- dfm(tokens(data_corpus_irishbudget2010)) %>%
        dfm_tfidf()
    tmod2 <- textmodel_svm(dfmat,
                           y = c(rep(NA, 3), "SF", "FF", "FG", NA, "LAB", NA,
                                 NA, "Green", rep(NA, 3)),
                           weight = "uniform")
    expect_equal(
        head(predict(tmod2, type = "class"), 3),
        factor(c("Lenihan, Brian (FF)" = "Green", "Bruton, Richard (FG)" = "FG",
                 "Burton, Joan (LAB)" = "FG"),
               levels = sort(tmod2$classnames))
    )
})

Any scripts or data that you put into this service are public.

quanteda.textmodels documentation built on Sept. 11, 2024, 8:19 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

quanteda.textmodels
Scaling Models and Classifiers for Textual Data

tests/testthat/test-textmodel_svm.R
In quanteda.textmodels: Scaling Models and Classifiers for Textual Data

Try the quanteda.textmodels package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

quanteda.textmodels Scaling Models and Classifiers for Textual Data

tests/testthat/test-textmodel_svm.R In quanteda.textmodels: Scaling Models and Classifiers for Textual Data

Try the quanteda.textmodels package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

quanteda.textmodels
Scaling Models and Classifiers for Textual Data

tests/testthat/test-textmodel_svm.R
In quanteda.textmodels: Scaling Models and Classifiers for Textual Data