test-textmodel_lr.R
In quanteda.textmodels: Scaling Models and Classifiers for Textual Data

library("quanteda")

test_that("the lr model works with binomal classification", {
    ## Example based on 13.1 of _An Introduction to Information Retrieval_
    corp <- corpus(c(d1 = "Chinese Beijing Chinese",
                               d2 = "Chinese Chinese Shanghai",
                               d3 = "Chinese Macao",
                               d4 = "Tokyo Japan Chinese",
                               d5 = "London England Chinese",
                               d6 = "Chinese Chinese Chinese Tokyo Japan"),
                             docvars = data.frame(train = factor(c("Y", "Y", "Y",
                                                                   "N", "N", NA))))
    dfmat <- dfm(tokens(corp), tolower = FALSE)
    dfmat_test <- dfmat
    #
    set.seed(1)
    dfmat <- dfm_sample(dfmat, 100, replace = TRUE)
    tmod <- textmodel_lr(dfmat, y = docvars(dfmat, "train"), nfolds = 3)
    expect_output(
        print(tmod),
        "Call:"
    )
    expect_equal(
        as.matrix(coef(tmod)),
        matrix(c(6.60662, 0.577683, 0, 0, 0, -12.042569, -2.236915,
                 -14.280884, 0), ncol = 1,
               dimnames = list(c("(Intercept)", "Chinese",
                                 "Beijing", "Shanghai", "Macao",
                                 "Tokyo", "Japan", "London", "England"
               ), "Y")),
        tol = .00001
    )

    expect_identical(
        predict(tmod, newdata = dfmat_test, type = "class"),
        factor(c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "N", d6 = "N"))
    )
    set.seed(10)
    expect_equal(
        predict(tmod, newdata = dfmat_test, type = "probability"),
        matrix(c(1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1), ncol = 2,
               dimnames = list(paste0("d", 1:6), c("Y", "N"))),
        tol = .1
    )
})

test_that("the lr model works with multinomial classification", {
    corp <- corpus(c(d1 = "Chinese Beijing Chinese",
                               d2 = "Chinese Chinese Shanghai",
                               d3 = "Chinese Macao",
                               d4 = "Tokyo Japan Chinese",
                               d5 = "Japan Japan Sushi",
                               d6 = "Bratwurst German Berlin"),
                             docvars = data.frame(train = factor(c("C", "C", "C",
                                                                   "J", "J", "G"))))
    dfmat <- dfm(tokens(corp), tolower = FALSE)
    dfmat_test <- dfmat

    set.seed(1)
    dfmat <- dfm_sample(dfmat, 100, replace = TRUE)
    tmod <- textmodel_lr(dfmat, y = docvars(dfmat, "train"), nfolds = 3)
    expect_output(
        print(tmod),
        "Call:"
    )
    expect_equal(
        as.matrix(coef(tmod)),
        matrix(c(0.535191, 3.589453, 0, 0, 2.768396, -0.283362, 0, 0, 0, 0, 0,
                 -0.356168, 0, 0, 0, 0, 0, 0, 0, 8.08106, 0, 0, -0.179023, 0,
                 0, 0, 0, 6.491737, 4.13242, 0, 0, 0, 0), ncol = 3,
               dimnames = list(c("(Intercept)", "Chinese", "Beijing",
                                 "Shanghai", "Macao", "Tokyo", "Japan",
                                 "Sushi", "Bratwurst", "German", "Berlin"
                   ), c("C", "G", "J"))),
        tol = .000001
    )

    expect_identical(
        predict(tmod, newdata = dfmat_test, type = "class"),
        factor(c(d1 = "C", d2 = "C", d3 = "C", d4 = "J", d5 = "J", d6 = "G"))
    )
    set.seed(10)
    expect_equal(
        predict(tmod, newdata = dfmat_test, type = "probability"),
        matrix(c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0),
               ncol = 3, dimnames = list(paste0("d", 1:6), c("C", "G", "J"))),
        tol = .1
    )
})

Any scripts or data that you put into this service are public.

quanteda.textmodels documentation built on April 12, 2025, 1:43 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

quanteda.textmodels
Scaling Models and Classifiers for Textual Data

tests/testthat/test-textmodel_lr.R
In quanteda.textmodels: Scaling Models and Classifiers for Textual Data

Try the quanteda.textmodels package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

quanteda.textmodels Scaling Models and Classifiers for Textual Data

tests/testthat/test-textmodel_lr.R In quanteda.textmodels: Scaling Models and Classifiers for Textual Data

Try the quanteda.textmodels package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

quanteda.textmodels
Scaling Models and Classifiers for Textual Data

tests/testthat/test-textmodel_lr.R
In quanteda.textmodels: Scaling Models and Classifiers for Textual Data