Nothing
library("quanteda")
test_that("the svm model works", {
## Example from 13.1 of _An Introduction to Information Retrieval_
corp <- corpus(c(d1 = "Chinese Beijing Chinese",
d2 = "Chinese Chinese Shanghai",
d3 = "Chinese Macao",
d4 = "Tokyo Japan Chinese",
d5 = "Chinese Chinese Chinese Tokyo Japan"),
docvars = data.frame(train = factor(c("Y", "Y", "Y", "N", NA))))
dfmat <- dfm(tokens(corp), tolower = FALSE)
set.seed(10)
tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), scale = TRUE)
expect_output(
print(tmod),
"Call:"
)
expect_equal(
coef(tmod)[1, 1:3, drop = FALSE],
matrix(c(0.3556985, 0.1370573, 0.1399235), nrow = 1,
dimnames = list(NULL, c("Chinese", "Beijing", "Shanghai"))),
tol = .01
)
expect_equal(names(summary(tmod)), c("call", "estimated.feature.scores"))
expect_identical(
predict(tmod, type = "class"),
factor(c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "N"))
)
expect_error(
predict(tmod, type = "probability"),
"probability predictions not implemented for this model type"
)
# for model = 0 type
set.seed(10)
tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), scale = TRUE, type = 0)
expect_equal(
predict(tmod, type = "probability"),
matrix(c(.8, .8, .7, .5, .7, .2, .2, .3, .5, .3), ncol = 2,
dimnames = list(paste0("d", 1:5), c("Y", "N"))),
tol = .1
)
})
test_that("the svm model works with different weights", {
## Example from 13.1 of _An Introduction to Information Retrieval_
corp <- corpus(c(d1 = "Chinese Beijing Chinese",
d2 = "Chinese Chinese Shanghai",
d3 = "Chinese Macao",
d4 = "Tokyo Japan Chinese",
d5 = "Chinese Chinese Chinese Tokyo Japan"),
docvars = data.frame(train = factor(c("Y", "Y", "Y", "N", NA))))
dfmat <- dfm(tokens(corp), tolower = FALSE)
set.seed(10)
tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), weight = "docfreq")
expect_identical(
predict(tmod, type = "class"),
factor(c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "Y"), levels = sort(tmod$classnames))
)
set.seed(10)
tmod <- textmodel_svm(dfmat, y = docvars(dfmat, "train"), weight = "termfreq")
expect_identical(
predict(tmod, type = "class"),
factor(c(d1 = "Y", d2 = "Y", d3 = "Y", d4 = "N", d5 = "Y"), levels = sort(tmod$classnames))
)
})
test_that("the svm model works with bias = 0", {
set.seed(100)
dfmat <- tokens(data_corpus_moviereviews[c(1:100, 1001:1101)]) %>%
dfm()
tmod <- textmodel_svm(dfmat, y = dfmat$sentiment, bias = 0)
expect_identical(
suppressWarnings(predict(tmod, newdata = dfm(tokens(data_corpus_moviereviews[1101])), type = "class")),
factor(c("cv100_11528.txt" = "pos"), levels = c("neg", "pos"))
)
})
test_that("multiclass prediction works", {
dfmat <- dfm(tokens(data_corpus_irishbudget2010)) %>%
dfm_tfidf()
tmod2 <- textmodel_svm(dfmat,
y = c(rep(NA, 3), "SF", "FF", "FG", NA, "LAB", NA,
NA, "Green", rep(NA, 3)),
weight = "uniform")
expect_equal(
head(predict(tmod2, type = "class"), 3),
factor(c("Lenihan, Brian (FF)" = "Green", "Bruton, Richard (FG)" = "FG",
"Burton, Joan (LAB)" = "FG"),
levels = sort(tmod2$classnames))
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.