Nothing
test_that("RBI works", {
set.seed(10)
corpus = readRDS(testthat::test_path("data", "enron.rds"))
unknown = quanteda::corpus_subset(corpus, texttype == "unknown")
known = quanteda::corpus_subset(corpus, texttype == "known")
# corpus version
q.data <- unknown[c(1, 2)]
k.data <- known[c(1:2, 5:6)]
cand.imps <- known[7:50]
results.corpus <- impostors(q.data, k.data, cand.imps, algorithm = "RBI", k = 100) |>
suppressWarnings()
# dfm version
d = vectorize(c(q.data, k.data, cand.imps), tokens = "character", remove_punct = F, remove_symbols = T,
remove_numbers = T, lowercase = F, n = 5, weighting = "rel", trim = T,
threshold = 1500)
results.dfm <- impostors(q.data = quanteda::dfm_subset(d, quanteda::docnames(d)
%in% quanteda::docnames(q.data)),
k.data = quanteda::dfm_subset(d, quanteda::docnames(d)
%in% quanteda::docnames(k.data)),
cand.imps = quanteda::dfm_subset(d, quanteda::docnames(d)
%in% quanteda::docnames(cand.imps)),
algorithm = "RBI", k = 100) |>
suppressWarnings()
testthat::expect(results.corpus[1, 4], 0.540)
testthat::expect(results.dfm[1, 4], 0.539)
testthat::expect(results.corpus[2, 4], 0.207)
testthat::expect(results.dfm[2, 4], 0.240)
# impostors as rest of K version
q.data <- unknown[c(1, 2)]
k.data <- known[c(1:50)]
cand.imps <- k.data
results.corpus <- impostors(q.data, k.data, cand.imps, algorithm = "RBI", k = 100) |>
suppressWarnings()
testthat::expect(results.corpus[4, 4], 0.975)
testthat::expect(results.corpus[2, 4], 0.172)
})
test_that("KGI works", {
set.seed(10)
corpus = readRDS(testthat::test_path("data", "enron.rds"))
unknown = quanteda::corpus_subset(corpus, texttype == "unknown")
known = quanteda::corpus_subset(corpus, texttype == "known")
# corpus version
q.data <- unknown[c(1, 2)]
k.data <- known[c(1:2, 5:6)]
cand.imps <- known[7:50]
results.corpus <- impostors(q.data, k.data, cand.imps, algorithm = "KGI") |>
suppressWarnings()
# dfm version
d = vectorize(c(q.data, k.data, cand.imps), tokens = "character", remove_punct = F, remove_symbols = T,
remove_numbers = T, lowercase = F, n = 4, weighting = "tf-idf", trim = F)
results.dfm <- impostors(q.data = quanteda::dfm_subset(d, quanteda::docnames(d)
%in% quanteda::docnames(q.data)),
k.data = quanteda::dfm_subset(d, quanteda::docnames(d)
%in% quanteda::docnames(k.data)),
cand.imps = quanteda::dfm_subset(d, quanteda::docnames(d)
%in% quanteda::docnames(cand.imps)),
algorithm = "KGI") |>
suppressWarnings()
testthat::expect(results.corpus[1, 4], 0.75)
testthat::expect(results.dfm[1, 4], 0.73)
testthat::expect_lt(results.corpus[2, 4], 0.01)
testthat::expect(results.dfm[2, 4], 0.01)
})
test_that("IM works", {
set.seed(10)
corpus = readRDS(testthat::test_path("data", "enron.rds"))
unknown = quanteda::corpus_subset(corpus, texttype == "unknown")
known = quanteda::corpus_subset(corpus, texttype == "known")
# corpus version
q.data <- unknown[c(1, 2)]
k.data <- known[c(1, 5)]
cand.imps <- known[7:50]
results.corpus <- impostors(q.data, k.data, cand.imps, algorithm = "IM") |>
suppressWarnings()
# dfm version
d = vectorize(c(q.data, k.data, cand.imps), tokens = "character", remove_punct = F,
remove_symbols = T, remove_numbers = T, lowercase = F, n = 4, weighting = "tf-idf",
trim = F)
results.dfm <- impostors(q.data = quanteda::dfm_subset(d, quanteda::docnames(d)
%in% quanteda::docnames(q.data)),
k.data = quanteda::dfm_subset(d, quanteda::docnames(d)
%in% quanteda::docnames(k.data)),
cand.imps = quanteda::dfm_subset(d, quanteda::docnames(d)
%in% quanteda::docnames(cand.imps)),
algorithm = "IM") |>
suppressWarnings()
testthat::expect(results.corpus[4, 4], 0.57)
testthat::expect(results.dfm[4, 4], 0.585)
testthat::expect_lt(results.corpus[2, 4], 0.01)
testthat::expect_lt(results.dfm[2, 4], 0.001)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.