tests/testthat/test-contentmask.R

test_that("content masking works", {

  # skip tests if there is no python installation
  testthat::skip_if(try(suppressMessages(spacyr::spacy_initialize()), silent = TRUE) |>
                      inherits("try-error"),
                    message = "spacyr environment not present")

  enron.corpus <- readRDS(testthat::test_path("data", "enron_corpus.rds"))

  enron.small <- enron.corpus[1:3]

  expect_snapshot(contentmask(enron.small, algorithm = "POSnoise"))
  expect_snapshot(contentmask(enron.small, algorithm = "frames"))
  expect_snapshot(contentmask(enron.small, algorithm = "textdistortion"))

  text1 <- "The cat was on the chair. He didn't move\ncat@pets.com;\nhttp://quanteda.io/ i.e. a test 😻 👍"
  text2 <- "😻 👍"
  toy.corpus <- quanteda::corpus(c(text1, text2))
  contentmask(toy.corpus, algorithm = "POSnoise") |> expect_snapshot()
  contentmask(toy.corpus, algorithm = "POSnoise", replace_non_ascii = F) |> expect_snapshot()
  contentmask(toy.corpus, algorithm = "textdistortion") |> expect_snapshot()

})

Try the idiolect package in your browser

Any scripts or data that you put into this service are public.

idiolect documentation built on Sept. 11, 2024, 5:34 p.m.