tests/testthat/test-sparse-tidiers.R

test_that("Can tidy DocumentTermMatrices and TermDocumentMatrices", {
  if (require("tm", quietly = TRUE)) {
    txt <- system.file("texts", "txt", package = "tm")
    ovid <- VCorpus(DirSource(txt, encoding = "UTF-8"),
                    readerControl = list(language = "lat")
    )

    ovid_dtm <- DocumentTermMatrix(ovid)
    ovid_dtm_td <- tidy(ovid_dtm)

    expect_s3_class(ovid_dtm_td, "tbl_df")
    expect_equal(sort(unique(ovid_dtm_td$document)), sort(rownames(ovid_dtm)))
    expect_equal(sort(unique(ovid_dtm_td$term)), sort(colnames(ovid_dtm)))

    ovid_tdm <- TermDocumentMatrix(ovid)
    ovid_tdm_td <- tidy(ovid_tdm)

    expect_s3_class(ovid_tdm_td, "tbl_df")
    expect_equal(sort(unique(ovid_tdm_td$document)), sort(colnames(ovid_tdm)))
    expect_equal(sort(unique(ovid_tdm_td$term)), sort(rownames(ovid_tdm)))
  }
})


test_that("Can tidy dfm from quanteda", {
  if (requireNamespace("quanteda", quietly = TRUE)) {
    dfm_obj <- quanteda::dfm(quanteda::tokens(quanteda::data_corpus_inaugural))
    dfm_obj_td <- tidy(dfm_obj)

    expect_s3_class(dfm_obj_td, "tbl_df")
    expect_equal(sort(unique(dfm_obj_td$document)), sort(rownames(dfm_obj)))
    expect_equal(sort(unique(dfm_obj_td$term)), sort(colnames(dfm_obj)))
  }
})

Try the tidytext package in your browser

Any scripts or data that you put into this service are public.

tidytext documentation built on May 29, 2024, 5:42 a.m.