lexRankr: Extractive Summarization of Text with the LexRank Algorithm

context("bind_lexrank_")

# test output str --------------------------------------------------------
test_that("correct ouput class and str", {
  df <- data.frame(doc_id = 1:3, 
                   text = c("Testing the system. Second sentence for you.", 
                            "System testing the tidy documents df.", 
                            "Documents will be parsed and lexranked."),
                   stringsAsFactors = FALSE)
  
  test_result <- unnest_sentences(df, sents, text)
  test_result <- bind_lexrank_(test_result, "sents", "doc_id", level = 'sentences')
  
  expect_equal(dim(test_result), c(4,4))
  expect_true(is.data.frame(test_result))
  expect_equal(names(test_result), c("doc_id","sent_id","sents","lexrank"))
  
  test_result <- unnest_sentences(df, sents, text, drop=FALSE) 
  test_result <- bind_lexrank_(test_result, "sents", "doc_id", level = 'sentences')
  
  expect_equal(dim(test_result), c(4,5))
  expect_equal(names(test_result), c("doc_id","text","sent_id","sents","lexrank"))
  
  df <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), 
                   sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
                   sents = c("Testing the system.", "Testing the system.", "Testing the system.", 
                             "Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.", 
                             "System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.", 
                             "System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.", 
                             "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", 
                             "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
                   tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the", 
                              "tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
                   stringsAsFactors = FALSE)
  
  test_result <- bind_lexrank_(df, "tokens", "doc_id", "sent_id", "tokens")
  
  expect_equal(dim(test_result), c(19,5))
  expect_equal(names(test_result), c("doc_id","sent_id","sents","tokens","lexrank"))
})

# test bad input -------------------------------------------------------
test_that("test input checking", {
  df <- data.frame(doc_id = 1:3, 
                   text = c("Testing the system. Second sentence for you.", 
                            "System testing the tidy documents df.", 
                            "Documents will be parsed and lexranked."),
                   stringsAsFactors = FALSE)
  df <- unnest_sentences(df, sents, text)
  
  expect_error(bind_lexrank_(df, "sents", "fake"))
  expect_error(bind_lexrank_(NULL, "sents", "doc_id"))
  expect_error(bind_lexrank_(df, "sents", "doc_id", level="fake"))
  # expect_warning(bind_lexrank_(df, "sents", "doc_id", level=c("sentences","tokens")))
  
  df <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), 
                   sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
                   sents = c("Testing the system.", "Testing the system.", "Testing the system.", 
                             "Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.", 
                             "System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.", 
                             "System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.", 
                             "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", 
                             "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
                   tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the", 
                              "tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
                   stringsAsFactors = FALSE)
  
  expect_error(bind_lexrank_(df, "tokens", "doc_id", "fake", level="tokens"))
  expect_error(bind_lexrank_(df, "tokens", "doc_id", level="tokens"))
  # expect_warning(bind_lexrank_(df, "tokens", "doc_id", "sent_id", level=c("tokens","sentences")))
})

# test output val ------------------------------------------------------
test_that("output value", {
  df <- data.frame(doc_id = 1:3, 
                   text = c("Testing the system. Second sentence for you.", 
                            "System testing the tidy documents df.", 
                            "Documents will be parsed and lexranked."),
                   stringsAsFactors = FALSE)
  df <- unnest_sentences(df, sents, text)
  
  test_result     <- bind_lexrank_(df, "sents", "doc_id", level="sentences")
  expected_result <- data.frame(doc_id = c(1L, 1L, 2L, 3L), 
                                sent_id = c(1L, 2L, 1L, 1L), 
                                sents = c("Testing the system.", "Second sentence for you.", 
                                          "System testing the tidy documents df.", "Documents will be parsed and lexranked."), 
                                lexrank = c(0.5, NA, 0.5, NA),
                                stringsAsFactors = FALSE)
  
  expect_equal(test_result, expected_result)
  
  df <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), 
                   sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
                   sents = c("Testing the system.", "Testing the system.", "Testing the system.", 
                             "Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.", 
                             "System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.", 
                             "System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.", 
                             "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", 
                             "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
                   tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the", 
                              "tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
                   stringsAsFactors = FALSE)
  
  test_result     <- bind_lexrank_(df, "tokens", "doc_id", "sent_id", level="sentences")
  test_result$lexrank <- round(test_result$lexrank, 5)
  expected_result <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), 
                                sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
                                sents = c("Testing the system.", "Testing the system.", "Testing the system.", 
                                          "Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.", 
                                          "System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.", 
                                          "System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.", 
                                          "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", 
                                          "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
                                tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the", 
                                           "tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
                                lexrank = c(0.16667, NA, 0.16667, NA, NA, NA, NA, 0.16667, 0.16667, NA, NA, 0.16667, NA, 0.16667, NA, NA, NA, NA, NA),
                                stringsAsFactors = FALSE)
  
  expect_equal(test_result, expected_result)
})

AdamSpannbauer/lexRankr documentation built on Dec. 9, 2022, 3:44 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

AdamSpannbauer/lexRankr
Extractive Summarization of Text with the LexRank Algorithm

tests/testthat/test-bind_lexrank_.R
In AdamSpannbauer/lexRankr: Extractive Summarization of Text with the LexRank Algorithm

R Package Documentation

Browse R Packages

We want your feedback!

AdamSpannbauer/lexRankr Extractive Summarization of Text with the LexRank Algorithm

tests/testthat/test-bind_lexrank_.R In AdamSpannbauer/lexRankr: Extractive Summarization of Text with the LexRank Algorithm

R Package Documentation

Browse R Packages

We want your feedback!

AdamSpannbauer/lexRankr
Extractive Summarization of Text with the LexRank Algorithm

tests/testthat/test-bind_lexrank_.R
In AdamSpannbauer/lexRankr: Extractive Summarization of Text with the LexRank Algorithm