Nothing
context("test single table dplyr verbs")
test_that("corpus arrange() works", {
expect_identical(
docnames(arrange(data_corpus_inaugural[1:3], c(3, 1, 2))),
c("1793-Washington", "1797-Adams", "1789-Washington")
)
expect_identical(
docnames(arrange(data_corpus_inaugural[1:3], President)),
c("1797-Adams", "1789-Washington", "1793-Washington")
)
expect_identical(
docnames(arrange(data_corpus_inaugural[1:3], President, desc(Year))),
c("1797-Adams", "1793-Washington", "1789-Washington")
)
})
test_that("corpus filter() works", {
expect_identical(
docnames(filter(data_corpus_inaugural, President == "Roosevelt")),
c("1905-Roosevelt", "1933-Roosevelt", "1937-Roosevelt",
"1941-Roosevelt", "1945-Roosevelt")
)
expect_identical(
docvars(filter(data_corpus_inaugural, President == "Roosevelt")),
structure(list(Year = c(1905L, 1933L, 1937L, 1941L, 1945L),
President = c("Roosevelt", "Roosevelt", "Roosevelt",
"Roosevelt", "Roosevelt"),
FirstName = c("Theodore", "Franklin D.", "Franklin D.",
"Franklin D.", "Franklin D."),
Party = structure(c(5L, 1L, 1L, 1L, 1L), .Label = c("Democratic",
"Democratic-Republican", "Federalist", "none", "Republican",
"Whig"), class = "factor")),
row.names = c(NA, -5L), class = "data.frame")
)
expect_identical(
docnames(filter(data_corpus_inaugural[1:3], c(TRUE, FALSE, TRUE))),
c("1789-Washington", "1797-Adams")
)
x <- c(TRUE, FALSE, TRUE)
expect_identical(
docnames(filter(data_corpus_inaugural[1:3], x)),
c("1789-Washington", "1797-Adams")
)
})
test_that("corpus mutate() works", {
expect_identical(
docvars(mutate(data_corpus_inaugural[1],
pyear = paste(President, Year, sep = "_"))),
structure(list(Year = 1789L, President = "Washington",
FirstName = "George",
Party = structure(4L, .Label = c("Democratic", "Democratic-Republican",
"Federalist", "none", "Republican", "Whig"), class = "factor"),
pyear = "Washington_1789"),
row.names = c(NA, -1L), class = "data.frame")
)
expect_identical(
docvars(mutate(data_corpus_inaugural[1:3],
cent = floor(Year / 100) * 100),
"cent"),
c(1700, 1700, 1700)
)
})
test_that("corpus transmute() works", {
expect_identical(
docvars(transmute(data_corpus_inaugural[1],
pyear = paste(President, Year, sep = "_"))),
structure(list(pyear = "Washington_1789"), row.names = c(NA, -1L),
class = "data.frame")
)
})
test_that("corpus select() works", {
expect_identical(
names(docvars(select(data_corpus_inaugural[1:2], Party, Year))),
c("Party", "Year")
)
expect_identical(
names(docvars(rename(data_corpus_inaugural[1:2],
LastName = President))),
c("Year", "LastName", "FirstName", "Party")
)
})
test_that("pull works", {
corp <- head(data_corpus_inaugural, 3)
expect_identical(
corp %>% pull(President),
c("Washington", "Washington", "Adams")
)
expect_identical(
corp %>% pull(1),
c(1789L, 1793L, 1797L)
)
expect_identical(
corp %>% pull(-1),
structure(c(4L, 4L, 3L),
.Label = c("Democratic", "Democratic-Republican",
"Federalist", "none", "Republican", "Whig"),
class = "factor")
)
toks <- tokens(corp)
expect_identical(
toks %>% pull(President),
c("Washington", "Washington", "Adams")
)
dfmat <- dfm(toks)
expect_identical(
dfmat %>% pull(President),
c("Washington", "Washington", "Adams")
)
})
test_that("pull gives informative error with empty docvars", {
corp <- corpus(letters[1:3])
expect_error(
pull(corp),
"Cannot extract document variables: corpus has no docvars"
)
toks <- tokens(corp)
expect_error(
pull(toks),
"Cannot extract document variables: corpus has no docvars"
)
dfmat <- dfm(toks)
expect_error(
pull(dfmat),
"Cannot extract document variables: corpus has no docvars"
)
})
test_that("corpus distinct() works", {
# Test distinct with .keep_all = FALSE (default behavior)
corp <- data_corpus_inaugural[c(1, 2, 4)] # Washington, Washington, Jefferson
result <- distinct(corp, President)
expect_identical(
docnames(result),
c("1789-Washington", "1801-Jefferson")
)
expect_identical(
names(docvars(result)),
c("President")
)
# Test distinct with .keep_all = TRUE
result_keep_all <- distinct(corp, President, .keep_all = TRUE)
expect_identical(
docnames(result_keep_all),
c("1789-Washington", "1801-Jefferson")
)
expect_identical(
names(docvars(result_keep_all)),
c("Year", "President", "FirstName", "Party")
)
# Test distinct with multiple variables
corp2 <- data_corpus_inaugural[1:10]
result_multi <- distinct(corp2, Party, .keep_all = TRUE)
expect_true(
nrow(docvars(result_multi)) < nrow(docvars(corp2))
)
})
test_that("corpus relocate() works", {
corp <- data_corpus_inaugural[1:3]
# Test relocating to front
result <- relocate(corp, Party)
expect_identical(
names(docvars(result)),
c("Party", "Year", "President", "FirstName")
)
# Test relocating to end
result_after <- relocate(corp, Year, .after = last_col())
expect_identical(
names(docvars(result_after)),
c("President", "FirstName", "Party", "Year")
)
# Test relocating before a column
result_before <- relocate(corp, Party, .before = President)
expect_identical(
names(docvars(result_before)),
c("Year", "Party", "President", "FirstName")
)
# Verify document order is preserved
expect_identical(
docnames(result),
docnames(corp)
)
})
test_that("corpus rename() works", {
corp <- data_corpus_inaugural[1:3]
# Test renaming a single variable
result <- rename(corp, LastName = President)
expect_identical(
names(docvars(result)),
c("Year", "LastName", "FirstName", "Party")
)
expect_identical(
docvars(result)$LastName,
c("Washington", "Washington", "Adams")
)
# Test renaming multiple variables
result_multi <- rename(corp, LastName = President, Given = FirstName)
expect_identical(
names(docvars(result_multi)),
c("Year", "LastName", "Given", "Party")
)
# Verify document order and texts are preserved
expect_identical(docnames(result), docnames(corp))
expect_identical(as.character(result), as.character(corp))
})
test_that("corpus rename_with() works", {
corp <- data_corpus_inaugural[1:3]
# Test renaming all columns with a function
result <- rename_with(corp, toupper)
expect_identical(
names(docvars(result)),
c("YEAR", "PRESIDENT", "FIRSTNAME", "PARTY")
)
# Test renaming with selection
result_select <- rename_with(corp, toupper, starts_with("P"))
expect_identical(
names(docvars(result_select)),
c("Year", "PRESIDENT", "FirstName", "PARTY")
)
# Test with custom function
add_prefix <- function(x) paste0("doc_", x)
result_prefix <- rename_with(corp, add_prefix, all_of(c("Year", "President")))
expect_identical(
names(docvars(result_prefix)),
c("doc_Year", "doc_President", "FirstName", "Party")
)
# Verify document order and content are preserved
expect_identical(
docnames(result),
docnames(corp)
)
expect_identical(
docvars(result)$YEAR,
docvars(corp)$Year
)
})
test_that("corpus slice() works", {
corp <- data_corpus_inaugural[1:10]
# Test basic slicing by position
result <- slice(corp, 1:3)
expect_identical(
docnames(result),
docnames(corp)[1:3]
)
expect_equal(ndoc(result), 3)
# Test slicing with negative indices
result_neg <- slice(corp, -(1:5))
expect_equal(ndoc(result_neg), 5)
expect_identical(
docnames(result_neg),
docnames(corp)[6:10]
)
# Test slicing with non-sequential indices
result_skip <- slice(corp, c(1, 3, 5, 7, 9))
expect_equal(ndoc(result_skip), 5)
expect_identical(
docnames(result_skip)[1],
docnames(corp)[1]
)
})
test_that("corpus slice_head() works", {
corp <- data_corpus_inaugural[1:10]
# Test with n argument
result_n <- slice_head(corp, n = 3)
expect_equal(ndoc(result_n), 3)
expect_identical(
docnames(result_n),
docnames(corp)[1:3]
)
# Test with prop argument
result_prop <- slice_head(corp, prop = 0.3)
expect_equal(ndoc(result_prop), 3)
expect_identical(
docnames(result_prop),
docnames(corp)[1:3]
)
})
test_that("corpus slice_tail() works", {
corp <- data_corpus_inaugural[1:10]
# Test with n argument
result_n <- slice_tail(corp, n = 3)
expect_equal(ndoc(result_n), 3)
expect_identical(
docnames(result_n),
docnames(corp)[8:10]
)
# Test with prop argument
result_prop <- slice_tail(corp, prop = 0.2)
expect_equal(ndoc(result_prop), 2)
expect_identical(
docnames(result_prop),
docnames(corp)[9:10]
)
})
test_that("corpus slice_sample() works", {
corp <- data_corpus_inaugural[1:10]
# Test with n argument
set.seed(123)
result_n <- slice_sample(corp, n = 5)
expect_equal(ndoc(result_n), 5)
expect_true(all(docnames(result_n) %in% docnames(corp)))
# Test with prop argument
set.seed(456)
result_prop <- slice_sample(corp, prop = 0.5)
expect_equal(ndoc(result_prop), 5)
expect_true(all(docnames(result_prop) %in% docnames(corp)))
# Test that different seeds give different results
set.seed(789)
result_diff <- slice_sample(corp, n = 5)
expect_false(identical(docnames(result_n), docnames(result_diff)))
})
test_that("corpus slice_min() works", {
corp <- data_corpus_inaugural[1:10]
# Test with n argument
result_n <- slice_min(corp, Year, n = 3)
expect_equal(ndoc(result_n), 3)
expect_true(all(docvars(result_n)$Year <= 1797))
# Test with prop argument
result_prop <- slice_min(corp, Year, prop = 0.2)
expect_equal(ndoc(result_prop), 2)
# Verify ordering (should be in original order, not sorted)
result_ordered <- slice_min(corp, Year, n = 5)
expect_true(min(docvars(result_ordered)$Year) == min(docvars(corp)$Year))
})
test_that("corpus slice_max() works", {
corp <- data_corpus_inaugural[1:10]
# Test with n argument (top 3 years are 1825, 1821, 1817)
result_n <- slice_max(corp, Year, n = 3)
expect_equal(ndoc(result_n), 3)
expect_true(all(docvars(result_n)$Year >= 1817))
# Test with prop argument
result_prop <- slice_max(corp, Year, prop = 0.3)
expect_equal(ndoc(result_prop), 3)
# Test with ties
corp_ties <- data_corpus_inaugural[c(1, 2, 3)] # Has two 179x years
result_ties <- slice_max(corp_ties, Year, n = 1, with_ties = TRUE)
expect_true(ndoc(result_ties) >= 1)
})
test_that("corpus add_count() works", {
corp <- data_corpus_inaugural[1:10]
# Test basic add_count by single variable
result <- add_count(corp, President)
expect_true("n" %in% names(docvars(result)))
expect_equal(ndoc(result), ndoc(corp))
# Check that Washington has count of 2 (appears twice in first 10)
washington_count <- unique(docvars(result)[docvars(result)$President == "Washington", "n"])
expect_equal(washington_count, 2)
# Test add_count with custom name
result_custom <- add_count(corp, President, name = "pres_count")
expect_true("pres_count" %in% names(docvars(result_custom)))
expect_false("n" %in% names(docvars(result_custom)))
# Test add_count by multiple variables
result_multi <- add_count(corp, President, Party)
expect_true("n" %in% names(docvars(result_multi)))
# Test add_count with sort
result_sort <- add_count(corp, President, sort = TRUE)
expect_true("n" %in% names(docvars(result_sort)))
# First document should have the highest count or tied for highest
first_count <- docvars(result_sort)$n[1]
expect_true(first_count >= max(docvars(result_sort)$n) - 1)
# Verify original docvars are preserved
expect_true(all(c("Year", "President", "FirstName", "Party") %in% names(docvars(result))))
})
test_that("corpus add_tally() works", {
corp <- data_corpus_inaugural[1:10]
# Test basic add_tally (adds total count to each row)
result <- add_tally(corp)
expect_true("n" %in% names(docvars(result)))
expect_equal(ndoc(result), ndoc(corp))
# All rows should have the same count (total number of documents)
expect_true(all(docvars(result)$n == 10))
# Test add_tally with custom name
result_custom <- add_tally(corp, name = "total")
expect_true("total" %in% names(docvars(result_custom)))
expect_false("n" %in% names(docvars(result_custom)))
expect_true(all(docvars(result_custom)$total == 10))
# Test add_tally with a subset that has duplicates
corp_subset <- data_corpus_inaugural[c(1, 1, 2, 2, 2, 3)]
result_subset <- add_tally(corp_subset)
expect_equal(ndoc(result_subset), 6)
expect_true(all(docvars(result_subset)$n == 6))
# Verify original docvars are preserved
expect_true(all(c("Year", "President", "FirstName", "Party") %in% names(docvars(result))))
})
test_that("add_count() and add_tally() with wt argument", {
corp <- data_corpus_inaugural[1:10]
# Create a corpus with a weight variable
corp_wt <- mutate(corp, weight = Year - min(Year) + 1)
# Test add_count with weights
result_count_wt <- add_count(corp_wt, President, wt = weight)
expect_true("n" %in% names(docvars(result_count_wt)))
# Check that weighted count for Washington is sum of weights
# Washington appears at indices 1 and 2 (years 1789 and 1793)
# weights are: (1789-1789+1)=1 and (1793-1789+1)=5, sum=6
washington_weighted <- unique(docvars(result_count_wt)[docvars(result_count_wt)$President == "Washington", "n"])
expect_equal(washington_weighted, 6)
# Test add_tally with weights
result_tally_wt <- add_tally(corp_wt, wt = weight)
expect_true("n" %in% names(docvars(result_tally_wt)))
# All rows should have the same weighted total
expected_total <- sum(docvars(corp_wt)$weight)
expect_true(all(docvars(result_tally_wt)$n == expected_total))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.