Nothing
context("test left_join")
test_that("left_join works with regular docvars", {
corp <- corpus(c(doc1 = "text1", doc2 = "text2", doc3 = "text3"),
docvars = data.frame(id = 1:3, name = c("a", "b", "c")))
df <- data.frame(id = 1:4, info = c("x", "y", "z", "w"))
result <- left_join(corp, df, by = "id")
expect_identical(docnames(result), c("doc1", "doc2", "doc3"))
expect_identical(
docvars(result),
data.frame(
id = 1:3,
name = c("a", "b", "c"),
info = c("x", "y", "z")
)
)
})
test_that("left_join handles unmatched rows with NA", {
corp <- corpus(c(doc1 = "text1", doc2 = "text2", doc3 = "text3"),
docvars = data.frame(id = 1:3))
df <- data.frame(id = c(1, 3, 5), info = c("a", "c", "e"))
result <- left_join(corp, df, by = "id")
expect_equal(
docvars(result),
data.frame(
id = 1:3,
info = c("a", NA, "c")
)
)
})
test_that("left_join works with docname in y", {
corp <- corpus(c(doc1 = "text1", doc2 = "text2", doc3 = "text3"),
docvars = data.frame(id = 1:3))
df <- data.frame(
docname = c("doc1", "doc2", "doc4"),
info = c("a", "b", "d")
)
result <- left_join(corp, df, by = "docname")
expect_identical(docnames(result), c("doc1", "doc2", "doc3"))
expect_identical(
docvars(result),
data.frame(
id = 1:3,
info = c("a", "b", NA)
)
)
# docname should not be added as a docvar
expect_false("docname" %in% names(docvars(result)))
})
test_that("left_join works with join_by(docname == other_col)", {
skip_if_not_installed("dplyr", minimum_version = "1.1.0")
corp <- corpus(c(doc1 = "text1", doc2 = "text2", doc3 = "text3"),
docvars = data.frame(id = 1:3))
df <- data.frame(
doc_id = c("doc1", "doc2", "doc4"),
info = c("a", "b", "d")
)
result <- left_join(corp, df, by = dplyr::join_by(docname == doc_id))
expect_identical(docnames(result), c("doc1", "doc2", "doc3"))
expect_identical(
docvars(result),
data.frame(
id = 1:3,
info = c("a", "b", NA)
)
)
# docname should not be added as a docvar
expect_false("docname" %in% names(docvars(result)))
})
test_that("left_join works with named vector c(docname = other_col)", {
corp <- corpus(c(doc1 = "text1", doc2 = "text2", doc3 = "text3"),
docvars = data.frame(id = 1:3))
df <- data.frame(
doc_id = c("doc1", "doc2", "doc4"),
info = c("a", "b", "d")
)
result <- left_join(corp, df, by = c("docname" = "doc_id"))
expect_identical(docnames(result), c("doc1", "doc2", "doc3"))
expect_identical(
docvars(result),
data.frame(
id = 1:3,
info = c("a", "b", NA)
)
)
# docname should not be added as a docvar
expect_false("docname" %in% names(docvars(result)))
})
test_that("left_join uses existing docname docvar when present", {
# When docname exists as a docvar, it should use that instead of docnames(x)
corp <- corpus(c(a = "text1", b = "text2", c = "text3"),
docvars = data.frame(
docname = c("doc1", "doc2", "doc3"),
id = 1:3
))
df <- data.frame(
docname = c("doc1", "doc2", "doc4"),
info = c("a", "b", "d")
)
result <- left_join(corp, df, by = "docname")
# Document names should be unchanged (a, b, c)
expect_identical(docnames(result), c("a", "b", "c"))
# Join should use the docname docvar, not docnames()
expect_identical(
docvars(result),
data.frame(
docname = c("doc1", "doc2", "doc3"),
id = 1:3,
info = c("a", "b", NA)
)
)
})
test_that("left_join works with multiple join columns", {
corp <- corpus(c(doc1 = "text1", doc2 = "text2", doc3 = "text3"),
docvars = data.frame(
year = c(2020, 2021, 2020),
country = c("US", "US", "UK")
))
df <- data.frame(
year = c(2020, 2021, 2020, 2021),
country = c("US", "US", "UK", "UK"),
value = c(10, 20, 30, 40)
)
result <- left_join(corp, df, by = c("year", "country"))
expect_identical(
docvars(result),
data.frame(
year = c(2020, 2021, 2020),
country = c("US", "US", "UK"),
value = c(10, 20, 30)
)
)
})
test_that("left_join preserves corpus metadata", {
corp <- corpus(c(doc1 = "text1", doc2 = "text2"),
docvars = data.frame(id = 1:2))
meta(corp, "source") <- "test"
meta(corp, "notes") <- "example"
df <- data.frame(id = 1:2, info = c("a", "b"))
result <- left_join(corp, df, by = "id")
expect_identical(meta(result, "source"), "test")
expect_identical(meta(result, "notes"), "example")
})
test_that("left_join handles suffix parameter correctly", {
corp <- corpus(c(doc1 = "text1", doc2 = "text2"),
docvars = data.frame(id = 1:2, value = c("x", "y")))
df <- data.frame(id = 1:2, value = c("a", "b"))
result <- left_join(corp, df, by = "id", suffix = c("_corp", "_df"))
expect_identical(
names(docvars(result)),
c("id", "value_corp", "value_df")
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.