tests/testthat/test-join-rows.R

test_that("`relationship` default behavior is correct", {
  # "warn-many-to-many" for equality joins
  expect_snapshot(out <- join_rows(c(1, 1), c(1, 1), condition = "=="))
  expect_equal(out$x, c(1L, 1L, 2L, 2L))
  expect_equal(out$y, c(1L, 2L, 1L, 2L))

  # "none" for rolling joins
  expect_warning(out <- join_rows(c(1, 2), c(1, 1), condition = ">=", filter = "max"), NA)
  expect_equal(out$x, c(1L, 1L, 2L, 2L))
  expect_equal(out$y, c(1L, 2L, 1L, 2L))
  # If rolling joins warned on many-to-many relationships, it would be a little
  # hard to explain that the above example warns, but this wouldn't just because
  # we've removed `2` as a key from `x`:
  # `join_rows(1, c(1, 1), condition = ">=", filter = "max")`

  # "none" for inequality joins (and overlap joins)
  expect_warning(out <- join_rows(c(1, 2), c(0, 1), condition = ">="), NA)
  expect_equal(out$x, c(1L, 1L, 2L, 2L))
  expect_equal(out$y, c(1L, 2L, 1L, 2L))

  # "none" for deprecated cross joins
  expect_warning(out <- join_rows(c(1, 1), c(1, 1), cross = TRUE), NA)
  expect_equal(out$x, c(1L, 1L, 2L, 2L))
  expect_equal(out$y, c(1L, 2L, 1L, 2L))
})

test_that("`multiple` first/last/any works correctly", {
  out <- join_rows(c(1, 1), c(1, 1), multiple = "first")
  expect_equal(out$x, c(1L, 2L))
  expect_equal(out$y, c(1L, 1L))

  out <- join_rows(c(1, 1), c(1, 1), multiple = "last")
  expect_equal(out$x, c(1L, 2L))
  expect_equal(out$y, c(2L, 2L))

  out <- join_rows(c(1, 1), c(1, 1), multiple = "any")
  expect_equal(out$x, c(1L, 2L))
  expect_equal(out$y %in% c(1L, 2L), c(TRUE, TRUE))
})

test_that("inner join only outputs matching keys", {
  out <- join_rows(c(2, 1), c(3, 4, 1), type = "inner")
  expect_equal(out$x, 2L)
  expect_equal(out$y, 3L)

  out <- join_rows(c(2, 1), c(3, 4, 1), type = "inner", condition = ">")
  expect_equal(out$x, 1L)
  expect_equal(out$y, 3L)
})

test_that("left join contains all keys from x", {
  out <- join_rows(c(2, 1), c(3, 4, 1), type = "left")
  expect_equal(out$x, c(1L, 2L))
  expect_equal(out$y, c(NA, 3L))

  out <- join_rows(c(2, 1), c(3, 4, 1), type = "left", condition = ">")
  expect_equal(out$x, c(1L, 2L))
  expect_equal(out$y, c(3L, NA))
})

test_that("right join contains all keys from y", {
  out <- join_rows(c(2, 1), c(3, 4, 1), type = "right")
  expect_equal(out$x, c(2L, NA, NA))
  expect_equal(out$y, c(3L, 1L, 2L))

  out <- join_rows(c(2, 1), c(3, 4, 1), type = "right", condition = ">=")
  expect_equal(out$x, c(1L, 2L, NA, NA))
  expect_equal(out$y, c(3L, 3L, 1L, 2L))
})

test_that("full join contains all keys from both", {
  out <- join_rows(c(2, 1), c(3, 1), type = "full")
  expect_equal(out$x, c(1L, 2L, NA))
  expect_equal(out$y, c(NA, 2L, 1L))

  out <- join_rows(c(2, 1), c(3, 1), type = "full", condition = ">")
  expect_equal(out$x, c(1L, 2L, NA))
  expect_equal(out$y, c(2L, NA, 1L))
})

test_that("nest join returns 0L for unmatched x keys", {
  out <- join_rows(c(2, 1), c(3, 4, 1), type = "nest")
  expect_equal(out$x, c(1L, 2L))
  expect_equal(out$y, c(0L, 3L))
})

test_that("nest join returns 0L for missing x keys with `na_matches = 'never'`", {
  out <- join_rows(c(NA, 1), 1, type = "nest", na_matches = "never")
  expect_equal(out$x, c(1L, 2L))
  expect_equal(out$y, c(0L, 1L))
})

test_that("matching rows can be filtered", {
  out <- join_rows(c(3, 5), c(2, 4, 1), condition = ">=", filter = "max")
  expect_equal(out$x, 1:2)
  expect_equal(out$y, 1:2)

  out <- join_rows(c(3, 5), c(2, 4, 1), condition = ">=", filter = "min")
  expect_equal(out$x, 1:2)
  expect_equal(out$y, c(3, 3))
})

test_that("missing values only match with `==`, `>=`, and `<=` conditions", {
  out <- join_rows(NA, NA, condition = "==")
  expect_identical(out$x, 1L)
  expect_identical(out$y, 1L)

  out <- join_rows(NA, NA, condition = ">=")
  expect_identical(out$x, 1L)
  expect_identical(out$y, 1L)

  out <- join_rows(NA, NA, condition = "<=")
  expect_identical(out$x, 1L)
  expect_identical(out$y, 1L)

  out <- join_rows(NA, NA, condition = ">")
  expect_identical(out$x, integer())
  expect_identical(out$y, integer())

  out <- join_rows(NA, NA, condition = "<")
  expect_identical(out$x, integer())
  expect_identical(out$y, integer())


  x <- tibble(x = c(1, 1), y = c(2, NA))
  y <- tibble(x = c(1, 1), y = c(3, NA))

  out <- join_rows(x, y, condition = c("==", "<="))
  expect_identical(out$x, c(1L, 2L))
  expect_identical(out$y, c(1L, 2L))

  out <- join_rows(x, y, condition = c("==", "<"))
  expect_identical(out$x, 1L)
  expect_identical(out$y, 1L)
})

test_that("join_rows() doesn't error on unmatched rows if they won't be dropped", {
  # 2 is unmatched, but a left join means we always retain that key
  out <- join_rows(c(1, 2), 1, type = "left", unmatched = "error")
  expect_identical(out$x, c(1L, 2L))
  expect_identical(out$y, c(1L, NA))

  out <- join_rows(c(1, 2), c(1, 3), type = "full", unmatched = "error")
  expect_identical(out$x, c(1L, 2L, NA))
  expect_identical(out$y, c(1L, NA, 2L))
})

test_that("join_rows() allows `unmatched` to be specified independently for inner joins", {
  out <- join_rows(c(1, 2), 1, type = "inner", unmatched = c("drop", "error"))
  expect_identical(out$x, 1L)
  expect_identical(out$y, 1L)

  out <- join_rows(1, c(2, 1), type = "inner", unmatched = c("error", "drop"))
  expect_identical(out$x, 1L)
  expect_identical(out$y, 2L)

  # Both have dropped rows, only `y` is mentioned in the error
  expect_snapshot(error = TRUE, {
    join_rows(c(1, 3), c(1, 2), type = "inner", unmatched = c("drop", "error"))
  })
})

test_that("join_rows() expects incompatible type errors to have been handled by join_cast_common()", {
  expect_snapshot({
    (expect_error(
      join_rows(data.frame(x = 1), data.frame(x = factor("a")))
    ))
  })
})

test_that("join_rows() gives meaningful one-to-one errors", {
  expect_snapshot(error = TRUE, {
    join_rows(1, c(1, 1), relationship = "one-to-one")
  })
  expect_snapshot(error = TRUE, {
    join_rows(c(1, 1), 1, relationship = "one-to-one")
  })
})

test_that("join_rows() gives meaningful one-to-many errors", {
  expect_snapshot(error = TRUE, {
    join_rows(c(1, 1), 1, relationship = "one-to-many")
  })
})

test_that("join_rows() gives meaningful many-to-one errors", {
  expect_snapshot(error = TRUE, {
    join_rows(1, c(1, 1), relationship = "many-to-one")
  })
})

test_that("join_rows() gives meaningful many-to-many warnings", {
  skip("TODO duckdb")
  expect_snapshot({
    join_rows(c(1, 1), c(1, 1))
  })

  # With proof that the defaults flow through user facing functions
  df <- data.frame(x = c(1, 1))
  expect_snapshot({
    duckplyr_left_join(df, df, by = join_by(x))
  })
})

test_that("join_rows() gives meaningful error message on unmatched rows", {
  # Unmatched in the RHS
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = c(1, 2)),
      data.frame(x = c(3, 1)),
      type = "left",
      unmatched = "error"
    )
  )
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = c(1, 2)),
      data.frame(x = c(3, 1)),
      type = "nest",
      unmatched = "error"
    )
  )

  # Unmatched in the LHS
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = c(1, 2)),
      data.frame(x = c(3, 1)),
      type = "right",
      unmatched = "error"
    )
  )

  # Unmatched in either side
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = c(1, 2)),
      data.frame(x = 1),
      type = "inner",
      unmatched = "error"
    )
  )
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = c(1, 2)),
      data.frame(x = 1),
      type = "inner",
      unmatched = c("error", "drop")
    )
  )
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = 1),
      data.frame(x = c(1, 2)),
      type = "inner",
      unmatched = "error"
    )
  )
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = 1),
      data.frame(x = c(1, 2)),
      type = "inner",
      unmatched = c("drop", "error")
    )
  )
})

test_that("join_rows() always errors on unmatched missing values", {
  # Unmatched in the RHS
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = 1),
      data.frame(x = NA),
      type = "left",
      unmatched = "error",
      na_matches = "na"
    )
  )
  expect_snapshot(
    error = TRUE,
    join_rows(
      data.frame(x = NA),
      data.frame(x = NA),
      type = "left",
      unmatched = "error",
      na_matches = "never"
    )
  )
  expect_snapshot(
    error = TRUE,
    join_rows(
      data.frame(x = 1),
      data.frame(x = NA),
      type = "nest",
      unmatched = "error",
      na_matches = "na"
    )
  )
  expect_snapshot(
    error = TRUE,
    join_rows(
      data.frame(x = NA),
      data.frame(x = NA),
      type = "nest",
      unmatched = "error",
      na_matches = "never"
    )
  )

  # Unmatched in the LHS
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = NA),
      data.frame(x = 1),
      type = "right",
      unmatched = "error",
      na_matches = "na"
    )
  )
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = NA),
      data.frame(x = NA),
      type = "right",
      unmatched = "error",
      na_matches = "never"
    )
  )

  # Unmatched in either side
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = 1),
      data.frame(x = c(1, NA)),
      type = "inner",
      unmatched = "error",
      na_matches = "na"
    )
  )
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = 1),
      data.frame(x = c(1, NA)),
      type = "inner",
      unmatched = c("drop", "error"),
      na_matches = "na"
    )
  )
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = c(1, NA)),
      data.frame(x = 1),
      type = "inner",
      unmatched = "error",
      na_matches = "na"
    )
  )
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = c(1, NA)),
      data.frame(x = 1),
      type = "inner",
      unmatched = c("error", "drop"),
      na_matches = "na"
    )
  )
  expect_snapshot(error = TRUE,
    join_rows(
      data.frame(x = NA),
      data.frame(x = NA),
      type = "inner",
      unmatched = "error",
      na_matches = "never"
    )
  )
})

test_that("join_rows() validates `unmatched`", {
  df <- tibble(x = 1)

  expect_snapshot(error = TRUE, {
    join_rows(df, df, unmatched = 1)
    join_rows(df, df, unmatched = "foo")

    # One `unmatched` input is allowed for most joins
    join_rows(df, df, type = "left", unmatched = character())
    join_rows(df, df, type = "left", unmatched = c("drop", "error"))

    # Two `unmatched` inputs are allowed for inner joins
    join_rows(df, df, type = "inner", unmatched = character())
    join_rows(df, df, type = "inner", unmatched = c("drop", "error", "error"))

    join_rows(df, df, type = "inner", unmatched = c("drop", "dr"))
  })
})

test_that("join_rows() validates `relationship`", {
  df <- tibble(x = 1)

  expect_snapshot(error = TRUE, {
    join_rows(df, df, relationship = 1)
  })

  # Notably can't use the vctrs options
  expect_snapshot(error = TRUE, {
    join_rows(df, df, relationship = "none")
  })
  expect_snapshot(error = TRUE, {
    join_rows(df, df, relationship = "warn-many-to-many")
  })
})

test_that("join_rows() rethrows overflow error nicely (#6912)", {
  skip_on_cran()
  # Windows 32-bit doesn't support long vectors of this size, and the
  # intermediate `r_ssize` will be too large
  skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support")

  df <- tibble(x = 1:1e7)

  expect_snapshot(error = TRUE, {
    join_rows(df, df, condition = ">=")
  })
})

# Deprecated behavior ----------------------------------------------------------

test_that("`multiple = NULL` is deprecated and results in `'all'` (#6731)", {
  df1 <- tibble(x = c(1, 2))
  df2 <- tibble(x = c(2, 1, 2))

  expect_snapshot({
    out <- join_rows(df1, df2, multiple = NULL)
  })
  expect_identical(out$x, c(1L, 2L, 2L))
  expect_identical(out$y, c(2L, 1L, 3L))

  expect_snapshot({
    duckplyr_left_join(df1, df2, by = join_by(x), multiple = NULL)
  })
})

test_that("`multiple = 'error'` is deprecated (#6731)", {
  df1 <- tibble(x = c(1, 2))
  df2 <- tibble(x = c(2, 1, 2))

  expect_snapshot(error = TRUE, {
    join_rows(df1, df2, multiple = "error")
  })
  expect_snapshot(error = TRUE, {
    duckplyr_left_join(df1, df2, by = join_by(x), multiple = "error")
  })
})

test_that("`multiple = 'warning'` is deprecated (#6731)", {
  df1 <- tibble(x = c(1, 2))
  df2 <- tibble(x = c(2, 1, 2))

  expect_snapshot({
    out <- join_rows(df1, df2, multiple = "warning")
  })
  expect_identical(out$x, c(1L, 2L, 2L))
  expect_identical(out$y, c(2L, 1L, 3L))

  expect_snapshot({
    duckplyr_left_join(df1, df2, by = join_by(x), multiple = "warning")
  })
})
duckdblabs/duckplyr documentation built on Nov. 6, 2024, 10 p.m.