tests/testthat/test-merge-data.table.R

withr::local_options(joyn.verbose = FALSE)
library(data.table) |>
  suppressWarnings()

# Load DATA --------------------------------------------------------------------

# options(joyn.verbose = FALSE)
x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
                t  = c(1L, 2L, 1L, 2L, NA_integer_),
                x  = 11:15)

y1 = data.table(id = c(1,2, 4),
                y  = c(11L, 15L, 16))


x2 = data.table(id = c(1, 4, 2, 3, NA),
                t  = c(1L, 2L, 1L, 2L, NA_integer_),
                x  = c(16, 12, NA, NA, 15))


y2 = data.table(id = c(1, 2, 5, 6, 3),
                yd = c(1, 2, 5, 6, 3),
                y  = c(11L, 15L, 20L, 13L, 10L),
                x  = c(16:20))


y3 <- data.table(id = c("c","b", "c", "a"),
                 y  = c(11L, 15L, 18L, 20L))

x3 <- data.table(id  = c("c","b", "d"),
                 v   = 8:10,
                 foo = c(4,2, 7))

x4 = data.table(id1 = c(1, 1, 2, 3, 3),
                id2 = c(1, 1, 2, 3, 4),
                t   = c(1L, 2L, 1L, 2L, NA_integer_),
                x   = c(16, 12, NA, NA, 15))


y4 = data.table(id  = c(1, 2, 5, 6, 3),
                id2 = c(1, 1, 2, 3, 4),
                y   = c(11L, 15L, 20L, 13L, 10L),
                x   = c(16:20))
x5 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_, NA_integer_),
                t  = c(1L, 2L, 1L, 2L, NA_integer_, 4L),
                x  = 11:16)

y5 = data.table(id = c(1,2, 4, NA_integer_, NA_integer_),
                y  = c(11L, 15L, 16, 17L, 18L))

x6 <- data.frame(
  id      = c(1, 4, 2, 3, NA),
  t       = c(1L, 2L, 1L, 2L, NA),
  country = c(16, 12, 3, NA, 15)
)

y6 <- data.frame(
  id      = c(1, 2, 5, 6, 3),
  gdp     = c(11L, 15L, 20L, 13L, 10L),
  country = 16:20
)


reportvar = getOption("joyn.reportvar")

# Warnings and errors ---------------

test_that("warnings are triggered correctly", {
  skip("warning of cartesian is not working well yet -
       found in `joyn()` due to deprecated arg")
  merge(
    x = x1,
    y = y1,
    match_type = "m:1",
    allow.cartesian = TRUE,
    by = "id"
  ) |> expect_warning()

  })

test_that("errors are triggered correctly", {

  merge(
    # x = x1,
    y = y1,
    match_type = "m:1",
    by = "id"
  ) |> expect_error(label = "")


  merge(
    x = x1,
    y = y1,
    match_type = "1:1",
    all.x = TRUE,
    by = "id",
    reportvar = FALSE
  ) |>
    expect_error(label = "merge did not detect inconsistency in match type 1:1")


  merge(
    x = x2,
    y = y2,
    by = "id",
    match_type = "m:m",
    all.y = TRUE,
    reportvar = FALSE
  )

})


# TEST LEFT JOINS --------------------------------------------------------------

test_that("LEFT JOIN - Conducts left join", {
  # m:1  -----------
  # One way
  jn_joyn <- merge(
    x = x1,
    y = y1,
    match_type = "m:1",
    all.x = TRUE,
    by = "id"
  )


  jn_dt <- merge.data.table(x = x1,
                             y = y1,
                             all.x = TRUE,
                             by = "id")

  setorderv(jn_dt, "id", na.last = TRUE)

  expect_equal(
    jn_joyn |> fselect(-get(reportvar)),
    jn_dt,
    ignore_attr = 'sorted'
  )

  expect_equal(
    merge(
      x = x1,
      y = y1,
      match_type = "m:1",
      all.x = TRUE,
      by = "id",
      reportvar = FALSE
    ),
    jn_dt,
    ignore_attr = 'sorted'
  )


  # 1:1 ----------------------
  jn_joyn <- merge(
    x = x2,
    y = y2,
    match_type = "1:1",
    all.x = TRUE,
    by = "id"
  )

  jn_dt <- merge.data.table(
    x = x2,
    y = y2,
    all.x = TRUE,
    by = "id"
  )

  setorderv(jn_dt, "id", na.last = TRUE)

  expect_equal(
    jn_joyn |>
      fselect(-get(reportvar)), # `reportvar` should be `.joyn` in principle
    jn_dt,
    ignore_attr = 'sorted'
  )


  # m:m  ----------------------
  jn <- merge(
    x4,
    y4,
    match_type = "m:m",
    all.x = TRUE,
    by.x = "id1",
    by.y = "id2"
  )

  jn_dt <-  merge.data.table(
    x4,
    y4,
    # match_type = "m:m",
    all.x = TRUE,
    by.x = "id1",
    by.y = "id2"
  )

  setorderv(jn_dt, c("id1", "id2"), na.last = TRUE)

  expect_equal(
    jn |> fselect(-get(reportvar)),
    jn_dt,
    ignore_attr = 'sorted'
  )

})



# TEST RIGHT JOINS ------------------------------------------------------

test_that("RIGHT JOIN - Conducts right join", {

  # One way
  jn_joyn <- merge(
    x = x1,
    y = y1,
    match_type = "m:1",
    all.y = TRUE,
    by = "id"
  )


  jn_dt <- merge.data.table(
    x = x1,
    y = y1,
    all.y = TRUE,
    by = "id"
  )

  expect_equal(
    jn_joyn |> fselect(-get(reportvar)),
    jn_dt,
    ignore_attr = 'sorted'
  )


  # Second set of tables ----------------------
  jn_joyn <- merge(
    x = x2,
    y = y2,
    match_type = "1:1",
    by = "id",
    all.y = TRUE
  )

  jn_dt <- merge.data.table(
    x2,
    y2,
    by = "id",
    all.y = TRUE
  )

  expect_equal(
    jn_joyn |> fselect(-get(reportvar)),
    jn_dt,
    ignore_attr = 'sorted'
  )

  expect_equal(
    merge(
      x = x2,
      y = y2,
      # match_type = "1:1",
      by = "id",
      all.y = TRUE,
      reportvar = FALSE
    ),
    jn_dt,
    ignore_attr = 'sorted'
  )


  jn <- merge(
    x4,
    y4,
    by.x = "id1",
    by.y = "id2",
    match_type = "m:m",
    all.y = TRUE
  )

  jn_dt <- merge.data.table(
    x4,
    y4,
    by.x = "id1",
    by.y = "id2",
    all.y = TRUE
  )

  expect_equal(
    jn |> fselect(-get(reportvar)),
    jn_dt,
    ignore_attr = 'sorted'
  )

})


# TEST FULL JOINS -------------------------------------------------------------

test_that("FULL JOIN - Conducts full join", {

  # One way
  jn_joyn <- merge(
    x = x1,
    y = y1,
    match_type = "m:1",
    by = "id",
    all = TRUE
  )


  jn_dt <- merge.data.table(
    x1,
    y1,
    by = "id",
    all = TRUE
  )

  setorderv(jn_dt, c("id"), na.last = TRUE)


  expect_equal(
    jn_joyn |> fselect(-get(reportvar)),
    jn_dt,
    ignore_attr = TRUE
  )

  expect_true(
    all(c("x", "y", "x & y") %in% jn_joyn$.joyn)
  )
  expect_true(
    all(
      c(jn_joyn$id) %in% c(y1$id, x2$id)
    )
  )

  # Second set of tables ----------------------
  jn_joyn <- merge(
    x = x2,
    y = y2,
    match_type = "1:1",
    by = "id",
    all = TRUE
  )

  jn_dt <- merge.data.table(
    x2,
    y2,
    by = "id",
    all = TRUE
  )

  setorderv(jn_dt, c("id"), na.last = TRUE)

  expect_equal(
    jn_joyn |> fselect(-get(reportvar)),
    jn_dt,
    ignore_attr = 'sorted'
  )


  jn <- merge(
    x4,
    y4,
    by.x  = "id1",
    by.y  = "id2",
    match_type = "m:m",
    all = TRUE
  )

  #merge.data.table(x4, y4, by = dplyr::join_by(id1 == id2), match_type = "m:m")
  jn_dt <- merge.data.table(
    x4,
    y4,
    by.x  = "id1",
    by.y  = "id2",
    all = TRUE
  )
  attr(jn_dt, 'sorted') <- NULL

  expect_equal(
    jn |> fselect(-get(reportvar)),
    jn_dt
  )

})


test_that("FULL JOIN - no id given", {

  jn1 <- merge(
    x2,
    y2,
    all = TRUE
  )
  jn2 <- merge(
    x2,
    y2,
    by = c("id", "x"),
    all = TRUE
  )
  expect_equal(jn1, jn2)

})


test_that("FULL JOIN - incorrectly specified arguments give errors", {

  expect_error(
    merge(
      x = x1,
      y = y1,
      match_type = "m:1",
      suffix = NULL
    )
  )

  expect_error(
    merge(
      x = x1,
      y = y1,
      match_type = "m:1",
      suffix = c("a", "b", "c")
    )
  )

  expect_error(
    merge(
      x = y1,
      y = x1,
      match_type = "1:m",
      multiple = "any"
    )
  )

  expect_error(
    merge(
      x = x1,
      y = y1,
      match_type = "m:1",
      unmatched = "error"
    )
  )


})


test_that("FULL JOIN - argument `keep_common_vars` preserves keys in output", {

  jn <- merge(
    x = x6,
    y = y6,
    match_type = "m:1",
    keep_common_vars  = TRUE,
    by = "id",
    all = TRUE
  )

  expect_true(
    "country.y" %in% names(jn)
  )
  expect_equal(
    jn |>
      fselect(id) |>
      na.omit() |>
      unique() |>
      reg_elem() |>
      sort(),

    union(x6$id, y6$id) |>
      na.omit() |>
      unique()  |>
      sort()
  )

})



test_that("FULL JOIN - update values works", {

  x2a <- x2
  x2a$x <- 1:5

  jn <- merge(
    x = x2a,
    y = y2,
    match_type = "1:1",
    update_values = TRUE,
    by = "id"
  )

  vupdated <- jn |>
    fsubset(get(reportvar) == "value updated") |>
    fselect(x.x) |>
    reg_elem()

  expect_true(
    all(vupdated %in% y2$x)
  )

  expect_equal(
    jn |>
      fsubset(get(reportvar) == "value updated") |>
      fnrow(),
    x2 |>
      fsubset(id %in% y2$id) |>
      fnrow()
  )

})


test_that("FULL JOIN - reportvar works", {

  jn <- merge(
    x1,
    y1,
    match_type = "m:1",
    by = "id",
    reportvar = "report"
  )
  expect_true(
    "report" %in% names(jn)
  )

})

test_that("FULL JOIN - NA matches", {

  jn <- merge(
    x5,
    y5,
    match_type = "m:m"
  )

  expect_equal(
    jn |>
      fsubset(is.na(id)) |>
      fnrow(),
    4
  )

})


# TEST INNER JOINS -------------------------------------------------------------

test_that("INNER JOIN - Conducts inner join", {

  # One way
  jn_joyn <- merge(
    x = x1,
    y = y1,
    match_type = "m:1",
    by = "id"
  )

  jn_dt <- merge.data.table(
    x1, y1,
    by = "id"
  )

  expect_equal(
    jn_joyn |> fselect(-get(reportvar)),
    jn_dt,
    ignore_attr = 'sorted'
  )

  expect_true(
    all(c("x & y") %in% jn_joyn$.joyn)
  )

  c(jn_joyn$id) %in% intersect(y1$id, x1$id) |>
    all() |>
    expect_true()

  jn_joyn <- merge(
    x = x2,
    y = y2,
    match_type = "1:1",
    by = "id"
  )

  jn_dt <- merge.data.table(
    x2,
    y2,
    by = "id"
  )


  expect_equal(
    jn_joyn |> fselect(-get(reportvar)),
    jn_dt,
    ignore_attr = 'sorted'
  )


  jn <- merge(
    x4,
    y4,
    by.x = "id1",
    by.y = "id2",
    match_type = "m:m"
  )

  #merge.data.table(x4, y4, by = dplyr::join_by(id1 == id2), match_type = "m:m")
  jn_dt <- merge.data.table(
    x4,
    y4,
    by.x = "id1",
    by.y = "id2"
  )

  expect_equal(
    jn |> fselect(-get(reportvar)),
    jn_dt,
    ignore_attr = 'sorted'
  )

})


test_that("INNER JOIN - no id given", {

  jn1 <- merge(
    x2,
    y2
  )
  jn2 <- merge(
    x2,
    y2,
    by = c("id", "x")
  )

  expect_equal(jn1, jn2)

})


test_that("INNER JOIN - incorrectly specified arguments give errors", {
  merge(
    x4,
    y4,
    by.x = "id1",
    by.y = "id2",
    match_type = "m:m" ,
    suffixes = NULL
  ) |>
    expect_error( )

    merge(
      x4,
      y4,
      by.x = "id1",
      by.y = "id2",
      match_type = "m:m",
      suffix = c("a", "b", "c")) |>
      expect_error()

    merge(
      x = y1,
      y = x1,
      match_type = "1:m",
      multiple = "any"
    ) |>
      expect_error()


})


test_that("INNER JOIN - argument `keep_common_vars` preserves keys in output", {


  jn <- merge(
    x = x6,
    y = y6,
    match_type = "m:1",
    keep_common_vars  = TRUE,
    by = "id"
  )

  expect_true(
    "country.y" %in% names(jn)
  )
  expect_equal(
    jn |>
      fselect(id) |>
      na.omit() |>
      unique() |>
      reg_elem(),
    y6 |>
      fsubset(id %in% x6$id) |>
      fselect(id) |>
      unique() |>
      reg_elem()
  )

})



test_that("INNER JOIN - update values works", {

  x2a <- x2
  x2a$x <- 1:5

  jn <- merge(
    x = x2a,
    y = y2,
    match_type = "1:1",
    update_values = TRUE,
    by = "id"
  )

  vupdated <- jn |>
    fsubset(get(reportvar) == "value updated") |>
    fselect(x.x) |>
    reg_elem()

  expect_true(
    all(vupdated %in% y2$x)
  )

  expect_equal(
    jn |>
      fsubset(get(reportvar) == "value updated") |>
      fnrow(),
    x2 |>
      fsubset(id %in% y2$id) |>
      fnrow()
  )



})


test_that("INNER JOIN - reportvar works", {

  jn <- merge(
    x1,
    y1,
    match_type = "m:1",
    by = "id",
    reportvar = "report"
  )
  expect_true(
    "report" %in% names(jn)
  )

})

test_that("INNER JOIN - NA matches", {


  jn <- merge(
    x5,
    y5,
    match_type = "m:m"
  )

  expect_equal(
    jn |>
      fsubset(is.na(id)) |>
      fnrow(),
    4
  )

})

# Test check logical - (RT) check if needed

# Testing check_dt_by function ####
# Checking errors
test_that("check_dt_by aborts as expected", {
  y7 = data.table(id  = c(1, 2, 5, 6, 3, 9),
                  id2 = c(1, 1, 2, 3, 4, 8),
                  y   = c(11L, 15L, 20L, 13L, 10L, 12L),
                  x   = c(16:21))

  y8 = data.table(id  = c(1, 2, 5, 6, 3, 9),
                  id2 = NULL,
                  y   = c(11L, 15L, 20L, 13L, 10L, 12L),
                  x   = c(16:21))

  check_dt_by(x4, y8, by.x = "id1", by.y = "id2") |>
    expect_error()

  check_dt_by(x4, y4, by.x = "id", by.y = "id2") |>
    expect_error()

  check_dt_by(x4, y4, by.x = "id1", by.y = "id1") |>
    expect_error()

  # Checking msg is stored when both by and by.x/by.y are supplied
  clear_joynenv()
  check_dt_by(x4, y4, by.x = "id1", by.y = "id", by = "id2")

  expect_true(rlang::env_has(.joynenv,
                             "joyn_msgs"))


})

# Checking outputs
test_that("check_dt_by output", {

  check_dt_by(x4,
              y4,
              by.x = "id1",
              by.y = "id2") |>
    expect_equal("id1 = id2")


  check_dt_by(x4,
              y4,
              by.x = 2,
              by.y = 3) |>
    expect_error()

  check_dt_by(x4,
              y4,
              by = "t") |>
    expect_error()
})
randrescastaneda/joyn documentation built on Dec. 20, 2024, 6:51 a.m.