tests/testthat/test-joyn.R

withr::local_options(joyn.verbose = FALSE)
library(data.table)
# options(joyn.verbose = FALSE)
x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
                t  = c(1L, 2L, 1L, 2L, NA_integer_),
                x  = 11:15)

y1 = data.table(id = c(1,2, 4),
                y  = c(11L, 15L, 16))


x2 = data.table(id = c(1, 4, 2, 3, NA),
                t  = c(1L, 2L, 1L, 2L, NA_integer_),
                x  = c(16, 12, NA, NA, 15))


y2 = data.table(id = c(1, 2, 5, 6, 3),
                yd = c(1, 2, 5, 6, 3),
                y  = c(11L, 15L, 20L, 13L, 10L),
                x  = c(16:20))


y3 <- data.table(id = c("c","b", "c", "a"),
                 y  = c(11L, 15L, 18L, 20L))

x3 <- data.table(id  = c("c","b", "d"),
                 v   = 8:10,
                 foo = c(4,2, 7))

x4 = data.table(id1 = c(1, 1, 2, 3, 3),
                id2 = c(1, 1, 2, 3, 4),
                t   = c(1L, 2L, 1L, 2L, NA_integer_),
                x   = c(16, 12, NA, NA, 15))


y4 = data.table(id  = c(1, 2, 5, 6, 3),
                id2 = c(1, 1, 2, 3, 4),
                y   = c(11L, 15L, 20L, 13L, 10L),
                x   = c(16:20))

x5 = data.table(id = c(1, 2, 5, 6, 3),
                yd = c(1, 2, 5, 6, 3),
                y  = c(11L, 15L, 20L, 13L, 10L),
                x  = c(16:18, NA, NA))

#-------------------------------------------------------------------------------
# TESTS ------------------------------------------------------------------------
#-------------------------------------------------------------------------------


test_that(
  "select `by` vars when non specified", {
    expect_equal(
      joyn(
        x = x1,
        y = y1,
        match_type = "m:1"
      ),
      joyn(
        x  = x1,
        y  = y1,
        by = "id",
        match_type = "m:1"
      )
    )
  }

)

test_that("all types of by argument raise no error", {

  joyn(x          = x4,
       y          = y4,
       by         = "id1=id2",
       match_type = "m:m") |>
    expect_no_error()


  # THIS ONE
  joyn(x          = x4,
       y          = y4,
       by         = c("id1 = id", "id2"),
       match_type = "m:1") |>
    expect_no_error()

  joyn(x          = x4,
       y          = y4,
       by         = c("id1 = id2", "id2 = id"),
       match_type = "m:1") |>
    expect_no_error()

  joyn(x          = x4,
       y          = y4,
       by         = c("id2", "x"),
       match_type = "1:1") |>
    expect_no_error()

})

test_that("Errors if no common variables", {
  xf <- copy(x1)
  xf[, id := NULL]
  expect_error(
    joyn(
      xf,
      y1
    )
  )
})

test_that("m:m and 1:1 gives the same if data is correct", {
  expect_equal(
    joyn(
      x2,
      y2,
      by = "id",
      update_values = TRUE,
      match_type = "1:1"
    ),
    joyn(
      x2,
      y2,
      by = "id",
      update_values = TRUE,
      verbose = FALSE,
      match_type = "m:m"
    )
  )

  expect_equal(
    joyn(
      x2,
      y2,
      by = "id",
      update_NAs = TRUE,
      match_type = "1:1"
    ),
    joyn(
      x2,
      y2,
      by = "id",
      update_NAs = TRUE
    )
  )

  expect_equal(joyn(x2, y2, by = "id", match_type = "1:1"),
               joyn(x2, y2, by = "id"))

})


test_that("left joyn is correct", {
  x <- joyn(
    x1,
    y1,
    by = "id",
    keep = "left",
    match_type = "m:1"
  )
  expect_equal(nrow(x), nrow(x1))


  w <- joyn(x2,
             y2,
             by = "id",
             keep = "left",
             match_type = "1:1")
  expect_equal(nrow(w), nrow(x2))


})

test_that("inverse joyn works", {
  ll <-
    joyn(
      y3,
      x3,
      keep = "left",
      by = "id",
      match_type = "m:1",
      reportvar = FALSE,
      sort      = TRUE
    )
  rr <-
    joyn(
      x3,
      y3,
      keep = "right",
      by = "id",
      match_type = "1:m",
      reportvar = FALSE,
      sort      = TRUE
    )

  lnames <- names(ll)
  setcolorder(rr, lnames)

  expect_equal(ll, rr)

  lt <-
    joyn(
      y3,
      x3,
      by = "id",
      match_type = "m:1",
      reportvar = FALSE,
      keep = "left",
      sort = TRUE
    )
  rt <-
    joyn(
      x3,
      y3,
      by = "id",
      match_type = "1:m",
      reportvar = FALSE,
      keep = "right",
      sort = TRUE
    )

  lnamest <- names(lt)
  setcolorder(rt, lnamest)

  expect_equal(lt, rt)


})


test_that("FULL- Compare with base::merge", {
  jn <- joyn(
    x1,
    y1,
    by         = "id",
    reportvar  = FALSE,
    match_type = "m:1",
    sort = TRUE
  )

  br <- base::merge(x1, y1, by = "id", all = TRUE)

  setorderv(br, "id", na.last = TRUE)
  setorderv(jn, "id", na.last = TRUE)
  setattr(jn, 'sorted', "id")

  expect_equal(jn, br, ignore_attr = 'row.names')

  jn <- joyn(x2,
             y2,
             by= "id",
              reportvar = FALSE,
              keep_common_vars = TRUE,
             sort = TRUE)

  br <- base::merge(x2, y2, by = "id", all = TRUE)

  setorderv(br, "id", na.last = TRUE)
  setorderv(jn, "id", na.last = TRUE)
  setattr(jn, 'sorted', "id")


  setcolorder(jn, names(br))

  expect_equal(jn, br, ignore_attr = 'row.names')

})


test_that("LEFT- Compare with base::merge", {
  jn <-
    joyn(
      x1,
      y1,
      by = "id",
      reportvar = FALSE,
      keep = "left",
      match_type = "m:1",
      sort = TRUE
    )
  br <- base::merge(x1, y1, by = "id", all.x = TRUE)
  setorderv(br, "id", na.last = TRUE)
  setorderv(jn, "id", na.last = TRUE)
  setattr(jn, 'sorted', "id")
  expect_equal(jn, br, ignore_attr = "row.names")


  jn <- joyn(
      x2,
      y2,
      by = "id",
      reportvar = FALSE,
      keep = "left",
      match_type = "1:1",
      keep_common_vars = TRUE,
      sort = TRUE
    )

  br <- base::merge(x2, y2, by = "id", all.x = TRUE)
  setorderv(br, "id", na.last = TRUE)
  setorderv(jn, "id", na.last = TRUE)
  setattr(jn, 'sorted', "id")


  setcolorder(jn, names(br))

  expect_equal(jn, br, ignore_attr = "row.names")

})



test_that("RIGHT - Compare with base::merge", {
  jn <-
    joyn(
      x1,
      y1,
      by = "id",
      reportvar = FALSE,
      keep = "right",
      match_type = "m:1",
      sort = TRUE
    )
  br <- base::merge(x1, y1, by = "id", all.y = TRUE)
  setorderv(br, "id", na.last = TRUE)
  setattr(jn, 'sorted', "id")


  expect_equal(jn, br)


  jn <-
    joyn(
      x2,
      y2,
      by               = "id",
      reportvar        = FALSE,
      keep             = "right",
      match_type       = "1:1",
      keep_common_vars = TRUE,
      sort = TRUE
    )

  br <- base::merge(x2, y2, by = "id", all.y = TRUE)

  setorderv(br, "id", na.last = TRUE)
  setattr(jn, 'sorted', "id")


  setcolorder(jn, names(br))

  expect_equal(jn, br)

})


test_that("INNER - Compare with base::merge", {
  jn <-
    joyn(
      x1,
      y1,
      by         = "id",
      reportvar  = FALSE,
      keep       = "inner",
      match_type = "m:1",
      sort = TRUE
    )
  br <- base::merge(x1, y1, by = "id")
  setorderv(br, "id", na.last = TRUE)
  setattr(jn, 'sorted', "id")


  expect_equal(jn, br)


  jn <-
    joyn(
      x2,
      y2,
      by               = "id",
      reportvar        = FALSE,
      keep             = "inner",
      match_type       = "1:1",
      keep_common_vars = TRUE,
      sort = TRUE
    )
  br <- base::merge(x2, y2, by = "id")

  setorderv(br, "id", na.last = TRUE)
  setattr(jn, 'sorted', "id")


  setcolorder(jn, names(br))

  expect_equal(jn, br)

})


test_that("match types work", {
  expect_error(joyn(
    x3,
    y3,
    by = "id",
    match_type = "1:1"
  ))
  expect_error(joyn(
    y3,
    x3,
    by = "id",
    match_type = "1:1"
  ))
  expect_error(joyn(
    x3,
    y3,
    by = "id",
    match_type = "m:1"
  ))

  x <-
    structure(
      list(
        id = c(1, 1, 2, 3, 4, 7),
        t = c(1L, 2L, 1L, 2L, NA, 2L),
        x = c(16, 12, NA, NA, 15, 12)
      ),
      row.names = c(NA, -6L),
      class = c("data.table", "data.frame")
    )

  y <-
    structure(
      list(
        id  =  c(1, 1, 2, 3, 4, 6),
        y = c(11L, 15L, 20L, 13L, 10L, 7L),
        x = 16:21
      ),
      row.names = c(NA, -6L),
      class = c("data.table", "data.frame")
    )

  by <- "id"
  jn <- joyn(x, y, by = by, match_type = "m:m")

  njn <- nrow(jn)

  ux <- x[, .N, by = by]

  uy <- y[, .N, by = by]

  dd <- merge.data.table(ux, uy, by = "id", all = TRUE)
  setnafill(dd, fill = 1)
  cN <- dd[,
           N := N.x * N.y][, sum(N)]

  expect_equal(njn, cN)

})

###########################################################################################
test_that("Update NAs", {
  # update NAs in x variable form x
  jn <- joyn(x2,
             y2,
             by               = "id",
             update_NAs       = TRUE,
             keep_common_vars = TRUE,
             sort = TRUE)

  idx <- x2[is.na(x), "id"]

  expect_equal(jn[idx, on = "id"][, x.x], y2[idx, on = "id"][, x])

  jn_1 <- joyn(x2,
               y2,
               by               = "id = yd",
               update_NAs       = TRUE,
               keep_common_vars = TRUE,
               sort = TRUE)


  expect_equal(jn_1[idx, on = "id"][, x.x], y2[idx, on = "id"][, x])

  jn_2 <- joyn(x5,
               y4,
               by               = c("id", "y"),
               update_NAs       = TRUE,
               keep_common_vars = TRUE,
               sort = TRUE)

  to_replace <- x5[is.na(x), "id"]

  expect_equal(jn_2[to_replace, on = "id"][, x.x], y4[to_replace, on = "id"][, x])

  out <- joyn(x5,
               y4,
               by               = c("id = id2", "yd = id"),
               update_NAs       = FALSE,
               keep_common_vars = TRUE,
              sort = TRUE)

  to_replace <- out[(is.na(x.x) & !is.na(x.y)) | (is.na(y.x) & !is.na(y.y) ), c("id", "yd")]

  jn_3 <- joyn(x5,
               y4,
               by               = c("id = id2", "yd = id"),
               update_NAs       = TRUE,
               keep_common_vars = TRUE,
               sort = TRUE)

  any(jn_3[to_replace, on = c("id", "yd")][, .joyn] != "NA updated", with = FALSE) |>
    expect_equal(FALSE)

})


test_that("Update actual values", {

  jn <-joyn(x                = x2,
            y                = y2,
            by               = "id",
            update_values    = TRUE,
            update_NAs       = TRUE,
            keep_common_vars = TRUE,
            sort = TRUE)

  br <- base::merge(x2, y2, by = "id", all = TRUE)

  br <- br |>
    ftransform( x.x = fifelse(!is.na(x.x) & is.na(x.y),
                              x.x, x.y))


  setorderv(br, "id", na.last = TRUE)
  setorderv(jn, "id", na.last = TRUE)
  setattr(jn, 'sorted', "id")

  expect_equal(jn |>
                 fselect(x.x),
               br |>
                 fselect(x.x),
               ignore_attr = 'row.names')

  joyn(x2,
       y2,
       by               = "id = yd",
       update_values    = TRUE,
       update_NAs       = FALSE,
       keep_common_vars = TRUE) |>
    expect_no_error()

  joyn(x5,
       y4,
       by               = c("id", "y"),
       update_values    = TRUE,
       keep_common_vars = TRUE) |>
    expect_no_error()


  joyn(x5,
       y4,
       by               = c("id = id2", "yd = id"),
       update_values    = TRUE,
       keep_common_vars = TRUE) |>
    expect_no_error()

})


test_that("y vars are extracted correctly", {
  yvars <- "y"
  jn <- joyn(x2,
              y2,
              by = "id",
              y_vars_to_keep = yvars)

  expect_equal(names(jn), c(names(x2), yvars, ".joyn"))

  jn <-
    joyn(
      x2,
      y2,
      by = "id",
      y_vars_to_keep = yvars,
      reportvar = FALSE
    )

  expect_equal(names(jn), c(names(x2), yvars))

  jn <- joyn(x2,
              y2,
              by = "id",
              y_vars_to_keep = F)

  expect_equal(names(jn), c(names(x2), ".joyn"))

  jn <- joyn(x2,
             y2,
             by = "id",
             y_vars_to_keep = F,
             reportvar = "report_test")

  expect_equal(names(jn), c(names(x2), "report_test"))



  yvars <- "reuiou"
  expect_error(joyn(x2,y2,by = "id", y_vars_to_keep = yvars))

})

test_that("selection of reportvar", {
  reportvar <- "wijf"
  jn <-
    joyn(x2,
          y2,
          by = "id",
          reportvar = reportvar)

  expect_true(reportvar %in% names(jn))

  jn <- joyn(x2,
              y2,
              by = "id",
              reportvar = FALSE,
              y_vars_to_keep = c("yd", "y"))


  expect_false(".joyn" %in% names(jn))

  expect_equal(unique(c(names(x2), names(y2))), names(jn))


  jn <- joyn(x         = x2,
             y         = y2,
             by        = "id",
             reportvar = "t")

  allnames <- unique(c(names(x2), names(y2)))

  newname <- setdiff(names(jn), allnames)

  expect_true(length(newname) > 0)

})

test_that("reporttype works", {
  jn <- joyn(x          = x2,
             y          = y2,
             by         = "id",
             reporttype = "numeric")

  class(jn$.joyn) |>
    expect_equal("numeric")

})


test_that("Keep Y vars works", {
  jn <- joyn(x2,
              y2,
              by = "id",
              keep_common_vars = TRUE)

  inames <- intersect(names(x2), names(y2))
  inames <- inames[!(inames %in% "id")]

  inames <- paste0(inames, ".y")

  expect_true(all(inames  %in% names(jn)))

})


test_that("error when there is not natural join", {
  xx1 <- copy(x1)
  setnames(xx1, "id", "foo")
  expect_error(joyn(xx1, y1))
})


test_that("different names in key vars are working fine", {

  df <- joyn(x4, y4, by = c("id1 = id", "id2"),
             match_type = "m:1",
             y_vars_to_keep = c("y"),
             sort = TRUE,
             reporttype =  "character")

  dd <- data.table(id1 = c(1, 1, 2, 2, 3, 3, 5, 6),
                   id2 = c(1, 1, 2, 1, 3, 4, 2, 3),
                   t = c(1L, 2L, 1L, NA, 2L, NA, NA, NA),
                   x = c(16, 12, NA, NA, NA, 15, NA, NA),
                   y = c(11L, 11L, NA, 15L, NA, 10L, 20L, 13L),
                   ".joyn" = c("x & y", "x & y", "x", "y", "x", "x & y", "y", "y")
                   )

  setorderv(dd, c("id1", "id2"), na.last = TRUE)
  setattr(df, 'sorted', c("id1", "id2"))
  setattr(dd, 'sorted', c("id1", "id2"))


  expect_equal(df, dd)

})


test_that("invalid names are changed", {

  dd <- joyn(x1, y1, reportvar = "_report", match_type = "m:1")
  expect_true("X_report"  %in% names(dd))

})


test_that("convert to data.table when dataframe", {

  yy1 <- as.data.frame(y1)

  expect_equal(joyn(x1, yy1, by = "id", match_type = "m:1"), joyn(x1, y1, match_type = "m:1"))

})


test_that("do not convert to data.table", {
  xx1 <- as.data.frame(x1)
  expect_equal(joyn(xx1, y1, match_type = "m:1") |> class(), xx1 |> class())
})

# Check return table is of the same class as x
test_that("output table class", {
  out <- joyn(x2, y2)

  class(out) |>
    expect_equal(class(x2))
})

# Test anti-join
#_________________________________
test_that("joyn's how = anti works as expected", {

  r <- joyn(x          = x1,
            y          = y1,
            match_type = "m:1",
            by         = "id",
            keep       = "anti",
            sort       = TRUE)

  expect_true(funique(r$`.joyn`) == "x")
  expect_equal(names(r),
               c(names(x1), ".joyn"))
  expect_equal(r$id,
               c(3, NA_real_))
  rn <- names(joyn(x              = x1,
                   y              = y1,
                   match_type     = "m:1",
                   by             = "id",
                   keep           = "anti",
                   y_vars_to_keep = TRUE,
                   sort           = TRUE))
  expect_false(all(names(r) == rn[1:length(names(r))]))

  # m:m anti joins
  r <- joyn(x              = x4,
            y              = y4[!id2 == 3,],
            match_type     = "m:m",
            by             = c("id1 = id2"),
            keep           = "anti",
            y_vars_to_keep = TRUE,
            sort = TRUE)

  expect_true(allNA(r$y))
  expect_true(all(r$id1 == 3))
  expect_true(all(r$.joyn == "x"))


})

test_that("anti join warning for update values", {

  expect_message(
    r <- joyn(x          = x1,
              y          = y1,
              match_type = "m:1",
              by         = "id",
              keep       = "anti",
              update_values = TRUE,
              verbose = TRUE))

  r2 <- joyn(x          = x1,
             y          = y1,
             match_type = "m:1",
             by         = "id",
             keep       = "anti",
             update_values = FALSE)

  expect_equal(r,
               r2)


})


# Test all input data is unchanged


test_that("joyn() - input data unchanged", {

  expect_equal(x1,
               data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
                          t  = c(1L, 2L, 1L, 2L, NA_integer_),
                          x  = 11:15))

  expect_equal(y1,
               data.table(id = c(1,2, 4),
                          y  = c(11L, 15L, 16)))

  expect_equal(x2,
               data.table(id = c(1, 4, 2, 3, NA),
                          t  = c(1L, 2L, 1L, 2L, NA_integer_),
                          x  = c(16, 12, NA, NA, 15)))

  expect_equal(y2,
               data.table(id = c(1, 2, 5, 6, 3),
                          yd = c(1, 2, 5, 6, 3),
                          y  = c(11L, 15L, 20L, 13L, 10L),
                          x  = c(16:20)))

  expect_equal(x3,
               data.table(id  = c("c","b", "d"),
                          v   = 8:10,
                          foo = c(4,2, 7)))

  expect_equal(y3,
               data.table(id = c("c","b", "c", "a"),
                          y  = c(11L, 15L, 18L, 20L)))

  expect_equal(x4,
               data.table(id1 = c(1, 1, 2, 3, 3),
                          id2 = c(1, 1, 2, 3, 4),
                          t   = c(1L, 2L, 1L, 2L, NA_integer_),
                          x   = c(16, 12, NA, NA, 15)))

  expect_equal(y4,
               data.table(id  = c(1, 2, 5, 6, 3),
                          id2 = c(1, 1, 2, 3, 4),
                          y   = c(11L, 15L, 20L, 13L, 10L),
                          x   = c(16:20)))

  expect_equal(x5,
               data.table(id = c(1, 2, 5, 6, 3),
                          yd = c(1, 2, 5, 6, 3),
                          y  = c(11L, 15L, 20L, 13L, 10L),
                          x  = c(16:18, NA, NA)))

})


test_that("update_values still filters rows for left joins", {

  x <- data.frame(id = 1:5,
                  a  = 1:5,
                  b  = 2:6)
  y <- data.frame(id = c(1, 11, 2, 12, 5),
                  a  = 11:15,
                  b  = 12:16)

  # for data.table
  j1 <- joyn(x             = qDT(x),
             y             = qDT(y),
             by            = "id",
             keep          = "left",
             update_values = TRUE)

  j2 <- joyn(x             = x,
             y             = y,
             by            = "id",
             keep          = "left",
             update_values = TRUE)

  expect_true(all(j1$id %in% x$id))
  expect_true(all(j2$id %in% x$id))

})
randrescastaneda/joyn documentation built on Dec. 20, 2024, 6:51 a.m.