tests/testthat/test-deconstruct.R

# TODO: can probably be deleted once this feature branch is ready for a merge

# helpers ----------------------------------

test_that("`new_fks_in()` generates expected tibble", {
  expect_snapshot({
    new_fks_in(
      child_uuid = "flights-uuid",
      child_fk_cols = new_keys(list(list("origin", "dest"))),
      parent_key_cols = new_keys(list(list("faa")))
    )
  })
})

test_that("`new_fks_out()` generates expected tibble", {
  expect_snapshot({
    new_fks_out(
      child_fk_cols = new_keys(list(list("origin", "dest"))),
      parent_uuid = "airports-uuid",
      parent_key_cols = new_keys(list(list("faa")))
    )
  })
})

test_that("`new_keyed_tbl()` generates expected output", {
  expect_snapshot({
    dm <- dm_nycflights13(cycle = TRUE)

    # should look similar to `dm_get_all_fks_impl(dm, "airports")`
    keyed_tbl <- new_keyed_tbl(
      x = dm$airports,
      pk = "faa",
      fks_in = new_fks_in(
        child_uuid = "flights-uuid",
        child_fk_cols = new_keys(list("origin", "dest")),
        parent_key_cols = new_keys(list("faa"))
      ),
      fks_out = new_fks_out(
        child_fk_cols = new_keys(list("origin", "dest")),
        parent_uuid = "airports-uuid",
        parent_key_cols = new_keys(list("faa"))
      ),
      uuid = "0a0c060f-0d01-0b03-0402-05090800070e"
    )

    keyed_get_info(keyed_tbl)
  })

  expect_equal(dm$airports, keyed_tbl, ignore_attr = TRUE)
})

test_that("dm_get_keyed_tables_impl()", {
  withr::local_seed(20220715)

  expect_snapshot({
    dm_nycflights13(cycle = TRUE) %>%
      dm_get_keyed_tables_impl() %>%
      map(keyed_get_info)
  })
})



test_that("`new_keyed_tbl()` formatting", {
  local_options(
    pillar.min_title_chars = NULL,
    pillar.max_title_chars = NULL,
    pillar.max_footer_lines = NULL,
    pillar.bold = NULL,
  )

  expect_snapshot({
    keyed_tbl_impl(dm_nycflights13(cycle = TRUE), "flights")
    keyed_tbl_impl(dm_nycflights13(cycle = TRUE), "airports")
    keyed_tbl_impl(dm_nycflights13(cycle = TRUE), "airlines")
  })
})

# subsetting ----------------------------------

test_that("both subsetting operators for `dm` produce the same object", {
  dm <- dm_nycflights13(cycle = TRUE)

  expect_equal(dm$airlines, dm[["airlines"]])
  expect_equal(dm[[1]], dm[["airlines"]])
})

test_that("subsetting `dm` produces `dm_keyed_tbl` objects", {
  dm <- dm_nycflights13(cycle = TRUE)

  skip("keyed = TRUE")

  expect_s3_class(dm$airlines, "dm_keyed_tbl")
  expect_s3_class(dm[[1]], "dm_keyed_tbl")
  expect_s3_class(dm[["airlines"]], "dm_keyed_tbl")
})

# constructors ----------------------------------

test_that("`dm()` and `new_dm()` can handle a list of `dm_keyed_tbl` objects", {
  dm <- dm_nycflights13(cycle = TRUE)

  y1 <- keyed_tbl_impl(dm, "weather") %>%
    mutate() %>%
    select(everything())
  y2 <- keyed_tbl_impl(dm, "airports") %>%
    mutate() %>%
    select(everything())

  expect_s3_class(y1, "dm_keyed_tbl")
  expect_s3_class(y2, "dm_keyed_tbl")

  dm_output <- dm(d1 = y1, d2 = y2)
  expect_s3_class(dm_output, "dm")

  new_dm_output <- new_dm(list(d1 = y1, d2 = y2))
  expect_s3_class(new_dm_output, "dm")

  # there shouldn't be any keys
  expect_snapshot(tbl_sum(keyed_tbl_impl(dm_output, "d1")))
  expect_snapshot(tbl_sum(keyed_tbl_impl(dm_output, "d2")))
  expect_snapshot(tbl_sum(keyed_tbl_impl(new_dm_output, "d1")))
  expect_snapshot(tbl_sum(keyed_tbl_impl(new_dm_output, "d2")))
})

test_that("`dm()` and `new_dm()` can handle a mix of tables and `dm_keyed_tbl` objects", {
  dm <- dm_nycflights13(cycle = TRUE)

  y1 <- keyed_tbl_impl(dm, "weather") %>%
    mutate() %>%
    select(everything())
  y2 <- dm$airports

  expect_s3_class(y1, "dm_keyed_tbl")
  expect_s3_class(y2, "tbl_df")

  dm_output <- dm(d1 = y1, d2 = y2)
  expect_s3_class(dm_output, "dm")

  new_dm_output <- new_dm(list(d1 = y1, d2 = y2))
  expect_s3_class(new_dm_output, "dm")

  # there shouldn't be any keys
  expect_snapshot(tbl_sum(keyed_tbl_impl(dm_output, "d1")))
  expect_snapshot(tbl_sum(keyed_tbl_impl(dm_output, "d2")))
  expect_snapshot(tbl_sum(keyed_tbl_impl(new_dm_output, "d1")))
  expect_snapshot(tbl_sum(keyed_tbl_impl(new_dm_output, "d2")))
})

test_that("`dm()` handles missing key column names gracefully", {
  dm <-
    dm(x = tibble(a = 1, b = 1), y = tibble(a = 1, b = 1)) %>%
    dm_add_pk(y, c(a, b)) %>%
    dm_add_fk(x, c(a, b), y)

  keyed <-
    dm %>%
    dm_get_tables(keyed = TRUE)

  expect_snapshot({
    dm(x = keyed$x["b"], y = keyed$y) %>%
      dm_paste()
    dm(x = keyed$x, y = keyed$y["b"]) %>%
      dm_paste()
  })
})

# joins ----------------------------------

test_that("keyed_by()", {
  withr::local_seed(20220715)

  dm <-
    dm(x = tibble(a = 1), y = tibble(b = 1)) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, a, y)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")

  expect_snapshot({
    keyed_by(x, y)
    keyed_by(y, x)
  })
})

test_that("joins without child PK", {
  withr::local_seed(20220715)

  dm <-
    dm(x = tibble(a = 1), y = tibble(b = 1)) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, a, y)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")

  expect_snapshot({
    keyed_build_join_spec(x, y) %>%
      to_snapshot_json()
    dm(x, y, r = left_join(x, y)) %>%
      dm_paste(options = c("select", "keys"))
    keyed_build_join_spec(y, x) %>%
      to_snapshot_json()
    dm(x, y, r = left_join(y, x)) %>%
      dm_paste(options = c("select", "keys"))
  })
})

test_that("joins with other child PK", {
  withr::local_seed(20220715)

  dm <-
    dm(x = tibble(a = 1, c = 1), y = tibble(b = 1)) %>%
    dm_add_pk(x, c) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, a, y)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")

  expect_snapshot({
    keyed_build_join_spec(x, y) %>%
      to_snapshot_json()
    dm(x, y, r = left_join(x, y)) %>%
      dm_paste(options = c("select", "keys"))
    keyed_build_join_spec(y, x) %>%
      to_snapshot_json()
    dm(x, y, r = left_join(y, x)) %>%
      dm_paste(options = c("select", "keys"))
  })
})

test_that("joins with other child PK and name conflict", {
  withr::local_seed(20220715)

  dm <-
    dm(x = tibble(a = 1, b = 1), y = tibble(b = 1)) %>%
    dm_add_pk(x, b) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, a, y)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")

  expect_snapshot({
    keyed_build_join_spec(x, y) %>%
      to_snapshot_json()
    dm(x, y, r = left_join(x, y)) %>%
      dm_paste(options = c("select", "keys"))
    keyed_build_join_spec(y, x) %>%
      to_snapshot_json()
    dm(x, y, r = left_join(y, x)) %>%
      dm_paste(options = c("select", "keys"))
  })
})

test_that("joins with same child PK", {
  withr::local_seed(20220715)

  dm <-
    dm(x = tibble(a = 1), y = tibble(b = 1)) %>%
    dm_add_pk(x, a) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, a, y)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")

  expect_snapshot({
    keyed_build_join_spec(x, y) %>%
      to_snapshot_json()
    dm(x, y, r = left_join(x, y)) %>%
      dm_paste(options = c("select", "keys"))
    keyed_build_join_spec(y, x) %>%
      to_snapshot_json()
    dm(x, y, r = left_join(y, x)) %>%
      dm_paste(options = c("select", "keys"))
  })
})

test_that("joins with same child PK and same name", {
  withr::local_seed(20220715)

  dm <-
    dm(x = tibble(b = 1), y = tibble(b = 1)) %>%
    dm_add_pk(x, b) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, b, y)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")

  expect_snapshot({
    keyed_build_join_spec(x, y) %>%
      to_snapshot_json()
    dm(x, y, r = left_join(x, y)) %>%
      dm_paste(options = c("select", "keys"))
    keyed_build_join_spec(y, x) %>%
      to_snapshot_json()
    dm(x, y, r = left_join(y, x)) %>%
      dm_paste(options = c("select", "keys"))
  })
})

test_that("joins with other FK from parent", {
  withr::local_seed(20220715)

  dm <-
    dm(x = tibble(a = 1), y = tibble(b = 1, c = 1), z = tibble(c = 1)) %>%
    dm_add_pk(x, a) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, a, y) %>%
    dm_add_fk(y, c, z, c)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")
  z <- keyed_tbl_impl(dm, "z")

  expect_snapshot({
    keyed_build_join_spec(x, y) %>%
      to_snapshot_json()
    dm(x, y, z, r = left_join(x, y)) %>%
      dm_paste(options = c("select", "keys"))
    keyed_build_join_spec(y, x) %>%
      to_snapshot_json()
    dm(x, y, z, r = left_join(y, x)) %>%
      dm_paste(options = c("select", "keys"))
  })
})

test_that("joins with other FK from parent and name conflict", {
  withr::local_seed(20220715)

  dm <-
    dm(x = tibble(a = 1), y = tibble(b = 1, a = 1), z = tibble(a = 1)) %>%
    dm_add_pk(x, a) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, a, y) %>%
    dm_add_fk(y, a, z, a)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")
  z <- keyed_tbl_impl(dm, "z")

  expect_snapshot({
    keyed_build_join_spec(x, y) %>%
      to_snapshot_json()
    dm(x, y, z, r = left_join(x, y)) %>%
      dm_paste(options = c("select", "keys"))
    keyed_build_join_spec(y, x) %>%
      to_snapshot_json()
    dm(x, y, z, r = left_join(y, x)) %>%
      dm_paste(options = c("select", "keys"))
  })
})

test_that("joins with other FK from child", {
  withr::local_seed(20220715)

  dm <-
    dm(x = tibble(a = 1, c = 1), y = tibble(b = 1), z = tibble(c = 1)) %>%
    dm_add_pk(x, a) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, a, y) %>%
    dm_add_fk(x, c, z, c)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")
  z <- keyed_tbl_impl(dm, "z")

  expect_snapshot({
    keyed_build_join_spec(x, y) %>%
      to_snapshot_json()
    dm(x, y, z, r = left_join(x, y)) %>%
      dm_paste(options = c("select", "keys"))
    keyed_build_join_spec(y, x) %>%
      to_snapshot_json()
    dm(x, y, z, r = left_join(y, x)) %>%
      dm_paste(options = c("select", "keys"))
  })
})

test_that("joins with other FK from child and name conflict", {
  withr::local_seed(20220715)

  dm <-
    dm(x = tibble(a = 1, b = 1), y = tibble(b = 1), z = tibble(b = 1)) %>%
    dm_add_pk(x, a) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, a, y) %>%
    dm_add_fk(x, b, z, b)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")
  z <- keyed_tbl_impl(dm, "z")

  expect_snapshot({
    keyed_build_join_spec(x, y) %>%
      to_snapshot_json()
    dm(x, y, z, r = left_join(x, y)) %>%
      dm_paste(options = c("select", "keys"))
    keyed_build_join_spec(y, x) %>%
      to_snapshot_json()
    dm(x, y, z, r = left_join(y, x)) %>%
      dm_paste(options = c("select", "keys"))
  })
})

test_that("left join works as expected with keyed tables", {
  withr::local_seed(20220717)

  local_options(
    pillar.min_title_chars = NULL,
    pillar.max_title_chars = NULL,
    pillar.max_footer_lines = NULL,
    pillar.bold = NULL,
  )

  expect_snapshot({
    dm <- dm_nycflights13()
    keyed_tbl_impl(dm, "weather") %>% left_join(keyed_tbl_impl(dm, "flights"), multiple = "all")
  })

  # results should be similar to zooming
  zd1 <-
    dm %>%
    dm_rename(weather, year.weather = year) %>%
    dm_rename(weather, month.weather = month) %>%
    dm_rename(weather, day.weather = day) %>%
    dm_rename(weather, hour.weather = hour) %>%
    dm_rename(flights, year.flights = year) %>%
    dm_rename(flights, month.flights = month) %>%
    dm_rename(flights, day.flights = day) %>%
    dm_rename(flights, hour.flights = hour) %>%
    dm_zoom_to(weather) %>%
    left_join(flights, multiple = "all")

  zd2 <-
    dm %>%
    dm_rename(flights, year.flights = year) %>%
    dm_rename(flights, month.flights = month) %>%
    dm_rename(flights, day.flights = day) %>%
    dm_rename(flights, hour.flights = hour) %>%
    dm_rename(weather, year.weather = year) %>%
    dm_rename(weather, month.weather = month) %>%
    dm_rename(weather, day.weather = day) %>%
    dm_rename(weather, hour.weather = hour) %>%
    dm_zoom_to(flights) %>%
    left_join(weather)

  jd1 <- keyed_tbl_impl(dm, "weather") %>% left_join(keyed_tbl_impl(dm, "flights"), multiple = "all")
  jd2 <- keyed_tbl_impl(dm, "flights") %>% left_join(keyed_tbl_impl(dm, "weather"))

  expect_equal(ncol(jd1), ncol(jd2))
  expect_equal(dim(zd2), dim(jd2))
})

# semi_join ----------------------------------

test_that("semi_join()", {
  withr::local_seed(20220720)

  local_options(
    pillar.min_title_chars = NULL,
    pillar.max_title_chars = NULL,
    pillar.max_footer_lines = NULL,
    pillar.bold = NULL,
  )

  dm <-
    dm(x = tibble(a = 1), y = tibble(b = 1)) %>%
    dm_add_pk(y, b) %>%
    dm_add_fk(x, a, y)

  x <- keyed_tbl_impl(dm, "x")
  y <- keyed_tbl_impl(dm, "y")

  expect_snapshot({
    dm(x, y, r = semi_join(x, y)) %>%
      dm_paste(options = c("select", "keys"))
    dm(x, y, r = semi_join(y, x)) %>%
      dm_paste(options = c("select", "keys"))
  })
})

# arrange ----------------------------------

test_that("arrange for keyed tables produces expected output", {
  dm <- dm_nycflights13(cycle = TRUE)

  expect_snapshot({
    keyed_tbl_impl(dm, "airlines") %>% arrange(desc(name))
  })
})

# group_by ----------------------------------

test_that("group_by for keyed tables produces expected output", {
  local_options(
    pillar.min_title_chars = NULL,
    pillar.max_title_chars = NULL,
    pillar.max_footer_lines = NULL,
    pillar.bold = NULL,
  )

  expect_snapshot({
    dm <- dm_nycflights13(cycle = TRUE)

    keyed_tbl_impl(dm, "flights") %>% group_by(month)

    keyed_tbl_impl(dm, "airports") %>% group_by(tzone)

    # grouping by the primary key works as well
    keyed_tbl_impl(dm, "airports") %>% group_by(faa)
  })
})

# summarize ----------------------------------

test_that("summarize for keyed tables produces expected output", {
  # FIXME: Brittle tests?
  local_options(dplyr.summarise.inform = FALSE)

  expect_snapshot({
    dm <- dm_nycflights13(cycle = TRUE)

    keyed_tbl_impl(dm, "airports") %>%
      summarise(mean_alt = mean(alt))

    keyed_tbl_impl(dm, "airports") %>%
      group_by(tzone, dst) %>%
      summarise(mean_alt = mean(alt))
  })
})


test_that("summarize for keyed tables produces same output as zooming", {
  local_options(
    pillar.min_title_chars = NULL,
    pillar.max_title_chars = NULL,
    pillar.max_footer_lines = NULL,
    pillar.bold = NULL,
  )

  dm <- dm_nycflights13(cycle = TRUE)

  z_summary <- dm %>%
    dm_zoom_to(flights) %>%
    group_by(month) %>%
    arrange(desc(day)) %>%
    summarize(avg_air_time = mean(air_time, na.rm = TRUE))

  k_summary <- keyed_tbl_impl(dm, "flights") %>%
    group_by(month) %>%
    arrange(desc(day)) %>%
    summarize(avg_air_time = mean(air_time, na.rm = TRUE))

  # zoomed and keyed approaches should provide same summaries
  expect_equal(dim(z_summary), dim(k_summary))
  expect_equal(z_summary$month, k_summary$month)
  expect_equal(z_summary$avg_air_time, k_summary$avg_air_time)
})

# reconstruction ----------------------------------

test_that("pks_df_from_keys_info()", {
  withr::local_seed(20220715)

  dm <- dm_nycflights13(cycle = TRUE)

  expect_snapshot({
    dm %>%
      dm_get_keyed_tables_impl() %>%
      pks_df_from_keys_info() %>%
      to_snapshot_json()
  })
})

test_that("uks_df_from_keys_info()", {
  withr::local_seed(20220715)

  dm <- dm_for_filter() %>%
    dm_add_uk(tf_5, l)

  expect_snapshot({
    dm %>%
      dm_get_keyed_tables_impl() %>%
      uks_df_from_keys_info() %>%
      to_snapshot_json()
  })
})

test_that("fks_df_from_keys_info()", {
  withr::local_seed(20220715)

  dm <- dm_nycflights13(cycle = TRUE)

  expect_snapshot({
    dm %>%
      dm_get_keyed_tables_impl() %>%
      fks_df_from_keys_info() %>%
      to_snapshot_json()
  })
})

test_that("primary and foreign keys survive the round trip", {
  dm <- dm_nycflights13(cycle = TRUE)
  tbl <- keyed_tbl_impl(dm, "weather")
  tbl_mutate <- tbl %>% select(everything())

  dm2 <- dm(
    weather = tbl_mutate,
    airlines = keyed_tbl_impl(dm, "airlines"),
    airports = keyed_tbl_impl(dm, "airports"),
    planes = keyed_tbl_impl(dm, "planes"),
    flights = keyed_tbl_impl(dm, "flights"),
  )

  original_def <- dm_get_def(dm) %>% arrange(table)
  new_def <- dm_get_def(dm2) %>% arrange(table)

  expect_equal(original_def$pks, new_def$pks)
  expect_equal(original_def$fks, new_def$fks)
})
krlmlr/dm documentation built on April 19, 2024, 5:23 p.m.