Nothing
describe("rejoin", {
it("is left-inverse for lossless full-dep database creation, outside of row permutations, for tables with unique rows", {
autodb_inverted_by_rejoin <- expect_bi(
with_args(df_equiv, digits = NA),
with_args(autodb, ensure_lossless = TRUE) %>>%
rejoin,
identity
)
table_dum <- data.frame()
table_dee <- data.frame(a = 1)[, -1, drop = FALSE]
autodb_inverted_by_rejoin(table_dum)
autodb_inverted_by_rejoin(table_dee)
# 6 columns allows for interesting cases, such as a table containing two
# independent ones, or a reference involving several attributes
forall(
gen_df(6, 7, remove_dup_rows = TRUE),
autodb_inverted_by_rejoin
)
forall(
gen_df(6, 7, remove_dup_rows = FALSE),
expect_bi(
with_args(df_equiv, digits = NA),
with_args(autodb, ensure_lossless = TRUE) %>>%
rejoin,
df_unique
)
)
})
it("is possible for any database constructed from a data frame", {
forall(
gen.choice(0, 10) |>
gen.list(of = 2) |>
gen.and_then(uncurry(\(rows, cols) {
gen_df(rows, cols, remove_dup_rows = TRUE)
})) |>
gen.with(with_args(autodb, ensure_lossless = TRUE)),
rejoin %>>%
with_args(try, silent = TRUE) %>>%
with_args(expect_s3_class, class = "data.frame")
)
})
it("handles databases with zero-column relations properly", {
db <- database(
relation(
list(
constants = list(
df = data.frame(a = 1L)[, FALSE, drop = FALSE],
keys = list(character())
),
a = list(df = data.frame(a = 1L), keys = list("a"))
),
"a"
),
list()
)
expect_identical(rejoin(db), data.frame(a = 1L))
})
it("returns an error if any attributes aren't present in relations", {
expect_error(
rejoin(database(
relation(setNames(list(), character()), "a"),
list()
)),
"^database is not lossless: attributes in attrs_order not present in relations\\na$"
)
expect_error(
rejoin(database(
relation(setNames(list(), character()), c("a", "b")),
list()
)),
"^database is not lossless: attributes in attrs_order not present in relations\\na, b$"
)
})
it("returns an error if rejoining fails, with best joined attribute sets", {
expect_error(
rejoin(database(
relation(
list(
a = list(df = data.frame(a = c(FALSE, TRUE)), keys = list("a")),
b = list(df = data.frame(b = c(FALSE, TRUE)), keys = list("b"))
),
c("a", "b")
),
list()
)),
"^database can not be fully rejoined\\nbest joined sets:\\na\\nb$"
)
# below doesn't return {c} as a set, since we have {b, c}.
expect_error(
rejoin(database(
relation(
list(
a = list(
df = data.frame(a = c(FALSE, TRUE), b = c(FALSE, TRUE)),
keys = list("a")
),
c = list(
df = data.frame(c = 1:4),
keys = list("c")
),
b_c = list(
df = data.frame(b = c(FALSE, TRUE), c = 1:4),
keys = list(c("b", "c"))
),
b_c.1 = list(
df = data.frame(b = c(FALSE, TRUE), c = 1:4),
keys = list(c("b", "c"))
)
),
c("a", "b", "c")
),
list()
)),
"^database can not be fully rejoined\\nbest joined sets:\\na, b\\nb, c$"
)
# sorts set contents, removes repeat sets
expect_error(
rejoin(database(
relation(
list(
a = list(
df = data.frame(a = logical()),
keys = list("a")
),
d = list(
df = data.frame(d = logical(), c = logical()),
keys = list("d")
),
b_d = list(
df = data.frame(b = logical(), d = logical()),
keys = list(c("d"))
)
),
c("a", "b", "c", "d")
),
list()
)),
"^database can not be fully rejoined\\nbest joined sets:\\na\\nb, c, d$"
)
})
it("properly merges tables with close floating-point values (no duplicates)", {
# The motivating cause of spurious duplicates on re-merges is that
# merge.data.frame works differently if there is more than one "by" column:
# in this case, it creates a grouping vector by pasting all the values in a
# row together, with "\r" as a separator. This implicitly turns floats into
# characters, i.e. represents them using only 15 sig. fig.s. Since going
# above 15 sig. figs. results in behaviour that varies across machines (see
# print.default), we have to test this behaviour without
# rounding, so digits = NA.
x <- data.frame(
a = c(F, F, F, F, F, F, T, T),
b = c(
2.69353840461766669279,
2.69353840461766669279,
2.69353840461766713688,
2.69353840461766713688,
3.74416921885076714460,
5.50801230419353693435,
3.72990161259524111159,
5.50801230419353693435
),
c = c(
-3.19131542233864262670,
-3.19131542233864262670,
-2.87397721325655908231,
-1.98786466514381765514,
-3.33190719804050772268,
-3.33190719804050772268,
-3.33190719804050772268,
-3.33190719804050772268
),
d = c(
2.56310969849146363941,
2.56310969849146363941,
2.56310969849146363941,
2.56310969849146363941,
2.94505568569789533129,
4.88640238864414033770,
2.94505568569789533129,
4.88640238864414211406
),
e = 1:8
)
# rows are unique, even when using only 1 sig. fig., so no rows can be
# considered duplicate on any machine.
# abce[e].{a,b} -> abd[ab,bd].{a,b},
# abcd[acd].{a,b} -> abd[ab,bd].{a,b}
# abcd[acd].{b,d} -> abd[ab,bd].{b,d}
# the merge duplicates occur when joining e and ab along the former
# reference.
db <- autodb(x, digits = NA)
expect_silent(y <- rejoin(db))
expect_true(df_equiv(y, x, digits = NA))
})
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.