Nothing
describe("decompose", {
it("returns valid databases", {
forall(
gen_df(6, 7),
dup %>>%
(onRight(
with_args(discover, accuracy = 1) %>>%
normalise
)) %>>%
uncurry(decompose) %>>%
is_valid_database
)
})
it("removes extraneous dependencies", {
df <- data.frame(a = integer(), b = integer(), c = integer())
schema <- relation_schema(
list(a = list(c("a", "b", "c"), list("a"))),
attrs_order = c("a", "b", "c")
) |>
database_schema(references = list())
db <- decompose(df, schema)
expect_identical(
db,
database(
relation(
list(a = list(
df = df,
keys = list("a")
)),
attrs_order = c("a", "b", "c")
),
references = list()
)
)
})
it("resolves a simple bijection with no splits", {
df <- data.frame(a = integer(), b = integer())
schema <- relation_schema(
list(a = list(c("a", "b"), list("a", "b"))),
attrs_order = c("a", "b")
) |>
database_schema(references = list())
db <- decompose(df, schema)
expect_identical(
db,
database(
relation(list(a = list(df = df, keys = list("a", "b"))), c("a", "b")),
list()
)
)
})
it("correctly splits example data.frame for original make_indexes() test", {
df <- data.frame(
id = c(
0, 1, 2, 3, 4,
5, 6, 7, 8, 9
),
month = c(
'dec', 'dec', 'jul', 'jul', 'dec',
'jul', 'jul', 'jul', 'dec', 'jul'
),
hemisphere = c(
'N', 'N', 'N', 'N', 'S',
'S', 'S', 'S', 'S', 'N'
),
is_winter = c(
TRUE, TRUE, FALSE, FALSE, FALSE,
TRUE, TRUE, TRUE, FALSE, FALSE
)
)
schema <- relation_schema(
list(
id = list(c("id", "month", "hemisphere"), list("id")),
month_hemisphere = list(
c("month", "hemisphere", "is_winter"),
list(
c("month", "hemisphere"),
c("month", "is_winter"),
c("hemisphere", "is_winter")
)
)
),
attrs_order = c("id", "month", "hemisphere", "is_winter")
) |>
database_schema(
references = list(list(
"id",
c("month", "hemisphere"),
"month_hemisphere",
c("month", "hemisphere")
))
)
new_db <- decompose(df, schema)
expected_db <- database(
relation(
list(
id = list(
df = df[, c("id", "month", "hemisphere")],
keys = list("id")
),
month_hemisphere = list(
df = data.frame(
month = c("dec", "jul", "dec", "jul"),
hemisphere = c("N", "N", "S", "S"),
is_winter = c(TRUE, FALSE, FALSE, TRUE),
row.names = c(1L, 3L, 5:6)
),
keys = list(
c("month", "hemisphere"),
c("month", "is_winter"),
c("hemisphere", "is_winter")
)
)
),
attrs_order = c("id", "month", "hemisphere", "is_winter")
),
references = list(list(
"id",
c("month", "hemisphere"),
"month_hemisphere",
c("month", "hemisphere")
))
)
expect_identical(new_db, expected_db)
})
it("removes transitive references", {
df <- data.frame(
a = 1L,
b = 1L,
c = 1L,
d = 1L,
e = 1L
)
schema <- relation_schema(
list(
a = list(c("a", "b", "c"), list("a")),
b_c = list(c("b", "c", "d"), list(c("b", "c"))),
b = list(c("b", "e"), list("b"))
),
attrs_order = c("a", "b", "c", "d", "e")
) |>
database_schema(
references = list(
list("a", c("b", "c"), "b_c", c("b", "c")),
list("b_c", "b", "b", "b")
)
)
new_db <- decompose(df, schema)
})
it("returns a error if data.frame doesn't satisfy FDs in the schema", {
add_id_attribute <- function(df) {
df2 <- cbind(df, a = seq.int(nrow(df)))
names(df2) <- make.names(names(df2), unique = TRUE)
df2
}
gen_fd_reduction_for_df <- function(df) {
true_fds <- discover(df, 1)
nonempty_detsets <- which(lengths(detset(true_fds)) > 0L)
if (length(nonempty_detsets) == 0)
return(gen.pure(list(df, NULL, NULL)))
gen.element(nonempty_detsets) |>
gen.with(\(index) true_fds[[index]]) |>
gen.and_then(\(fd) list(gen.pure(fd), gen.int(length(detset(fd))))) |>
gen.with(\(lst) c(list(df), lst))
}
gen_df_and_fd_reduction <- function(nrow, ncol) {
gen_df(nrow, ncol, minrow = 2L, mincol = 2L, remove_dup_rows = TRUE) |>
gen.with(add_id_attribute) |>
gen.and_then(gen_fd_reduction_for_df)
}
expect_decompose_error <- function(df, reduced_fd, removed_det) {
if (nrow(df) <= 1)
discard()
flat_deps <- discover(df, 1)
reduced_index <- match(reduced_fd, flat_deps)
if (is.na(reduced_index))
stop("reduced_fd doesn't exist")
reduced_deps <- unclass(flat_deps)
reduced_deps[[reduced_index]][[1]] <-
reduced_deps[[reduced_index]][[1]][-removed_det]
reduced_deps <- functional_dependency(reduced_deps, attrs_order(flat_deps))
schema <- normalise(reduced_deps)
expect_error(
decompose(df, schema),
paste0(
"\\A",
"df doesn't satisfy functional dependencies in schema:",
"(\\n\\{.*\\} -> .*)+",
"\\Z"
),
perl = TRUE
)
}
forall(
gen_df_and_fd_reduction(6, 7),
expect_decompose_error,
discard.limit = 10L,
curry = TRUE
)
})
it("returns a error if data.frame doesn't satisfy FKs in the schema", {
fac2char <- function(df) {
facs <- vapply(df, is.factor, logical(1))
df[, facs] <- lapply(df[, facs, drop = FALSE], as.character)
df
}
gen_fk_reduction_for_df <- function(df) {
true_dbs <- normalise(discover(df, 1))
true_fks <- references(true_dbs)
true_fk_key_switch <- lapply(
true_fks,
\(fk) {
len <- length(fk[[2]])
new_tbs <- vapply(
keys(true_dbs),
\(ks) match(len, lengths(ks)),
integer(1)
)
valid_new_tbs <- new_tbs[
!is.na(new_tbs) &
names(new_tbs) != fk[[1]] &
names(new_tbs) != fk[[3]]
]
new_fks <- Map(
\(new_key_index, new_parent) {
new_fk <- c(
fk[1:2],
list(new_parent, keys(true_dbs)[[new_parent]][[new_key_index]])
)
stopifnot(length(new_fk) == 4)
if (!is.element(list(new_fk[[4]]), keys(true_dbs)[[new_fk[[3]]]]))
stop("argh")
new_fk
},
valid_new_tbs,
names(valid_new_tbs)
) |>
Filter(f = \(fk) !is.element(list(fk), true_fks)) |>
Filter(f = \(fk) {
length(remove_violated_references(
list(fk),
decompose(df, true_dbs)
)) == 0
})
new_fks
}
)
if (all(lengths(true_fk_key_switch) == 0))
return(gen.pure(list(df, NULL)))
gen.element(which(lengths(true_fk_key_switch) > 0)) |>
gen.and_then(\(index) list(
gen.pure(df),
gen.element(true_fk_key_switch[[index]]) |>
gen.with(\(new_fk) {
dbs <- true_dbs
references(dbs)[[index]] <- new_fk
dbs
})
))
}
gen_df_and_fk_reduction <- function(nrow, ncol) {
gen_df(nrow, ncol, minrow = 4L, mincol = 4L, remove_dup_rows = TRUE) |>
# change factors to characters, since they cause merge problems when
# merged with non-factor non-character vectors that aren't in their
# level set
gen.with(fac2char) |>
gen.and_then(gen_fk_reduction_for_df)
}
expect_fk_error <- function(df, dbs) {
if (nrow(df) <= 1 || is.null(dbs))
discard()
name_regexp <- "[\\w\\. ]+"
fk_half_regexp <- paste0(
name_regexp,
"\\.\\{", name_regexp, "(, ", name_regexp, ")*\\}"
)
expect_error(
decompose(df, dbs),
paste0(
"\\A",
"relations must satisfy references in schema:",
"(\\n", fk_half_regexp, " -> ", fk_half_regexp, ")+",
"\\Z"
),
perl = TRUE
)
}
forall(
gen_df_and_fk_reduction(6, 7),
expect_fk_error,
discard.limit = 200,
curry = TRUE
)
})
it("is equivalent to create >> insert for valid data", {
forall(
gen_df(6, 7, remove_dup_rows = TRUE) |>
gen.with(\(df) {
list(
df = df,
schema = normalise(discover(df, 1), remove_avoidable = TRUE)
)
}),
\(df, schema) {
expect_identical(
decompose(df, schema),
create(schema) |> insert(df)
)
},
curry = TRUE
)
})
describe("Dependencies", {
it("DepDF", {
df <- data.frame(
player_name = integer(),
jersey_num = integer(),
team = integer(),
city = integer(),
state = integer()
)
schema <- relation_schema(
list(
player_name_jersey_num = list(
c("player_name", "jersey_num", "team"),
list(
c("player_name", "jersey_num"),
c("player_name", "team"),
c("team", "jersey_num")
)
),
city = list(
c("city", "state"),
list("city", "state")
),
team = list(
c("team", "city"),
list("team")
)
),
attrs_order = c("player_name", "jersey_num", "team", "city", "state")
) |>
database_schema(
references = list(
list("player_name_jersey_num", "team", "team", "team"),
list("team", "city", "city", "city")
)
)
db <- decompose(df, schema)
expect_identical(length(db), 3L)
expected_db <- database(
relation(
list(
player_name_jersey_num = list(
df = data.frame(
player_name = integer(),
jersey_num = integer(),
team = integer()
),
keys = list(
c("player_name", "jersey_num"),
c("player_name", "team"),
c("jersey_num", "team")
)
),
city = list(
df = data.frame(
city = integer(),
state = integer()
),
keys = list("city", "state")
),
team = list(
df = data.frame(
team = integer(),
city = integer()
),
keys = list("team")
)
),
attrs_order = c("player_name", "jersey_num", "team", "city", "state")
),
references = list(
list("player_name_jersey_num", "team", "team", "team"),
list("team", "city", "city", "city")
)
)
expect_identical(db, expected_db)
})
})
it("correctly handles attributes with non-df-standard names", {
df <- data.frame(1:3, c(1, 1, 2), c(1, 2, 2)) |>
stats::setNames(c("A 1", "B 2", "C 3"))
schema <- relation_schema(
list(
`A 1` = list(
c("A 1", "B 2", "C 3"),
list("A 1", c("B 2", "C 3"))
)
),
attrs_order = c("A 1", "B 2", "C 3")
) |>
database_schema(references = list())
db <- decompose(df, schema)
expect_setequal(attrs(db)[[1]], c("A 1", "B 2", "C 3"))
})
it("links added key relations", {
df <- data.frame(
a = c(1L, 2L, 1L, 2L),
b = c(1L, 2L, 1L, 2L),
c = c(1L, 1L, 2L, 2L)
)
schema <- relation_schema(
list(
a = list(c("a", "b"), list("a", "b")),
a_c = list(c("a", "c"), list(c("a", "c")))
),
attrs_order = c("a", "b", "c")
) |>
database_schema(references = list(list("a_c", "a", "a", "a")))
db <- decompose(df, schema)
expect_identical(
records(db)$a_c,
data.frame(a = 1:2, c = rep(1:2, each = 2), row.names = 1:4)
)
expect_identical(
keys(db)$a_c,
list(c("a", "c"))
)
})
})
describe("create_insert", {
it("is equivalent to create >> insert", {
forall(
gen_df(6, 7, remove_dup_rows = TRUE) |>
gen.with(\(df) list(
df = df,
schema = synthesise(discover(df, 1))
)),
expect_biidentical(
uncurry(create_insert),
onRight(create) %>>% rev %>>% uncurry(insert)
)
)
})
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.