Nothing
# re-implementation of expect_no_error that hedgehog doesn't complain about
# (it doesn't register usage of expect_no_ as an expectation)
expect_errorless <- function(object) {
expect(
class(try(object))[1] != "try-error",
failure_message = "object returned an error"
)
}
is_valid_functional_dependency <- function(x) {
expect_s3_class(x, "functional_dependency")
attrs <- attrs_order(x)
expect_true(all(lengths(unclass(x)) == 2L))
expect_silent(dependant(x))
expect_true(all(lengths(dependant(x)) == 1L))
expect_true(all(vapply(detset(x), is.character, logical(1))))
lhs <- detset(x)
expect_true(all(is.element(unlist(x), attrs)))
expect_true(all(
mapply(\(dets, dep) !is.element(dep, dets), detset(x), dependant(x))
))
expect_true(all(vapply(
lhs,
\(detset) !is.unsorted(match(detset, attrs)),
logical(1)
)))
}
is_valid_minimal_functional_dependency <- function(x) {
is_valid_functional_dependency(x)
grouped <- split(detset(x), dependant(x))
expect_true(!any(
vapply(
grouped,
\(detsets) anyDuplicated(detsets) ||
any(outer(
detsets,
detsets,
Vectorize(\(d1, d2) {
both <- intersect(d1, d2)
!setequal(d1, d2) &&
(setequal(both, d1) || setequal(both, d2))
})
)),
logical(1)
)
))
}
is_valid_relation_schema <- function(x, unique = FALSE, single_empty_key = FALSE) {
expect_s3_class(x, "relation_schema")
expect_true(is.character(names(x)))
expect_true(!anyDuplicated(names(x)))
expect_true(all(nchar(names(x)) > 0L))
expect_true(all(lengths(unclass(x)) == 2))
attrs <- attrs(x)
keys <- keys(x)
key_els <- lapply(keys, \(ks) unique(unlist(ks)))
expect_identical(
Map(\(as, n) as[seq_len(n)], attrs, lengths(key_els)),
key_els
)
nonprime_attrs <- Map(
\(as, n) as[setdiff(seq_along(as), seq_len(n))],
attrs,
lengths(key_els)
)
expect_true(all(vapply(
keys,
\(ks) all(vapply(ks, \(k) !is.unsorted(match(k, attrs_order(x))), logical(1))),
logical(1)
)))
expect_true(all(vapply(
nonprime_attrs,
\(as) all(vapply(as, \(a) !is.unsorted(match(a, attrs_order(x))), logical(1))),
logical(1)
)))
expect_true(all(vapply(keys, Negate(anyDuplicated), logical(1))))
if (single_empty_key)
expect_lte(sum(vapply(keys, identical, logical(1), list(character()))), 1L)
if (unique) {
expect_true(!anyDuplicated(x))
implied_fds <- functional_dependency(
unlist(
Map(
\(ks, as) {
unlist(
lapply(ks, \(k) lapply(setdiff(as, k), \(a) list(k, a))),
recursive = FALSE
)
},
keys,
attrs
),
recursive = FALSE
),
attrs_order(x)
)
expect_true(!anyDuplicated(implied_fds))
}
}
is_valid_references <- function(
x,
same_attr_name = FALSE,
single_key_pairs = FALSE
) {
act <- quasi_label(rlang::enquo(x), arg = "x")
references <- references(x)
attrs <- attrs(x)
if (length(references) == 0L)
return(invisible(act$val))
# former condition is temporary until references are properly grouped
if (single_key_pairs && anyDuplicated(references))
fail(sprintf("%s has duplicate references", act$lab))
for (fk in references) {
if (!is(fk, "list"))
fail(sprintf(
"%s has non-list references",
act$lab
))
if (length(fk) != 4L)
fail(sprintf(
"%s has non-length-four references",
act$lab
))
if (!is.character(fk[[1]]))
fail(sprintf(
"%s has non-character reference child names",
act$lab
))
if (!is.character(fk[[2]]))
fail(sprintf(
"%s has non-character reference child attributes",
act$lab
))
if (!is.character(fk[[3]]))
fail(sprintf(
"%s has non-character reference parent names",
act$lab
))
if (!is.character(fk[[4]]))
fail(sprintf(
"%s has non-character reference parent attributes",
act$lab
))
if (!all(is.element(unlist(fk[c(1L, 3L)]), names(attrs))))
fail(sprintf(
"%s has references over non-present relation names",
act$lab
))
if (fk[[1]] == fk[[3]]) # no self-references, relax this?
fail(sprintf(
"%s has self-references in references",
act$lab
))
if (same_attr_name && !identical(fk[[2]], fk[[4]]))
fail(sprintf(
"%s has non-matching attribute names in references",
act$lab
))
if (anyDuplicated(fk[[2]]))
fail(sprintf(
"%s has references with non-unique child attribute names",
act$lab
))
if (anyDuplicated(fk[[4]]))
fail(sprintf(
"%s has references with non-unique parent attribute names",
act$lab
))
if (length(fk[[2]]) == 0L || length(fk[[4]]) == 0L)
fail(sprintf(
"%s has references with zero-length attribute sets",
act$lab
))
if (length(fk[[2]]) != length(fk[[4]]))
fail(sprintf(
"%s has references with different attribute set lengths",
act$lab
))
if (!all(is.element(fk[[2]], attrs[[fk[[1]]]])))
fail(sprintf(
"%s has invalid child attribute names in references",
act$lab
))
if (!all(is.element(fk[[4]], attrs[[fk[[3]]]])))
fail(sprintf(
"%s has invalid parent attribute names in references",
act$lab
))
}
if (single_key_pairs) {
relnames_df <- as.data.frame(do.call(
rbind,
lapply(references, \(r) unlist(r[c(1L, 3L)]))
))
if (anyDuplicated(relnames_df))
fail(sprintf(
"%s has reference pairs with multiple keys",
act$lab
))
}
invisible(act$val)
}
is_valid_database_schema <- function(
x,
unique = FALSE,
single_empty_key = FALSE,
same_attr_name = FALSE,
single_key_pairs = FALSE
) {
is_valid_relation_schema(x, unique, single_empty_key)
expect_s3_class(x, "database_schema")
is_valid_references(x, same_attr_name, single_key_pairs)
}
is_valid_relation <- function(x, unique = FALSE, single_empty_key = FALSE) {
expect_s3_class(x, "relation")
expect_true(is.character(names(x)))
expect_true(!anyDuplicated(names(x)))
expect_true(all(nchar(names(x)) > 0L))
rel_keys <- keys(x)
rel_key_els <- lapply(rel_keys, \(ks) unique(unlist(ks)))
rel_attrs <- attrs(x)
key_attrs_first <- mapply(
\(ks, as) identical(as[seq_along(ks)], ks),
rel_key_els,
rel_attrs
)
expect_true(all(key_attrs_first))
nonprime_attrs <- Map(
\(ks, as) as[-seq_along(ks)],
rel_key_els,
rel_attrs
)
expect_true(all(vapply(
rel_keys,
\(ks) all(vapply(ks, \(k) !is.unsorted(match(k, attrs_order(x))), logical(1))),
logical(1)
)))
expect_true(all(vapply(
nonprime_attrs,
\(as) all(vapply(as, \(a) !is.unsorted(match(a, attrs_order(x))), logical(1))),
logical(1)
)))
expect_true(all(vapply(rel_keys, Negate(anyDuplicated), logical(1))))
if (single_empty_key)
expect_lte(sum(vapply(rel_keys, identical, logical(1), list(character()))), 1L)
expect_true(all(mapply(
\(recs, ks) all(vapply(
ks,
\(k) !df_anyDuplicated(recs[, k, drop = FALSE]),
logical(1)
)),
records(x),
rel_keys
)))
if (unique) {
expect_true(!anyDuplicated(x))
implied_fds <- functional_dependency(
unlist(
Map(
\(ks, as) {
unlist(
lapply(ks, \(k) lapply(setdiff(as, k), \(a) list(k, a))),
recursive = FALSE
)
},
rel_keys,
rel_attrs
),
recursive = FALSE
),
attrs_order(x)
)
expect_true(!anyDuplicated(implied_fds))
}
}
is_valid_database <- function(
x,
unique = FALSE,
single_empty_key = FALSE,
same_attr_name = FALSE,
single_key_pairs = FALSE
) {
is_valid_relation(x, unique, single_empty_key)
expect_s3_class(x, "database")
fks <- references(x)
is_valid_references(x, same_attr_name, single_key_pairs)
recs <- records(x)
for (fk in fks) {
expect_true(identical(
nrow(records(x)[[fk[[1]]]]),
nrow(df_join(
recs[[fk[[1]]]][, fk[[2]], drop = FALSE],
recs[[fk[[3]]]][, fk[[4]], drop = FALSE],
by.x = fk[[2]],
by.y = fk[[4]]
))))
}
fk_children <- vapply(fks, "[[", character(1), 1L)
fk_parents <- vapply(fks, "[[", character(1), 3L)
fk_parent_sets <- split(fk_parents, fk_children)
children <- names(fk_parent_sets)
nonchildren <- setdiff(names(x), children)
}
expect_identical_unordered_table <- function(new, original) {
expect_true(df_equiv(new, original, digits = NA))
}
gen_df <- function(
nrow,
ncol,
minrow = 0L,
mincol = 0L,
remove_dup_rows = FALSE,
variant = c("data.frame", "tibble")
) {
asable_classes <- c("logical", "integer", "numeric", "character", "factor")
list(
gen.element(seq.int(min(mincol, ncol), ncol)) |>
gen.and_then(\(n) list(
classes = gen.element(asable_classes) |> gen.c(of = n),
nms = gen_attr_names(n, 9)
)),
n_records = gen.element(seq.int(min(minrow, nrow), nrow)),
variant = gen.element(variant)
) |>
gen.with(\(lst) c(lst[[1]], lst[2], list(remove_dup_rows = remove_dup_rows), lst[3])) |>
gen.and_then(uncurry(gen.df_fixed_ranges))
}
gen.df_fixed_ranges <- function(
classes,
nms,
n_records,
remove_dup_rows,
variant = c("data.frame", "tibble")
) {
variant <- match.arg(variant)
variant <- switch(
variant,
data.frame = identity,
tibble = with_args(tibble::as_tibble, .name_repair = "minimal")
)
as_fns <- list(
logical = as.logical,
integer = as.integer,
numeric = as.numeric,
character = as.character,
factor = with_args(factor, levels = c(FALSE, TRUE))
)
if (length(classes) == 0L)
return(
if (remove_dup_rows)
gen.pure(data.frame(a = NA)[rep(1L, min(n_records, 1L)), FALSE, drop = FALSE])
else
gen.pure(data.frame(a = NA)[rep(1L, n_records), FALSE, drop = FALSE])
)
lapply(
classes,
\(cl) {
# gen.sample only shrinks by reordering,
# and gen.c incorrectly returns NULL when size = 0,
# so we need to unlist "manually"
as_fns[[cl]](c(FALSE, TRUE, NA)) |>
gen.sample_resampleable(of = n_records)
}
) |>
gen.with(
with_args(setNames, nm = nms) %>>%
with_args(as.data.frame, check.names = FALSE) %>>%
(if (remove_dup_rows) unique else identity)
) |>
gen.with(variant)
}
gen_attr_name <- function(len) {
gen.sample_resampleable(c(letters, LETTERS, "_", " ", "."), to = len) |>
gen.and_then(\(chars) {
if (all(chars == " ")) {
gen.element(c(letters, "_", "."))
}else{
gen.pure(chars)
}
}) |>
gen.with(\(attr_name) paste(attr_name, collapse = ""))
}
gen_attr_names <- function(n, len) {
gen_attr_name(len) |>
gen.c(of = n) |>
# as.character for length-0 NULL value
gen.with(as.character %>>% make.unique)
}
gen_unique_dets <- function(n_attrs, n, max_dets) {
# should also check no redundancy
gen.subsequence(setdiff(seq_len(n_attrs), n)) |>
gen.list(from = 0, to = min(max_dets, n_attrs - 1)) |>
gen.with(unique)
}
gen_detset_lists <- function(n_attrs, max_dets) {
md <- min(max_dets, n_attrs - 1)
gen.structure(lapply(
seq_len(n_attrs),
function(n) {
gen_unique_dets(n_attrs, n, md)
}
))
}
gen_named_flat_deps_fixed_size <- function(attrs, n, max_detset_size, unique = TRUE) {
list(
gen.sample(attrs, n, replace = TRUE),
gen.sample(attrs, gen.element(0:max_detset_size)) |>
gen.list(of = n)
) |>
gen.with(\(lst) functional_dependency(
Map(\(x, y) list(setdiff(x, y), y), lst[[2]], lst[[1]]),
attrs,
unique = unique
))
}
gen_named_flat_deps <- function(
attrs,
max_detset_size,
from = 0L,
to = NULL,
of = NULL
) {
max_detset_size <- min(max_detset_size, length(attrs) - 1L)
(
if (missing(of) || is.null(of))
gen.element(seq.int(from, to))
else
gen.pure(of)
) |>
gen.and_then(\(m) gen_named_flat_deps_fixed_size(attrs, m, max_detset_size))
}
gen_flat_deps_fixed_names <- function(
n_attrs,
max_detset_size,
from = 0L,
to = NULL,
of = NULL
) {
attrs <- LETTERS[seq.int(n_attrs)]
gen_named_flat_deps(attrs, max_detset_size, from, to, of)
}
gen_flat_deps <- function(
n_attrs,
max_detset_size,
max_attr_nchar = 9,
from = 0L,
to = NULL,
of = NULL
) {
gen_attr_names(n_attrs, max_attr_nchar) |>
gen.and_then(\(attrs) gen_named_flat_deps(attrs, max_detset_size, from, to, of))
}
gen.keys <- function(attrs) {
gen.subsequence(attrs) |>
gen.list(to = 3) |>
gen.with(\(keys) {
uniq <- unique(keys)
superset <- outer(
uniq,
uniq,
Vectorize(\(sup, sub) {
all(is.element(sub, sup)) && !all(is.element(sup, sub))
})
)
rem <- uniq[!apply(superset, 1, any)]
rem[keys_order(lapply(rem, match, attrs))]
})
}
gen.relation_schema <- function(x, from, to, single_empty_key = FALSE) {
gen.subsequence(x) |>
gen.and_then(\(attrs) {
list(gen.pure(attrs), gen.keys(attrs)) |>
gen.list(from = from, to = to)
}) |>
gen.with(\(schemas) {
if (single_empty_key) {
# only one schema can have an empty key
rels_with_empty_keys <- which(vapply(
schemas,
\(schema) any(lengths(schema[[2]]) == 0L),
logical(1)
))
if (length(rels_with_empty_keys) > 1L)
schemas <- schemas[-rels_with_empty_keys[-1]]
}
nms <- make.names(
vapply(schemas, \(rel) name_dataframe(rel[[2]][[1]]), character(1)),
unique = TRUE
)
list(setNames(schemas, nms), x)
}) |>
gen.with(\(lst) {
do.call(relation_schema_nocheck, lst)
})
}
gen.relation_schema_empty_keys <- function(x, from, to, min_empty) {
if (min_empty > from)
stop("can't guarantee more empty keys than minimum schema count")
gen.relation_schema(x, from, to) |>
gen.and_then(\(rs) {
gen.element(floor(max(0L, min_empty)):length(rs)) |>
gen.and_then(\(n_empty) {
if (n_empty == 0)
return(gen.pure(rs))
gen.sample(seq_along(rs), n_empty, replace = FALSE) |>
gen.with(\(empty) {
keys(rs)[empty] <- rep(list(list(character())), n_empty)
rs
})
})
})
}
# references are included to ensure attributes that reference each other have
# the same class
gen.attrs_class <- function(nm, references = list()) {
groups <- seq_along(nm)
for (rel in references) {
child_attrs <- match(rel[[2]], nm)
parent_attrs <- match(rel[[4]], nm)
stopifnot(!anyNA(c(child_attrs, parent_attrs)))
for (n in seq_along(child_attrs)) {
grp <- groups[c(child_attrs[[n]], parent_attrs[[n]])]
groups[is.element(groups, grp)] <- min(grp)
}
}
gen.element(list(
"logical",
"integer",
"numeric",
"character",
"factor"
)) |>
gen.list(of = length(unique(groups))) |>
gen.with(\(group_classes) group_classes[match(groups, sort(unique(groups)))]) |>
gen.with(with_args(setNames, nm = nm))
}
gen.relation <- function(
x,
from,
to,
rows_from = 0L,
rows_to = 10L,
single_empty_key = FALSE,
variant = c("data.frame", "tibble")
) {
list(
gen.relation_schema(x, from, to, single_empty_key = single_empty_key),
gen.element(variant)
) |>
gen.and_then(uncurry(
\(rs, var) gen.relation_from_schema(rs, rows_from, rows_to, var)
))
}
gen.relation_from_schema <- function(
rs,
rows_from = 0L,
rows_to = 10L,
variant = c("data.frame", "tibble")
) {
variant <- match.arg(variant)
gen.pure(create(rs)) |>
gen.and_then(\(empty_rel) {
r_attrs <- attrs(empty_rel)
r_ncols <- lengths(r_attrs)
r_keys <- keys(empty_rel)
lapply(
setNames(seq_along(empty_rel), names(empty_rel)),
\(n) {
ks <- r_keys[[n]]
gen.element(rows_from:rows_to) |>
gen.and_then(with_args(
gen.df_fixed_ranges,
classes = rep("logical", r_ncols[[n]]),
nms = r_attrs[[n]],
remove_dup_rows = TRUE,
variant = variant
)) |>
gen.with(\(df) list(
df = remove_key_violations(df, ks),
keys = ks
))
}
) |>
gen.with(with_args(relation_nocheck, attrs_order = attrs_order(empty_rel)))
})
}
remove_key_violations <- function(df, keys) {
Reduce(
\(df, key) df[!df_duplicated(df[, key, drop = FALSE]), , drop = FALSE],
keys,
init = df
)
}
remove_insertion_key_violations <- function(df, relation) {
Reduce(
\(df, n) {
recs <- records(relation)
Reduce(
\(df, key) {
r_df <- recs[[n]]
r_attrs <- names(r_df)
remove <- if (length(key) == 0L) {
negind <- if (nrow(r_df) == 0)
TRUE
else
-seq_len(nrow(r_df))
if (length(r_attrs) == 0L)
rep(FALSE, nrow(df))
else{
single_adds <- lapply(
seq_len(nrow(df)),
\(n) df_rbind(r_df, df[n, r_attrs, drop = FALSE])
)
record_new <- vapply(
single_adds,
\(sa) {
nondups <- !duplicated(sa)
if (length(nondups) != nrow(r_df) + 1L)
stop(paste(print(1), print(relation[[n]]), print(df)))
res <- nondups[negind]
if (length(res) != 1)
stop(paste(print(2), print(relation[[n]]), print(df)))
res
},
logical(1)
)
record_new
}
}else{
negind <- if (nrow(r_df) == 0)
TRUE
else
-seq_len(nrow(r_df))
comb <- df_rbind(r_df, df[, r_attrs, drop = FALSE])
key_dups <- df_duplicated(comb[, key, drop = FALSE])[negind]
single_adds <- lapply(
seq_len(nrow(df)),
\(n) df_rbind(r_df, df[n, r_attrs, drop = FALSE])
)
record_new <- vapply(
single_adds,
\(sa) {
nondups <- !duplicated(sa)
if (length(nondups) != nrow(r_df) + 1L)
stop(paste(print(1), print(relation[[n]]), print(df)))
res <- nondups[negind]
if (length(res) != 1)
stop(paste(print(2), print(relation[[n]]), print(df)))
res
},
logical(1)
)
key_dups & record_new
}
df[!remove, , drop = FALSE]
},
keys(relation)[[n]],
init = df
)
},
seq_along(relation),
init = df
)
}
remove_violated_references <- function(references, relation) {
recs <- records(relation)
references[vapply(
references,
\(rel) {
child <- recs[[rel[[1]]]][, rel[[2]], drop = FALSE]
parent <- recs[[rel[[3]]]][, rel[[4]], drop = FALSE]
identical(
nrow(child),
nrow(df_join(
child,
parent,
by.x = rel[[2]],
by.y = rel[[4]]
))
)
},
logical(1)
)]
}
gen.references_same_attrs <- function(rs, single_key_pairs) {
gen.references_for_index_and_key <- function(rs, n, k) {
contains_key <- setdiff(
which(vapply(
attrs(rs),
\(as) all(is.element(k, as)),
logical(1)
)),
n
)
gen.subsequence(contains_key) |>
gen.with(\(citers) {
lapply(
citers,
\(citer) list(
names(rs)[[citer]],
k,
names(rs)[[n]],
k
)
)
})
}
gen.references_for_index <- function(rs, n) {
ks <- keys(rs)[[n]]
lapply(
ks[lengths(ks) > 0L],
gen.references_for_index_and_key,
rs = rs,
n = n
) |>
gen.with(\(lst) {
if (length(lst) == 0L) list() else unlist(lst, recursive = FALSE)
}) |>
gen.with(\(rels) {
if (single_key_pairs)
rels[!duplicated(lapply(rels, \(r) c(r[[1]], r[[3]])))]
else
rels
})
}
lapply(seq_along(rs), gen.references_for_index, rs = rs) |>
gen.with(\(lst) if (length(lst) == 0L) list() else unlist(lst, recursive = FALSE))
}
gen.references_different_attrs <- function(rs, single_key_pairs) {
gen.references_for_index_and_key <- function(rs, n, k) {
contains_key_length <- setdiff(
which(vapply(
attrs(rs),
\(as) length(as) >= length(k),
logical(1)
)),
n
)
gen.subsequence(contains_key_length) |>
gen.and_then(\(citers) {
lapply(
citers,
\(citer) {
gen.sample(attrs(rs)[[citer]], length(k)) |>
gen.with(\(attrs) {
list(
names(rs)[[citer]],
attrs,
names(rs)[[n]],
k
)
})
}
)
})
}
gen.references_for_index <- function(rs, n) {
ks <- keys(rs)[[n]]
lapply(
ks[lengths(ks) > 0L],
gen.references_for_index_and_key,
rs = rs,
n = n
) |>
gen.with(\(lst) {
if (length(lst) == 0L) list() else unlist(lst, recursive = FALSE)
}) |>
gen.with(\(rels) {
if (single_key_pairs)
rels[!duplicated(lapply(rels, \(r) c(r[[1]], r[[3]])))]
else
rels
})
}
lapply(seq_along(rs), gen.references_for_index, rs = rs) |>
gen.with(\(lst) if (length(lst) == 0L) list() else unlist(lst, recursive = FALSE))
}
gen.references <- function(rs, single_key_pairs) {
gen.choice(
gen.references_same_attrs(rs, single_key_pairs),
gen.references_different_attrs(rs, single_key_pairs)
)
}
gen.database_schema <- function(
x,
from,
to,
single_empty_key = FALSE,
same_attr_name = FALSE,
single_key_pairs = FALSE
) {
gen.relation_schema(x, from, to, single_empty_key = single_empty_key) |>
gen.and_then(\(rs) {
list(
gen.pure(rs),
if (same_attr_name)
gen.references_same_attrs(rs, single_key_pairs)
else
gen.references(rs, single_key_pairs))
}) |>
gen.with(\(lst) do.call(database_schema_nocheck, lst))
}
gen.database_schema_empty_keys <- function(
x,
from,
to,
min_empty,
same_attr_name = FALSE,
single_key_pairs = FALSE
) {
gen.relation_schema_empty_keys(x, from, to, min_empty) |>
gen.and_then(\(rs) {
list(
gen.pure(rs),
if (same_attr_name)
gen.references_same_attrs(rs, single_key_pairs)
else
gen.references(rs, single_key_pairs))
}) |>
gen.with(\(lst) do.call(database_schema_nocheck, lst))
}
gen.database <- function(
x,
from,
to,
single_empty_key = FALSE,
same_attr_name = TRUE,
single_key_pairs = TRUE,
rows_from = 0L,
rows_to = 10L,
variant = c("data.frame", "tibble")
) {
list(
gen.database_schema(
x,
from,
to,
single_empty_key = single_empty_key,
same_attr_name = same_attr_name,
single_key_pairs = single_key_pairs
),
gen.element(variant)
) |>
gen.and_then(uncurry(\(ds, var) {
gen.relation_from_schema(ds, rows_from, rows_to, var) |>
gen.with(
with_args(
remove_reference_violations,
references = references(ds)
) %>>%
with_args(database_nocheck, references = references(ds))
)
}))
}
remove_reference_violations <- function(relation, references) {
if (length(references) == 0L)
return(relation)
change <- TRUE
recs <- records(relation)
while (change) {
change <- FALSE
for (ref in references) {
child_name <- ref[[1]]
child <- recs[[child_name]][, ref[[2]], drop = FALSE]
if (nrow(child) > 0L) {
child_records <- df_records(child)
parent_name <- ref[[3]]
parent <- recs[[parent_name]][, ref[[4]], drop = FALSE]
parent_keys <- keys(relation)[[parent_name]]
stopifnot(is.element(list(ref[[4]]), parent_keys))
parent_records <- df_records(parent)
valid <- is.element(child_records, parent_records)
recs[[child_name]] <- recs[[child_name]][valid, , drop = FALSE]
if (!all(valid))
change <- TRUE
}
}
}
records(relation) <- recs
relation
}
remove_insertion_reference_violations <- function(df, database) {
if (length(references(database)) == 0L)
return(df)
recs <- records(database)
change <- TRUE
while (change) {
change <- FALSE
for (ref in references(database)) {
child_name <- ref[[1]]
child <- df_rbind(
recs[[child_name]][, ref[[2]], drop = FALSE],
df[, ref[[2]], drop = FALSE]
)
if (nrow(child) > 0L) {
parent_name <- ref[[3]]
parent <- df_rbind(
recs[[parent_name]][, ref[[4]], drop = FALSE],
df[, ref[[4]], drop = FALSE]
)
valid <- vapply(
seq_len(nrow(child)),
\(n) nrow(df_join(
child[n, , drop = FALSE],
parent,
by.x = ref[[2]],
by.y = ref[[4]]
)) > 0L,
logical(1)
)
df <- df[valid[-seq_len(nrow(recs[[child_name]]))], , drop = FALSE]
if (!all(valid))
change <- TRUE
}
}
}
df
}
# naively inserting data into a database can give reference errors,
# which sets of relations are legal to insert into?
minimal_legal_insertion_sets <- function(db, df) {
refs <- references(db)
ref_mat <- matrix(FALSE, nrow = length(db), ncol = length(db))
dimnames(ref_mat) <- list(child = names(db), parent = names(db))
for (ref in refs) {
ref_mat[ref[[1]], ref[[3]]] <- TRUE
}
# 1. If df doesn't have all attributes for a relation, inserting does nothing:
# it's a legal insertion set, that relation can't be inserted into to nake
# insertion into a parent legal.
have_attrs <- vapply(
attrs(db),
\(x) all(x %in% names(df)),
logical(1)
)
# 2. Otherwise, if a relation already has the given data, it's a legal
# insertion set.
already_present <- rep(FALSE, length(db))
already_present[have_attrs] <- vapply(
records(db[have_attrs]),
\(r) {
# assumes df rows are already unique
nrow(df_join(r, df[, names(r), drop = FALSE])) == nrow(df)
},
logical(1)
)
# take out rels with data already, since sets by themselves and can't affect
# legality of children
legal_sets <- as.list(c(
rownames(ref_mat)[!have_attrs],
rownames(ref_mat)[already_present]
))
ref_mat <- ref_mat[!already_present, !already_present, drop = FALSE]
# 3. Otherwise, if inserting into that relation would cause a key violation,
# the relation can't be inserted into.
violates_key <- rep(NA, length(db))
violates_key[have_attrs][!already_present] <- vapply(
names(db)[have_attrs][!already_present],
\(nm) {
nr <- df_rbind(records(db)[[nm]], df[, attrs(db)[[nm]], drop = FALSE])
any(vapply(
keys(db)[[nm]],
\(key) as.logical(df_anyDuplicated(nr[, key, drop = FALSE])),
logical(1)
))
},
logical(1)
)
# 4. Otherwise, check whether all the relation's parents have legal insertion
# sets. If they do, the relation has a legal insertion set that includes
# itself, plus the insertion sets of the parents that don't already contain
# the data themselves.
# First, we determine whether a relation can be inserted into when ignoring
# foreign keys.
legal <- (have_attrs & !violates_key)[!already_present]
# Then, we find all the relations it refers to, directly and indirectly. This
# includes itself.
family_mat <- ref_mat
old_val <- NA & family_mat
while (!identical(old_val, family_mat)) {
old_val <- family_mat
family_mat <- family_mat | (family_mat %*% ref_mat)
}
diag(family_mat) <- TRUE
# Check whether it depends on anything illegal.
eventually_illegal <- apply(family_mat[, !legal, drop = FALSE], 1, any)
# Keep dependency set otherwise.
legal_sets <- c(
legal_sets,
apply(
family_mat[!eventually_illegal, , drop = FALSE],
1,
\(lgs) rownames(ref_mat)[lgs],
simplify = FALSE
)
)
unique(legal_sets)
}
# error arising from x[[indices]], assuming there is one
single_subset_failure_type <- function(x, indices) {
if (length(indices) > 1)
"attempt to select more than one element in vectorIndex"
else {
if (
length(indices) == 1 &&
all(indices < 0) &&
length(indices) + 1 < length(x)
) {
if (is.logical(indices))
"attempt to select more than one element in get1Index"
else
"attempt to select more than one element in integerOneIndex"
}else{
if (length(indices) == 1)
"attempt to select less than one element in integerOneIndex"
else
"attempt to select less than one element in get1index"
}
}
}
# generating key / determinant set lists
gen.nonempty_list <- function(generator, to)
gen.list(generator, from = 1, to = to)
gen.emptyable_list <- function(generator, to)
gen.list(generator, from = 0, to = to)
gen.list_with_dups <- function(generator, n_unique)
gen.nonempty_list(generator, n_unique) |>
gen.and_then(\(lst) gen.sample(lst, ceiling(1.5*length(lst)), replace = TRUE))
# gen.sample with replace=TRUE, but allowing changing the sample
# when shrinking, not just re-ordering
gen.sample_resampleable <- function(x, from = 1, to = NULL, of = NULL) {
if ((!missing(from) || !missing(to)) && !missing(of))
stop("Specify `to` and `from`, or `of`")
if (!missing(of)) {
if (of == 0) {
gen.pure(x[FALSE])
}else{
gen.element(x) |>
gen.list(of = of) |>
gen.and_then(function(xs) do.call(c, xs))
}
}else {
gen.element(from:to) |>
gen.and_then(\(of) {
if (of == 0) {
gen.pure(x[FALSE])
}else{
gen.element(x) |>
gen.list(of = of) |>
gen.and_then(function(xs) do.call(c, xs))
}
})
}
}
rel2df <- function(rel, relations) {
records(rel)[relations] <- lapply(records(rel)[relations], as.data.frame)
rel
}
# functional utility functions for tests
`%>>%` <- function(fn1, fn2) function(...) fn2(fn1(...))
biapply <- function(fn1, fn2) function(x) list(fn1(x), fn2(x))
expect_bi <- function(logical_fn, fn1, fn2) {
function(x) expect_true(logical_fn(fn1(x), fn2(x)))
}
expect_biequal <- function(fn1, fn2) function(x) expect_equal(fn1(x), fn2(x))
expect_biidentical <- function(fn1, fn2)
function(...) expect_identical(fn1(...), fn2(...))
split_by <- function(fn, ...) function(x) split(x, fn(x), ...)
subset_by <- function(fn) function(x) x[fn(x)]
sort_by <- function(fn) function(x) x[order(fn(x))]
if_discard_else <- function(cond, fn)
function(x) if (cond(x)) discard() else fn(x)
uncurry <- function(fn) function(x) do.call(fn, x)
with_args <- function(fn, ...) {
lst <- list(...)
function(...) do.call(fn, c(list(...), lst))
}
apply_both <- function(fn1, fn2) function(x) {fn1(x); fn2(x)}
dup <- function(x) list(x, x)
onLeft <- function(f) function(x) list(f(x[[1]]), x[[2]])
onRight <- function(f) function(x) list(x[[1]], f(x[[2]]))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.