tests/testthat/test-synthesise.r

gen.nonempty_key <- gen.sample.int(2, gen.int(5), replace = TRUE)
gen.key <- gen.sample.int(2, gen.element(0:5), replace = TRUE)

describe("synthesise", {
  expect_relation_schema <- function(current, target) {
    expect_identical(
      current,
      relation_schema(
        setNames(Map(list, target$attrs, target$keys), target$relation_names),
        target$attrs_order
      )
    )
  }
  gets_unique_table_names <- function(fds) {
    expect_true(!anyDuplicated(names(synthesise(fds))))
  }

  it("gives valid schemas", {
    # rare failing case:
    # removing avoidables can muck up attrs order
    # if keys aren't reordered afterwards
    deps <- functional_dependency(
      list(
        list(c("bcr_j", "vfzc."), "mdaoyx"),
        list("vfzc.", "bcr_j"),
        list(c("mdaoyx", "bcr_j", "bi", "vfzc."), "fvoxk"),
        list(c("mdaoyx", "bcr_j", "eqro", "vfzc."), "fvoxk"),
        list(c("mdaoyx", "eqro", "bi"), "fvoxk"),
        list(c("mdaoyx", "vfzc."), "fvoxk"),
        list(c("mdaoyx", "bcr_j", "eqro", "fvoxk", "vfzc."), "bi"),
        list(c("mdaoyx", "eqro", "bi"), "vfzc."),
        list(c("bcr_j", "eqro", "fvoxk"), "vfzc."),
        list(c("mdaoyx", "bcr_j", "fvoxk", "bi"), "vfzc."),
        list(c("bcr_j", "fvoxk", "bi"), "vfzc.")
      ),
      c("mdaoyx", "bcr_j", "eqro", "fvoxk", "bi", "vfzc.")
    )
    deps |>
      (apply_both(
        synthesise %>>% is_valid_relation_schema,
        with_args(synthesise, remove_avoidable = TRUE) %>>%
          is_valid_relation_schema
      ))()

    forall(
      gen_flat_deps(7, 20, to = 20L),
      apply_both(
        synthesise %>>% is_valid_relation_schema,
        with_args(synthesise, remove_avoidable = TRUE) %>>%
          is_valid_relation_schema
      )
    )
  })
  it("is invariant to dependency reordering", {
    gen_permutation <- gen.int(10) |>
      gen.and_then(\(n) list(
        gen_named_flat_deps_fixed_size(letters[1:10], n, 5, unique = FALSE),
        gen.sample.int(n),
        n
      )) |>
      gen.with(\(lst) {
        if (length(lst[[1]]) != length(lst[[2]]))
          stop(print(lst))
        lst[1:2]
      }) |>
      gen.with(\(lst) {
        list(lst[[1]], lst[[1]][lst[[2]]])
      })
    normalisation_permutation_invariant <- if_discard_else(
      uncurry(identical),
      with_args(lapply, synthesise) %>>% (uncurry(expect_identical))
    )
    # currently-rarely-generated case:
    # given a choice of which dependency to remove as extraneous,
    # order matters
    deps <- functional_dependency(
      list(
        list(c("C", "D"), "B"),
        list(c("A", "B"), "C"),
        list(c("A", "D"), "B"),
        list(c("A", "D"), "C")
      ),
      attrs_order = c("A", "B", "C", "D")
    )
    normalisation_permutation_invariant(list(deps, deps[c(1, 2, 4, 3)]))

    forall(gen_permutation, normalisation_permutation_invariant)
  })
  it("removes longer/later-attributed dependency sets if given a choce", {
    schema <- synthesise(functional_dependency(
      list(
        list(c("C", "D"), "B"),
        list(c("A", "B"), "C"),
        list(c("A", "D"), "B"),
        list(c("A", "D"), "C")
      ),
      attrs_order = c("A", "B", "C", "D")
    ))
    expect_setequal(names(schema), c("A_B", "A_D", "C_D"))
    ord <- match("A_D", names(schema))
    expect_identical(setdiff(attrs(schema)[[ord]], c("A", "D")), "B")
  })
  it("removes extraneous attributes by default", {
    dependencies <- functional_dependency(
      list(
        list("a", "b"),
        list(c("a", "b"), "c")
      ),
      attrs_order = c("a", "b", "c")
    )
    norm.dependencies <- synthesise(dependencies)
    expect_relation_schema(
      norm.dependencies,
      list(
        attrs = list(c("a", "b", "c")),
        keys = list(list("a")),
        attrs_order = c("a", "b", "c"),
        relation_names = "a"
      )
    )
  })
  it("is equivalent to removing extraneous attributes separately", {
    forall(
      gen_flat_deps(7, 20, to = 20),
      expect_biidentical(
        synthesise,
        remove_extraneous_attributes %>>%
          with_args(synthesise, reduce_attributes = FALSE)
      )
    )
  })
  it("removes extraneous dependencies", {
    dependencies <- functional_dependency(
      list(
        list("a", "b"),
        list("a", "c"),
        list("b", "c")
      ),
      attrs_order = c("a", "b", "c")
    )
    norm.dependencies <- synthesise(dependencies)
    expect_relation_schema(
      norm.dependencies,
      list(
        attrs = list(c("a", "b"), c("b", "c")),
        keys = list(list("a"), list("b")),
        attrs_order = c("a", "b", "c"),
        relation_names = c("a", "b")
      )
    )
  })
  it("is order_invariant WRT ensuring lossless, added relation is last", {
    forall(
      list(
        gen_flat_deps(7, 20, to = 20L),
        gen.element(c(FALSE, TRUE))
      ),
      \(deps, ra) {
        s1 <- synthesise(deps, ensure_lossless = FALSE, remove_avoidable = ra)
        s2 <- synthesise(deps, ensure_lossless = TRUE, remove_avoidable = ra)
        expect_identical(s2[seq_along(s1)], s1)
      }
    )
  })
  it("is order_invariant WRT removing avoidable attributes", {
    forall(
      list(
        gen_flat_deps(7, 20, to = 20L),
        gen.element(c(FALSE, TRUE))
      ),
      \(deps, el) {
        s1 <- synthesise(deps, ensure_lossless = el, remove_avoidable = FALSE)
        s2 <- synthesise(deps, ensure_lossless = el, remove_avoidable = TRUE)
        shared_names <- intersect(names(s1), names(s2))
        expect_identical(
          match(shared_names, names(s1)),
          match(shared_names, names(s2))
        )
      }
    )
  })
  it("has no change in added table for losslessness if avoidable attributes removed", {
    still_lossless_with_less_or_same_attributes_dep <- function(flat_deps) {
      schema_avoid_lossy <- synthesise(
        flat_deps,
        ensure_lossless = FALSE,
        remove_avoidable = TRUE
      )
      schema_noavoid_lossy <- synthesise(
        flat_deps,
        ensure_lossless = FALSE,
        remove_avoidable = FALSE
      )
      schema_avoid_lossless <- synthesise(
        flat_deps,
        ensure_lossless = TRUE,
        remove_avoidable = TRUE
      )
      schema_noavoid_lossless <- synthesise(
        flat_deps,
        ensure_lossless = TRUE,
        remove_avoidable = FALSE
      )
      lengths_avoid_lossy <- lengths(attrs(schema_avoid_lossy))
      lengths_noavoid_lossy <- lengths(attrs(schema_noavoid_lossy))
      lengths_avoid_lossless <- lengths(attrs(schema_avoid_lossless))
      lengths_noavoid_lossless <- lengths(attrs(schema_noavoid_lossless))

      # losslessness should add 0 or 1 tables
      expect_gte(
        length(lengths_avoid_lossless),
        length(lengths_avoid_lossy)
      )
      expect_lte(
        length(lengths_avoid_lossless),
        length(lengths_avoid_lossy) + 1
      )
      expect_gte(
        length(lengths_noavoid_lossless),
        length(lengths_noavoid_lossy)
      )
      expect_lte(
        length(lengths_noavoid_lossless),
        length(lengths_noavoid_lossy) + 1
      )

      # removing avoidable attributes doesn't affect whether extra table added
      expect_identical(
        length(lengths_avoid_lossless),
        length(lengths_noavoid_lossless)
      )

      # removing avoidable attributes can't make tables wider
      lossless_length <- length(lengths_avoid_lossless)
      for (l in seq_len(lossless_length)) {
        expect_lte(lengths_avoid_lossless[l], lengths_noavoid_lossless[l])
      }

      # if extra table added, avoidance shouldn't affect its attributes
      if (length(lengths_avoid_lossless) > length(lengths_avoid_lossy))
        expect_identical(
          attrs(schema_avoid_lossless)[[lossless_length]],
          attrs(schema_noavoid_lossless)[[lossless_length]]
        )
    }

    forall(
      gen_flat_deps(7, 20, to = 20L),
      still_lossless_with_less_or_same_attributes_dep
    )
  })
  it("merges equivalent keys", {
    dependencies <- functional_dependency(
      list(
        list("d", "a"),
        list("d", "b"),
        list("a", "d"),
        list(c("b", "c"), "d")
      ),
      attrs_order = c("a", "b", "c", "d")
    )
    norm.dependencies <- synthesise(dependencies)
    expect_relation_schema(
      norm.dependencies,
      list(
        attrs = list(c("a", "d", "b"), c("b", "c", "a")),
        keys = list(list("a", "d"), list(c("b", "c"))),
        attrs_order = c("a", "b", "c", "d"),
        relation_names = c("a", "b_c")
      )
    )
  })
  it("resolves simple bijections with no splits", {
    # A -> B, B -> A => A <-> B
    dependencies <- functional_dependency(
      list(
        list("a", "b"),
        list("b", "a")
      ),
      attrs_order = c("a", "b")
    )
    norm.df <- synthesise(dependencies)
    expect_relation_schema(
      norm.df,
      list(
        attrs = list(c("a", "b")),
        keys = list(list("a", "b")),
        attrs_order = c("a", "b"),
        relation_names = "a"
      )
    )
  })
  it("can handle basic bijections with dependants", {
    # A -> B, B -> A, A -> C, B -> C, A -> D, B -> F, D -> E, F -> E
    # => A <-> B, A -> CDF, D -> E, F -> E
    dependencies <- functional_dependency(
      list(
        list("a", "b"),
        list("b", "a"),
        list("a", "c"),
        list("b", "c"),
        list("a", "d"),
        list("b", "f"),
        list("d", "e"),
        list("f", "e")
      ),
      attrs_order = c("a", "b", "c", "d", "e", "f")
    )
    norm.dependencies <- synthesise(dependencies)
    expect_relation_schema(
      norm.dependencies,
      list(
        attrs = list(c("a", "b", "c", "d", "f"), c("d", "e"), c("f", "e")),
        keys = list(list("a", "b"), list("d"), list("f")),
        attrs_order = c("a", "b", "c", "d", "e", "f"),
        relation_names = c("a", "d", "f")
      )
    )
  })
  it("removes transient dependencies after-merging keys (DFD fig. 3)", {
    dependencies <- functional_dependency(
      list(
        list(c("x1", "x2"), "a"),
        list(c("x1", "x2"), "d"),
        list(c("c", "d"), "x1"),
        list(c("c", "d"), "x2"),
        list(c("a", "x1"), "b"),
        list(c("b", "x2"), "c"),
        list("c", "a")
      ),
      attrs_order = c("x1", "x2", "a", "b", "c", "d")
    )
    norm.dep <- synthesise(dependencies)
    expected_attrs <- list(
      c("x1", "x2", "c", "d"), # contains a if trans_deps not removed
      c("x1", "a", "b"),
      c("x2", "b", "c"),
      c("c", "a")
    )
    expected_keys <- list(
      list(c("x1", "x2"), c("c", "d")),
      list(c("x1", "a")),
      list(c("x2", "b")),
      list("c")
    )
    expect_setequal(attrs(norm.dep), expected_attrs)
    expect_setequal(keys(norm.dep), expected_keys)
    expect_identical(
      match(attrs(norm.dep), expected_attrs),
      match(keys(norm.dep), expected_keys)
    )
  })
  it("replaces keys / non-key attributes with their bijection set's chosen index", {
    dependencies <- functional_dependency(
      list(
        list(c("A", "B"), "C"),
        list("C", "A"),
        list("C", "B"),
        list("C", "D"),
        list(c("A", "B", "E"), "F")
      ),
      attrs_order = c("A", "B", "C", "D", "E", "F")
    )
    norm.dep <- synthesise(dependencies)
    expect_relation_schema(
      norm.dep,
      list(
        attrs = list(c("C", "A", "B", "D"), c("C", "E", "F")),
        keys = list(list("C", c("A", "B")), list(c("C", "E"))),
        attrs_order = c("A", "B", "C", "D", "E", "F"),
        relation_names = c("C", "C_E")
      )
    )
  })
  it("gives unique names if constants appears in attribute names and via constant attributes", {
    fds <- functional_dependency(
      list(
        list("constants", "a"),
        list(character(), "b")
      ),
      attrs_order = c("constants", "a", "b")
    )
    gets_unique_table_names(fds)
  })
  it("includes all attributes in tables if ensuring lossless", {
    includes_all_attrs_if_lossless <- function(fds) {
      new_fds <- fds
      attrs_order(new_fds) <- make.unique(c(attrs_order(fds), "X"))
      rs <- synthesise(new_fds, ensure_lossless = TRUE)
      expect_true(setequal(attrs_order(rs), unlist(attrs(rs))))
    }
    forall(
      gen_flat_deps(7, 20, to = 20L),
      includes_all_attrs_if_lossless
    )
  })
  it("includes all attributes in non-extraneous FDs in tables if not ensuring lossless", {
    includes_all_fd_attrs_if_lossless <- function(fds) {
      nonextr_fds <- fds |>
        remove_extraneous_attributes() |>
        remove_extraneous_dependencies() |>
        convert_to_vectors() |>
        convert_to_integer_attributes() |>
        sort_key_contents() |>
        sort_dependencies()
      nonextr_fds$determinant_sets <- lapply(
        nonextr_fds$determinant_sets,
        \(inds) attrs_order(fds)[inds]
      )
      nonextr_fds$dependants <- lapply(
        nonextr_fds$dependants,
        \(inds) attrs_order(fds)[inds]
      )
      nonextr_fds <- functional_dependency(
        Map(list, nonextr_fds$determinant_sets, nonextr_fds$dependants),
        attrs_order(fds)
      )

      rs <- synthesise(fds, ensure_lossless = FALSE)
      expect_true(setequal(unlist(nonextr_fds), unlist(attrs(rs))))
    }
    forall(
      gen_flat_deps(7, 20, to = 20L),
      includes_all_fd_attrs_if_lossless
    )
  })
  it("can remove avoidable attributes", {
    # example 6.24 from Maier
    # A <-> B, AC -> D, AC -> E, BD -> C
    # expected without removing avoidable: A <-> B, AC <-> BD -> E
    # expected with removing avoidable: A <-> B, AC <-> AD -> E
    deps <- functional_dependency(
      list(
        list("A", "B"),
        list("B", "A"),
        list(c("A", "C"), "D"),
        list(c("A", "C"), "E"),
        list(c("B", "D"), "C")
      ),
      attrs_order = c("A", "B", "C", "D", "E")
    )
    norm.deps <- synthesise(deps, remove_avoidable = FALSE)
    expect_relation_schema(
      norm.deps,
      list(
        attrs = list(c("A", "B"), c("A", "C", "B", "D", "E")),
        keys = list(list("A", "B"), list(c("A", "C"), c("B", "D"))),
        attrs_order = c("A", "B", "C", "D", "E"),
        relation_names = c("A", "A_C")
      )
    )
    norm.deps2 <- synthesise(deps, remove_avoidable = TRUE)
    expect_relation_schema(
      norm.deps2,
      list(
        attrs = list(c("A", "B"), c("A", "C", "D", "E")),
        keys = list(list("A", "B"), list(c("A", "C"), c("A", "D"))),
        attrs_order = c("A", "B", "C", "D", "E"),
        relation_names = c("A", "A_C")
      )
    )

    still_lossless_with_less_or_same_attributes_dep <- function(flat_deps) {
      norm_deps_avoid <- synthesise(
        flat_deps,
        remove_avoidable = TRUE
      )
      norm_deps_noavoid <- synthesise(
        flat_deps,
        remove_avoidable = FALSE
      )
      lengths_avoid <- lengths(attrs(norm_deps_avoid))
      lengths_noavoid <- lengths(attrs(norm_deps_noavoid))
      expect_identical(length(lengths_avoid), length(lengths_noavoid))
      expect_true(all(lengths_avoid <= lengths_noavoid))
      expect_true(all(lengths(norm_deps_avoid) <= lengths(norm_deps_noavoid)))
      expect_true(all(mapply(
        \(av, noav) all(is.element(av, noav)),
        attrs(norm_deps_avoid),
        attrs(norm_deps_noavoid)
      )))
    }

    still_lossless_with_less_or_same_attributes <- function(df) {
      flat_deps <- discover(df, 1)
      schema_avoid_lossless <- autoref(synthesise(
        flat_deps,
        remove_avoidable = TRUE
      ))

      # schema_avoid_lossless should be lossless
      database_avoid_lossless <- decompose(df, schema_avoid_lossless)
      df2 <- rejoin(database_avoid_lossless)
      expect_identical_unordered_table(df2, df)

      still_lossless_with_less_or_same_attributes_dep(flat_deps)
    }

    forall(
      gen_df(10, 7, remove_dup_rows = TRUE),
      still_lossless_with_less_or_same_attributes
    )
    forall(
      gen_flat_deps(7, 20, to = 20L),
      still_lossless_with_less_or_same_attributes_dep
    )
  })
  it("gives database schemas that enforce the given functional dependencies", {
    expect_all_enforced <- function(deps, schema) {
      implied_fds <- synthesised_fds(
        attrs(schema),
        keys(schema)
      )
      implied_flat_fds <- implied_fds
      if (length(implied_flat_fds) > 0)
        implied_flat_fds <- unlist(implied_flat_fds, recursive = FALSE)
      implied_flat_fds <- functional_dependency(implied_flat_fds, attrs_order(deps))
      dep_closures <- lapply(
        detset(deps),
        find_closure,
        detset(implied_flat_fds),
        dependant(implied_flat_fds)
      )
      fds_reproduced <- mapply(
        \(closure, dep) dep %in% closure,
        dep_closures,
        dependant(deps)
      )

      act <- quasi_label(rlang::enquo(deps), arg = "object")
      act$nonrep <- act$val[!fds_reproduced]
      act$n <- length(act$nonrep)
      expect(
        act$n == 0L,
        sprintf(paste0(
          act$n,
          " dependencies not represented:\n",
          paste(
            vapply(
              act$nonrep,
              \(fd) paste0("{", toString(detset(fd)[[1]]), "} -> ", dependant(fd)),
              character(1)
            ),
            collapse = "\n"
          )
        ))
      )
      invisible(act$val)
    }
    enforces_fds <- function(deps, remove_avoidable = FALSE) {
      if (length(deps) == 0L)
        discard()
      schema <- synthesise(deps, remove_avoidable = remove_avoidable)
      expect_all_enforced(deps, schema)
    }

    # example of when no violations only if removables avoided
    deps <- functional_dependency(
      list(
        list(c("C", "G"), "A"),
        list("E", "B"),
        list("F", "C"),
        list(c("A", "G"), "C"),
        list("F", "D"),
        list("B", "E"),
        list(c("F", "G"), "E"),
        list(c("C", "D"), "F"),
        list(c("D", "G"), "F"),
        list(c("C", "E"), "F"),
        list("E", "G"),
        list(c("A", "C"), "G")
      ),
      c("A", "B", "C", "D", "E", "F", "G")
    )
    enforces_fds(deps, TRUE)

    forall(
      gen_flat_deps_fixed_names(7, 20, from = 1L, to = 20L),
      enforces_fds,
      discard.limit = 10L
    )
  })
  it("has no change in added table for losslessness if avoidable attributes removed", {
    still_lossless_with_less_or_same_attributes_dep <- function(flat_deps) {
      schema_avoid_lossy <- synthesise(
        flat_deps,
        ensure_lossless = FALSE,
        remove_avoidable = TRUE
      )
      schema_noavoid_lossy <- synthesise(
        flat_deps,
        ensure_lossless = FALSE,
        remove_avoidable = FALSE
      )
      schema_avoid_lossless <- synthesise(
        flat_deps,
        ensure_lossless = TRUE,
        remove_avoidable = TRUE
      )
      schema_noavoid_lossless <- synthesise(
        flat_deps,
        ensure_lossless = TRUE,
        remove_avoidable = FALSE
      )
      lengths_avoid_lossy <- lengths(attrs(schema_avoid_lossy))
      lengths_noavoid_lossy <- lengths(attrs(schema_noavoid_lossy))
      lengths_avoid_lossless <- lengths(attrs(schema_avoid_lossless))
      lengths_noavoid_lossless <- lengths(attrs(schema_noavoid_lossless))

      # losslessness should add 0 or 1 tables
      expect_gte(
        length(lengths_avoid_lossless),
        length(lengths_avoid_lossy)
      )
      expect_lte(
        length(lengths_avoid_lossless),
        length(lengths_avoid_lossy) + 1
      )
      expect_gte(
        length(lengths_noavoid_lossless),
        length(lengths_noavoid_lossy)
      )
      expect_lte(
        length(lengths_noavoid_lossless),
        length(lengths_noavoid_lossy) + 1
      )

      # removing avoidable attributes doesn't affect whether extra table added
      expect_identical(
        length(lengths_avoid_lossless),
        length(lengths_noavoid_lossless)
      )

      # removing avoidable attributes can't make tables wider
      lossless_length <- length(lengths_avoid_lossless)
      for (l in seq_len(lossless_length)) {
        expect_lte(lengths_avoid_lossless[l], lengths_noavoid_lossless[l])
      }

      # if extra table added, avoidance shouldn't affect its attributes
      if (length(lengths_avoid_lossless) > length(lengths_avoid_lossy))
        expect_identical(
          attrs(schema_avoid_lossless)[[lossless_length]],
          attrs(schema_noavoid_lossless)[[lossless_length]]
        )
    }

    forall(
      gen_flat_deps(7, 20, to = 20L),
      still_lossless_with_less_or_same_attributes_dep
    )
  })
})

test_that("drop_primary_dups", {
  df <- data.frame(
    city = c(
      'honolulu', 'boston', 'honolulu', 'dallas', 'seattle',
      'honolulu', 'boston', 'honolulu', 'seattle', 'boston'
    ),
    state = c(
      'HI', 'MA', 'HI', 'TX', 'WA',
      'AL', 'MA', 'HI', 'WA', 'NA'
    ),
    is_liberal = c(
      TRUE, TRUE, TRUE, FALSE, TRUE,
      TRUE, TRUE, TRUE, TRUE, FALSE
    )
  )
  new_df <- drop_primary_dups(df, "city")
  df_new_dic <- data.frame(
    city = c("boston", "dallas", "honolulu", "seattle"),
    state = c("MA", "TX", "HI", "WA"),
    is_liberal = c(TRUE, FALSE, TRUE, TRUE)
  )
  expect_identical(new_df, df_new_dic)

  df <- data.frame(
    requires_light = c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE),
    is_dark = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE),
    light_on = c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE)
  )
  new_df <- drop_primary_dups(df, c('requires_light', 'is_dark'))
  expect_identical(nrow(new_df), 3L)
  expect_true(all(new_df$light_on == (new_df$requires_light & new_df$is_dark)))
})

describe("keys_order", {
  it("works like order() for single-length elements", {
    forall(
      gen.sample.int(100, gen.element(0:10)),
      expect_biidentical(as.list %>>% keys_order, order)
    )
  })
  it("gives a sorted list if applied as subsetter", {
    forall(
      gen.sample.int(100, gen.element(0:10)),
      expect_biidentical(subset_by(keys_order) %>>% keys_order, seq_along)
    )
  })
  it("orders by length first", {
    forall(
      gen.emptyable_list(gen.key, 10),
      expect_biidentical(subset_by(keys_order) %>>% lengths, lengths %>>% sort)
    )
  })
  it("orders by values, in given order, within lengths", {
    same_length_sorted <- function(lst) {
      len <- length(lst[[1]])
      stopifnot(all(lengths(lst) == len))
      if (len == 0)
        return(TRUE)
      firsts <- vapply(lst, `[`, integer(1), 1)
      if (is.unsorted(firsts))
        return(FALSE)
      rest <- lapply(lst, `[`, -1)
      all(tapply(rest, firsts, \(x) length(x) <= 1 || same_length_sorted(x)))
    }
    orders_by_values_with_lengths <- function(lst) {
      ord <- keys_order(lst)
      sorted_keys <- lst[ord]
      sorted_lengths <- lengths(lst)[ord]
      expect_true(all(tapply(sorted_keys, sorted_lengths, same_length_sorted)))
    }
    forall(
      gen.emptyable_list(gen.sample.int(2, gen.int(5), replace = TRUE), 10),
      orders_by_values_with_lengths
    )
  })
  it("returns an order, i.e. sequential integers from 1", {
    is_order <- function(x) expect_setequal(x, seq_along(x))
    forall(
      gen.emptyable_list(gen.key, 10),
      keys_order %>>% is_order
    )
  })
})

describe("keys_rank", {
  it("is equal to order of keys_order when there are no ties", {
    gen_unique_lst <- gen.with(gen.emptyable_list(gen.key, 10), unique)
    forall(gen_unique_lst, expect_biequal(keys_rank, keys_order %>>% order))
  })
  it("returns ranks, i.e. reals in [1,length] that sum to same as 1:length", {
    is_rank <- function(r) {
      len <- length(r)
      expect_true(all(r >= 1 & r <= len) && sum(r) == len*(len + 1)/2)
    }
    forall(
      gen.emptyable_list(gen.subsequence(0:10), 10),
      keys_rank %>>% is_rank
    )
  })
  it("gives equal rank to equal keys", {
    no_multielement <- function(x) all(lengths(x) <= 1L)
    expect_monovalued_elements <- function(grouped) {
      expect_true(all(vapply(
        grouped,
        \(x) all(vapply(x, identical, logical(1), x[[1]])),
        logical(1)
      )))
    }
    forall(
      gen.list_with_dups(gen.nonempty_key, 10),
      split_by(keys_rank) %>>%
        if_discard_else(no_multielement, expect_monovalued_elements)
    )
  })
})

describe("synthesised_fds", {
  it("is a closure-equivalent inverse of synthesise", {
    expect_closure_equiv <- function(fds1, fds2) {
      expect_true(all(c(
        mapply(
          \(dets, dep) {
            dep %in% find_closure(dets, detset(fds2), dependant(fds2))
          },
          detset(fds1),
          dependant(fds1)
        ),
        mapply(
          \(dets, dep) {
            dep %in% find_closure(dets, detset(fds1), dependant(fds1))
          },
          detset(fds2),
          dependant(fds2)
        )
      )))
    }
    forall(
      gen_flat_deps(7, 20, to = 20),
      \(fds) {
        rels <- synthesise(fds)
        expect_closure_equiv(
          functional_dependency(
            unname(unlist(
              synthesised_fds(attrs(rels), keys(rels)),
              recursive = FALSE
            )),
            attrs_order(fds)
          ),
          fds
        )
      }
    )
  })
})

Try the autodb package in your browser

Any scripts or data that you put into this service are public.

autodb documentation built on April 4, 2025, 5:12 a.m.