tests/testthat/test-autodb.r

describe("autodb", {
  it("returns valid databases", {
    forall(
      gen_df(6, 7),
      apply_both(
        autodb %>>% is_valid_database,
        with_args(autodb, remove_avoidable = TRUE) %>>% is_valid_database
      )
    )
  })
  it("is the same as discover >> normalise >> decompose", {
    df <- data.frame(a = 1:4, b = 1:2)
    database <- autodb(df)
    database2 <- discover(df, 1) |>
      normalise() |>
      decompose(df = df)
    expect_identical(database, database2)
    expect_silent(gv(database))

    forall(
      list(
        gen_df(6, 7),
        ensure_lossless = gen.element(c(FALSE, TRUE)),
        remove_avoidable = gen.element(c(FALSE, TRUE))
      ),
      expect_biidentical(
        uncurry(autodb),
        biapply(
          with_args("[[", 1),
          (\(x) c(list(discover(x[[1]], 1)), x[-1])) %>>%
            (uncurry(normalise))
        ) %>>%
          (uncurry(decompose))
      ),
      curry = FALSE
    )
  })
  it("runs DFD and normalises the given data.frame", {
    df <- data.frame(
      Title = rep(
        c(
          "Beginning MySQL Database Design and Optimization",
          "The Relational Model for Database Management: Version 2"
        ),
        each = 2
      ),
      Format = c("Hardcover", "E-book", "E-book", "Paperback"),
      Author = rep(c("Chad Russell", "E.F. Codd"), each = 2),
      Author_Nationality = rep(c("American", "British"), each = 2),
      Price = c(4999L, 2234L, 1388L, 3999L),
      Thickness = "Thick",
      Genre_ID = rep(1:2, each = 2),
      Genre_Name = rep(c("Tutorial", "Popular science"), each = 2),
      Publisher_ID = rep(1:2, each = 2)
    )
    database <- autodb(df)
    expect_true(!anyDuplicated(database))
    expect_identical(length(database), 3L)
    expect_setequal(names(database), c("Price", "Title", "constants"))
    expect_setequal(
      references(database),
      list(list("Price", "Title", "Title", "Title"))
    )
  })
  it("doesn't choose keys containing attributes with types in exclude_class", {
    df <- data.frame(
      Title = rep(
        c(
          "Beginning MySQL Database Design and Optimization",
          "The Relational Model for Database Management: Version 2"
        ),
        each = 2
      ),
      Format = c("Hardcover", "E-book", "E-book", "Paperback"),
      Author = rep(c("Chad Russell", "E.F. Codd"), each = 2),
      Author_Nationality = rep(c("American", "British"), each = 2),
      Price = c(4999, 2234, 1388, 3999),
      Thickness = "Thick",
      Genre_ID = rep(1:2, each = 2),
      Genre_Name = rep(c("Tutorial", "Popular science"), each = 2),
      Publisher_ID = rep(1:2, each = 2)
    )
    database_nonfiltered <- autodb(df)
    expect_setequal(
      names(database_nonfiltered),
      c("Price", "Title", "constants")
    )
    expect_identical(
      keys(database_nonfiltered)$Price,
      list("Price", c("Title", "Format"))
    )
    database_filtered <- autodb(df, exclude_class = "numeric")
    expect_setequal(
      names(database_filtered),
      c("Title_Format", "Title", "constants")
    )
    expect_identical(
      keys(database_filtered)$Title_Format,
      list(c("Title", "Format"))
    )
  })
  it("removes keys containing attributes named in exclude", {
    df <- data.frame(
      Title = rep(
        c(
          "Beginning MySQL Database Design and Optimization",
          "The Relational Model for Database Management: Version 2"
        ),
        each = 2
      ),
      Format = c("Hardcover", "E-book", "E-book", "Paperback"),
      Author = rep(c("Chad Russell", "E.F. Codd"), each = 2),
      Author_Nationality = rep(c("American", "British"), each = 2),
      Price = c(4999L, 2234L, 1388L, 3999L),
      Thickness = "Thick",
      Genre_ID = rep(1:2, each = 2),
      Genre_Name = rep(c("Tutorial", "Popular science"), each = 2),
      Publisher_ID = rep(1:2, each = 2)
    )
    database_nonfiltered <- autodb(df)
    expect_setequal(
      names(database_nonfiltered),
      c("Price", "Title", "constants")
    )
    expect_identical(
      keys(database_nonfiltered)$Price,
      list("Price", c("Title", "Format"))
    )
    database_filtered <- autodb(df, exclude = "Price")
    expect_setequal(
      names(database_filtered),
      c("Title_Format", "Title", "constants")
    )
    expect_identical(
      keys(database_filtered)$Title_Format,
      list(c("Title", "Format"))
    )
  })
  it("doesn't choose keys with incorrect types as the index if filter = TRUE", {
    df <- data.frame(
      Title = rep(
        c(
          "Beginning MySQL Database Design and Optimization",
          "The Relational Model for Database Management: Version 2"
        ),
        each = 2
      ),
      Format = c("Hardcover", "E-book", "E-book", "Paperback"),
      Author = rep(c("Chad Russell", "E.F. Codd"), each = 2),
      Author_Nationality = rep(c("American", "British"), each = 2),
      Price = c(4999L, 2234L, 1388L, 3999L),
      Thickness = "Thick",
      Genre_ID = rep(1:2, each = 2),
      Genre_Name = rep(c("Tutorial", "Popular science"), each = 2),
      Publisher_ID = rep(1:2, each = 2)
    )
    database_nonfiltered <- autodb(df)
    expect_setequal(
      names(database_nonfiltered),
      c("Price", "Title", "constants")
    )
    expect_identical(
      keys(database_nonfiltered)$Price,
      list("Price", c("Title", "Format"))
    )
    database_filtered <- autodb(df, exclude_class = "integer")
    expect_setequal(
      names(database_filtered),
      c("Title_Format", "Title", "constants")
    )
    expect_identical(
      keys(database_filtered)$Title_Format,
      list(c("Title", "Format"))
    )
  })
  it("correctly handles attributes with non-df-standard names", {
    df <- data.frame(1:3, c(1, 1, 2), c(1, 2, 2)) |>
      stats::setNames(c("A 1", "B 2", "C 3"))
    database <- autodb(df)
    expect_identical(attrs(database)[[1]], c("A 1", "B 2", "C 3"))
  })
  it("adds a key table if none given in normalisation", {
    df <- data.frame(
      a = c(1L, 2L, 1L, 2L),
      b = c(1L, 2L, 1L, 2L),
      c = c(1L, 1L, 2L, 2L)
    )
    database <- autodb(df)
    expect_identical(
      records(database)$a_c,
      data.frame(a = 1:2, c = rep(1:2, each = 2), row.names = 1:4)
    )
    expect_identical(
      keys(database)$a_c,
      list(c("a", "c"))
    )
  })
  it("decomposes zero-column data frames correctly into TABLE_DUM or TABLE_DEE", {
    table_dum <- data.frame()
    table_dee <- data.frame(a = 1)[, -1, drop = FALSE]
    table_deux <- data.frame(a = 1:2)[, -1, drop = FALSE]
    db_dum <- autodb(table_dum)
    db_dee <- autodb(table_dee)
    db_deux <- autodb(table_deux)
    expect_length(db_dum, 1L)
    expect_length(db_dee, 1L)
    expect_identical(nrow(records(db_dum)[[1]]), 0L)
    expect_identical(nrow(records(db_dee)[[1]]), 1L)
    expect_identical(db_deux, db_dee)
  })
})

Try the autodb package in your browser

Any scripts or data that you put into this service are public.

autodb documentation built on April 4, 2025, 5:12 a.m.