test-autodb.r
In autodb: Automatic Database Normalisation for Data Frames

describe("autodb", {
  it("returns valid databases", {
    forall(
      gen_df(6, 7),
      apply_both(
        autodb %>>% is_valid_database,
        with_args(autodb, remove_avoidable = TRUE) %>>% is_valid_database
      )
    )
  })
  it("is the same as discover >> normalise >> decompose", {
    df <- data.frame(a = 1:4, b = 1:2)
    database <- autodb(df)
    database2 <- discover(df) |>
      normalise() |>
      decompose(df = df)
    expect_identical(database, database2)
    expect_silent(gv(database))

    forall(
      list(
        gen_df(6, 7),
        digits = gen.element(c(7:1, NA_integer_)),
        ensure_lossless = gen.element(c(FALSE, TRUE)),
        remove_avoidable = gen.element(c(FALSE, TRUE))
      ),
      \(df, digits, ensure_lossless, remove_avoidable) {
        expect_identical(
          autodb(
            df,
            digits = digits,
            ensure_lossless = ensure_lossless,
            remove_avoidable = remove_avoidable
          ),
          decompose(
            df,
            normalise(
              discover(df, digits = digits),
              ensure_lossless = ensure_lossless,
              remove_avoidable = remove_avoidable
            ),
            digits = digits
          )
        )
      },
      curry = TRUE
    )
  })
  it("runs DFD and normalises the given data.frame", {
    df <- data.frame(
      Title = rep(
        c(
          "Beginning MySQL Database Design and Optimization",
          "The Relational Model for Database Management: Version 2"
        ),
        each = 2
      ),
      Format = c("Hardcover", "E-book", "E-book", "Paperback"),
      Author = rep(c("Chad Russell", "E.F. Codd"), each = 2),
      Author_Nationality = rep(c("American", "British"), each = 2),
      Price = c(4999L, 2234L, 1388L, 3999L),
      Thickness = "Thick",
      Genre_ID = rep(1:2, each = 2),
      Genre_Name = rep(c("Tutorial", "Popular science"), each = 2),
      Publisher_ID = rep(1:2, each = 2)
    )
    database <- autodb(df)
    expect_true(!anyDuplicated(database))
    expect_identical(length(database), 3L)
    expect_setequal(names(database), c("Price", "Title", "constants"))
    expect_setequal(
      references(database),
      list(list("Price", "Title", "Title", "Title"))
    )
  })
  it("doesn't choose keys containing attributes with types in exclude_class", {
    df <- data.frame(
      Title = rep(
        c(
          "Beginning MySQL Database Design and Optimization",
          "The Relational Model for Database Management: Version 2"
        ),
        each = 2
      ),
      Format = c("Hardcover", "E-book", "E-book", "Paperback"),
      Author = rep(c("Chad Russell", "E.F. Codd"), each = 2),
      Author_Nationality = rep(c("American", "British"), each = 2),
      Price = c(4999, 2234, 1388, 3999),
      Thickness = "Thick",
      Genre_ID = rep(1:2, each = 2),
      Genre_Name = rep(c("Tutorial", "Popular science"), each = 2),
      Publisher_ID = rep(1:2, each = 2)
    )
    database_nonfiltered <- autodb(df)
    expect_setequal(
      names(database_nonfiltered),
      c("Price", "Title", "constants")
    )
    expect_identical(
      keys(database_nonfiltered)$Price,
      list("Price", c("Title", "Format"))
    )
    database_filtered <- autodb(df, exclude_class = "numeric")
    expect_setequal(
      names(database_filtered),
      c("Title_Format", "Title", "constants")
    )
    expect_identical(
      keys(database_filtered)$Title_Format,
      list(c("Title", "Format"))
    )
  })
  it("removes keys containing attributes named in exclude", {
    df <- data.frame(
      Title = rep(
        c(
          "Beginning MySQL Database Design and Optimization",
          "The Relational Model for Database Management: Version 2"
        ),
        each = 2
      ),
      Format = c("Hardcover", "E-book", "E-book", "Paperback"),
      Author = rep(c("Chad Russell", "E.F. Codd"), each = 2),
      Author_Nationality = rep(c("American", "British"), each = 2),
      Price = c(4999L, 2234L, 1388L, 3999L),
      Thickness = "Thick",
      Genre_ID = rep(1:2, each = 2),
      Genre_Name = rep(c("Tutorial", "Popular science"), each = 2),
      Publisher_ID = rep(1:2, each = 2)
    )
    database_nonfiltered <- autodb(df)
    expect_setequal(
      names(database_nonfiltered),
      c("Price", "Title", "constants")
    )
    expect_identical(
      keys(database_nonfiltered)$Price,
      list("Price", c("Title", "Format"))
    )
    database_filtered <- autodb(df, exclude = "Price")
    expect_setequal(
      names(database_filtered),
      c("Title_Format", "Title", "constants")
    )
    expect_identical(
      keys(database_filtered)$Title_Format,
      list(c("Title", "Format"))
    )
  })
  it("doesn't choose keys with incorrect types as the index if filter = TRUE", {
    df <- data.frame(
      Title = rep(
        c(
          "Beginning MySQL Database Design and Optimization",
          "The Relational Model for Database Management: Version 2"
        ),
        each = 2
      ),
      Format = c("Hardcover", "E-book", "E-book", "Paperback"),
      Author = rep(c("Chad Russell", "E.F. Codd"), each = 2),
      Author_Nationality = rep(c("American", "British"), each = 2),
      Price = c(4999L, 2234L, 1388L, 3999L),
      Thickness = "Thick",
      Genre_ID = rep(1:2, each = 2),
      Genre_Name = rep(c("Tutorial", "Popular science"), each = 2),
      Publisher_ID = rep(1:2, each = 2)
    )
    database_nonfiltered <- autodb(df)
    expect_setequal(
      names(database_nonfiltered),
      c("Price", "Title", "constants")
    )
    expect_identical(
      keys(database_nonfiltered)$Price,
      list("Price", c("Title", "Format"))
    )
    database_filtered <- autodb(df, exclude_class = "integer")
    expect_setequal(
      names(database_filtered),
      c("Title_Format", "Title", "constants")
    )
    expect_identical(
      keys(database_filtered)$Title_Format,
      list(c("Title", "Format"))
    )
  })
  it("correctly handles attributes with non-df-standard names", {
    df <- data.frame(1:3, c(1, 1, 2), c(1, 2, 2)) |>
      stats::setNames(c("A 1", "B 2", "C 3"))
    database <- autodb(df)
    expect_identical(attrs(database)[[1]], c("A 1", "B 2", "C 3"))
  })
  it("adds a key table if none given in normalisation", {
    df <- data.frame(
      a = c(1L, 2L, 1L, 2L),
      b = c(1L, 2L, 1L, 2L),
      c = c(1L, 1L, 2L, 2L)
    )
    database <- autodb(df)
    expect_identical(
      records(database)$a_c,
      data.frame(a = 1:2, c = rep(1:2, each = 2), row.names = 1:4)
    )
    expect_identical(
      keys(database)$a_c,
      list(c("a", "c"))
    )
  })
  it("decomposes zero-column data frames correctly into TABLE_DUM or TABLE_DEE", {
    table_dum <- data.frame()
    table_dee <- data.frame(a = 1)[, -1, drop = FALSE]
    table_deux <- data.frame(a = 1:2)[, -1, drop = FALSE]
    db_dum <- autodb(table_dum)
    db_dee <- autodb(table_dee)
    db_deux <- autodb(table_deux)
    expect_length(db_dum, 1L)
    expect_length(db_dee, 1L)
    expect_identical(nrow(records(db_dum)[[1]]), 0L)
    expect_identical(nrow(records(db_dee)[[1]]), 1L)
    expect_identical(db_deux, db_dee)
  })
  it("adds row names to database if added in keep_rownames", {
    x <- data.frame(
      a = c(1, 1, 1, 2, 2, 3, 3, 3, 4),
      b = c(1, 1, 1, 1, 1, 2, 2, 2, 3),
      row.names = letters[1:9]
    )
    db <- autodb(x, keep_rownames = TRUE)
    expect_identical(
      db,
      database(
        relation(
          list(
            row = list(
              df = cbind(data.frame(row = letters[1:9]), x["a"]),
              keys = list("row")
            ),
            a = list(df = unique(x), keys = list("a"))
          ),
          c("row", "a", "b")
        ),
        list(list("row", "a", "a", "a"))
      )
    )
  })
})

Any scripts or data that you put into this service are public.

autodb documentation built on Nov. 5, 2025, 6:49 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

autodb
Automatic Database Normalisation for Data Frames

tests/testthat/test-autodb.r
In autodb: Automatic Database Normalisation for Data Frames

Try the autodb package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

autodb Automatic Database Normalisation for Data Frames

tests/testthat/test-autodb.r In autodb: Automatic Database Normalisation for Data Frames

Try the autodb package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

autodb
Automatic Database Normalisation for Data Frames

tests/testthat/test-autodb.r
In autodb: Automatic Database Normalisation for Data Frames