tests/testthat/test-create_schema.R

test_that("create_schema() returns a valid Table Schema", {
  df <- data.frame(
    "col_1" = c(1, 2),
    "col_2" = factor(c("a", "b"), levels = c("a", "b", "c"))
  )
  expected_schema <- list(
    fields = list(
      list(
        name = "col_1",
        type = "number"
        # Do not add empty property "constraints"
      ),
      list(
        name = "col_2",
        type = "string",
        constraints = list(
          enum = c("a", "b", "c")
        )
      )
    )
  )
  expect_identical(create_schema(df), expected_schema)
  expect_no_error(check_schema(create_schema(df)))
})

test_that("create_schema() returns error on invalid or empty data frame", {
  expect_error(
    create_schema("not_a_df"),
    class = "frictionless_error_data_invalid"
  )
  expect_error(
    create_schema(data.frame()),
    class = "frictionless_error_data_invalid"
  )
})

test_that("create_schema() accepts data frames and tibbles", {
  df <- data.frame("col_1" = c(1, 2), "col_2" = c("a", "b"))
  tibble <- dplyr::tibble("col_1" = c(1, 2), "col_2" = c("a", "b"))
  expect_type(create_schema(df), "list")
  expect_type(create_schema(tibble), "list")
})

test_that("create_schema() uses colnames as field names", {
  df <- data.frame(NA, NA, NA, NA)
  colnames <- c("col_1", "Column 2", "col_3!") # Only 3 of 4 defined
  colnames(df) <- colnames
  expect_identical(
    purrr::map_chr(create_schema(df)$fields, ~ .x$name),
    c(colnames, "") # Last unnamed column (NA) should have empty name ""
  )
})

test_that("create_schema() translates coltypes into field types", {
  interval <- lubridate::interval(
    lubridate::ymd("2020-03-01"), lubridate::ymd("2020-03-02")
  )
  dttm <- "2020-03-01T08:00:00"

  # Create data.frame with base classes + some returned by read_delim()
  df <- data.frame(
    array = as.array(1),                # numeric
    character = as.character(1),        # character
    complex = as.complex(1),            # complex
    # data.frame = as.data.frame(1),    # results in X1 (numeric)
    Date = as.Date("2020-03-01"),       # Date
    difftime = as.difftime(interval, units = "weeks"), # difftime
    # dist = as.dist(1),                # cannot coerce to df
    double = as.double(1),              # numeric
    factor = as.factor(1),              # factor
    hms = hms::as_hms("08:00:00"),      # hms,difftime
    integer = as.integer(1),            # integer
    # list = as.list(1),                # results in X1 (numeric)
    logical = as.logical(1),            # logical
    matrix = as.matrix(1),              # numeric
    # null = as.null(),                 # args imply differing number of rows
    numeric = as.numeric(1),            # numeric
    # octmode = as.octmode(1),          # cannot coerce to df
    period = lubridate::as.period(interval), # Period
    # person = as.person(1),            # cannot coerce to df
    POSIXct = as.POSIXct(dttm),         # POSIXct,POSIXt
    POSIXct_tz = as.POSIXct(dttm, tz = "EET"), # POSIXct,POSIXt
    POSIXlt = as.POSIXlt(dttm),         # POSIXct,POSIXt
    POSIXlt_tz = as.POSIXlt(dttm, tz = "EET"), # POSIXct,POSIXt
    # raster = as.raster(1),            # cannot coerce to df
    raw = as.raw(1),                    # raw
    # roman = as.roman(1),              # cannot coerce to df
    single = as.single(1),              # numeric
    # symbol = as.symbol(1),            # cannot coerce to df
    # table = as.table(1),              # results in table.Var1 (fct),
                                        # table.Freq (num)
    ts = as.ts(1),                      # ts
    vector = as.vector(1)               # numeric
  )
  schema <- create_schema(df)
  types <- purrr::map(schema$fields, ~ .x$type)
  types <- setNames(types, purrr::map_chr(schema$fields, ~ .x$name))

  expect_identical(
    types,
    list(
      array = "number",
      character = "string",
      complex = "any",
      Date = "date",
      difftime = "number", # Expressed as number when written to csv
      double = "number",
      factor = "string",
      hms = "time",
      integer = "integer",
      logical = "boolean",
      matrix = "number",
      numeric = "number",
      period = "any", # 1d 0H 0M 0S
      POSIXct = "datetime",
      POSIXct_tz = "datetime",
      POSIXlt = "datetime",
      POSIXlt_tz = "datetime",
      raw = "any",
      single = "number",
      ts = "any",
      vector = "number"
    )
  )
})

test_that("create_schema() will set columns containing all NA to string", {
  df <- data.frame(
    na_all = NA,
    na_one = c(NA, 1),
    logical = c(TRUE, NA),
    na_integer = NA_integer_,
    na_real = NA_real_,
    na_complex = NA_complex_,
    na_character = NA_character_
  )
  schema <- create_schema(df)
  types <- purrr::map(schema$fields, ~ .x$type)
  types <- setNames(types, purrr::map_chr(schema$fields, ~ .x$name))

  expect_identical(
    types,
    list(
      na_all = "string",
      na_one = "number",
      logical = "boolean",
      na_integer = "integer",
      na_real = "number",
      na_complex = "any",
      na_character = "string"
    )
  )
})
frictionlessdata/frictionless-r documentation built on April 17, 2025, 11:45 a.m.