tests/testthat/test-parquet-schema.R

test_that("parquet_type", {
  expect_snapshot({
    parquet_type("AUTO")
    parquet_type("BOOLEAN")
    parquet_type("INT32")
    parquet_type("INT64")
    parquet_type("INT96")
    parquet_type("FLOAT")
    parquet_type("DOUBLE")
    parquet_type("BYTE_ARRAY")
    parquet_type("FIXED_LEN_BYTE_ARRAY", type_length = 10)

    parquet_type("STRING")
    parquet_type("ENUM")
    parquet_type("UUID")
    parquet_type("INT", bit_width = 8, is_signed = TRUE)
    parquet_type("INT", bit_width = 16, is_signed = TRUE)
    parquet_type("INT", bit_width = 32, is_signed = FALSE)
    parquet_type("INT", bit_width = 64, is_signed = FALSE)
    parquet_type("DECIMAL", precision = 5, primitive_type = "INT32")
    parquet_type("DECIMAL", precision = 5, scale = 0, primitive_type = "INT32")
    parquet_type("DECIMAL", precision = 5, scale = 5, primitive_type = "INT64")
    parquet_type("DECIMAL", precision = 5, primitive_type = "BYTE_ARRAY")
    parquet_type(
      "DECIMAL",
      precision = 5,
      primitive_type = "FIXED_LEN_BYTE_ARRAY",
      type_length = 5
    )
    parquet_type("FLOAT16")
    parquet_type("DATE")
    parquet_type("TIME", is_adjusted_utc = TRUE, unit = "MILLIS")
    parquet_type("TIME", is_adjusted_utc = TRUE, unit = "MICROS")
    parquet_type("TIME", is_adjusted_utc = TRUE, unit = "NANOS")
    parquet_type("TIMESTAMP", is_adjusted_utc = TRUE, unit = "MILLIS")
    parquet_type("TIMESTAMP", is_adjusted_utc = TRUE, unit = "MICROS")
    parquet_type("TIMESTAMP", is_adjusted_utc = TRUE, unit = "NANOS")
    parquet_type("JSON")
    parquet_type("BSON")
  })

  expect_snapshot({
    parquet_type("INT32", repetition_type = "OPTIONAL")
    parquet_type("STRING", repetition_type = "REQUIRED")
    parquet_type(
      "TIME",
      repetition_type = "REPEATED",
      is_adjusted_utc = TRUE,
      unit = "MILLIS"
    )
  })

  expect_snapshot(error = TRUE, {
    parquet_type("FOO")
    parquet_type("FIXED_LEN_BYTE_ARRAY")
    parquet_type("INT", bit_width = 8)
    parquet_type("INT", is_signed = TRUE)
    parquet_type("INT", bit_width = 10, is_signed = TRUE)
    parquet_type("INT", bit_width = 16, is_signed = 1)
    parquet_type("DECIMAL", precision = 5)
    parquet_type("DECIMAL", primitive_type = "INT32")
    parquet_type("DECIMAL", precision = 5/2, primitive_type = "INT32")
    parquet_type("DECIMAL", precision = 0, primitive_type = "INT32")
    parquet_type("DECIMAL", precision = 5, scale = 6, primitive_type = "INT32")
    parquet_type("DECIMAL", precision = 10, primitive_type = "INT32")
    parquet_type("DECIMAL", precision = 19, primitive_type = "INT64")
    parquet_type(
      "DECIMAL",
      precision = 12,
      primitive_type = "FIXED_LEN_BYTE_ARRAY",
      type_length = 5
    )
    parquet_type("TIME", is_adjusted_utc = TRUE)
    parquet_type("TIME", unit = "MILLIS")
    parquet_type("TIME", is_adjusted_utc = 1, unit = "MILLIS")
    parquet_type("TIME", is_adjusted_utc = TRUE, unit = "FOO")
    parquet_type("TIMESTAMP", is_adjusted_utc = TRUE)
    parquet_type("TIMESTAMP", unit = "MILLIS")
    parquet_type("TIMESTAMP", is_adjusted_utc = 1, unit = "MILLIS")
    parquet_type("TIMESTAMP", is_adjusted_utc = TRUE, unit = "FOO")
    parquet_type("LIST")
    parquet_type("MAP")
    parquet_type("UNKNOWN")
    parquet_type("INT32", repetition_type = TRUE)
    parquet_type("INT32", repetition_type = "FOO")
  })

  # Need this as well for covr, which does not handle stop()
  # in snapshots, apparently
  expect_error(parquet_type("FOO"), "not supported by nanoparquet")
  expect_error(parquet_type("LIST"), "not supported by nanoparquet")
  expect_error(parquet_type("MAP"), "not supported by nanoparquet")
  expect_error(parquet_type("UNKNOWN"), "not supported by nanoparquet")
})

test_that("parquet_type converted type shortcuts", {
  expect_equal(
    parquet_type("INT_8"),
    parquet_type("INT", bit_width = 8, is_signed = TRUE)
  )
  expect_equal(
    parquet_type("INT_16"),
    parquet_type("INT", bit_width = 16, is_signed = TRUE)
  )
  expect_equal(
    parquet_type("INT_32"),
    parquet_type("INT", bit_width = 32, is_signed = TRUE)
  )
  expect_equal(
    parquet_type("INT_64"),
    parquet_type("INT", bit_width = 64, is_signed = TRUE)
  )

  expect_equal(
    parquet_type("UINT_8"),
    parquet_type("INT", bit_width = 8, is_signed = FALSE)
  )
  expect_equal(
    parquet_type("UINT_16"),
    parquet_type("INT", bit_width = 16, is_signed = FALSE)
  )
  expect_equal(
    parquet_type("UINT_32"),
    parquet_type("INT", bit_width = 32, is_signed = FALSE)
  )
  expect_equal(
    parquet_type("UINT_64"),
    parquet_type("INT", bit_width = 64, is_signed = FALSE)
  )

  expect_equal(
    parquet_type("TIME_MICROS"),
    parquet_type("TIME", is_adjusted_utc = TRUE, unit = "MICROS")
  )
  expect_equal(
    parquet_type("TIME_MILLIS"),
    parquet_type("TIME", is_adjusted_utc = TRUE, unit = "MILLIS")
  )

  expect_equal(
    parquet_type("TIMESTAMP_MICROS"),
    parquet_type("TIMESTAMP", is_adjusted_utc = TRUE, unit = "MICROS")
  )
  expect_equal(
    parquet_type("TIMESTAMP_MILLIS"),
    parquet_type("TIMESTAMP", is_adjusted_utc = TRUE, unit = "MILLIS")
  )
})

test_that("parquet_schema", {
  sch <- parquet_schema(
    "INT32",
    "BOOLEAN",
    "INT32",
    "INT64",
    "INT96",
    "FLOAT",
    "BYTE_ARRAY",
    list("FIXED_LEN_BYTE_ARRAY", type_length = 10)
  )
  expect_snapshot(as.data.frame(sch))

  sch2 <- parquet_schema(
    a = "INT32",
    b = "BOOLEAN",
    c = "INT32",
    d = "INT64",
    e = "INT96",
    f = "FLOAT",
    g = "BYTE_ARRAY",
    list("FIXED_LEN_BYTE_ARRAY", type_length = 10)
  )
  expect_snapshot(as.data.frame(sch2))

  sch3 <- parquet_schema(
    a = "INT32",
    b = "BOOLEAN",
    c = "INT32",
    d = "INT64",
    e = "INT96",
    f = "FLOAT",
    g = "BYTE_ARRAY",
    h = list("FIXED_LEN_BYTE_ARRAY", type_length = 10)
  )
  expect_snapshot(as.data.frame(sch3))

  sch4 <- parquet_schema(
    "STRING",
    "ENUM",
    "UUID",
    list("INTEGER", bit_width = 8, is_signed = TRUE),
    list("INTEGER", bit_width = 64, is_signed = FALSE),
    list("DECIMAL", precision = 5, primitive_type = "INT64"),
    "FLOAT16",
    "DATE",
    "JSON",
    "BSON"
  )
  expect_snapshot(as.data.frame(sch4))

  sch5 <- parquet_schema(
    foo = "AUTO",
    bar = "INT32"
  )
  expect_snapshot(as.data.frame(sch5))
})

Try the nanoparquet package in your browser

Any scripts or data that you put into this service are public.

nanoparquet documentation built on April 3, 2025, 11:26 p.m.