Nothing
test_that("errors", {
options <- parquet_options()
expect_snapshot(error = TRUE, {
.Call(
rf_nanoparquet_write, mtcars, tempfile(), dim(mtcars), 0L,
list(character(), character()), rep(FALSE, ncol(mtcars)),
options, map_schema_to_df(NULL, mtcars), rep(10L, ncol(mtcars)), 1L,
sys.call()
)
})
})
test_that("force PLAIN / RLE", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(d1 = 1:100, d2 = rep(1L, 100))
withr::local_envvar(NANOPARQUET_FORCE_PLAIN = "true")
write_parquet(d, tmp)
expect_snapshot({
read_parquet_pages(tmp)[["encoding"]]
})
withr::local_envvar(
NANOPARQUET_FORCE_PLAIN = NA_character_,
NANOPARQUET_FORCE_RLE = "true"
)
write_parquet(d, tmp)
expect_snapshot({
read_parquet_pages(tmp)[["encoding"]]
})
})
test_that("cutoff for dict encoding decision", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(d1 = c(1:10000, rep(1L, 100000)))
write_parquet(d, tmp)
expect_snapshot({
read_parquet_pages(tmp)[["encoding"]]
})
d <- data.frame(d1 = c(rep(c(TRUE, FALSE), 5000), rep(TRUE, 100000)))
write_parquet(d, tmp)
expect_snapshot({
read_parquet_pages(tmp)[["encoding"]]
})
})
test_that("write broken DECIMAL INT32", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(dec = 1:5)
d2 <- data.frame(dec = as.double(1:5))
schema <- parquet_schema(
list("DECIMAL", precision = 5, scale = 2, primitive_type = "INT32")
)
schema$logical_type[1] <- list(NULL)
# works w/o logical type
write_parquet(d, tmp, schema = schema)
expect_snapshot({
as.data.frame(read_parquet_schema(tmp))[, -1]
})
# no scale?
schema2 <- schema
schema2$scale <- NA_integer_
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = schema2)
})
# no precision?
schema2 <- schema
schema2$precision <- NA_integer_
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = schema2)
})
# bad precision?
schema2 <- schema
schema2$precision <- 10L
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = schema2)
})
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = schema2)
})
})
test_that("write broken DECIMAL INT64", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(dec = 1:5)
d2 <- data.frame(dec = as.double(1:5))
schema <- parquet_schema(
list("DECIMAL", precision = 5, scale = 2, primitive_type = "INT64")
)
schema$logical_type[1] <- list(NULL)
schema2 <- schema
schema2$precision <- 19L
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = schema2)
})
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = schema2)
})
})
test_that("write broken INT32", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(dec = 1:5)
d2 <- data.frame(dec = as.double(1:5))
schema <- parquet_schema("INT_32")
schema$logical_type[[1]]$bit_width <- 64L
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = schema)
})
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = schema)
})
})
test_that("write broken UINT32", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(dec = 1:5)
d2 <- data.frame(dec = as.double(1:5))
schema <- parquet_schema("UINT_32")
schema$logical_type[[1]]$bit_width <- 64L
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = schema)
})
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = schema)
})
})
test_that("write broken INT64", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d2 <- data.frame(dec = as.double(1:5))
schema <- parquet_schema("INT_64")
schema$logical_type[[1]]$bit_width <- 32
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = schema)
})
})
test_that("INT96 errors", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(c = c(TRUE, FALSE))
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("INT96"))
})
})
test_that("FLOAT errors", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(c = c(TRUE, FALSE))
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("FLOAT"))
})
})
test_that("DOUBLE errors", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(c = c(TRUE, FALSE))
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("DOUBLE"))
})
})
test_that("BYTE_ARRAY errors", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(c = I(list(c(TRUE, FALSE))))
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("BYTE_ARRAY"))
})
d2 <- data.frame(c = c(TRUE, FALSE))
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = parquet_schema("BYTE_ARRAY"))
})
})
test_that("FIXED_LEN_BYTE_ARRAY errors", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(c = I(list(c(TRUE, FALSE))))
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("UUID"))
})
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("FLOAT16"))
})
d2 <- data.frame(c = c("foo", "bar", "foobar"))
schema <- parquet_schema(list("FIXED_LEN_BYTE_ARRAY", type_length = 3))
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = schema)
})
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = schema)
})
d3 <- data.frame(c = I(list(charToRaw("foo"), charToRaw("foo2"))))
expect_snapshot(error = TRUE, {
write_parquet(d3, tmp, schema = schema)
})
d4 <- data.frame(c = c(TRUE, FALSE))
expect_snapshot(error = TRUE, {
write_parquet(d4, tmp, schema = schema)
})
})
test_that("BOOLEAN errors", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(c = 1:5)
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("BOOLEAN"))
})
schema <- parquet_schema(list("BOOLEAN", repetition_type = "OPTIONAL"))
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = schema)
})
})
test_that("Errors when writing a dictionary", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(c = 1:5)
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("DOUBLE"), encoding = "RLE_DICTIONARY")
})
d2 <- data.frame(c = as.double(1:5))
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = parquet_schema("BYTE_ARRAY"), encoding = "RLE_DICTIONARY")
})
d3 <- data.frame(c = as.factor(letters))
expect_snapshot(error = TRUE, {
write_parquet(d3, tmp, schema = parquet_schema("DOUBLE"), encoding = "RLE_DICTIONARY")
})
# too small value for DECIMAL INT32
d4 <- data.frame(c = -101L)
schema4 <- parquet_schema(list("DECIMAL", precision = 2, primitive_type = "INT32"))
expect_snapshot(error = TRUE, {
write_parquet(d4, tmp, schema = schema4, encoding = "RLE_DICTIONARY")
})
# too larse values for DECIMAL INT32
d5 <- data.frame(c = 101L)
schema5 <- parquet_schema(list("DECIMAL", precision = 2, primitive_type = "INT32"))
expect_snapshot(error = TRUE, {
write_parquet(d5, tmp, schema = schema5, encoding = "RLE_DICTIONARY")
})
# too small value for DECIMAL INT64
d4 <- data.frame(c = -101L)
schema4 <- parquet_schema(list("DECIMAL", precision = 2, primitive_type = "INT64"))
expect_snapshot(error = TRUE, {
write_parquet(d4, tmp, schema = schema4, encoding = "RLE_DICTIONARY")
})
# too larse values for DECIMAL INT64
d5 <- data.frame(c = 101L)
schema5 <- parquet_schema(list("DECIMAL", precision = 2, primitive_type = "INT64"))
expect_snapshot(error = TRUE, {
write_parquet(d5, tmp, schema = schema5, encoding = "RLE_DICTIONARY")
})
# no INTSXP -> DOUBLE conversion
d5 <- data.frame(c = 1:5)
expect_snapshot(error = TRUE, {
write_parquet(d5, tmp, schema = parquet_schema("DOUBLE"), encoding = "RLE_DICTIONARY")
})
})
test_that("POSIXct dictionary", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(d = structure(1724157480.12919, class = c("POSIXct", "POSIXt")))
write_parquet(d, tmp, encoding = "RLE_DICTIONARY")
expect_snapshot({
as.data.frame(read_parquet(tmp))
read_parquet_metadata(tmp)[["column_chunks"]]$encodings
})
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("INT32"), encoding = "RLE_DICTIONARY")
})
d2 <- data.frame(x = as.difftime(1, units = "secs"))
write_parquet(d2, tmp, encoding = "RLE_DICTIONARY")
expect_snapshot({
as.data.frame(read_parquet(tmp))
read_parquet_metadata(tmp)[["column_chunks"]]$encodings
})
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = parquet_schema("INT32"), encoding = "RLE_DICTIONARY")
})
})
test_that("more dictionaries", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
# too small value for DECIMAL INT32
d4 <- data.frame(c = as.double(-101L))
schema4 <- parquet_schema(list("DECIMAL", precision = 2, primitive_type = "INT32"))
expect_snapshot(error = TRUE, {
write_parquet(d4, tmp, schema = schema4, encoding = "RLE_DICTIONARY")
})
# too larse values for DECIMAL INT32
d5 <- data.frame(c = as.double(101L))
schema5 <- parquet_schema(list("DECIMAL", precision = 2, primitive_type = "INT32"))
expect_snapshot(error = TRUE, {
write_parquet(d5, tmp, schema = schema5, encoding = "RLE_DICTIONARY")
})
# invalid bit width
schema <- parquet_schema("INT_32")
schema$logical_type[[1]]$bit_width <- 64L
expect_snapshot(error = TRUE, {
write_parquet(d5, tmp, schema = schema, encoding = "RLE_DICTIONARY")
})
schema <- parquet_schema("UINT_32")
schema$logical_type[[1]]$bit_width <- 64L
expect_snapshot(error = TRUE, {
write_parquet(d5, tmp, schema = schema, encoding = "RLE_DICTIONARY")
})
# too large value
d6 <- data.frame(c = 128.0)
expect_snapshot(error = TRUE, {
write_parquet(d6, tmp, schema = parquet_schema("INT_8"), encoding = "RLE_DICTIONARY")
})
# too large value
d7 <- data.frame(c = 256.0)
expect_snapshot(error = TRUE, {
write_parquet(d7, tmp, schema = parquet_schema("UINT_8"), encoding = "RLE_DICTIONARY")
})
# too small value
d8 <- data.frame(c = -1)
expect_snapshot(error = TRUE, {
write_parquet(d8, tmp, schema = parquet_schema("UINT_8"), encoding = "RLE_DICTIONARY")
})
# too small value for DECIMAL INT64
d9 <- data.frame(c = as.double(-101L))
schema4 <- parquet_schema(list("DECIMAL", precision = 2, primitive_type = "INT64"))
expect_snapshot(error = TRUE, {
write_parquet(d9, tmp, schema = schema4, encoding = "RLE_DICTIONARY")
})
# too larse values for DECIMAL INT32
d10 <- data.frame(c = as.double(101L))
schema5 <- parquet_schema(list("DECIMAL", precision = 2, primitive_type = "INT64"))
expect_snapshot(error = TRUE, {
write_parquet(d10, tmp, schema = schema5, encoding = "RLE_DICTIONARY")
})
# no DOUBLE -> BYTE_ARRAY conversion
expect_snapshot(error = TRUE, {
write_parquet(d10, tmp, schema = parquet_schema("BYTE_ARRAY"), encoding = "RLE_DICTIONARY")
})
})
test_that("Even more dictionaries", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
# bad UUID value
d <- data.frame(x = "not-a-uuid")
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("UUID"), encoding = "RLE_DICTIONARY")
})
# invalid string length in FIXED_LEN_BYTE_ARRAY
d2 <- data.frame(x = c("foo", "thisisbad"))
sch2 <- parquet_schema(list("FIXED_LEN_BYTE_ARRAY", type_length = 3))
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = sch2, encoding = "RLE_DICTIONARY")
})
# no CHARSXP -> DOUBLE conversion
expect_snapshot(error = TRUE, {
write_parquet(d2, tmp, schema = parquet_schema("DOUBLE"), encoding = "RLE_DICTIONARY")
})
})
test_that("R -> Parquet mapping error", {
d <- data.frame(x = raw(10))
expect_snapshot(error = TRUE, {
infer_parquet_schema(d)
})
})
test_that("argument errors", {
expect_snapshot(error = TRUE, {
write_parquet(mtcars, 1:10)
write_parquet(mtcars, letters)
})
})
test_that("edge case: empty dict", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
df <- data.frame(address_id = c(NA_integer_, NA_integer_, NA_integer_))
write_parquet(df, tmp)
expect_equal(as.data.frame(read_parquet(tmp)), df)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.