Nothing
test_that("errors", {
expect_snapshot(error = TRUE, {
parquet_options(num_rows_per_row_group = "foobar")
})
df <- test_df()
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
expect_snapshot(error = TRUE, {
write_parquet(df, tmp, row_groups = "foobar")
write_parquet(df, tmp, row_groups = c(100L, 1L))
write_parquet(df, tmp, row_groups = c(1L, 100L))
})
})
test_that("row groups", {
tmp1 <- tempfile(fileext = ".parquet")
tmp2 <- tempfile(fileext = ".parquet")
on.exit(unlink(c(tmp1, tmp2)), add = TRUE)
df <- test_df()
write_parquet(df, tmp1, row_groups = 1L)
write_parquet(df, tmp2, row_groups = c(1L, 16L))
expect_equal(read_parquet(tmp1), read_parquet(tmp2))
expect_equal(nrow(read_parquet_metadata(tmp2)[["row_groups"]]), 2L)
unlink(tmp2)
write_parquet(df, tmp2, row_groups = seq_len(nrow(df)))
expect_equal(read_parquet(tmp1), read_parquet(tmp2))
expect_equal(nrow(read_parquet_metadata(tmp2)[["row_groups"]]), nrow(df))
unlink(tmp2)
withr::local_options(nanoparquet.num_rows_per_row_group = 10L)
write_parquet(df, tmp2)
expect_equal(read_parquet(tmp1), read_parquet(tmp2))
expect_equal(nrow(read_parquet_metadata(tmp2)[["row_groups"]]), 4L)
})
test_that("factors & factor levels", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
df <- data.frame(
f = factor(c(rep("a", 100), rep("b", 100), rep("c", 100)))
)
withr::local_options(nanoparquet.num_rows_per_row_group = 50L)
write_parquet(df, tmp)
expect_equal(as.data.frame(read_parquet(tmp)), df)
# the same dict is written into every dicitonary page
pgs <- read_parquet_pages(tmp)
dict_ofs <- pgs[["page_header_offset"]][
pgs[["page_type"]] == "DICTIONARY_PAGE"
]
dict_data <- read_parquet_page(tmp, dict_ofs[1])[["data"]]
for (do in dict_ofs) {
expect_equal(dict_data, read_parquet_page(tmp, do)[["data"]])
}
})
test_that("non-factors write local dictionary", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
df <- data.frame(
stringsAsFactors = FALSE,
f = c(rep("a", 100), rep("b", 100), rep("c", 100))
)
withr::local_options(nanoparquet.num_rows_per_row_group = 40L)
write_parquet(df, tmp)
expect_equal(as.data.frame(read_parquet(tmp)), df)
pgs <- read_parquet_pages(tmp)
dict_ofs <- pgs[["page_header_offset"]][
pgs[["page_type"]] == "DICTIONARY_PAGE"
]
expect_snapshot({
for (do in dict_ofs) {
print(read_parquet_page(tmp, do)[["data"]])
}
})
})
test_that("strings in a dictionary", {
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
df <- test_df()
write_parquet(
df, tmp,
encoding = c(large = "RLE", "RLE_DICTIONARY"),
options = parquet_options(num_rows_per_row_group=10)
)
expect_equal(as.data.frame(df), as.data.frame(read_parquet(tmp)))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.