Nothing
test_that("parquet_pages", {
pp <- read_parquet_pages(test_path("data/mtcars-arrow.parquet"))
expect_snapshot(as.data.frame(pp))
})
test_that("read_parquet_page", {
pf <- test_path("data/mtcars-arrow.parquet")
page <- read_parquet_page(pf, 4)
expect_snapshot(page)
page2 <- read_parquet_page(pf, 444)
expect_snapshot(page2)
})
test_that("read_parquet_page for trick v2 data page", {
pf <- test_path("data/rle_boolean_encoding.parquet")
expect_snapshot({
read_parquet_page(pf, 4L)
})
})
test_that("read_parquet_page error", {
# https://github.com/llvm/llvm-project/issues/59432
if (is_asan()) skip("ASAN bug")
pf <- test_path("data/mtcars-arrow.parquet")
expect_error(read_parquet_page(pf, 5))
})
test_that("snappy", {
for (i in 1:10) {
x <- as.raw(as.integer(runif(1000) * 100))
comp <- snappy_compress(x)
uncp <- snappy_uncompress(comp)
expect_false(length(x) == length(comp) && all(x == comp))
expect_equal(uncp, x)
}
})
test_that("snappy error", {
expect_error(snappy_uncompress(charToRaw("foobar")))
})
test_that("MemStream", {
expect_snapshot({
.Call(test_memstream)
cat(rawToChar(.Call(test_memstream)))
})
})
test_that("DELTA_BIT_PACKED decoding", {
skip_on_cran()
skip_without_pyarrow()
if (is_asan()) skip("ASAN false positive")
dodbp <- function(df, path) {
write_parquet(df, path)
pyscript <- sprintf(r"[
import pyarrow
import pyarrow.parquet as pq
path = "%s"
df = pq.read_table(path)
writer = pq.ParquetWriter(path, df.schema, use_dictionary = False, column_encoding = 'DELTA_BINARY_PACKED')
writer.write_table(df)
writer.close()
]", normalizePath(path, winslash = "/"))
pytmp <- tempfile(fileext = ".py")
on.exit(unlink(pytmp), add = TRUE)
writeLines(pyscript, pytmp)
py <- if (Sys.which("python3") != "") "python3" else "python"
processx::run(py, pytmp, stderr = "2>&1", error_on_status = FALSE)
}
tmp <- tempfile(fileext = ".parquet")
on.exit(unlink(tmp), add = TRUE)
d <- data.frame(x = c(1:100, 500:600 * 2L))
stat <- dodbp(d, tmp)
expect_snapshot(stat)
dbp <- read_parquet_page(tmp, 4L)$data
expect_equal(dbp_decode_int(dbp), d$x)
d <- data.frame(x = c(-(101:200) * 2L + 1:10))
stat <- dodbp(d, tmp)
expect_snapshot(stat)
dbp <- read_parquet_page(tmp, 4L)$data
expect_equal(dbp_decode_int(dbp), d$x)
d <- data.frame(x = c(-(101:200) * 2L + 1:10, as.integer(rnorm(123)* 100L + 1000L)))
stat <- dodbp(d, tmp)
expect_snapshot(stat)
dbp <- read_parquet_page(tmp, 4L)$data
expect_equal(dbp_decode_int(dbp), d$x)
d <- data.frame(x = 0L)
stat <- dodbp(d, tmp)
expect_snapshot(stat)
dbp <- read_parquet_page(tmp, 4L)$data
expect_equal(dbp_decode_int(dbp), d$x)
# large integers
d <- data.frame(x = c(
as.integer(2^31-1 - runif(100) * 10L),
as.integer(-2^31+1 + runif(100) * 10L)
))
stat <- dodbp(d, tmp)
expect_snapshot(stat)
dbp <- read_parquet_page(tmp, 4L)$data
expect_equal(dbp_decode_int(dbp), d$x)
})
test_that("DELTA_BINARY_PACKED edge cases", {
skip_on_cran()
if (is_ubsan()) skip("UBSAN false positive")
if (is_asan()) skip("ASAN false positive")
pf <- test_path("data/issue10279_delta_encoding.parquet")
pgs <- read_parquet_pages(pf)
expect_snapshot({
# this is BOOLEAN in the file, which should not happen AFAICT
p1 <- read_parquet_page(pf, pgs$page_header_offset[1])
dbp1 <- p1$data[
(p1$definition_levels_byte_length +
p1$repetition_levels_byte_length + 1):length(p1$data)]
dbp_decode_int(dbp1)
p3 <- read_parquet_page(pf, pgs$page_header_offset[3])
dbp3 <- p3$data
dbp_decode_int(dbp3)
p4 <- read_parquet_page(pf, pgs$page_header_offset[4])
dbp4 <- p4$data
dbp_decode_int(dbp4)
# unfortunately the minimum int32 value is NA in R
# not sure what to do about this...
p5<- read_parquet_page(pf, pgs$page_header_offset[5])
dbp5 <- p5$data
dbp_decode_int(dbp5)
})
})
test_that("DELTA_BIANRY_PACKED INT64", {
suppressPackageStartupMessages(library(bit64))
pf <- test_path("data/dbp-int64.parquet")
dt <- read_parquet_page(pf, 4L)$data
on.exit(close(con), add = TRUE)
len <- readBin(con <- rawConnection(dt), "integer", 1)
dt <- dt[(len + 4 + 1):length(dt)]
expect_snapshot({
dbp_decode_int(dt)
})
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.