Nothing
# -- Phase 1: compression round-trips ------------------------------------------
test_that("compress='fast' round-trips doubles", {
df <- data.frame(x = rnorm(500), y = runif(500))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast")
result <- tbl(f) |> collect()
expect_equal(result$x, df$x)
expect_equal(result$y, df$y)
})
test_that("compress='small' round-trips doubles", {
df <- data.frame(x = rnorm(500), y = runif(500))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "small")
result <- tbl(f) |> collect()
expect_equal(result$x, df$x)
expect_equal(result$y, df$y)
})
test_that("compress='small' round-trips integers", {
df <- data.frame(a = 1:1000, b = sample(1e6, 1000))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "small")
result <- tbl(f) |> collect()
expect_equal(result$a, as.double(df$a))
expect_equal(result$b, as.double(df$b))
})
test_that("compress='small' round-trips strings", {
df <- data.frame(s = sample(letters, 500, replace = TRUE),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "small")
result <- tbl(f) |> collect()
expect_equal(result$s, df$s)
})
test_that("compress='small' round-trips booleans with NA", {
df <- data.frame(flag = c(TRUE, FALSE, NA, TRUE, FALSE, rep(TRUE, 495)))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "small")
result <- tbl(f) |> collect()
expect_equal(result$flag, df$flag)
})
test_that("compress='small' round-trips all types together", {
df <- data.frame(
i = c(1L, 2L, NA, 4L, seq(5L, 500L)),
d = c(1.1, NA, 3.3, 4.4, rnorm(496)),
b = c(TRUE, NA, FALSE, TRUE, sample(c(TRUE, FALSE), 496, replace = TRUE)),
s = c("a", NA, "c", "d", sample(letters, 496, replace = TRUE)),
stringsAsFactors = FALSE
)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "small")
result <- tbl(f) |> collect()
expect_equal(result$d, df$d)
expect_equal(result$b, df$b)
expect_equal(result$s, df$s)
expect_equal(result$i, as.double(df$i))
})
test_that("small files are no larger than fast files on structured data", {
df <- data.frame(x = rep(1:100, 50), y = rep(rnorm(100), 50))
f_fast <- tempfile(fileext = ".vtr")
f_ratio <- tempfile(fileext = ".vtr")
on.exit(unlink(c(f_fast, f_ratio)))
write_vtr(df, f_fast, compress = "fast")
write_vtr(df, f_ratio, compress = "small")
expect_lte(file.size(f_ratio), file.size(f_fast))
})
test_that("compress='small' with multiple row groups", {
df <- data.frame(x = rnorm(1000), y = 1:1000)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "small", batch_size = 200)
result <- tbl(f) |> collect()
expect_equal(nrow(result), 1000L)
expect_equal(result$x, df$x)
expect_equal(result$y, as.double(df$y))
})
test_that("small with quantize round-trips", {
set.seed(42)
df <- data.frame(temp = rnorm(1000, 15, 5))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "small",
quantize = list(temp = list(precision = 0.01, type = "int16")))
result <- tbl(f) |> collect()
max_err <- max(abs(result$temp - df$temp))
expect_lt(max_err, 0.01 + 1e-10)
})
test_that("small with spatial round-trips", {
nx <- 50; ny <- 50
set.seed(1)
temp <- 10 + 0.05 * rep(1:nx, ny) + 0.03 * rep(1:ny, each = nx) +
rnorm(nx * ny, 0, 0.5)
df <- data.frame(temp = temp)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "small",
quantize = list(temp = list(precision = 0.001, type = "int16")),
spatial = list(nx = nx, ny = ny))
result <- tbl(f) |> collect()
max_err <- max(abs(result$temp - df$temp))
expect_lt(max_err, 0.001 + 1e-10)
})
test_that("small with narrow int round-trips", {
df <- data.frame(a = sample(-100L:100L, 500, replace = TRUE),
b = sample(-30000L:30000L, 500, replace = TRUE))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "small",
col_types = c(a = "int8", b = "int16"))
result <- tbl(f) |> collect()
expect_equal(result$a, as.double(df$a))
expect_equal(result$b, as.double(df$b))
})
test_that("compress='none' round-trips doubles", {
df <- data.frame(x = rnorm(500), y = runif(500))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "none")
result <- tbl(f) |> collect()
expect_equal(result$x, df$x)
expect_equal(result$y, df$y)
})
test_that("compress='fast' round-trips integers", {
df <- data.frame(a = 1:1000, b = sample(1e6, 1000))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast")
result <- tbl(f) |> collect()
expect_equal(result$a, as.double(df$a))
expect_equal(result$b, as.double(df$b))
})
test_that("compress='fast' round-trips strings", {
df <- data.frame(s = sample(letters, 500, replace = TRUE),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast")
result <- tbl(f) |> collect()
expect_equal(result$s, df$s)
})
test_that("compress='fast' round-trips booleans with NA", {
df <- data.frame(flag = c(TRUE, FALSE, NA, TRUE, FALSE, rep(TRUE, 495)))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast")
result <- tbl(f) |> collect()
expect_equal(result$flag, df$flag)
})
test_that("compress='fast' round-trips all types together", {
df <- data.frame(
i = c(1L, 2L, NA, 4L, seq(5L, 500L)),
d = c(1.1, NA, 3.3, 4.4, rnorm(496)),
b = c(TRUE, NA, FALSE, TRUE, sample(c(TRUE, FALSE), 496, replace = TRUE)),
s = c("a", NA, "c", "d", sample(letters, 496, replace = TRUE)),
stringsAsFactors = FALSE
)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast")
result <- tbl(f) |> collect()
expect_equal(result$d, df$d)
expect_equal(result$b, df$b)
expect_equal(result$s, df$s)
expect_equal(result$i, as.double(df$i))
})
test_that("compressed files are smaller than uncompressed", {
df <- data.frame(x = rep(1:100, 50), y = rep(rnorm(100), 50))
f_none <- tempfile(fileext = ".vtr")
f_fast <- tempfile(fileext = ".vtr")
on.exit(unlink(c(f_none, f_fast)))
write_vtr(df, f_none, compress = "none")
write_vtr(df, f_fast, compress = "fast")
expect_lt(file.size(f_fast), file.size(f_none))
})
test_that("compress='fast' with multiple row groups", {
df <- data.frame(x = rnorm(1000), y = 1:1000)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast", batch_size = 200)
result <- tbl(f) |> collect()
expect_equal(nrow(result), 1000L)
expect_equal(result$x, df$x)
expect_equal(result$y, as.double(df$y))
})
test_that("uncompressed file reads after rewrite with compression", {
df <- data.frame(x = 1:100, y = rnorm(100))
f1 <- tempfile(fileext = ".vtr")
f2 <- tempfile(fileext = ".vtr")
on.exit(unlink(c(f1, f2)))
write_vtr(df, f1, compress = "none")
write_vtr(df, f2, compress = "fast")
r1 <- tbl(f1) |> collect()
r2 <- tbl(f2) |> collect()
expect_equal(r1$x, r2$x)
expect_equal(r1$y, r2$y)
})
# -- Phase 2a: narrow integer types -------------------------------------------
test_that("col_types int8 round-trips", {
df <- data.frame(val = c(-100L, 0L, 50L, 127L, NA))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, col_types = c(val = "int8"))
result <- tbl(f) |> collect()
expect_equal(result$val, as.double(df$val))
})
test_that("col_types int16 round-trips", {
df <- data.frame(val = c(-30000L, 0L, 15000L, 32767L, NA))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, col_types = c(val = "int16"))
result <- tbl(f) |> collect()
expect_equal(result$val, as.double(df$val))
})
test_that("col_types int32 round-trips", {
df <- data.frame(val = c(-2000000L, 0L, 1000000L, NA))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, col_types = c(val = "int32"))
result <- tbl(f) |> collect()
expect_equal(result$val, as.double(df$val))
})
test_that("narrow int with compression round-trips", {
df <- data.frame(a = sample(-100L:100L, 500, replace = TRUE),
b = sample(-30000L:30000L, 500, replace = TRUE))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast",
col_types = c(a = "int8", b = "int16"))
result <- tbl(f) |> collect()
expect_equal(result$a, as.double(df$a))
expect_equal(result$b, as.double(df$b))
})
test_that("narrow int files are smaller", {
df <- data.frame(val = sample(-100L:100L, 1000, replace = TRUE))
f64 <- tempfile(fileext = ".vtr")
f8 <- tempfile(fileext = ".vtr")
on.exit(unlink(c(f64, f8)))
write_vtr(df, f64, compress = "none")
write_vtr(df, f8, compress = "none", col_types = c(val = "int8"))
expect_lt(file.size(f8), file.size(f64))
})
test_that("filter works on narrow int columns", {
df <- data.frame(id = 1:200, val = sample(-100L:100L, 200, replace = TRUE))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, col_types = c(val = "int8"))
result <- tbl(f) |> filter(val > 0) |> collect()
expected <- df[df$val > 0, ]
expect_equal(nrow(result), nrow(expected))
expect_equal(result$val, as.double(expected$val))
})
test_that("mutate works on narrow int columns", {
df <- data.frame(val = c(10L, 20L, 30L))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, col_types = c(val = "int8"))
result <- tbl(f) |> mutate(doubled = val * 2) |> collect()
expect_equal(result$doubled, c(20, 40, 60))
})
# -- Phase 2b: lossy quantization ---------------------------------------------
test_that("quantize round-trips within precision", {
set.seed(42)
df <- data.frame(temp = runif(500, -20, 20))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, quantize = list(temp = list(precision = 0.001,
type = "int16")))
result <- tbl(f) |> collect()
expect_equal(length(result$temp), 500L)
max_err <- max(abs(result$temp - df$temp), na.rm = TRUE)
expect_lt(max_err, 0.001 + 1e-10)
})
test_that("quantize with scale parameter", {
df <- data.frame(val = c(0.0, 0.5, 1.0, -1.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, quantize = list(val = list(scale = 1000, type = "int16")))
result <- tbl(f) |> collect()
max_err <- max(abs(result$val - df$val))
expect_lt(max_err, 0.001 + 1e-10)
})
test_that("quantize preserves NA", {
df <- data.frame(temp = c(1.5, NA, -3.2, NA, 10.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, quantize = list(temp = list(precision = 0.01,
type = "int16")))
result <- tbl(f) |> collect()
expect_true(is.na(result$temp[2]))
expect_true(is.na(result$temp[4]))
non_na <- c(1, 3, 5)
max_err <- max(abs(result$temp[non_na] - df$temp[non_na]))
expect_lt(max_err, 0.01 + 1e-10)
})
test_that("quantize with compression round-trips", {
set.seed(123)
df <- data.frame(temp = rnorm(1000, 15, 5))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast",
quantize = list(temp = list(precision = 0.01, type = "int16")))
result <- tbl(f) |> collect()
max_err <- max(abs(result$temp - df$temp))
expect_lt(max_err, 0.01 + 1e-10)
})
test_that("quantize to int8", {
df <- data.frame(val = c(-5.0, 0.0, 5.0, 12.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, quantize = list(val = list(precision = 0.1,
type = "int8")))
result <- tbl(f) |> collect()
max_err <- max(abs(result$val - df$val))
expect_lt(max_err, 0.1 + 1e-10)
})
test_that("quantize produces smaller files", {
set.seed(99)
df <- data.frame(temp = rnorm(5000, 15, 5))
f_raw <- tempfile(fileext = ".vtr")
f_q <- tempfile(fileext = ".vtr")
on.exit(unlink(c(f_raw, f_q)))
write_vtr(df, f_raw, compress = "fast")
write_vtr(df, f_q, compress = "fast",
quantize = list(temp = list(precision = 0.01, type = "int16")))
expect_lt(file.size(f_q), file.size(f_raw))
})
# -- Phase 2c: spatial encoding -----------------------------------------------
test_that("spatial round-trips on smooth raster data", {
nx <- 50; ny <- 50
x_coord <- rep(1:nx, ny)
y_coord <- rep(1:ny, each = nx)
# smooth temperature field: gradient + small noise
set.seed(1)
temp <- 10 + 0.05 * x_coord + 0.03 * y_coord + rnorm(nx * ny, 0, 0.5)
df <- data.frame(temp = temp)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast",
quantize = list(temp = list(precision = 0.001, type = "int16")),
spatial = list(nx = nx, ny = ny))
result <- tbl(f) |> collect()
max_err <- max(abs(result$temp - df$temp))
expect_lt(max_err, 0.001 + 1e-10)
})
test_that("spatial without quantize round-trips integers", {
nx <- 20; ny <- 20
vals <- as.integer(rep(1:20, each = 20) + rep(1:20, 20))
df <- data.frame(val = vals)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast",
spatial = list(nx = nx, ny = ny))
result <- tbl(f) |> collect()
expect_equal(result$val, as.double(df$val))
})
test_that("spatial produces smaller files on smooth data", {
nx <- 100; ny <- 100
set.seed(2)
temp <- 10 + 0.05 * rep(1:nx, ny) + 0.03 * rep(1:ny, each = nx)
df <- data.frame(temp = temp)
f_plain <- tempfile(fileext = ".vtr")
f_spatial <- tempfile(fileext = ".vtr")
on.exit(unlink(c(f_plain, f_spatial)))
write_vtr(df, f_plain, compress = "fast",
quantize = list(temp = list(precision = 0.001, type = "int16")))
write_vtr(df, f_spatial, compress = "fast",
quantize = list(temp = list(precision = 0.001, type = "int16")),
spatial = list(nx = nx, ny = ny))
expect_lt(file.size(f_spatial), file.size(f_plain))
})
test_that("spatial + quantize + fast compression round-trips", {
nx <- 30; ny <- 30
set.seed(3)
temp <- 15 + 0.1 * rep(1:nx, ny) + rnorm(nx * ny, 0, 0.2)
df <- data.frame(temp = temp)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast",
quantize = list(temp = list(precision = 0.001, type = "int16")),
spatial = list(nx = nx, ny = ny))
result <- tbl(f) |> collect()
max_err <- max(abs(result$temp - df$temp))
expect_lt(max_err, 0.001 + 1e-10)
})
test_that("spatial requires nx*ny == n_rows", {
nx <- 40; ny <- 40
df <- data.frame(temp = rnorm(100))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
expect_error(
write_vtr(df, f, compress = "fast",
spatial = list(nx = nx, ny = ny)),
"nx\\*ny"
)
})
# -- DIFF encoding (auto-selected for slowly varying data) --------------------
test_that("slowly varying integer data round-trips (triggers DIFF encoding)", {
# Monotonically increasing with small increments - should trigger DIFF
df <- data.frame(ts = cumsum(sample(1:5, 1000, replace = TRUE)))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast")
result <- tbl(f) |> collect()
expect_equal(result$ts, as.double(df$ts))
})
test_that("slowly varying doubles round-trip (may trigger DIFF)", {
set.seed(5)
df <- data.frame(val = cumsum(rnorm(1000, 0, 0.01)))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast")
result <- tbl(f) |> collect()
expect_equal(result$val, df$val)
})
# -- Combined pipeline: narrow + compress + filter ----------------------------
test_that("full pipeline: write compressed narrow int, then filter + collect", {
df <- data.frame(
id = 1:1000,
category = sample(c(-1L, 0L, 1L), 1000, replace = TRUE),
value = rnorm(1000)
)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast",
col_types = c(category = "int8"))
result <- tbl(f) |> filter(category == 1) |> collect()
expected <- df[df$category == 1L, ]
expect_equal(nrow(result), nrow(expected))
expect_equal(result$value, expected$value)
})
test_that("full pipeline: quantize + compress + summarise", {
set.seed(10)
df <- data.frame(
grp = rep(c("A", "B"), each = 500),
val = c(rnorm(500, 10, 2), rnorm(500, 20, 2))
)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f, compress = "fast",
quantize = list(val = list(precision = 0.01, type = "int16")))
result <- tbl(f) |>
group_by(grp) |>
summarise(avg = mean(val)) |>
collect()
# Means should be close to 10 and 20 (within quantization + sampling error)
result <- result[order(result$grp), ]
expect_equal(nrow(result), 2L)
expect_lt(abs(result$avg[1] - 10), 0.5)
expect_lt(abs(result$avg[2] - 20), 0.5)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.