Nothing
test_that("tbl_csv reads basic CSV and returns correct data.frame", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
write.csv(mtcars[1:10, ], f, row.names = FALSE)
result <- tbl_csv(f) |> collect()
expect_equal(nrow(result), 10)
expect_equal(ncol(result), ncol(mtcars))
expect_equal(names(result), names(mtcars))
expect_equal(result$mpg, mtcars$mpg[1:10])
})
test_that("tbl_csv infers integer columns correctly", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
df <- data.frame(x = 1:5, y = c(1.5, 2.5, 3.5, 4.5, 5.5))
write.csv(df, f, row.names = FALSE)
result <- tbl_csv(f) |> collect()
# x should be int64 (returned as double by default)
expect_type(result$x, "double")
expect_equal(result$x, c(1, 2, 3, 4, 5))
expect_equal(result$y, df$y)
})
test_that("tbl_csv handles string columns", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
df <- data.frame(name = c("Alice", "Bob", "Charlie"), score = c(90, 85, 95),
stringsAsFactors = FALSE)
write.csv(df, f, row.names = FALSE)
result <- tbl_csv(f) |> collect()
expect_equal(result$name, c("Alice", "Bob", "Charlie"))
expect_equal(result$score, c(90, 85, 95))
})
test_that("tbl_csv handles NA values", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
writeLines("x,y,z\n1,hello,TRUE\nNA,NA,NA\n3,world,FALSE", f)
result <- tbl_csv(f) |> collect()
expect_equal(result$x[1], 1)
expect_true(is.na(result$x[2]))
expect_equal(result$x[3], 3)
expect_true(is.na(result$y[2]))
expect_true(is.na(result$z[2]))
})
test_that("tbl_csv handles logical columns", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
writeLines("a,b\nTRUE,1\nFALSE,2\nTRUE,3", f)
result <- tbl_csv(f) |> collect()
expect_equal(result$a, c(TRUE, FALSE, TRUE))
})
test_that("tbl_csv handles quoted fields with commas", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
writeLines('name,city\n"Smith, John","New York"\n"Doe, Jane",Boston', f)
result <- tbl_csv(f) |> collect()
expect_equal(result$name, c("Smith, John", "Doe, Jane"))
expect_equal(result$city, c("New York", "Boston"))
})
test_that("tbl_csv works with filter pipeline", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
write.csv(mtcars, f, row.names = FALSE)
result <- tbl_csv(f) |>
filter(cyl == 6) |>
select(mpg, cyl, hp) |>
collect()
expected <- mtcars[mtcars$cyl == 6, c("mpg", "cyl", "hp")]
expect_equal(nrow(result), nrow(expected))
expect_equal(result$mpg, expected$mpg)
})
test_that("tbl_csv works with group_by + summarise", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
write.csv(mtcars, f, row.names = FALSE)
result <- tbl_csv(f) |>
group_by(cyl) |>
summarise(mean_mpg = mean(mpg)) |>
arrange(cyl) |>
collect()
expect_equal(nrow(result), 3)
expect_equal(result$cyl, c(4, 6, 8))
})
test_that("tbl_csv respects batch_size parameter", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
write.csv(mtcars, f, row.names = FALSE)
# Small batch size should still give correct results
result <- tbl_csv(f, batch_size = 5L) |> collect()
expect_equal(nrow(result), nrow(mtcars))
expect_equal(result$mpg, mtcars$mpg)
})
test_that("tbl_csv handles empty fields as NA", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
writeLines("x,y\n1,hello\n2,\n3,world", f)
result <- tbl_csv(f) |> collect()
expect_true(is.na(result$y[2]))
expect_equal(result$y[c(1, 3)], c("hello", "world"))
})
test_that("tbl_csv explain shows CsvScanNode", {
f <- tempfile(fileext = ".csv")
on.exit(unlink(f))
write.csv(mtcars[1:5, ], f, row.names = FALSE)
node <- tbl_csv(f)
plan <- capture.output(explain(node))
expect_true(any(grepl("CsvScanNode", plan)))
})
test_that("tbl_csv pipe to write_csv round-trips", {
f_in <- tempfile(fileext = ".csv")
f_out <- tempfile(fileext = ".csv")
on.exit(unlink(c(f_in, f_out)))
df <- data.frame(a = 1:3, b = c("x", "y", "z"), stringsAsFactors = FALSE)
write.csv(df, f_in, row.names = FALSE)
tbl_csv(f_in) |> write_csv(f_out)
result <- tbl_csv(f_out) |> collect()
expect_equal(result$a, c(1, 2, 3))
expect_equal(result$b, c("x", "y", "z"))
})
test_that("tbl_csv reads gzip-compressed CSV via the miniz path", {
f <- tempfile(fileext = ".csv.gz")
on.exit(unlink(f))
df <- data.frame(
a = 1:5,
b = c(1.5, 2.5, 3.5, 4.5, 5.5),
c = c("foo", "bar", "baz", "qux", "quux"),
stringsAsFactors = FALSE
)
con <- gzfile(f, "w")
write.csv(df, con, row.names = FALSE)
close(con)
result <- tbl_csv(f) |> collect()
expect_equal(nrow(result), 5)
expect_equal(names(result), c("a", "b", "c"))
expect_equal(result$a, c(1, 2, 3, 4, 5))
expect_equal(result$b, df$b)
expect_equal(result$c, df$c)
})
test_that("tbl_csv on a gz CSV exercises the type-inference rewind path", {
# csv_scan infers types from the first 1000 rows, then seeks back to the
# data start. For the gz path that means seeking inside the in-memory
# decompressed buffer. Use >1000 rows so the rewind actually fires.
f <- tempfile(fileext = ".csv.gz")
on.exit(unlink(f))
n <- 1500
df <- data.frame(
id = seq_len(n),
val = as.double(seq_len(n)) / 2,
tag = sprintf("row%04d", seq_len(n)),
stringsAsFactors = FALSE
)
con <- gzfile(f, "w")
write.csv(df, con, row.names = FALSE)
close(con)
result <- tbl_csv(f) |> collect()
expect_equal(nrow(result), n)
expect_equal(result$id[1], 1)
expect_equal(result$id[n], n)
expect_equal(result$val[n], n / 2)
expect_equal(result$tag[1], "row0001")
expect_equal(result$tag[n], sprintf("row%04d", n))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.