tests/testthat/test-write_dataset.R

#' Note that it is not possible to open from one S3 source and write to another
#'
test_that("write_dataset", {

  skip_on_cran()
  ## write an in-memory dataset
  path <- file.path(tempdir(), "mtcars.parquet")
  write_dataset(mtcars, path)
  expect_true(file.exists(path))
  df <- open_dataset(path)
  expect_s3_class(df, "tbl")

  write_dataset(mtcars, path, options = c("PER_THREAD_OUTPUT FALSE", "FILENAME_PATTERN 'cars_{i}'"))

  expect_true(file.exists(path))
  df <- open_dataset(path)
  expect_s3_class(df, "tbl")

  ## write from an on-disk dataset
  local_file <-  system.file("extdata/spatial-test.csv", package="duckdbfs")
  tbl <- open_dataset(local_file, format='csv')
  path <- file.path(tempdir(), "spatial.parquet")
  write_dataset(tbl, path)

  expect_true(file.exists(path))
  df <- open_dataset(path)
  expect_s3_class(df, "tbl")

  ## Write from a query string
  path2 <- file.path(tempdir(), "spatial2.parquet")

  dataset <- tbl |>
    dplyr::mutate(new = "test")
  dataset |>
    write_dataset(path2)

})

test_that("write_dataset partitions", {

  skip_on_cran()
  ## write an in-memory dataset
  path <- file.path(tempdir(), "mtcars")
  library(dplyr)

  mtcars |>
    group_by(cyl, gear) |>
    write_dataset(path, options = "FILENAME_PATTERN 'cars_{uuid}'")

  expect_true(dir.exists(path))
  df <- open_dataset(path)
  expect_s3_class(df, "tbl")
  parts <- list.files(path)
  expect_true(any(grepl("cyl=4", parts)))

  path <- file.path(tempdir(), "mtcars2")
  mtcars |> write_dataset(path, partitioning = "cyl", overwrite=TRUE)
  expect_true(file.exists(path))
  df <- open_dataset(path)
  expect_s3_class(df, "tbl")

  unlink(path, recursive=TRUE)

})


test_that("write_dataset, remote input", {
  skip_on_cran()
  skip_if_offline()

  tbl <- open_dataset(
    paste0("https://raw.githubusercontent.com/cboettig/duckdbfs/",
           "main/inst/extdata/spatial-test.csv"),
    format = "csv")

  path <- file.path(tempdir(), "spatial.parquet")
  write_dataset(tbl, path)

  expect_true(file.exists(path))
  df <- open_dataset(path)
  expect_s3_class(df, "tbl")

})

test_that("write_dataset to s3:", {

  skip_on_os("windows")
  skip_if_offline()
  skip_on_cran()
  skip_if_not_installed("jsonlite")
  skip_if_not_installed("minioclient")

  minioclient::install_mc(force = TRUE)

  p <- minioclient::mc_alias_ls("play --json")
  config <- jsonlite::fromJSON(p$stdout)

  minioclient::mc_mb("play/duckdbfs")
  duckdb_secrets(config$accessKey, config$secretKey, gsub("https://", "", config$URL))

  mtcars |> write_dataset("s3://duckdbfs/mtcars.parquet")

  expect_true(TRUE)
  minioclient::mc("rb --force play/duckdbfs")

  close_connection()
})

mc_config_get <- function(alias="play"){

  # this can fail tp parse on windows, stdout is not pure json
  # p <- minioclient::mc_alias_ls(paste(alias, "--json"))
  # config <- jsonlite::fromJSON(p$stdout)

  ## fails to find config on remote
  path <- getOption("minioclient.dir", tools::R_user_dir("minioclient", "data"))
  json <- jsonlite::read_json(file.path(path, "config.json"))
  config <- json$aliases[[alias]]
  config$alias <- alias
  config$URL <- config$url
  config
}






test_that("write_geo", {

  skip_on_cran()
  skip_if_not_installed("sf")

  ## write from an on-disk dataset
  local_file <-  system.file("extdata/world.fgb", package="duckdbfs")
  load_spatial()
  tbl <- open_dataset(local_file, format='sf')
  path <- file.path(tempdir(), "spatial.geojson")
  write_geo(tbl, path)

  expect_true(file.exists(path))
  df <- sf::st_read(path)
  expect_s3_class(df, "sf")
  expect_gt(nrow(df), 1)

})



test_that("to_geojson", {

  skip_on_cran()
  skip_if_offline() # extensions need internet
  load_extension("json")

  ## write from an on-disk dataset
  local_file <-  system.file("extdata/world.fgb", package="duckdbfs")
  load_spatial()
  tbl <- open_dataset(local_file, format='sf')
  path <- file.path(tempdir(), "spatial1.geojson")
  to_geojson(tbl, path, id_col = "iso_a3")

  expect_true(file.exists(path))

  skip_if_not_installed("sf")

  ## not sure why sf doesn't recognize this file!
  #df <- sf::st_read(path)
  #expect_s3_class(df, "sf")
  #expect_gt(nrow(df), 1)

})


test_that("to_geojson s3", {

  skip_on_cran()
  skip_if_offline() # extensions need internet
  skip_if_not_installed("sf")
  skip_if_not_installed("jsonlite")
  skip_if_not_installed("minioclient")
  minioclient::install_mc(force = TRUE)

  skip_on_os("windows")
  p <- minioclient::mc_alias_ls("play --json")
  config <- jsonlite::fromJSON(p$stdout)
  minioclient::mc_mb("play/duckdbfs")

  duckdb_secrets(config$accessKey,
                 config$secretKey,
                 gsub("https://", "", config$URL))
  load_spatial()

  ## write from an on-disk dataset
  local_file <-  system.file("extdata/world.fgb", package="duckdbfs")
  tbl <- open_dataset(local_file, format='sf')
  path <-  "s3://duckdbfs/spatial-test.geojson"
  to_geojson(tbl, path, id_col = "iso_a3")

  expect_true(TRUE)

})

Try the duckdbfs package in your browser

Any scripts or data that you put into this service are public.

duckdbfs documentation built on Aug. 8, 2025, 6:57 p.m.