tests/testthat/test-read.R

test_that("duckdb_read_csv() works as expected", {
  skip_if_not(TEST_RE2)

  con <- dbConnect(duckdb())

  tf <- tempfile()

  # default case
  write.csv(iris, tf, row.names = FALSE)
  duckdb_read_csv(con, "iris", tf)
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))

  # table exists
  duckdb_read_csv(con, "iris", tf)
  count <- dbGetQuery(con, "SELECT COUNT(*) FROM iris")[1][1]
  expect_true(identical(as.integer(count), as.integer(nrow(iris) * 2)))
  dbRemoveTable(con, "iris")


  # different separator
  write.table(iris, tf, row.names = FALSE, sep = " ")
  duckdb_read_csv(con, "iris", tf, delim = " ")
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))
  dbRemoveTable(con, "iris")

  write.table(iris, tf, row.names = FALSE, sep = " ")
  duckdb_read_csv(con, "iris", tf, sep = " ")
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))
  dbRemoveTable(con, "iris")

  # no header
  write.table(iris, tf, row.names = FALSE, sep = ",", col.names = FALSE)
  duckdb_read_csv(con, "iris", tf, header = FALSE)
  res <- dbReadTable(con, "iris")
  names(res) <- names(iris)
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))
  dbRemoveTable(con, "iris")

  # lowercase header
  write.csv(iris, tf, row.names = FALSE)
  duckdb_read_csv(con, "iris", tf, lower.case.names = T)
  res <- dbReadTable(con, "iris")
  res$species <- as.factor(res$species)
  iris_lc <- iris
  names(iris_lc) <- tolower(names(iris))
  expect_true(identical(res, iris_lc))
  dbRemoveTable(con, "iris")

  # nulls
  iris_na <- iris
  iris_na[[2]][42] <- NA

  write.csv(iris_na, tf, row.names = FALSE, na = "")
  duckdb_read_csv(con, "iris", tf)
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris_na))
  dbRemoveTable(con, "iris")


  write.csv(iris_na, tf, row.names = FALSE, na = "NULL")
  duckdb_read_csv(con, "iris", tf, na.strings = "NULL")
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris_na))
  dbRemoveTable(con, "iris")


  # strange table name
  write.csv(iris, tf, row.names = FALSE)
  duckdb_read_csv(con, "ir Is", tf)
  res <- dbReadTable(con, "ir Is")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))
  dbRemoveTable(con, "ir Is")


  # specified column names
  colnames <- paste0("c", 1:5)
  write.csv(iris, tf, row.names = FALSE)
  duckdb_read_csv(con, "iris", tf, col.names = colnames)
  res <- dbReadTable(con, "iris")
  res$c5 <- as.factor(res$c5)
  iris_c <- iris
  names(iris_c) <- colnames
  expect_true(identical(res, iris_c))
  dbRemoveTable(con, "iris")


  # multiple files
  tf2 <- tempfile()
  write.csv(iris, tf2, row.names = FALSE)
  duckdb_read_csv(con, "iris", c(tf, tf2))
  count <- dbGetQuery(con, "SELECT COUNT(*) FROM iris")[1][1]
  expect_true(identical(as.integer(count), as.integer(nrow(iris) * 2)))
  dbRemoveTable(con, "iris")


  # deprecated API
  write.csv(iris, tf, row.names = FALSE)
  expect_warning(read_csv_duckdb(con, tf, "iris"), "duckdb_read_csv")
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))

  # test better na.strings handling
  # see https://github.com/duckdb/duckdb/issues/8590
  tf3 <- tempfile()
  csv <- c(
    '"num","char","logi","lisst.1","lisst.2","lisst.3","lisst.NA"',
    '0.5,"yes",TRUE,1,2,3,NA',
    '2,"no",FALSE,1,2,3,NA',
    'NA,NA,NA,1,2,3,NA'
  )
  writeLines(csv, tf3)
  duckdb_read_csv(con, "na_table", tf3, na.strings = "-")
  expect_identical(
    dbReadTable(con, "na_table"),
    read.csv(tf3, na.strings = "-")
  )

  dbDisconnect(con, shutdown = TRUE)
})

Try the duckdb package in your browser

Any scripts or data that you put into this service are public.

duckdb documentation built on June 22, 2024, 9:37 a.m.