tests/testthat/test-read.R

test_that("duckdb_read_csv() works as expected", {
  skip_if_not(TEST_RE2)

  con <- dbConnect(duckdb())

  tf <- tempfile()
  tf2 <- tempfile()

  # default case
  write.csv(iris, tf, row.names = FALSE)
  duckdb_read_csv(con, "iris", tf)
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))

  # table exists
  duckdb_read_csv(con, "iris", tf)
  count <- dbGetQuery(con, "SELECT COUNT(*) FROM iris")[1][1]
  expect_true(identical(as.integer(count), as.integer(nrow(iris) * 2)))
  dbRemoveTable(con, "iris")


  # different separator
  write.table(iris, tf, row.names = FALSE, sep = " ")
  duckdb_read_csv(con, "iris", tf, delim = " ")
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))
  dbRemoveTable(con, "iris")

  write.table(iris, tf, row.names = FALSE, sep = " ")
  duckdb_read_csv(con, "iris", tf, sep = " ")
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))
  dbRemoveTable(con, "iris")

  # no header
  write.table(iris, tf, row.names = FALSE, sep = ",", col.names = FALSE)
  duckdb_read_csv(con, "iris", tf, header = FALSE)
  res <- dbReadTable(con, "iris")
  names(res) <- names(iris)
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))
  dbRemoveTable(con, "iris")

  # lowercase header
  write.csv(iris, tf, row.names = FALSE)
  duckdb_read_csv(con, "iris", tf, lower.case.names = T)
  res <- dbReadTable(con, "iris")
  res$species <- as.factor(res$species)
  iris_lc <- iris
  names(iris_lc) <- tolower(names(iris))
  expect_true(identical(res, iris_lc))
  dbRemoveTable(con, "iris")

  # nulls
  iris_na <- iris
  iris_na[[2]][42] <- NA

  write.csv(iris_na, tf, row.names = FALSE, na = "")
  duckdb_read_csv(con, "iris", tf)
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris_na))
  dbRemoveTable(con, "iris")


  write.csv(iris_na, tf, row.names = FALSE, na = "NULL")
  duckdb_read_csv(con, "iris", tf, na.strings = "NULL")
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris_na))
  dbRemoveTable(con, "iris")


  # strange table name
  write.csv(iris, tf, row.names = FALSE)
  duckdb_read_csv(con, "ir Is", tf)
  res <- dbReadTable(con, "ir Is")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))
  dbRemoveTable(con, "ir Is")


  # specified column names
  colnames <- paste0("c", 1:5)
  write.csv(iris, tf, row.names = FALSE)
  duckdb_read_csv(con, "iris", tf, col.names = colnames)
  res <- dbReadTable(con, "iris")
  res$c5 <- as.factor(res$c5)
  iris_c <- iris
  names(iris_c) <- colnames
  expect_true(identical(res, iris_c))
  dbRemoveTable(con, "iris")


  # multiple files
  tf2 <- tempfile()
  write.csv(iris, tf2, row.names = FALSE)
  duckdb_read_csv(con, "iris", c(tf, tf2))
  count <- dbGetQuery(con, "SELECT COUNT(*) FROM iris")[1][1]
  expect_true(identical(as.integer(count), as.integer(nrow(iris) * 2)))
  dbRemoveTable(con, "iris")


  # deprecated API
  write.csv(iris, tf, row.names = FALSE)
  expect_warning(read_csv_duckdb(con, tf, "iris"), "duckdb_read_csv")
  res <- dbReadTable(con, "iris")
  res$Species <- as.factor(res$Species)
  expect_true(identical(res, iris))

  # test better na.strings handling
  # see https://github.com/duckdb/duckdb/issues/8590
  tf3 <- tempfile()
  csv <- c(
    '"num","char","logi","lisst.1","lisst.2","lisst.3","lisst.NA"',
    '0.5,"yes",TRUE,1,2,3,NA',
    '2,"no",FALSE,1,2,3,NA',
    "NA,NA,NA,1,2,3,NA"
  )
  writeLines(csv, tf3)
  duckdb_read_csv(con, "na_table", tf3, na.strings = "-")
  expect_identical(
    dbReadTable(con, "na_table"),
    read.csv(tf3, na.strings = "-")
  )

  # temporary table
  # see https://github.com/duckdb/duckdb-r/issues/142
  db <- tempfile()
  con2 <- dbConnect(duckdb(), dbdir = db)
  duckdb_read_csv(con2, "iris", tf, temporary = TRUE)
  expect_true(length(dbListTables(con2)) == 1)
  dbDisconnect(con2)

  con2 <- dbConnect(duckdb(), dbdir = db)
  expect_true(length(dbListTables(con2)) == 0)
  dbDisconnect(con2)

  dbDisconnect(con, shutdown = TRUE)
})

describe("duckdb_read_csv", {

  skip_if_not(TEST_RE2)
  tf <- tempfile()
  con <- dbConnect(duckdb())

  it("col.types arg works with vector of types and inferred colnames", {

    # Case with col.types as character vector
    write.csv(iris, tf, row.names = FALSE)
    duckdb_read_csv(con, "iris", tf,
                    col.types = c(
                      "DOUBLE",
                      "DOUBLE",
                      "DOUBLE",
                      "DOUBLE",
                      "VARCHAR"
                    )
    )

    res <- dbReadTable(con, "iris")
    res$Species <- as.factor(res$Species)
    expect_true(identical(res, iris))
    dbRemoveTable(con, "iris")

  })

  it("col.types and col.names work together when unnamed", {

    write.csv(iris, tf, row.names = FALSE)
    duckdb_read_csv(
      con, "iris", tf,
      col.names = c("S.Length", "S.Width", "P.Length", "P.Width", "Species"),
      col.types = c("DOUBLE", "DOUBLE", "DOUBLE", "DOUBLE", "VARCHAR")
    )

    res <- dbReadTable(con, "iris")
    res$Species <- as.factor(res$Species)
    iris_renamed <- setNames(iris, c("S.Length", "S.Width", "P.Length", "P.Width", "Species"))
    expect_true(identical(res, iris_renamed))
    dbRemoveTable(con, "iris")

  })

  it("col.types overwrites col.names when col.types is named", {

    write.csv(iris, tf, row.names = FALSE)
    expect_warning(
      duckdb_read_csv(con, "iris", tf,
                      col.names = c("A", "B", "C", "D", "E"),
                      col.types = c(
                        Sepal.Length = "DOUBLE",
                        Sepal.Width = "DOUBLE",
                        Petal.Length = "DOUBLE",
                        Petal.Width = "DOUBLE",
                        Species = "VARCHAR"
                      )
      )
    )
    res <- dbReadTable(con, "iris")
    res$Species <- as.factor(res$Species)
    expect_true(identical(res, iris))
    dbRemoveTable(con, "iris")

  })

  it("lower.case.names works as expected with col.types named vector", {

    write.csv(iris, tf, row.names = FALSE)
    duckdb_read_csv(con, "iris", tf,
                    col.types = c(
                      S.lEngth = "DOUBLE",
                      S.wiDth = "DOUBLE",
                      p.leNgth = "DOUBLE",
                      p.Width = "DOUBLE",
                      spEc = "VARCHAR"
                    ), lower.case.names = TRUE)

    res <- dbReadTable(con, "iris")
    res$spec <- as.factor(res$spec)
    iris_renamed <- setNames(iris, tolower(c("s.length", "s.width", "p.length", "p.width", "spec")))
    expect_true(identical(res, iris_renamed))
    dbRemoveTable(con, "iris")

  })

  it("error when col.types length not equal to number of cols in file", {

    write.csv(iris, tf, row.names = FALSE)
    expect_error(duckdb_read_csv(con, "iris", tf, col.types = c(rep("VARCHAR", 4))))
    dbRemoveTable(con, "iris")

  })

  it("invalid col.types gives error", {

    write.csv(iris, tf, row.names = FALSE)
    expect_error(
      duckdb_read_csv(con, "iris", tf,
                      col.types = c(
                        Sepal.Length = "DOUBLE",
                        Sepal.Width = "DOUBLE",
                        Petal.Length = "DOUBLE",
                        Petal.Width = "DOUBLE",
                        Species = "DOUBLE"
                      )
      )
    )
  })

  it("test date types works as expected", {

    dates_df <- data.frame(dates = as.Date(seq(1:10), origin = '2020-01-01'))
    write.csv(dates_df, tf, row.names = FALSE)
    duckdb_read_csv(con, "dates_test", tf, col.types = c(dates = 'DATE'))

    res <- dbReadTable(con, "dates_test")
    expect_true(identical(res, dates_df))
    dbRemoveTable(con, "dates_test")  # Corrected table name

  })

  dbDisconnect(con, shutdown = TRUE)

})

Try the duckdb package in your browser

Any scripts or data that you put into this service are public.

duckdb documentation built on Oct. 30, 2024, 5:06 p.m.