tests/testthat/test-melt-csv.R

test_that("read_csv type imputation and NA detection works", {
  skip_on_os("windows")
  withr::local_options(lifecycle_verbosity = "quiet")
  melt_data <- melt_csv("non-tabular.csv", na = "NA")
  expect_equal(
    melt_data$data_type[7:11],
    c("missing", "empty", "character", "integer", "double")
  )
})

test_that("read_tsv works on a simple file", {
  withr::local_options(lifecycle_verbosity = "quiet")
  melt_data <- melt_tsv("a\tb\n1\t2")
  expect_equal(melt_data$data_type, rep(c("character", "integer"), each = 2))
})

test_that("melt_csv's 'NA' option genuinely changes the NA values", {
  withr::local_options(lifecycle_verbosity = "quiet")
  expect_equal(melt_csv("z\n", na = "z")$data_type, "missing")
})

test_that("melt_csv's 'NA' option works with multiple NA values", {
  withr::local_options(lifecycle_verbosity = "quiet")
  expect_equal(
    melt_csv("NA\nmiss\n13", na = c("13", "miss"))$data_type,
    c("character", "missing", "missing")
  )
})

test_that('passing character() to melt_csv\'s "NA" option reads "" correctly', {
  withr::local_options(lifecycle_verbosity = "quiet")
  expect_equal(melt_csv("foo\n", na = character())$value, "foo")
})

test_that("passing \"\" to melt_csv's 'NA' option reads \"\" correctly", {
  withr::local_options(lifecycle_verbosity = "quiet")
  expect_equal(
    melt_csv("foo,bar\nfoo,\n", na = "")$value,
    c("foo", "bar", "foo", NA)
  )
})

test_that("changing melt_csv's 'quote' argument works correctly", {
  withr::local_options(lifecycle_verbosity = "quiet")
  test_data <- melt_csv("basic-df.csv")
  test_data_singlequote <- melt_csv("basic-df-singlequote.csv", quote = "'")
  expect_identical(test_data, test_data_singlequote)
})

test_that("melt_csv's 'skip' option allows for skipping'", {
  withr::local_options(lifecycle_verbosity = "quiet")
  test_data <- melt_csv("basic-df.csv", skip = 1)
  expect_equal(nrow(test_data), 40)
})

test_that("melt_csv's 'n_max' allows for a maximum number of records and does not corrupt any", {
  withr::local_options(lifecycle_verbosity = "quiet")
  test_data <- melt_csv("basic-df.csv", n_max = 7)
  expect_equal(nrow(test_data), 28)
  expect_equal(sum(test_data$data_type == "missing"), 0)
})

test_that("can read more than 100 columns", {
  withr::local_options(lifecycle_verbosity = "quiet")
  set.seed(2015 - 3 - 13)
  x <- as.data.frame(matrix(rbinom(300, 2, .5), nrow = 2))
  y <- format_csv(x)
  expect_equal(max(melt_csv(y)$col), 150)
})

test_that("encoding affects text", {
  withr::local_options(lifecycle_verbosity = "quiet")
  x <- melt_csv("enc-iso-8859-1.txt", locale = locale(encoding = "ISO-8859-1"))
  expect_identical(x$value[2], "\u00e9l\u00e8ve")
})

test_that("nuls are dropped with a warning", {
  withr::local_options(lifecycle_verbosity = "quiet")
  expect_warning(x <- melt_csv("raw.csv"))
  expect_equal(readr:::n_problems(x), 1)
  expect_equal(x$value[3], "ab")
})

test_that("can read from the clipboard", {
  withr::local_options(lifecycle_verbosity = "quiet")
  skip_on_cran()
  skip_if_no_clipboard()
  clipr::write_clip("a,b,c\n1,2,3")
  expect_identical(melt_csv(clipboard()), melt_csv("a,b,c\n1,2,3"))
})

test_that("can read from a multi-line character vector", {
  withr::local_options(lifecycle_verbosity = "quiet")
  expect_identical(max(melt_csv(c("a,b,c", "1,2,3"))$row), 2)
})

# Column warnings ---------------------------------------------------------

test_that("missing lines are not skipped", {
  withr::local_options(lifecycle_verbosity = "quiet")
  # first
  expect_equal(max(melt_csv("a,b\n\n\n1,2")$row), 4)

  # middle
  expect_equal(max(melt_csv("a,b\n1,2\n\n\n2,3\n")$row), 5)

  # last (trailing \n is ignored)
  expect_equal(max(melt_csv("a,b\n1,2\n\n\n")$row), 4)
})

# read_csv2 ---------------------------------------------------------------

test_that("decimal mark automatically set to ,", {
  withr::local_options(lifecycle_verbosity = "quiet")
  expect_message(
    x <- melt_csv2("x\n1,23"),
    if (default_locale()$decimal_mark == ".") "decimal .*grouping .*mark" else NA
  )
  expect_equal(x$data_type[2], "double")
})

# Zero rows ---------------------------------------------------------------

test_that("n_max 0 gives zero row data frame", {
  withr::local_options(lifecycle_verbosity = "quiet")
  x <- melt_csv("a,b\n1,2", n_max = 0)
  expect_equal(dim(x), c(0, 4))
})

# Comments ----------------------------------------------------------------

test_that("comments are ignored regardless of where they appear", {
  withr::local_options(lifecycle_verbosity = "quiet")
  out1 <- melt_csv("x\n1#comment", comment = "#")
  out2 <- melt_csv("x\n1#comment\n#comment", comment = "#")
  out3 <- melt_csv('x\n"1"#comment', comment = "#")

  chk1 <- tibble::tibble(
    row = c(1, 2),
    col = c(1, 1),
    data_type = c("character", "integer"),
    value = c("x", "1")
  )

  expect_true(all.equal(chk1, out1))
  expect_true(all.equal(chk1, out2))
  expect_true(all.equal(chk1, out3))

  out5 <- melt_csv("x1,x2,x3\nA2,B2,C2\nA3#,B2,C2\nA4,A5,A6", comment = "#")
  out6 <- melt_csv("x1,x2,x3\nA2,B2,C2\nA3,#B2,C2\nA4,A5,A6", comment = "#")
  out7 <- melt_csv("x1,x2,x3\nA2,B2,C2\nA3,#B2,C2\n#comment\nA4,A5,A6", comment = "#")

  chk2 <- tibble::tibble(
    row = c(1, 1, 1, 2, 2, 2, 3, 4, 4, 4),
    col = c(1, 2, 3, 1, 2, 3, 1, 1, 2, 3),
    data_type = "character",
    value = c("x1", "x2", "x3", "A2", "B2", "C2", "A3", "A4", "A5", "A6")
  )

  expect_true(all.equal(chk2, out5))
  expect_true(all.equal(chk2, out6))
  expect_true(all.equal(chk2, out7))
})

test_that("escaped/quoted comments are ignored", {
  withr::local_options(lifecycle_verbosity = "quiet")
  out1 <- melt_delim("x\n\\#",
    comment = "#", delim = ",",
    escape_backslash = TRUE, escape_double = FALSE
  )
  out2 <- melt_csv('x\n"#"', comment = "#")

  expect_equal(out1$value[2], "#")
  expect_equal(out2$value[2], "#")
})

test_that("leading comments are ignored", {
  withr::local_options(lifecycle_verbosity = "quiet")
  out <- melt_csv("#a\n#b\nx\n1", comment = "#")

  expect_equal(nrow(out), 2)
  expect_equal(out$value[2], "1")
})

test_that("skip respects comments", {
  withr::local_options(lifecycle_verbosity = "quiet")
  melt_x <- function(...) {
    melt_csv("#a\nb\nc", ...)$value
  }

  expect_equal(melt_x(), c("#a", "b", "c"))
  expect_equal(melt_x(skip = 1), c("b", "c"))
  expect_equal(melt_x(comment = "#"), c("b", "c"))
  expect_equal(melt_x(comment = "#", skip = 2), c("c"))
})

test_that("melt_csv returns a four-col zero-row data.frame on an empty file", {
  withr::local_options(lifecycle_verbosity = "quiet")
  expect_equal(dim(melt_csv("empty-file")), c(0, 4))
})

test_that("melt_delim errors on length 0 delimiter", {
  withr::local_options(lifecycle_verbosity = "quiet")
  expect_error(
    melt_delim("a b\n1 2\n", delim = ""),
    "`delim` must be at least one character, use `melt_table\\(\\)` for whitespace delimited input\\."
  )
})

test_that("melt_csv handles whitespace between delimiters and quoted fields", {
  withr::local_options(lifecycle_verbosity = "quiet")
  x <- melt_csv('1, \"hi,there\"\n3,4')
  expect_equal(x$value[2:3], c("hi,there", "3"))
})
hadley/readr documentation built on Feb. 3, 2024, 9:45 a.m.