tests/testthat/test-vroom_fwf.R

test_that("trailing spaces omitted", {
  spec <- fwf_empty(test_path("fwf-trailing.txt"))
  expect_equal(spec$begin, c(0, 4))
  expect_equal(spec$end, c(3, NA))

  df <- vroom_fwf(test_path("fwf-trailing.txt"), spec, col_types = list())
  expect_equal(df$X1, df$X2)
})

test_that("dos newlines handles", {
  spec <- fwf_empty(test_path("fwf-trailing.txt"))
  x <- vroom_fwf(test_path("fwf-trailing.txt"), spec, col_types = list())
  y <- vroom_fwf(test_path("fwf-trailing-crlf.txt"), spec, col_types = list())
  expect_equal(x, y)

  z <- vroom_fwf(file(test_path("fwf-trailing-crlf.txt")), spec, col_types = list())
  expect_equal(z, x)
})

test_that("connections and normal files produce identical output", {
  spec <- fwf_empty(test_path("fwf-trailing.txt"))

  y <- vroom_fwf(test_path("fwf-trailing-crlf.txt"), spec, col_types = list())
  x <- vroom_fwf(file(test_path("fwf-trailing-crlf.txt")), spec, col_types = list())

  expect_equal(x, y)
})

test_that("respects the trim_ws argument", {
  x <- I("a11 b22 c33\nd   e   f  ")
  out1 <- vroom_fwf(x, fwf_empty(x), trim_ws = FALSE, col_types = list())
  expect_equal(out1$X1, c("a11", "d  "))
  expect_equal(out1$X2, c("b22", "e  "))
  expect_equal(out1$X3, c("c33", "f  "))

  out2 <- vroom_fwf(x, fwf_empty(x), trim_ws = TRUE, col_types = list())
  expect_equal(out2$X1, c("a11", "d"))
  expect_equal(out2$X2, c("b22", "e"))
  expect_equal(out2$X3, c("c33", "f"))
})

test_that("respects the trim_ws argument with empty fields", {
  x <- I("a11 b22 c33\nd       f  ")
  out1 <- vroom_fwf(x, fwf_empty(x), trim_ws = FALSE, col_types = list())
  expect_equal(out1$X1, c("a11", "d  "))
  expect_equal(out1$X2, c("b22", "   "))
  expect_equal(out1$X3, c("c33", "f  "))

  out1 <- vroom_fwf(x, fwf_empty(x), trim_ws = TRUE, na = "NA", col_types = list())
})

test_that("skipping column doesn't pad col_names", {
  x <- I("1 2 3\n4 5 6")

  out1 <- vroom_fwf(x, fwf_empty(x), col_types = 'd-d')
  expect_named(out1, c("X1", "X3"))

  names <- c("a", "b", "c")
  out2 <- vroom_fwf(x, fwf_empty(x, col_names = names), col_types = 'd-d')
  expect_named(out2, c("a", "c"))
})

test_that("fwf_empty can skip comments", {
  x <- I("COMMENT\n1 2 3\n4 5 6")

  out1 <- vroom_fwf(x, fwf_empty(x, comment = "COMMENT"), comment = "COMMENT", col_types = list())
  expect_equal(dim(out1), c(2, 3))
})

test_that("fwf_empty can skip lines", {
  x <- I("foo\nbar\baz\n1 2 3\n4 5 6\n")

  obj <- fwf_empty(x, skip = 3)

  exp <- list(begin = c(0L, 2L, 4L), end = c(1L, 3L, NA_integer_), col_names = c("X1", "X2", "X3"))

  expect_equal(obj, exp)
})

test_that("passing \"\" to vroom_fwf's 'na' option", {
  expect_equal(vroom_fwf(I("foobar\nfoo   "), fwf_widths(c(3, 3)), na = "", col_types = list())[[2]],
               c("bar", NA))
})

test_that("ragged last column expanded with NA", {
  x <- vroom_fwf(I("1a\n2ab\n3abc"), fwf_widths(c(1, NA)), col_types = list())
  expect_equal(x$X2, c("a", "ab", "abc"))
})

#test_that("ragged last column shrunk with warning", {
  #expect_warning(x <- vroom_fwf(I("1a\n2ab\n3abc"), fwf_widths(c(1, 3))))
  #expect_equal(x$X2, c("a", "ab", "abc"))
#})

test_that("read all columns with positions, non ragged", {
  col_pos <- fwf_positions(c(1,3,6),c(2,5,6))
  x <- vroom_fwf(I("12345A\n67890BBBBBBBBB\n54321C"), col_positions = col_pos, col_types = list())
  expect_equal(x$X3, c("A", "B", "C"))
})

test_that("read subset columns with positions", {
  col_pos <- fwf_positions(c(1,3),c(2,5))
  x <- vroom_fwf(I("12345A\n67890BBBBBBBBB\n54321C"), col_positions = col_pos, col_types = list())
  expect_equal(x$X1, c(12, 67, 54))
  expect_equal(x$X2, c(345, 890, 321))
})

test_that("read columns with positions, ragged", {
  col_pos <- fwf_positions(c(1,3,6),c(2,5,NA))
  x <- vroom_fwf(I("12345A\n67890BBBBBBBBB\n54321C"), col_positions = col_pos, col_types = list())
  expect_equal(x$X1, c(12, 67, 54))
  expect_equal(x$X2, c(345, 890, 321))
  expect_equal(x$X3, c('A', 'BBBBBBBBB', 'C'))
})

test_that("read columns with width, ragged", {
  col_pos <- fwf_widths(c(2,3,NA))
  x <- vroom_fwf(I("12345A\n67890BBBBBBBBB\n54321C"), col_positions = col_pos, col_types = list())
  expect_equal(x$X1, c(12, 67, 54))
  expect_equal(x$X2, c(345, 890, 321))
  expect_equal(x$X3, c('A', 'BBBBBBBBB', 'C'))
})

#test_that("vroom_fwf returns an empty data.frame on an empty file", {
   ##expect_true(all.equal(vroom_fwf(test_path("empty-file")), tibble::data_frame()))
#})

#test_that("check for line breaks in between widths", {
  #txt1 <- paste(
    #"1 1",
    #"2",
    #"1 1 ",
    #sep = "\n"
  #)
  #expect_warning(out1 <- vroom_fwf(txt1, fwf_empty(txt1)))
  #expect_equal(n_problems(out1), 2)

  #txt2 <- paste(
    #" 1 1",
    #" 2",
    #" 1 1 ",
    #sep = "\n"
  #)
  #expect_warning(out2 <- vroom_fwf(txt2, fwf_empty(txt2)))
  #expect_equal(n_problems(out2), 2)

  #exp <- tibble::tibble(X1 = c(1L, 2L, 1L), X2 = c(1L, NA, 1L))
  #expect_true(all.equal(out1, exp))
  #expect_true(all.equal(out2, exp))

#})

#test_that("error on empty spec (#511, #519)", {
  #txt = "foo\n"
  #pos = fwf_positions(start = numeric(0), end = numeric(0))
  #expect_error(vroom_fwf(txt, pos), "Zero-length.*specifications not supported")
#})

#test_that("error on negatives in fwf spec", {
  #txt = "foo\n"
  #pos = fwf_positions(start = c(1, -1), end = c(2, 3))
  #expect_error(vroom_fwf(txt, pos), ".*offset.*greater than 0")
#})

test_that("fwf spec can overlap", {
    x <- vroom_fwf(I("2015a\n2016b"), fwf_positions(c(1, 3, 5), c(4, 4, 5)), col_types = list())
    expect_equal(x$X1, c(2015, 2016))
    expect_equal(x$X2, c(15, 16))
    expect_equal(x$X3, c("a", "b"))
})

# fwf_cols
test_that("fwf_cols produces correct fwf_positions object with elements of length 2", {
  expected <- fwf_positions(c(1L, 9L, 4L), c(2L, 12L, 6L), c("a", "b", "d"))
  expect_equal(fwf_cols(a = c(1, 2), b = c(9, 12), d = c(4, 6)), expected, ignore_attr = TRUE)
})

test_that("fwf_cols produces correct fwf_positions object with elements of length 1", {
  expected <- fwf_widths(c(2L, 4L, 3L), c("a", "b", "c"))
  expect_equal(fwf_cols(a = 2, b = 4, c = 3), expected, ignore_attr = TRUE)
})


test_that("fwf_cols throws error when arguments are not length 1 or 2", {
  expect_error(fwf_cols(a = 1:3, b = 4:5))
  expect_error(fwf_cols(a = c(), b = 4:5))
})

test_that("fwf_cols works with unnamed columns", {
  expect_equal(ignore_attr = TRUE,
    fwf_cols(c(1, 2), c(9, 12), c(4, 6)),
    fwf_positions(c(1L, 9L, 4L), c(2L, 12L, 6L), c("X1", "X2", "X3"))
  )
  expect_equal(ignore_attr = TRUE,
    fwf_cols(a = c(1, 2), c(9, 12), c(4, 6)),
    fwf_positions(c(1L, 9L, 4L), c(2L, 12L, 6L), c("a", "X2", "X3"))
  )
})

# fwf_positions ---------------------------------------------------------------

test_that("fwf_positions always returns col_names as character (#797)", {
  begin <- c(1, 2, 4, 8)
  end <- c(1, 3, 7, 15)

  # Input a factor, should return a character
  nms <- factor(letters[1:4])

  info <- fwf_positions(begin, end, nms)

  expect_type(info$begin, "double")
  expect_type(info$end, "double")
  expect_type(info$col_names, "character")
})

# Robustness

test_that("vroom_fwf() is robust to improper inputs", {
  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2\n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2 \n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2  \n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2   \n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2   \n4\n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2   \n4 \n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2   \n4  \n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2   \n4   \n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2   \n4   5\n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2   \n4   5 \n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2   \n4   5  \n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2   \n4   5   \n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("foo bar baz\n1   2   \n4   5   6\n"), col_types = list())
  )

  expect_error_free(
    vroom_fwf(I("A\n  a\n"), col_types = list())
  )
})

test_that("Errors if begin is greater than end", {
  positions <- fwf_positions(
    start = c(1, 3, 5),
    end = c(3, 1, NA),
    col_names = c("foo", "bar", "baz")
  )

  expect_error(
    vroom_fwf(I("1  2  3\n"), positions, col_types = list()),
    "`col_positions` must have begin less than end"
  )
})

test_that("vroom_fwf respects n_max (#334)", {
  out <- vroom_fwf(I("foo 1\nbar 2\nbaz 3\nqux 4"), n_max = 0, col_types = list())
  expect_named(out, c("X1", "X2"))
  expect_equal(out[[1]], character())
  expect_equal(out[[2]], character())

  out <- vroom_fwf(I("foo 1\nbar 2\nbaz 3\nqux 4"), n_max = 1, col_types = list())
  expect_named(out, c("X1", "X2"))
  expect_equal(out[[1]], c("foo"))
  expect_equal(out[[2]], c(1))

  out <- vroom_fwf(I("foo 1\nbar 2\nbaz 3\nqux 4"), n_max = 2, col_types = list())
  expect_named(out, c("X1", "X2"))
  expect_equal(out[[1]], c("foo", "bar"))
  expect_equal(out[[2]], c(1, 2))
})

test_that("vroom_fwf respects n_max when reading from a connection", {
  f <- tempfile()
  on.exit(unlink(f))
  writeLines(rep("00010002", 1000), f)

  out1 <- vroom_fwf(file(f), col_positions = fwf_widths(c(4, 4)), col_types = "ii")

  expect_equal(dim(out1), c(1000, 2))

  out2 <- vroom_fwf(file(f), n_max = 900, col_positions = fwf_widths(c(4, 4)), col_types = "ii")

  expect_equal(dim(out2), c(900, 2))

  out3 <- vroom_fwf(file(f), n_max = 100, col_positions = fwf_widths(c(4, 4)), col_types = "ii")

  expect_equal(dim(out3), c(100, 2))

  out4 <- vroom_fwf(file(f), n_max = 10, col_positions = fwf_widths(c(4, 4)), col_types = "ii")

  expect_equal(dim(out4), c(10, 2))

  out5 <- vroom_fwf(file(f), n_max = 1, col_positions = fwf_widths(c(4, 4)), col_types = "ii")

  expect_equal(dim(out5), c(1, 2))
})

test_that("vroom_fwf works when skip_empty_rows is false (https://github.com/tidyverse/readr/issues/1211)", {
  f <- tempfile()
  on.exit(unlink(f))

  writeLines(rep(" ", 10), f)

  out <- vroom_fwf(f, fwf_cols(A = c(1, NA)), col_types = "c", na = " ", trim_ws = FALSE, skip_empty_rows = FALSE)

  expect_equal(out[[1]], rep(NA_character_, 10))
})

test_that("vroom_fwf correctly reads files with no trailing newline (https://github.com/tidyverse/readr/issues/1293)", {
  f <- tempfile()
  on.exit(unlink(f))

  writeBin(charToRaw("111222\n333444"), f)

  out <- vroom_fwf(f, fwf_widths(c(3, 3)), col_types = "ii")

  expect_equal(out, tibble::tibble(X1 = c(111, 333), X2 = c(222, 444)))

  out2 <- vroom_fwf(file(f), fwf_widths(c(3, 3)), col_types = "ii")

  expect_equal(out, out2)
})

test_that("vroom_fwf correctly reads DOS files with no trailing newline (https://github.com/tidyverse/readr/issues/1293)", {
  f <- tempfile()
  on.exit(unlink(f))

  writeBin(charToRaw("111222\r\n333444"), f)

  out <- vroom_fwf(f, fwf_widths(c(3, 3)), col_types = "ii")

  expect_equal(out, tibble::tibble(X1 = c(111, 333), X2 = c(222, 444)))

  out2 <- vroom_fwf(file(f), fwf_widths(c(3, 3)), col_types = "ii")

  expect_equal(out, out2)
})

Try the vroom package in your browser

Any scripts or data that you put into this service are public.

vroom documentation built on Oct. 2, 2023, 5:07 p.m.