tests/testthat/test-factor.R

test_that("strings mapped to levels", {
  test_vroom(
    "a\nb\n",
    col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b"))),
    equals = tibble::tibble(X1 = factor(c("a", "b")))
  )
})

test_that("can generate ordered factor", {
  res <- test_vroom(
    "a\nb\n",
    col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b"), ordered = TRUE)),
    equals = tibble::tibble(X1 = ordered(c("a", "b")))
  )

  expect_true(is.ordered(res$X1))
})

test_that("NA if value not in levels", {
  test_vroom(
    "a\nb\nc\n",
    col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b"))),
    equals = tibble::tibble(X1 = factor(c("a", "b", NA)))
  )
})

test_that("NAs silently passed along", {
  test_vroom(
    "a\nb\nNA\n",
    col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b"), include_na = FALSE)),
    equals = tibble::tibble(X1 = factor(c("a", "b", NA)))
  )
})

test_that("levels = NULL", {
  test_vroom(
    "a\nb\nc\nb\n",
    col_names = FALSE,
    col_types = list(X1 = col_factor(levels = NULL)),
    equals = tibble::tibble(X1 = factor(c("a", "b", "c", "b")))
  )
})

test_that("levels = NULL orders by data", {
  test_vroom(
    "b\na\nc\nb\n",
    col_names = FALSE,
    col_types = list(X1 = col_factor(levels = NULL)),
    equals = tibble::tibble(
      X1 = factor(c("b", "a", "c", "b"), levels = c("b", "a", "c"))
    )
  )
})

test_that("levels = NULL is the default", {
  test_vroom(
    "a\nb\nc\nd\n",
    col_names = FALSE,
    col_types = list(X1 = "f"),
    equals = tibble::tibble(X1 = factor(c("a", "b", "c", "d")))
  )
})

test_that("NAs included in levels if desired", {
  test_vroom(
    "NA\nb\na\n",
    col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b", NA))),
    equals = tibble::tibble(
      X1 = factor(c(NA, "b", "a"), levels = c("a", "b", NA), exclude = NULL)
    )
  )

  test_vroom(
    "NA\nb\na\n",
    col_names = FALSE,
    col_types = list(
      X1 = col_factor(levels = c("a", "b", NA), include_na = TRUE)
    ),
    equals = tibble::tibble(
      X1 = factor(c(NA, "b", "a"), levels = c("a", "b", NA), exclude = NULL)
    )
  )

  test_vroom(
    "NA\nb\na\n",
    col_names = FALSE,
    col_types = list(X1 = col_factor(levels = NULL, include_na = FALSE)),
    equals = tibble::tibble(X1 = factor(c(NA, "b", "a"), levels = c("b", "a")))
  )

  test_vroom(
    "NA\nb\na\n",
    col_names = FALSE,
    col_types = list(X1 = col_factor(levels = NULL, include_na = TRUE)),
    equals = tibble::tibble(
      X1 = factor(c(NA, "b", "a"), levels = c(NA, "b", "a"), exclude = NULL)
    )
  )
})

test_that("Factors handle encodings properly (#615)", {
  encoded <- function(x, encoding) {
    Encoding(x) <- encoding
    x
  }
  f <- tempfile()
  on.exit(unlink(f))
  writeBin(charToRaw(encoded("test\nA\n\xC4\n", "latin1")), f)
  x <- test_vroom(
    f,
    delim = ",",
    col_types = cols(col_factor(c("A", "\uC4"))),
    locale = locale(encoding = "latin1"),
    equals = tibble::tibble(
      test = factor(c("A", "\uC4"), levels = c("A", "\uC4"))
    )
  )
})

test_that("factors parse like factor if trim_ws = FALSE", {
  test_vroom(
    "a\na \n",
    col_names = FALSE,
    trim_ws = FALSE,
    col_types = list(X1 = col_factor(levels = "a")),
    equals = tibble::tibble(X1 = factor(c("a", "a "), levels = c("a")))
  )
  test_vroom(
    "a\na \n",
    col_names = FALSE,
    trim_ws = FALSE,
    col_types = list(X1 = col_factor(levels = "a ")),
    equals = tibble::tibble(X1 = factor(c("a", "a "), levels = c("a ")))
  )
  test_vroom(
    "a\na \n",
    col_names = FALSE,
    trim_ws = FALSE,
    col_types = list(X1 = col_factor(levels = c("a ", "a"))),
    equals = tibble::tibble(X1 = factor(c("a", "a "), levels = c("a ", "a")))
  )
})

test_that("Can parse a factor with levels of NA and empty string", {
  x <- c(
    "NC",
    "NC",
    "NC",
    "",
    "",
    "NB",
    "NA",
    "",
    "",
    "NB",
    "NA",
    "NA",
    "NC",
    "NB",
    "NB",
    "NC",
    "NB",
    "NA",
    "NA"
  )

  x_in <- paste0(paste(x, collapse = "\n"), "\n")

  test_vroom(
    x_in,
    col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("NA", "NB", "NC", ""))),
    na = character(),
    skip_empty_rows = FALSE,
    equals = tibble::tibble(X1 = factor(x, levels = c("NA", "NB", "NC", "")))
  )
})


test_that("encodings are respected", {
  loc <- locale(encoding = "ISO-8859-1")
  expected <- c("fran\u00e7ais", "\u00e9l\u00e8ve")

  x <- vroom(
    test_path("enc-iso-8859-1.txt"),
    delim = "\n",
    locale = loc,
    col_types = c(X1 = "f"),
    col_names = FALSE
  )
  expect_equal(x[[1]], factor(expected, levels = expected))

  y <- vroom(
    test_path("enc-iso-8859-1.txt"),
    delim = "\n",
    locale = loc,
    col_types = list(X1 = col_factor(levels = expected)),
    col_names = FALSE
  )

  expect_equal(y[[1]], factor(expected, levels = expected))
})

test_that("Results are correct with backslash escapes", {
  obj <- vroom(
    I("A,T\nB,F\n"),
    col_names = FALSE,
    col_types = list("f", "f"),
    escape_backslash = TRUE
  )
  exp <- tibble::tibble(
    X1 = factor(c("A", "B")),
    X2 = factor(c("T", "F"), levels = c("T", "F"))
  )
  expect_equal(obj, exp)

  obj2 <- vroom(
    I("A,T\nB,F\n"),
    col_names = FALSE,
    col_types = list("f", "f"),
    escape_backslash = FALSE
  )
  expect_equal(obj2, exp)
})


test_that("subsetting works with both double and integer indexes", {
  x <- vroom(I("X1\nfoo"), delim = ",", col_types = "f")
  expect_equal(x$X1[1L], factor("foo"))
  expect_equal(x$X1[1], factor("foo"))
  expect_equal(x$X1[NA_integer_], factor(NA_character_, levels = "foo"))
  expect_equal(x$X1[NA_real_], factor(NA_character_, levels = "foo"))
})

test_that("results are correct even with quoted values", {
  expect_equal(
    vroom(
      I('day\n"Sun"\n"Sat"\n"Sat"'),
      altrep = FALSE,
      col_types = "f",
      delim = ","
    )$day,
    factor(c("Sun", "Sat", "Sat"), levels = c("Sun", "Sat"))
  )
})

test_that("NAs are correctly encoded and not included in levels unless desired", {
  test_vroom(
    "X\nunknown\nb\na\n",
    col_types = list(X = col_factor(levels = NULL, include_na = FALSE)),
    na = "unknown",
    equals = tibble::tibble(
      X = factor(c(NA, "b", "a"), levels = c("b", "a"), exclude = NULL)
    )
  )
  test_vroom(
    "X\nunknown\nb\na\nmissing\n",
    col_types = list(X = col_factor(levels = NULL, include_na = FALSE)),
    na = c("unknown", "missing"),
    equals = tibble::tibble(
      X = factor(c(NA, "b", "a", NA), levels = c("b", "a"), exclude = NULL)
    )
  )
  test_vroom(
    "X\nunknown\nb\na\n",
    col_types = list(X = col_factor(levels = NULL, include_na = TRUE)),
    na = "unknown",
    equals = tibble::tibble(
      X = factor(c(NA, "b", "a"), levels = c(NA, "b", "a"), exclude = NULL)
    )
  )
  test_vroom(
    "X\nunknown\nb\na\nmissing\n",
    col_types = list(X = col_factor(levels = NULL, include_na = TRUE)),
    na = c("unknown", "missing"),
    equals = tibble::tibble(
      X = factor(c(NA, "b", "a", NA), levels = c(NA, "b", "a"), exclude = NULL)
    )
  )
})

Try the vroom package in your browser

Any scripts or data that you put into this service are public.

vroom documentation built on Jan. 27, 2026, 5:09 p.m.