tests/testthat/test-factor.R

test_that("strings mapped to levels", {
  test_vroom("a\nb\n", col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b"))),
    equals = tibble::tibble(X1 = factor(c("a", "b")))
  )
})

test_that("can generate ordered factor", {
  res <- test_vroom("a\nb\n", col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b"), ordered = TRUE)),
    equals = tibble::tibble(X1 = ordered(c("a", "b")))
  )

  expect_true(is.ordered(res$X1))
})

test_that("NA if value not in levels", {
  test_vroom("a\nb\nc\n", col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b"))),
    equals = tibble::tibble(X1 = factor(c("a", "b", NA)))
  )
})

test_that("NAs silently passed along", {
  test_vroom("a\nb\nNA\n", col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b"), include_na = FALSE)),
    equals = tibble::tibble(X1 = factor(c("a", "b", NA)))
  )
})

test_that("levels = NULL", {
  test_vroom("a\nb\nc\nb\n", col_names = FALSE,
    col_types = list(X1 = col_factor(levels = NULL)),
    equals = tibble::tibble(X1 = factor(c("a", "b", "c", "b")))
  )
})

test_that("levels = NULL orders by data", {
  test_vroom("b\na\nc\nb\n", col_names = FALSE,
    col_types = list(X1 = col_factor(levels = NULL)),
    equals = tibble::tibble(X1 = factor(c("b", "a", "c", "b"), levels = c("b", "a", "c")))
  )
})

test_that("levels = NULL is the default", {
  test_vroom("a\nb\nc\nd\n", col_names = FALSE,
    col_types = list(X1 = "f"),
    equals = tibble::tibble(X1 = factor(c("a", "b", "c", "d")))
  )
})

test_that("NAs included in levels if desired", {
  test_vroom("NA\nb\na\n", col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b", NA))),
    equals = tibble::tibble(X1 = factor(c(NA, "b", "a"), levels = c("a", "b", NA), exclude = NULL))
  )

  test_vroom("NA\nb\na\n", col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("a", "b", NA), include_na = TRUE)),
    equals = tibble::tibble(X1 = factor(c(NA, "b", "a"), levels = c("a", "b", NA), exclude = NULL))
  )

  test_vroom("NA\nb\na\n", col_names = FALSE,
    col_types = list(X1 = col_factor(levels = NULL, include_na = FALSE)),
    equals = tibble::tibble(X1 = factor(c(NA, "b", "a"), levels = c("b", "a")))
  )

  test_vroom("NA\nb\na\n", col_names = FALSE,
    col_types = list(X1 = col_factor(levels = NULL, include_na = TRUE)),
    equals = tibble::tibble(X1 = factor(c(NA, "b", "a"), levels = c(NA, "b", "a"), exclude = NULL))
  )
})

test_that("Factors handle encodings properly (#615)", {
  encoded <- function(x, encoding) {
    Encoding(x) <- encoding
    x
  }
  f <- tempfile()
  on.exit(unlink(f))
  writeBin(charToRaw(encoded("test\nA\n\xC4\n", "latin1")), f)
  x <- test_vroom(f,
    delim = ",",
    col_types = cols(col_factor(c("A", "\uC4"))),
    locale = locale(encoding = "latin1"),
    equals = tibble::tibble(test = factor(c("A", "\uC4"), levels = c("A", "\uC4")))
  )
})

test_that("factors parse like factor if trim_ws = FALSE", {
  test_vroom("a\na \n", col_names = FALSE, trim_ws = FALSE,
    col_types = list(X1 = col_factor(levels = "a")),
    equals = tibble::tibble(X1 = factor(c("a", "a "), levels = c("a")))
  )
  test_vroom("a\na \n", col_names = FALSE, trim_ws = FALSE,
    col_types = list(X1 = col_factor(levels = "a ")),
    equals = tibble::tibble(X1 = factor(c("a", "a "), levels = c("a ")))
  )
  test_vroom("a\na \n", col_names = FALSE, trim_ws = FALSE,
    col_types = list(X1 = col_factor(levels = c("a ", "a"))),
    equals = tibble::tibble(X1 = factor(c("a", "a "), levels = c("a ", "a")))
  )
})

test_that("Can parse a factor with levels of NA and empty string", {
  x <- c("NC", "NC", "NC", "", "", "NB", "NA", "", "", "NB", "NA",
    "NA", "NC", "NB", "NB", "NC", "NB", "NA", "NA")

  x_in <- paste0(paste(x, collapse = "\n"), "\n")

  test_vroom(x_in, col_names = FALSE,
    col_types = list(X1 = col_factor(levels = c("NA", "NB", "NC", ""))),
    na = character(),
    skip_empty_rows = FALSE,
    equals = tibble::tibble(X1 = factor(x, levels = c("NA", "NB", "NC", "")))
  )
})


test_that("encodings are respected", {
  loc <- locale(encoding = "ISO-8859-1")
  expected <- c("fran\u00e7ais", "\u00e9l\u00e8ve")

  x <- vroom(test_path("enc-iso-8859-1.txt"), delim = "\n", locale = loc, col_types = c(X1 = "f"), col_names = FALSE)
  expect_equal(x[[1]], factor(expected, levels = expected))

  y <- vroom(
    test_path("enc-iso-8859-1.txt"),
    delim = "\n",
    locale = loc,
    col_types = list(X1 = col_factor(levels = expected)),
    col_names = FALSE
  )

  expect_equal(y[[1]], factor(expected, levels = expected))
})

test_that("Results are correct with backslash escapes", {
  obj <- vroom(I("A,T\nB,F\n"), col_names = FALSE, col_types = list("f", "f"), escape_backslash = TRUE)
  exp <- tibble::tibble(X1 = factor(c("A", "B")), X2 = factor(c("T", "F"), levels = c("T", "F")))
  expect_equal(obj, exp)

  obj2 <- vroom(I("A,T\nB,F\n"), col_names = FALSE, col_types = list("f", "f"), escape_backslash = FALSE)
  expect_equal(obj2, exp)
})


test_that("subsetting works with both double and integer indexes", {
  x <- vroom(I("X1\nfoo"), delim = ",", col_types = "f")
  expect_equal(x$X1[1L], factor("foo"))
  expect_equal(x$X1[1], factor("foo"))
  expect_equal(x$X1[NA_integer_], factor(NA_character_, levels = "foo"))
  expect_equal(x$X1[NA_real_], factor(NA_character_, levels = "foo"))
})

test_that("results are correct even with quoted values", {
  expect_equal(
    vroom(I('day\n"Sun"\n"Sat"\n"Sat"'), altrep = FALSE, col_types = "f", delim = ",")$day,
    factor(c("Sun", "Sat", "Sat"), levels = c("Sun", "Sat"))
  )
})

test_that("NAs are correctly encoded and not included in levels unless desired", {
  test_vroom("X\nunknown\nb\na\n",
    col_types = list(X = col_factor(levels = NULL, include_na = FALSE)),
    na = "unknown",
    equals = tibble::tibble(X = factor(c(NA, "b", "a"), levels = c("b", "a"), exclude = NULL))
  )
  test_vroom("X\nunknown\nb\na\nmissing\n",
    col_types = list(X = col_factor(levels = NULL, include_na = FALSE)),
    na = c("unknown", "missing"),
    equals = tibble::tibble(X = factor(c(NA, "b", "a", NA), levels = c("b", "a"), exclude = NULL))
  )
  test_vroom("X\nunknown\nb\na\n",
    col_types = list(X = col_factor(levels = NULL, include_na = TRUE)),
    na = "unknown",
    equals = tibble::tibble(X = factor(c(NA, "b", "a"), levels = c(NA, "b", "a"), exclude = NULL))
  )
  test_vroom("X\nunknown\nb\na\nmissing\n",
    col_types = list(X = col_factor(levels = NULL, include_na = TRUE)),
    na = c("unknown", "missing"),
    equals = tibble::tibble(X = factor(c(NA, "b", "a", NA), levels = c(NA, "b", "a"), exclude = NULL))
  )
})

Try the vroom package in your browser

Any scripts or data that you put into this service are public.

vroom documentation built on July 4, 2024, 9:08 a.m.