tests/testthat/test-missing-values.R

test_that("blanks in different rows read as missing [xlsx]", {
  blanks <- read_excel(test_sheet("blanks.xlsx"), sheet = "different_rows")
  expect_equal(blanks$x, c(NA, 1))
  expect_equal(blanks$y, c("a", NA))
})

test_that("blanks read as missing [xls]", {
  blanks <- read_excel(test_sheet("blanks.xls"), sheet = "different_rows")
  expect_equal(blanks$x, c(NA, 1))
  expect_equal(blanks$y, c("a", NA))
})

test_that("blanks in same middle row are read as missing [xlsx]", {
  blanks <- read_excel(test_sheet("blanks.xlsx"), sheet = "same_row_middle")
  expect_equal(blanks$x, c(1, NA, 2))
  expect_equal(blanks$y, c("a", NA, "b"))
})

test_that("blanks in same middle row are read as missing [xls]", {
  blanks <- read_excel(test_sheet("blanks.xls"), sheet = "same_row_middle")
  expect_equal(blanks$x, c(1, NA, 2))
  expect_equal(blanks$y, c("a", NA, "b"))
})

test_that("blanks in same, first row are read as missing [xlsx]", {
  blanks <- read_excel(test_sheet("blanks.xlsx"), sheet = "same_row_first")
  expect_equal(blanks$x, c(NA, 1))
  expect_equal(blanks$y, c(NA, "a"))
})

test_that("blanks in same, first row are read as missing [xls]", {
  blanks <- read_excel(test_sheet("blanks.xls"), sheet = "same_row_first")
  expect_equal(blanks$x, c(NA, 1))
  expect_equal(blanks$y, c(NA, "a"))
})

test_that("By default, NA read as text", {
  df <- read_xls(test_sheet("missing-values-xls.xls"))
  expect_equal(df$x, c("NA", "1", "2"))
})

test_that("na arg can be used to map 'NA' to NA [xls]", {
  df <- read_excel(test_sheet("missing-values-xls.xls"), na = "NA")
  expect_equal(df$x, c(NA, 1, 2))
  expect_equal(df$y, c(NA, 1, 2)) # formula column
})

test_that("na arg can be used to map 'NA' to NA [xlsx]", {
  df <- read_excel(test_sheet("missing-values-xlsx.xlsx"), na = "NA")
  expect_equal(df$x, c(NA, 1, 2))
  expect_equal(df$y, c(NA, 1, 2)) # formula column
})

## #401 make sure check against NA strings is done after shared string table
## lookup for affected cells vs. on the index into the shared string table
test_that("na arg works with multiple strings and for shared strings [xlsx]", {
  df <- read_excel(
    test_sheet("numbers-as-na-and-shared-strings-xlsx.xlsx"),
    na = c("1", "d")
  )
  ## shared string table is:
  ## sst[0] = "a"
  ## sst[1] = "b" <-- make sure this does not become NA!
  ## sst[2] = "c"
  ## sst[3] = "d"
  expect_identical(df, tibble::tribble(
    ~a, ~c,
    "b", NA
  ))
})

test_that("na arg works with multiple strings and for shared strings [xls]", {
  df <- read_excel(
    test_sheet("numbers-as-na-and-shared-strings-xls.xls"),
    na = c("1", "d")
  )
  expect_identical(df, tibble::tribble(
    ~a, ~c,
    "b", NA
  ))
})

test_that("empty first column gives valid data.frame [xls]", {
  df <- read_excel(test_sheet("missing-first-column.xls"), col_names = FALSE)
  expect_s3_class(df, "tbl_df")
  expect_identical(dim(df), c(3L, 1L))
})

test_that("empty first column gives valid data.frame [xlsx]", {
  df <- read_excel(test_sheet("missing-first-column.xlsx"), col_names = FALSE)
  expect_s3_class(df, "tbl_df")
  expect_identical(dim(df), c(3L, 1L))
})

test_that("empty named column gives NA column", {
  df1 <- read_excel(test_sheet("empty-named-column.xlsx"), col_names = TRUE)
  df2 <- read_excel(test_sheet("empty-named-column.xls"), col_names = TRUE)
  expect_equal(ncol(df1), 4)
  expect_equal(names(df1)[2], "y")
  expect_true(all(is.na(df1$y)))
  expect_true(all(is.logical(df1$y)))
  expect_equal(ncol(df2), 4)
  expect_equal(names(df2)[2], "y")
  expect_true(all(is.na(df2$y)))
  expect_true(all(is.logical(df2$y)))
})

test_that("empty (styled) cells are not loaded, but can survive as NA [xlsx]", {
  ## what's important about this sheet?
  ## contains empty cells with a custom format
  ## therefore they appear in the xml and have a style attribute
  ## where are they?
  ## in the embedded empty columns, w/ and w/o a name
  ## in a trailing empty column WHICH SHOULD BE DROPPED
  ## in some trailing rows WHICH SHOULD BE DROPPED
  out <- read_excel(test_sheet("style-only-cells.xlsx"))
  df <- tibble::tibble(
    var1 = c("val1,1", "val2,1", "val3,1"),
    var2 = NA,
    var3 = c("aa", "bb", "cc"),
    var4 = NA,
    var5 = c(1, 2, 3)
  )
  ## this is not a test re: column names; be robust to tibble version
  names(out)[4] <- "var4"
  expect_equal(out, df)
})

test_that("empty (styled) cells are not loaded, but can survive as NA [xls]", {
  out <- read_excel(test_sheet("style-only-cells.xls"))
  df <- tibble::tibble(
    var1 = c("val1,1", "val2,1", "val3,1"),
    var2 = NA,
    var3 = c("aa", "bb", "cc"),
    var4 = NA,
    var5 = c(1, 2, 3)
  )
  ## this is not a test re: column names; be robust to tibble version
  names(out)[4] <- "var4"
  expect_equal(out, df)
})
hadley/readxl documentation built on Oct. 15, 2023, 10:28 a.m.