tests/testthat/test-haven-stata.R

# read_stata --------------------------------------------------------------

test_that("stata data types read into expected types (#45)", {
  df <- read_stata(test_path("stata/types.dta"))
  types <- vapply(df, typeof, character(1))

  expect_equal(types, c(
    vfloat = "double",
    vdouble = "double",
    vlong = "double",
    vint = "double",
    vbyte = "double",
    vstr = "character",
    vdate = "double",
    vdatetime = "double"
  ))
})

test_that("Stata %td (date) and %tc (datetime) read into expected classes", {
  df <- read_stata(test_path("stata/types.dta"))

  expect_s3_class(df$vdate, "Date")
  expect_s3_class(df$vdatetime, "POSIXct")
})


test_that("Old %d format read into Date class", {
  df <- zap_formats(read_stata(test_path("stata/datetime-d.dta")))
  expect_equal(df$date, as.Date("2015-11-02"))
})


test_that("tagged double missings are read correctly", {
  x <- read_dta(test_path("stata/tagged-na-double.dta"))$x
  expect_equal(na_tag(x), c(rep(NA, 5), "a", "h", "z"))

  labels <- attr(x, "labels")
  expect_equal(na_tag(labels), c("a", "z"))
})

test_that("tagged integer missings are read correctly", {
  x <- read_dta(test_path("stata/tagged-na-int.dta"))$x
  expect_equal(na_tag(x), c(rep(NA, 5), "a", "h", "z"))

  labels <- attr(x, "labels")
  expect_equal(na_tag(labels), c("a", "z"))
})

test_that("file label and notes stored as attributes", {
  df <- read_dta(test_path("stata/notes.dta"))

  expect_equal(attr(df, "label"), "This is a test dataset.")
  expect_length(attr(df, "notes"), 2)
})

test_that("only selected columns are read", {
  out <- read_dta(test_path("stata/notes.dta"), col_select = "id")
  expect_named(out, "id")
})

test_that("using skip returns correct number of rows", {
  rows_after_skipping <- function(n) {
    nrow(read_dta(test_path("stata/notes.dta"), skip = n))
  }

  n <- rows_after_skipping(0)

  expect_equal(rows_after_skipping(1), n - 1)
  expect_equal(rows_after_skipping(n - 1), 1)
  expect_equal(rows_after_skipping(n + 0), 0)
  expect_equal(rows_after_skipping(n + 1), 0)
})

test_that("can limit the number of rows to read", {
  rows_with_limit <- function(n) {
    nrow(read_dta(test_path("stata/notes.dta"), n_max = n))
  }

  n <- rows_with_limit(Inf)
  expect_equal(rows_with_limit(0), 0)
  expect_equal(rows_with_limit(1), 1)
  expect_equal(rows_with_limit(n), n)
  expect_equal(rows_with_limit(n + 1), n)

  # alternatives for unlimited rows
  expect_equal(rows_with_limit(NA), n)
  expect_equal(rows_with_limit(-1), n)
})

# write_dta ---------------------------------------------------------------

test_that("can roundtrip basic types", {
  x <- runif(10)
  expect_equal(roundtrip_var(x, "dta"), x)
  expect_equal(roundtrip_var(1:10, "dta"), 1:10)
  expect_equal(roundtrip_var(c(TRUE, FALSE), "dta"), c(1, 0))
  expect_equal(roundtrip_var(letters, "dta"), letters)
})

test_that("can roundtrip missing values (as much as possible)", {
  expect_equal(roundtrip_var(NA, "dta"), NA_integer_)
  expect_equal(roundtrip_var(NA_real_, "dta"), NA_real_)
  expect_equal(roundtrip_var(NA_integer_, "dta"), NA_integer_)
  expect_equal(roundtrip_var(NA_character_, "dta"), "")
})

test_that("can roundtrip date times", {
  x1 <- c(as.Date("2010-01-01"), NA)
  expect_equal(roundtrip_var(x1, "dta"), x1)

  # converted to same time in UTC
  x2 <- as.POSIXct("2010-01-01 09:00", tz = "Pacific/Auckland")
  expect_equal(
    roundtrip_var(x2, "dta"),
    as.POSIXct("2010-01-01 09:00", tz = "UTC")
  )

  x2_utc <- x2
  attr(x2_utc, "tzone") <- "UTC"
  expect_equal(
    roundtrip_var(x2, "sav", adjust_tz = FALSE),
    x2_utc
  )

  attr(x2, "label") <- "abc"
  expect_equal(attr(roundtrip_var(x2, "dta"), "label"), "abc")
})

test_that("can roundtrip tagged NAs", {
  x <- c(1, 2, tagged_na("a", "b"), NA)
  expect_equal(roundtrip_var(x, "dta"), x)

  tags <- tagged_na("a", "b")
  y <- labelled(
    c(1, 2, 1, tags[1], tags[2]),
    c("ABC" = tags[1], "DEF" = tags[2])
  )
  expect_equal(roundtrip_var(y, "dta"), y)
})

test_that("infinity gets converted to NA", {
  expect_equal(roundtrip_var(c(Inf, 0, -Inf), "dta"), c(NA, 0, NA))
})

test_that("factors become labelleds", {
  f <- factor(c("a", "b"), levels = letters[1:3])
  rt <- roundtrip_var(f, "dta")

  expect_s3_class(rt, "haven_labelled")
  expect_equal(as.vector(rt), 1:2)
  expect_equal(attr(rt, "labels"), c(a = 1, b = 2, c = 3))
})

test_that("labels are preserved", {
  x <- 1:10
  attr(x, "label") <- "abc"

  expect_equal(attr(roundtrip_var(x, "dta"), "label"), "abc")
})

test_that("labelleds are round tripped", {
  int <- labelled(c(1L, 2L), c(a = 1L, b = 3L))
  num <- labelled(c(1, 2), c(a = 1, b = 3))
  chr <- labelled(c("a", "b"), c(a = "b", b = "a"))

  expect_equal(roundtrip_var(num, "dta"), num)
  # FIXME!
  # expect_equal(roundtrip_var(chr, "dta"), chr)
})

test_that("can write labelled with NULL labels", {
  int <- labelled(c(1L, 2L), NULL)
  num <- labelled(c(1, 2), NULL)
  chr <- labelled(c("a", "b"), NULL)

  expect_equal(roundtrip_var(int, "dta"), c(1L, 2L))
  expect_equal(roundtrip_var(num, "dta"), c(1L, 2L))
  expect_equal(roundtrip_var(chr, "dta"), c("a", "b"))
})

test_that("factors become labelleds", {
  f <- factor(c("a", "b"), levels = letters[1:3])
  rt <- roundtrip_var(f, "dta")

  expect_s3_class(rt, "haven_labelled")
  expect_equal(as.vector(rt), 1:2)
  expect_equal(attr(rt, "labels"), c(a = 1, b = 2, c = 3))
})

test_that("labels are converted to utf-8", {
  labels_utf8 <- c("\u00e9\u00e8", "\u00e0", "\u00ef")
  labels_latin1 <- iconv(labels_utf8, "utf-8", "latin1")

  v_utf8 <- labelled(3:1, setNames(1:3, labels_utf8))
  v_latin1 <- labelled(3:1, setNames(1:3, labels_latin1))

  expect_equal(names(attr(roundtrip_var(v_utf8, "dta"), "labels")), labels_utf8)
  expect_equal(names(attr(roundtrip_var(v_latin1, "dta"), "labels")), labels_utf8)
})

test_that("supports stata version 15", {
  df <- tibble(x = factor(letters), y = runif(26))

  path <- tempfile()
  write_dta(df, path, version = 15)
  df2 <- read_dta(path)

  df2$x <- as_factor(df2$x)
  df2$y <- zap_formats(df2$y)
  expect_equal(df2, df)
})

test_that("can roundtrip file labels", {
  df <- tibble(x = 1)
  expect_null(attr(roundtrip_dta(df), "label"))
  expect_equal(attr(roundtrip_dta(df, label = "abcd"), "label"), "abcd")

  attr(df, "label") <- "abc"
  expect_equal(attr(roundtrip_dta(df), "label"), "abc")
  expect_equal(attr(roundtrip_dta(df, label = "abcd"), "label"), "abcd")
  expect_null(attr(roundtrip_dta(df, label = NULL), "label"))
})

test_that("invalid files generate informative errors", {
  expect_snapshot(error = TRUE, {
    long <- paste(rep("a", 100), collapse = "")
    write_dta(data.frame(x = 1), tempfile(), label = long)

    df <- data.frame(1)
    names(df) <- "x y"
    write_dta(df, tempfile(), version = 13)

    names(df) <- long
    write_dta(df, tempfile(), version = 13)
    write_dta(df, tempfile(), version = 14)
  })
})

test_that("can't write non-integer labels (#401)", {
  expect_snapshot(error = TRUE, {
    df <- data.frame(x = labelled(c(1, 2.5, 3), c("b" = 1.5)))
    write_dta(df, tempfile())
  })
})

test_that("can roundtrip long strings (strL)", {
  long_string <- function(n, m) {
    do.call("paste0", replicate(m, sample(LETTERS, n, TRUE), simplify = FALSE))
  }

  x <- long_string(10, 400)
  expect_equal(roundtrip_var(x, "dta"), x)
  x <- long_string(10, 1000)
  expect_equal(roundtrip_var(x, "dta"), x)
  x <- long_string(10, 3000)
  expect_equal(roundtrip_var(x, "dta"), x)
})


test_that("invisibly returns original data unaltered", {

  df <- tibble(
    x = 1:5,
    dt = seq(
      as.POSIXct("2022-01-01 12:00:00", tz = "America/Chicago"),
      by = "days",
      length.out = 5
    )
  )

  path <- tempfile()
  df_returned <- write_dta(df, path)

  expect_identical(df, df_returned)
})

Try the haven package in your browser

Any scripts or data that you put into this service are public.

haven documentation built on July 10, 2023, 2:04 a.m.