tests/testthat/test_micro.R

# Manually set these constants...
rows_h <- 3385
rows_p <- 7668

vars_h <- 6
vars_p <- 6
vars_all <- 9
vars_rect <- 8

YEAR_label <- "Survey year"
YEAR_var_desc <- paste0(
  "YEAR reports the year in which the survey was conducted.  ",
  "YEARP is repeated on person records."
)
STATEFIP_val_labels <- c(Alabama = 1, Alaska = 2)
YEAR_first5_values <- rep(1962, 5)

# imp_dec
ASECWTH_first5_values <- c(1475.59, 1475.59, 1475.59, 1597.61, 1706.65)

test_that("Can read Rectangular .dat.gz", {
  cps <- read_ipums_micro(
    ipums_example("cps_00157.xml"),
    data_file = ipums_example("cps_00157.dat.gz"),
    verbose = FALSE
  )

  expect_equal(nrow(cps), rows_p)
  expect_equal(ncol(cps), vars_rect)
  expect_equal(attr(cps[["YEAR"]], "label"), YEAR_label)
  expect_equal(attr(cps[["YEAR"]], "var_desc"), YEAR_var_desc)
  expect_equal(attr(cps[["STATEFIP"]], "labels")[1:2], STATEFIP_val_labels)
  expect_equal(cps$YEAR[1:5], YEAR_first5_values)
  expect_equal(cps$ASECWTH[1:5], ASECWTH_first5_values)
})

test_that("Can read Rectangular .dat", {
  temp_file <- paste0(tempfile(), ".dat")

  temp <- readr::read_lines(ipums_example("cps_00157.dat.gz"))
  readr::write_lines(temp, temp_file)
  on.exit(unlink(temp_file), add = TRUE, after = FALSE)

  cps <- read_ipums_micro(
    ipums_example("cps_00157.xml"),
    data_file = temp_file,
    verbose = FALSE
  )

  expect_equal(nrow(cps), rows_p)
  expect_equal(ncol(cps), vars_rect)
  expect_equal(attr(cps[["YEAR"]], "label"), YEAR_label)
  expect_equal(attr(cps[["YEAR"]], "var_desc"), YEAR_var_desc)
  expect_equal(attr(cps[["STATEFIP"]], "labels")[1:2], STATEFIP_val_labels)
  expect_equal(cps$YEAR[1:5], YEAR_first5_values)
  expect_equal(cps$ASECWTH[1:5], ASECWTH_first5_values)
})

test_that("Can read Rectangular .csv.gz", {
  cps <- read_ipums_micro(
    ipums_example("cps_00158.xml"),
    data_file = ipums_example("cps_00158.csv.gz"),
    verbose = FALSE
  )

  expect_equal(nrow(cps), rows_p)
  expect_equal(ncol(cps), vars_rect)
  expect_equal(attr(cps[["YEAR"]], "label"), YEAR_label)
  expect_equal(attr(cps[["YEAR"]], "var_desc"), YEAR_var_desc)
  expect_equal(attr(cps[["STATEFIP"]], "labels")[1:2], STATEFIP_val_labels)
  expect_equal(cps$YEAR[1:5], YEAR_first5_values)
  expect_equal(cps$ASECWTH[1:5], ASECWTH_first5_values)
})

test_that("Can read Hierarchical into long format", {
  cps <- read_ipums_micro(
    ipums_example("cps_00159.xml"),
    verbose = FALSE
  )

  expect_equal(nrow(cps), rows_h + rows_p)
  expect_equal(ncol(cps), vars_all)
  expect_equal(attr(cps[["YEAR"]], "label"), YEAR_label)
  expect_equal(attr(cps[["YEAR"]], "var_desc"), YEAR_var_desc)
  expect_equal(attr(cps[["STATEFIP"]], "labels")[1:2], STATEFIP_val_labels)
  expect_equal(cps$YEAR[1], YEAR_first5_values[1])
  expect_equal(cps$ASECWTH[1], ASECWTH_first5_values[1])
})

test_that("Can read Hierarchical into list format", {
  cps <- read_ipums_micro_list(
    ipums_example("cps_00159.xml"),
    verbose = FALSE
  )

  expect_equal(nrow(cps$HOUSEHOLD), rows_h)
  expect_equal(nrow(cps$PERSON), rows_p)
  expect_equal(ncol(cps$HOUSEHOLD), vars_h)
  expect_equal(ncol(cps$PERSON), vars_p)
  expect_equal(attr(cps$HOUSEHOLD[["YEAR"]], "label"), YEAR_label)
  expect_equal(attr(cps$PERSON[["YEAR"]], "label"), YEAR_label)
  expect_equal(attr(cps$HOUSEHOLD[["YEAR"]], "var_desc"), YEAR_var_desc)
  expect_equal(attr(cps$PERSON[["YEAR"]], "var_desc"), YEAR_var_desc)
  expect_equal(
    attr(cps$HOUSEHOLD[["STATEFIP"]], "labels")[1:2],
    STATEFIP_val_labels
  )
  expect_equal(cps$HOUSEHOLD$YEAR[1], YEAR_first5_values[1])
  expect_equal(cps$HOUSEHOLD$ASECWTH[1], ASECWTH_first5_values[1])
})

test_that("Can't read Rectangular into list format", {
  temp_file <- paste0(tempfile(), ".dat")

  temp <- readr::read_lines(ipums_example("cps_00157.dat.gz"))
  readr::write_lines(temp, temp_file)
  on.exit(unlink(temp_file), add = TRUE, after = FALSE)

  expect_error(
    cps_list <- read_ipums_micro_list(
      ipums_example("cps_00157.xml"),
      data_file = temp_file,
      verbose = FALSE
    ),
    regexp = "must be hierarchical"
  )
})

test_that("Can read microdata from an `ipums_ddi` object", {
  expect_message(
    x1 <- read_ipums_micro(ipums_example("cps_00157.xml")),
    "Use of data from IPUMS CPS"
  )

  expect_message(
    x2 <- read_ipums_micro(read_ipums_ddi(ipums_example("cps_00157.xml"))),
    "Use of data from IPUMS CPS"
  )

  expect_identical(x1, x2)
})

test_that("Arguments n_max and vars work", {
  cps <- read_ipums_micro(
    ipums_example("cps_00159.xml"),
    n_max = 100,
    vars = c(RECTYPE, STATEFIP),
    verbose = FALSE
  )
  expect_equal(nrow(cps), 100)
  expect_equal(ncol(cps), 2)
})

test_that("Arguments n_max and vars work for csv files (#26)", {
  cps <- read_ipums_micro(
    ipums_example("cps_00158.xml"),
    ipums_example("cps_00158.csv.gz"),
    n_max = 100,
    vars = c(YEAR, SERIAL),
    verbose = FALSE
  )
  expect_equal(nrow(cps), 100)
  expect_equal(ncol(cps), 2)
})

test_that("Setting argument var_attrs to NULL works", {
  cps <- read_ipums_micro(
    ipums_example("cps_00157.xml"),
    data_file = ipums_example("cps_00157.dat.gz"),
    verbose = FALSE,
    var_attrs = NULL
  )

  no_var_attrs <- purrr::map_lgl(cps, ~ is.null(attributes(.)))
  expect_true(all(no_var_attrs))
})

test_that("Informative errors when improper ddi file", {
  expect_error(
    read_ipums_micro("FAKE_FILE.xml"),
    "Could not find file .+/FAKE_FILE.xml`"
  )
  expect_error(
    read_ipums_micro("C:/FAKE_FOLDER/FAKE_FILE.xml"),
    "Could not find file `C:/FAKE_FOLDER/FAKE_FILE.xml`"
  )

  # Try to read through a directory/zip:
  vcr_dir <- vcr::vcr_test_path("fixtures")

  expect_error(
    read_ipums_micro(vcr_dir),
    "Expected `ddi` to be an `ipums_ddi` object or the path"
  )
  expect_error(
    read_ipums_micro_list(vcr_dir),
    "Expected `ddi` to be an `ipums_ddi` object or the path"
  )
  expect_error(
    read_ipums_micro_yield(file.path(vcr_dir, "zipped_ddi.zip")),
    "Expected `ddi` to be an `ipums_ddi` object or the path"
  )
  expect_error(
    read_ipums_micro_chunked(file.path(vcr_dir, "zipped_ddi.zip")),
    "Expected `ddi` to be an `ipums_ddi` object or the path"
  )
})

test_that("keyvar is loaded regardless of selection in hierarchical", {
  cps <- read_ipums_micro_list(
    ipums_example("cps_00159.xml"),
    verbose = FALSE,
    vars = c(STATEFIP, INCTOT)
  )
  expect_true("SERIAL" %in% names(cps$HOUSEHOLD))
  expect_true("SERIAL" %in% names(cps$PERSON))
})


test_that("Don't duplicate rectype vars in ATUS hierarchical extracts (#43)", {
  ddi_file <- file.path(vcr::vcr_test_path("fixtures"), "atus_00025.xml")
  ddi <- read_ipums_ddi(ddi_file)

  data <- dplyr::tibble(
    RECTYPE = c(1, 1, 2, 3, 3, 3),
    REGION = c(1, 2, 3, 4, 1, 2),
    x = 1:6
  )

  out <- set_ipums_var_attributes(data, ddi, var_attrs = "val_labels")
  # Removed duplicates from rectype
  expect_equal(
    ipums_val_labels(out$RECTYPE),
    tibble::tibble(
      val = as.numeric(1:5),
      lbl = c(
        "Household Record", "Person Record", "Activity Record",
        "Who Record", "Elder Care Record"
      )
    )
  )
  # Didn't affect the region labels
  expect_equal(
    ipums_val_labels(out$REGION),
    tibble::tibble(
      val = as.numeric(1:4),
      lbl = c("Northeast", "Midwest", "South", "West")
    )
  )
})


test_that("Empty vars selection is handled", {
  expect_error(
    cps <- read_ipums_micro(
      ipums_example("cps_00157.xml"),
      data_file = ipums_example("cps_00157.dat.gz"),
      vars = starts_with("MISSING"),
      verbose = FALSE
    ),
    regexp = "did not match any variables"
  )
})

Try the ipumsr package in your browser

Any scripts or data that you put into this service are public.

ipumsr documentation built on Sept. 12, 2024, 7:38 a.m.