tests/testthat/test_micro_chunked.R

# B & list
# csv chunk reading is equivalent
# biglm is equivalent to lm

test_that("basic chunk reading - long .dat.gz", {
  cps_full <- read_ipums_micro(
    ipums_example("cps_00157.xml"),
    data_file = ipums_example("cps_00157.dat.gz"),
    verbose = FALSE
  )
  cps_full <- cps_full[cps_full$STATEFIP == 19, ]

  cps_chunked <- read_ipums_micro_chunked(
    ipums_example("cps_00157.xml"),
    IpumsDataFrameCallback$new(function(x, ...) x[x$STATEFIP == 19, ]),
    data_file = ipums_example("cps_00157.dat.gz"),
    verbose = FALSE
  )

  expect_equal(cps_full, cps_chunked)
})

test_that("basic chunk reading - long .csv.gz", {
  skip_on_cran() # Don't want to lock readr into hipread's callback API
  cps_full <- read_ipums_micro(
    ipums_example("cps_00158.xml"),
    data_file = ipums_example("cps_00158.csv.gz"),
    verbose = FALSE
  )
  cps_full <- cps_full[cps_full$STATEFIP == 19, ]

  cps_chunked <- read_ipums_micro_chunked(
    ipums_example("cps_00158.xml"),
    IpumsDataFrameCallback$new(function(x, ...) x[x$STATEFIP == 19, ]),
    data_file = ipums_example("cps_00158.csv.gz"),
    verbose = FALSE
  )

  expect_equal(cps_full, cps_chunked)
})

test_that("basic chunk reading - list", {
  cps_full <- read_ipums_micro_list(
    ipums_example("cps_00159.xml"),
    verbose = FALSE
  )

  cps_chunked <- read_ipums_micro_list_chunked(
    ipums_example("cps_00159.xml"),
    IpumsListCallback$new(function(x, ...) x),
    verbose = FALSE
  )

  cps_chunked_combined <- list(
    HOUSEHOLD = ipums_bind_rows(
      lapply(cps_chunked, function(x) x$HOUSEHOLD)
    ),
    PERSON = ipums_bind_rows(
      lapply(cps_chunked, function(x) x$PERSON)
    )
  )

  expect_equal(cps_full, cps_chunked_combined)
})

test_that("biglm is equivalent to lm", {
  skip_if_not_installed("biglm")
  biglm_results <- read_ipums_micro_chunked(
    ipums_example("cps_00160.xml"),
    IpumsBiglmCallback$new(
      INCTOT ~ AGE + HEALTH,
      function(x, ...) {
        x[x$INCTOT < 900000, ]
      }
    ),
    chunk_size = 1000,
    var_attrs = NULL,
    verbose = FALSE
  )

  data <- read_ipums_micro(
    ipums_example("cps_00160.xml"),
    verbose = FALSE,
    var_attrs = NULL
  )
  data <- data[data$INCTOT < 900000, ]

  lm_results <- lm(INCTOT ~ AGE + HEALTH, data)

  expect_equal(coef(biglm_results), coef(lm_results))
  expect_equal(vcov(biglm_results), vcov(lm_results))
})

Try the ipumsr package in your browser

Any scripts or data that you put into this service are public.

ipumsr documentation built on Sept. 12, 2024, 7:38 a.m.