Nothing
test_that("read_nhgis() is deprecated", {
lifecycle::expect_deprecated(
read_nhgis(ipums_example("nhgis0972_csv.zip"), verbose = FALSE)
)
})
test_that("Can read NHGIS extract: single dataset", {
rlang::local_options(lifecycle_verbosity = "quiet")
nhgis_single_csv <- ipums_example("nhgis0972_csv.zip")
# Use snapshot since several forms of output are produced by default
# (IPUMS conditions, col spec, and additional col spec info)
expect_snapshot(
nhgis_csv <- read_nhgis(nhgis_single_csv)
)
expect_equal(nrow(nhgis_csv), 71)
expect_equal(ncol(nhgis_csv), 25)
expect_equal(
attr(nhgis_csv$D6Z001, "label"),
"Total area: 1989 to March 1990"
)
expect_equal(
attr(nhgis_csv$D6Z001, "var_desc"),
"Table D6Z: Year Structure Built (Universe: Housing Units)"
)
expect_equal(
nhgis_csv$PMSA[1:2],
c("Akron, OH PMSA", "Anaheim--Santa Ana, CA PMSA")
)
expect_s3_class(nhgis_csv, c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))
})
test_that("Can read NHGIS extract: fixed-width files", {
rlang::local_options(lifecycle_verbosity = "quiet")
nhgis_multi_ds <- ipums_example("nhgis0731_csv.zip")
nhgis_multi_fwf <- ipums_example("nhgis0730_fixed.zip")
expect_error(
read_nhgis(nhgis_multi_fwf, verbose = FALSE),
"Multiple files found"
)
nhgis_fwf1 <- read_nhgis(nhgis_multi_fwf, file_select = 1, verbose = FALSE)
nhgis_csv1 <- read_nhgis(nhgis_multi_ds, file_select = 1, verbose = FALSE)
# Test that .do file col typing is working.
# These coltypes differ from what would be expected by readr defaults.
expect_type(nhgis_fwf1$AIHHTLI, "character")
expect_type(nhgis_fwf1$MEMI, "character")
expect_type(nhgis_fwf1$PCI, "character")
expect_type(nhgis_fwf1$NATIONA, "character")
# Test that .do file recoding is working:
# This should be coercible to integer because it was not rescaled on import:
expect_true(
dplyr::near(
nhgis_fwf1$AJWBE001,
as.integer(nhgis_fwf1$AJWBE001)
)
)
# This was rescaled and should be a decimal:
expect_false(
dplyr::near(
nhgis_fwf1$AJWCE001,
as.integer(nhgis_fwf1$AJWCE001)
)
)
expect_equal(nrow(nhgis_fwf1), nrow(nhgis_csv1))
expect_equal(ncol(nhgis_fwf1), ncol(nhgis_csv1) - 1)
expect_equal(
colnames(nhgis_fwf1),
read_nhgis_codebook(nhgis_multi_fwf, file_select = 1)$var_info$var_name
)
# This has some coltype differences which are to be expected
# as we can directly provide coltypes in read_nhgis_fwf()
# whereas read_nhgis_csv() uses readr defaults
expect_identical(
purrr::map(nhgis_fwf1, attributes),
purrr::map(dplyr::select(nhgis_csv1, -"GISJOIN"), attributes)
)
expect_identical(
purrr::map(zap_ipums_attributes(nhgis_fwf1), "identity"),
purrr::map(
zap_ipums_attributes(dplyr::select(nhgis_csv1, -"GISJOIN")),
"identity"
)
)
expect_error(
read_nhgis(
nhgis_multi_fwf,
file_select = 1,
do_file = FALSE,
verbose = FALSE
)
)
expect_error(
read_nhgis(
nhgis_multi_fwf,
file_select = 1,
do_file = "asdf",
verbose = FALSE
),
"Could not find the provided `do_file`."
)
expect_equal(
attributes(nhgis_fwf1$AJWBE001),
list(
label = "Estimates: Total",
var_desc = "Table AJWB: Sex by Age (Universe: Total population)"
)
)
expect_equal(attr(nhgis_fwf1$YEAR, "label"), "Data File Year")
expect_s3_class(nhgis_fwf1, c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))
})
test_that("Can read NHGIS extract: single time series table", {
rlang::local_options(lifecycle_verbosity = "quiet")
nhgis_multi_ds <- ipums_example("nhgis0731_csv.zip")
nhgis_multi_fwf <- ipums_example("nhgis0730_fixed.zip")
tst <- read_nhgis(
nhgis_multi_fwf,
file_select = contains("ts"),
verbose = FALSE
)
tst2 <- read_nhgis(
nhgis_multi_ds,
file_select = contains("ts"),
verbose = FALSE
)
expect_identical(
purrr::map(tst, attributes),
purrr::map(tst2, attributes)
)
expect_identical(
purrr::map(zap_ipums_attributes(tst), "identity"),
purrr::map(zap_ipums_attributes(tst2), "identity")
)
expect_equal(nrow(tst), 84)
expect_equal(tst$GISJOIN[[1]], "G010")
expect_equal(tst$STATE[[1]], "Alabama")
expect_equal(tst$A00AA1820[[1]], 127901)
expect_equal(
attributes(tst$A00AA2020),
list(
label = "2020: Persons: Total",
var_desc = "Table A00: Total Population"
)
)
})
test_that("Can read IHGIS extract", {
fp <- ipums_example("ihgis0014.zip")
x <- read_ipums_agg(fp, file_select = matches("AAA_g0"), verbose = FALSE)
expect_s3_class(x, "tbl_df")
expect_equal(dim(x), c(1, 11))
expect_equal(zap_ipums_attributes(x[["GISJOIN"]]), "KZ")
expect_equal(zap_ipums_attributes(x[["g0"]]), "Kazakhstan")
expect_true(all(fostr_detect(colnames(x)[3:ncol(x)], "AAA")))
expect_identical(
attributes(x$AAA001),
list(
label = "Total population : 1999",
var_desc = "Table AAA: Urban and rural population (Universe: Total population)"
)
)
expect_false(is.null(ipums_var_info(x)))
x <- read_ipums_agg(
fp,
file_select = matches("AAA_g0"),
vars = c(GISJOIN, "AAA003"),
remove_extra_header = FALSE,
var_attrs = "var_desc",
verbose = FALSE
)
expect_s3_class(x, "tbl_df")
expect_equal(dim(x), c(1, 2))
expect_equal(zap_ipums_attributes(x[["GISJOIN"]]), "KZ")
expect_equal(zap_ipums_attributes(x[["AAA003"]]), 106.9)
expect_identical(
attributes(x$AAA003),
list(
var_desc = "Table AAA: Urban and rural population (Universe: Total population)"
)
)
})
test_that("Can select data files from an extract", {
rlang::local_options(lifecycle_verbosity = "quiet")
nhgis_multi_ds <- ipums_example("nhgis0731_csv.zip")
nhgis1 <- read_nhgis(nhgis_multi_ds, file_select = 1, verbose = FALSE)
nhgis2 <- read_nhgis(nhgis_multi_ds, file_select = 2, verbose = FALSE)
expect_equal(dim(nhgis1), c(1, 115))
expect_equal(dim(nhgis2), c(84, 28))
expect_true("AJWBE001" %in% colnames(nhgis1))
expect_true("A00AA1790" %in% colnames(nhgis2))
expect_equal(
attributes(nhgis1$GISJOIN),
list(label = "GIS Join Match Code", var_desc = "")
)
expect_equal(
attributes(nhgis2$GISJOIN),
list(label = "GIS Join Match Code", var_desc = "")
)
expect_identical(
nhgis1,
read_nhgis(
nhgis_multi_ds,
file_select = contains("ds239"),
verbose = FALSE
)
)
expect_identical(
nhgis2,
read_nhgis(
nhgis_multi_ds,
file_select = contains("ts_nominal"),
verbose = FALSE
)
)
})
test_that("Don't show metadata files for IHGIS extract file selection", {
fp <- ipums_example("ihgis0014.zip")
expect_error(
read_ipums_agg(fp),
paste0(
"Multiple files found.+KZ2009popAAA_g0.+KZ2009popAAA_g1.+",
"KZ2009popAAB_g0.+KZ2009popAAB_g1"
)
)
expect_error(
read_ipums_agg(fp, file_select = 5),
"There are only 4 files"
)
expect_error(
read_ipums_agg(fp, file_select = matches("datadict")),
"did not select any of the available files"
)
})
test_that("Can still find codebook if file_select matches data file only", {
rlang::local_options(lifecycle_verbosity = "quiet")
nhgis_multi_ds <- ipums_example("nhgis0731_csv.zip")
nhgis <- read_nhgis(
nhgis_multi_ds,
file_select = matches("nation.csv"),
verbose = FALSE
)
nhgis2 <- read_nhgis(
nhgis_multi_ds,
file_select = "nhgis0731_csv/nhgis0731_ds239_20185_nation.csv",
verbose = FALSE
)
expect_true(all.equal(nhgis, nhgis2))
expect_equal(
attributes(nhgis$GISJOIN),
list(label = "GIS Join Match Code", var_desc = "")
)
})
test_that("Can still find codebook if direct data file path provided", {
rlang::local_options(lifecycle_verbosity = "quiet")
temp_dir <- tempfile()
dir.create(temp_dir)
unzipped <- utils::unzip(ipums_example("nhgis0972_csv.zip"), exdir = temp_dir)
on.exit(unlink(temp_dir, recursive = TRUE), add = TRUE, after = FALSE)
data_file <- unzipped[fostr_detect(unzipped, ".csv$")]
x <- read_nhgis(data_file, verbose = FALSE)
expect_equal(
ipums_var_info(x)$var_name,
colnames(x)
)
expect_equal(
ipums_var_info(x)$var_label,
unname(purrr::map_chr(x, ~ attributes(.x)$label))
)
})
test_that("We can specify available readr options in read_nhgis()", {
rlang::local_options(lifecycle_verbosity = "quiet")
nhgis_multi_fwf <- ipums_example("nhgis0730_fixed.zip")
expect_silent(
nhgis_data <- read_nhgis(
ipums_example("nhgis0972_csv.zip"),
verbose = FALSE,
col_types = readr::cols(.default = readr::col_character()),
remove_extra_header = TRUE
)
)
expect_warning(
nhgis_data_fwf <- read_nhgis(
nhgis_multi_fwf,
file_select = 1,
verbose = FALSE,
vars = 1:4,
col_types = "ccil"
),
"One or more parsing issues"
)
nhgis_data_fwf2 <- read_nhgis(
nhgis_multi_fwf,
file_select = 1,
verbose = FALSE,
vars = c(YEAR, STUSAB)
)
expect_equal(nrow(nhgis_data), 71)
expect_equal(ncol(nhgis_data), 25)
expect_equal(unique(purrr::map_chr(nhgis_data, class)), "character")
expect_equal(nrow(nhgis_data_fwf), 1)
expect_equal(ncol(nhgis_data_fwf), 4)
expect_equal(
unname(purrr::map_chr(nhgis_data_fwf, class)),
c("character", "character", "integer", "logical")
)
expect_equal(colnames(nhgis_data_fwf2), c("YEAR", "STUSAB"))
expect_equal(
nrow(
read_nhgis(ipums_example("nhgis0972_csv.zip"), n_max = 10, verbose = FALSE)
),
10
)
})
test_that("We get informative error messages when reading NHGIS extracts", {
rlang::local_options(lifecycle_verbosity = "quiet")
skip_if_not_installed("tidyselect", "1.2.1")
nhgis_multi_ds <- ipums_example("nhgis0731_csv.zip")
nhgis_multi_fwf <- ipums_example("nhgis0730_fixed.zip")
expect_error(
read_nhgis("FAKE_FILE.zip", verbose = FALSE),
"Could not find file"
)
expect_error(
read_nhgis("~/Desktop/FAKE_FILE.zip", verbose = FALSE),
"Could not find file .+/Desktop/FAKE_FILE.zip"
)
expect_error(
nhgis <- read_nhgis(nhgis_multi_ds, verbose = FALSE),
"Multiple files found, please use the `file_select`"
)
expect_error(
nhgis <- read_nhgis(
nhgis_multi_ds,
file_select = contains("nhgis"),
verbose = FALSE
),
"Multiple files found, please use the `file_select`"
)
expect_error(
nhgis <- read_nhgis(
nhgis_multi_ds,
file_select = 1:2,
verbose = FALSE
),
"Multiple files found, please use the `file_select`"
)
expect_snapshot(
read_nhgis(nhgis_multi_ds, file_select = 3, verbose = FALSE),
error = TRUE
)
expect_error(
read_nhgis(
nhgis_multi_ds,
file_select = contains("not-in-file"),
verbose = FALSE
),
"The provided `file_select` did not select any of the available files:"
)
})
test_that("Can read NHGIS codebook", {
rlang::local_options(lifecycle_verbosity = "quiet")
nhgis_single_csv <- ipums_example("nhgis0972_csv.zip")
nhgis_multi_ds <- ipums_example("nhgis0731_csv.zip")
nhgis_multi_fwf <- ipums_example("nhgis0730_fixed.zip")
expect_error(
read_nhgis_codebook(nhgis_multi_ds),
"Multiple files found"
)
# Single dataset
d_csv <- read_nhgis(nhgis_single_csv, verbose = FALSE)
cb_csv <- read_nhgis_codebook(nhgis_single_csv)
# FWF with ACS
# Includes multiple data types which changes codebook slightly
d_fwf <- read_nhgis(
nhgis_multi_fwf,
file_select = 1,
verbose = FALSE
)
cb_fwf <- read_nhgis_codebook(nhgis_multi_fwf, file_select = 1)
# Time series table
d_fwf_tst <- read_nhgis(
nhgis_multi_fwf,
file_select = 2,
verbose = FALSE
)
cb_fwf_ts <- read_nhgis_codebook(nhgis_multi_fwf, file_select = 2)
expect_equal(cb_csv$var_info$var_name, colnames(d_csv))
expect_equal(cb_fwf$var_info$var_name, colnames(d_fwf))
expect_equal(cb_fwf_ts$var_info$var_name, colnames(d_fwf_tst))
# Raw cb lines:
expect_equal(
length(read_nhgis_codebook(nhgis_single_csv, raw = TRUE)),
117
)
# Confirm that data types and universe are added to labels
expect_equal(
attr(d_fwf$AJWBE009, "label"),
"Estimates: Male: 21 years"
)
expect_equal(
attr(d_fwf$AJWBE009, "var_desc"),
"Table AJWB: Sex by Age (Universe: Total population)"
)
})
test_that("Can read IHGIS metadata from extract", {
cb <- read_ihgis_codebook(ipums_example("ihgis0014.zip"))
expect_s3_class(cb, "ipums_ddi")
expect_equal(dim(cb$var_info), c(18, 10))
expect_true(all(fostr_detect(cb$var_info$var_name, "GISJOIN|^g|AAA|AAB")))
expect_true(
all(
fostr_detect(
cb$var_info$var_desc[which(!fostr_detect(cb$var_info$var_name, "GISJOIN|^g"))]
, "Table AA"
)
)
)
expect_equal(cb$var_info[4, ]$var_name, "AAA001")
expect_equal(cb$var_info[4, ]$var_label, "Total population : 1999")
expect_equal(cb$var_info[4, ]$var_desc, "Table AAA: Urban and rural population (Universe: Total population)")
})
test_that("Can read IHGIS metadata if no txt codebook", {
fp <- file.path(
vcr::vcr_test_path("fixtures", "ihgis0014"),
"ihgis0014_datadict.csv"
)
expect_warning(
cb <- read_ihgis_codebook(fp),
"Unable to load IPUMS conditions"
)
cb2 <- read_ihgis_codebook(ipums_example("ihgis0014.zip"))
expect_null(cb$conditions)
expect_false(is.null(cb2$conditions))
expect_identical(cb$var_info, cb2$var_info)
})
test_that("Correctly handle missing files when loading IHGIS metadata", {
fixtures <- vcr::vcr_test_path("fixtures")
fp1 <- file.path(fixtures, "ihgis0014/ihgis0014_datadict.csv")
fp2 <- file.path(fixtures, "ihgis0014_incomplete/ihgis0014_datadict.csv")
fp3 <- file.path(fixtures, "ihgis0014_incomplete/KZ2009popAAA_g0.csv")
expect_error(
read_ihgis_codebook(ipums_example("nhgis0972_csv.zip")),
"Could not find `_datadict\\.csv`"
)
expect_warning(
d1 <- read_ihgis_codebook(fp1),
"Unable to load IPUMS conditions"
)
expect_error(
read_ihgis_codebook(fp2),
"Could not find `_tables.csv`"
)
expect_warning(
read_ipums_agg(fp3, verbose = FALSE),
"Unable to read codebook"
)
expect_error(
read_ihgis_codebook(fp2, tbls_file = "foobar/ihgis0014_tables.csv"),
"Could not find file `foobar/ihgis0014_tables.csv`"
)
expect_warning(
d2 <- read_ihgis_codebook(
fp2,
tbls_file = file.path(fixtures, "ihgis0014/ihgis0014_tables.csv")
),
"Unable to load tabulation geography metadata for this file"
)
expect_equal(
setdiff(d1$var_info$var_name, d2$var_info$var_name),
c("g0", "g1")
)
expect_equal(dim(d1$var_info), c(18, 10))
expect_equal(dim(d2$var_info), c(16, 10))
expect_true(!is.null(d2$conditions))
})
test_that("Error on bad file arguments for cb vs. data readers", {
fp <- vcr::vcr_test_path("fixtures", "ihgis0014")
expect_error(
read_ihgis_codebook(file.path(fp, "KZ2009popAAA_g0.csv")),
"Expected `cb_file` to be a zipped IPUMS extract or a `_datadict\\.csv`"
)
expect_error(
read_ipums_agg(file.path(fp, "ihgis0014_datadict.csv")),
"Unexpected data file"
)
expect_error(
read_ipums_agg(vcr::vcr_test_path("fixtures", "ihgis0014")),
"Expected file `.+` to be a file path, but got a directory"
)
})
test_that("Can read raw IHGIS codebook", {
fp <- vcr::vcr_test_path("fixtures", "ihgis0014_incomplete")
expect_error(
read_ihgis_codebook(file.path(fp, "ihgis0014_codebook.txt")),
"Expected `cb_file` to be a zipped IPUMS extract or a `_datadict\\.csv`"
)
x <- read_ihgis_codebook(ipums_example("ihgis0014.zip"), raw = TRUE)
expect_equal(length(x), 70)
expect_equal(class(x), "character")
})
test_that("We get relevant subset of IHGIS metadata after attaching to data", {
fp <- ipums_example("ihgis0014.zip")
d1 <- read_ipums_agg(fp, file_select = 1, verbose = FALSE)
d2 <- read_ipums_agg(fp, file_select = 1, verbose = FALSE, var_attrs = NULL)
c1 <- read_ihgis_codebook(fp)
expect_true(all(is.na(ipums_var_info(d2)$var_label)))
expect_equal(dim(ipums_var_info(c1)), c(18, 10))
expect_equal(nrow(ipums_var_info(d1)), ncol(d1))
expect_equal(nrow(ipums_var_info(d1)), 11)
expect_true(all(ipums_var_info(d1)$var_name %in% colnames(d1)))
expect_true(
all(
fostr_detect(
setdiff(ipums_var_info(c1)$var_name, ipums_var_info(d1)$var_name),
"g1|^AAB"
)
)
)
})
test_that("Can read and find metadata for unzipped IHGIS files", {
fp <- vcr::vcr_test_path("fixtures", "ihgis0014")
# Warning suppression b/c this test case uses a file path with no txt codebook
suppressWarnings(
d <- read_ipums_agg(file.path(fp, "KZ2009popAAB_g1.csv"), verbose = FALSE)
)
expect_identical(
d,
read_ipums_agg(
ipums_example("ihgis0014.zip"),
file_select = "KZ2009popAAB_g1.csv",
verbose = FALSE
)
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.